Skip to content

Crash in recursive function with many intermediate allocations #55192

Closed as not planned

Description

While reading https://www.modular.com/blog/mojo-vs-rust-is-mojo-faster-than-rust, I wanted to see how Julia fares with the TCO example. I tried this translation:

julia> versioninfo()
Julia Version 1.12.0-DEV.878
Commit 04af4460e68 (2024-07-20 17:31 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: macOS (arm64-apple-darwin22.4.0)
  CPU: 8 × Apple M1
  WORD_SIZE: 64
  LLVM: libLLVM-17.0.6 (ORCJIT, apple-m1)
Threads: 1 default, 0 interactive, 1 GC (on 4 virtual cores)

julia> function recursive(x::Int)
           if x < 1
               return nothing
           end
           stuff = sizehint!(Int[], x)
           for idx in 1:x
               push!(stuff, idx)
           end
           recursive(x - 1)
       end
recursive (generic function with 1 method)

julia> @time recursive(50_000)
Warning: detected a stack overflow; program state may be corrupted, so further execution might be unreliable.

[4967] signal 4 (1): Illegal instruction: 4
in expression starting at REPL[10]:1
_os_unfair_lock_recursive_abort at /usr/lib/system/libsystem_platform.dylib (unknown line)
_os_unfair_lock_lock_slow at /usr/lib/system/libsystem_platform.dylib (unknown line)
Allocations: 1782749 (Pool: 1782702; Big: 47); GC: 2202

The stackoverflow error is new in Julia v1.11, it works up to Julia v1.10:

julia> versioninfo()
Julia Version 1.10.4
Commit 48d4fd48430 (2024-06-04 10:41 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: macOS (arm64-apple-darwin22.4.0)
  CPU: 8 × Apple M1
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-15.0.7 (ORCJIT, apple-m1)
Threads: 1 default, 0 interactive, 1 GC (on 4 virtual cores)

julia> function recursive(x::Int)
           if x < 1
               return nothing
           end
           stuff = sizehint!(Int[], x)
           for idx in 1:x
               push!(stuff, idx)
           end
           recursive(x - 1)
       end
recursive (generic function with 1 method)

julia> @time recursive(50_000)
  6.232747 seconds (100.00 k allocations: 9.322 GiB, 8.83% gc time)

For the record, LLVM IR of the julia version:

Julia LLVM IR
@jl_nothing = external constant ptr

@"+Core.Array#1605.jit" = private alias ptr, inttoptr (i64 129754708354704 to ptr)
@"+Core.GenericMemory#1602.jit" = private alias ptr, inttoptr (i64 129754708354896 to ptr)

define swiftcc void @julia_recursive_1597(ptr nonnull swiftself %pgcstack, i64 signext %"x::Int64") {
top:
  %gcframe1 = alloca [8 x ptr], align 16
  call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 64, i1 true)
  %gc_slot_addr4 = getelementptr inbounds ptr, ptr %gcframe1, i64 6
  %gc_slot_addr3 = getelementptr inbounds ptr, ptr %gcframe1, i64 5
  %gc_slot_addr2 = getelementptr inbounds ptr, ptr %gcframe1, i64 4
  %0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
  %1 = alloca { ptr, ptr }, align 8
  %2 = alloca { ptr, ptr }, align 8
  %3 = alloca { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, align 8
  store i64 24, ptr %gcframe1, align 16
  %frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
  %task.gcstack = load ptr, ptr %pgcstack, align 8
  store ptr %task.gcstack, ptr %frame.prev, align 8
  store ptr %gcframe1, ptr %pgcstack, align 8
   %4 = icmp sgt i64 %"x::Int64", 0
  br i1 %4, label %L4, label %common.ret

common.ret:
  %frame.prev47 = phi ptr [ %frame.prev47.pre, %L62 ], [ %task.gcstack, %top ]
  store ptr %frame.prev47, ptr %pgcstack, align 8
  ret void

L4:
  %5 = getelementptr inbounds ptr, ptr %gcframe1, i64 3
     %.instance = load atomic ptr, ptr getelementptr inbounds (ptr, ptr @"+Core.GenericMemory#1602.jit", i64 4) unordered, align 16
     %gc_slot_addr_5 = getelementptr inbounds ptr, ptr %gcframe1, i64 7
     store ptr %.instance, ptr %gc_slot_addr_5, align 8
    call swiftcc void @j_memoryref_1604(ptr noalias nocapture noundef nonnull sret({ ptr, ptr }) %1, ptr noalias nocapture noundef nonnull %5, ptr nonnull swiftself %pgcstack, ptr nonnull %.instance)
    %ptls_field = getelementptr inbounds ptr, ptr %pgcstack, i64 2
    %ptls_load = load ptr, ptr %ptls_field, align 8
    %"new::Array" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_pool_alloc(ptr %ptls_load, i32 552, i32 32, i64 129754708354704) #10
    %"new::Array.tag_addr" = getelementptr inbounds i64, ptr %"new::Array", i64 -1
    store atomic i64 129754708354704, ptr %"new::Array.tag_addr" unordered, align 8
    %6 = getelementptr inbounds ptr, ptr %"new::Array", i64 1
    %.unbox.fca.0.load = load ptr, ptr %1, align 8
    %.unbox.fca.1.gep = getelementptr inbounds { ptr, ptr }, ptr %1, i64 0, i32 1
    %.unbox.fca.1.load = load ptr, ptr %.unbox.fca.1.gep, align 8
    store ptr %.unbox.fca.0.load, ptr %"new::Array", align 8
    store ptr %.unbox.fca.1.load, ptr %6, align 8
    %"new::Array.size_ptr" = getelementptr inbounds i8, ptr %"new::Array", i64 16
    store i64 0, ptr %"new::Array.size_ptr", align 8
    store ptr %"new::Array", ptr %gc_slot_addr_5, align 8
   %7 = call swiftcc nonnull ptr @"j_#sizehint!#139_1608"(ptr nonnull swiftself %pgcstack, i8 zeroext 0, i8 zeroext 1, ptr nonnull %"new::Array", i64 signext %"x::Int64")
   %8 = getelementptr inbounds { ptr, ptr }, ptr %7, i64 0, i32 1
   %9 = getelementptr inbounds i8, ptr %7, i64 16
   %.fca.1.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 1
   %.fca.2.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 2
   %.fca.3.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 3
   %.fca.4.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 4
   %.fca.5.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 5
   %.fca.6.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 6
   %.fca.7.0.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 7, i32 0
   %.fca.7.1.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 7, i32 1
     %.pre = load ptr, ptr %7, align 8
     %.pre32 = load ptr, ptr %8, align 8
     %.size.sroa.0.0.copyload.pre = load i64, ptr %9, align 8
  br label %L25

L25:
  %10 = phi ptr [ %.pre32, %L4 ], [ %20, %L44 ]
  %11 = phi ptr [ %.pre, %L4 ], [ %21, %L44 ]
     %.size.sroa.0.0.copyload = phi i64 [ %.size.sroa.0.0.copyload.pre, %L4 ], [ %.size18.sroa.0.0.copyload, %L44 ]
     %value_phi6 = phi i64 [ 1, %L4 ], [ %25, %L44 ]
     %12 = add i64 %.size.sroa.0.0.copyload, 1
    %.data_ptr = getelementptr inbounds { i64, ptr }, ptr %10, i64 0, i32 1
    %13 = load ptr, ptr %.data_ptr, align 8
    %14 = ptrtoint ptr %13 to i64
    %15 = ptrtoint ptr %11 to i64
    %16 = sub i64 %15, %14
    %17 = lshr exact i64 %16, 3
    %18 = add nuw nsw i64 %17, 1
    store i64 %12, ptr %9, align 8
     %19 = add i64 %18, %.size.sroa.0.0.copyload
     %.unbox14 = load i64, ptr %10, align 8
     %.not = icmp slt i64 %.unbox14, %19
    br i1 %.not, label %L41, label %L44

L41:
    store ptr %7, ptr %gc_slot_addr4, align 16
    store ptr %7, ptr %3, align 8
    store i64 %19, ptr %.fca.1.gep, align 8
    store i64 %18, ptr %.fca.2.gep, align 8
    store i64 %12, ptr %.fca.3.gep, align 8
    store i64 %.size.sroa.0.0.copyload, ptr %.fca.4.gep, align 8
    store i64 %.unbox14, ptr %.fca.5.gep, align 8
    store ptr %10, ptr %gc_slot_addr3, align 8
    store ptr %10, ptr %.fca.6.gep, align 8
    store ptr %11, ptr %.fca.7.0.gep, align 8
    store ptr %10, ptr %gc_slot_addr2, align 16
    store ptr %10, ptr %.fca.7.1.gep, align 8
    store ptr %7, ptr %gc_slot_addr_5, align 8
    call swiftcc void @"j_#133_1617"(ptr noalias nocapture noundef nonnull sret({ ptr, ptr }) %2, ptr noalias nocapture noundef nonnull %0, ptr nonnull swiftself %pgcstack, ptr nocapture nonnull readonly %3)
    %.size18.sroa.0.0.copyload.pre = load i64, ptr %9, align 8
     %.pre35 = load ptr, ptr %7, align 8
     %.pre36 = load ptr, ptr %8, align 8
    br label %L44

L44:
     %20 = phi ptr [ %10, %L25 ], [ %.pre36, %L41 ]
     %21 = phi ptr [ %11, %L25 ], [ %.pre35, %L41 ]
    %.size18.sroa.0.0.copyload = phi i64 [ %12, %L25 ], [ %.size18.sroa.0.0.copyload.pre, %L41 ]
    %22 = shl i64 %.size18.sroa.0.0.copyload, 3
    %23 = add i64 %22, -8
    %24 = getelementptr i8, ptr %21, i64 %23
    store i64 %value_phi6, ptr %24, align 8
    %.not31.not = icmp eq i64 %value_phi6, %"x::Int64"
   %25 = add nuw i64 %value_phi6, 1
  br i1 %.not31.not, label %L62, label %L25

L62:
   %26 = add i64 %"x::Int64", -1
  call swiftcc void @julia_recursive_1597(ptr nonnull swiftself %pgcstack, i64 signext %26)
  %frame.prev47.pre = load ptr, ptr %frame.prev, align 8
  br label %common.ret
}

define nonnull ptr @jfptr_recursive_1598(ptr %"function::Core.Function", ptr noalias nocapture noundef readonly %"args::Any[]", i32 %"nargs::UInt32") {
top:
  %thread_ptr = call ptr asm "movq %fs:0, $0", "=r"()
  %tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
  %tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
  %0 = getelementptr inbounds ptr, ptr %"args::Any[]", i32 0
  %1 = load ptr, ptr %0, align 8
  %2 = load i64, ptr %1, align 8
  call swiftcc void @julia_recursive_1597(ptr nonnull swiftself %tls_pgcstack, i64 signext %2)
  %jl_nothing = load ptr, ptr @jl_nothing, align 8
  ret ptr %jl_nothing
}

declare void @llvm.dbg.value(metadata, metadata, metadata) #2

declare swiftcc void @j_memoryref_1604(ptr noalias nocapture noundef sret({ ptr, ptr }), ptr noalias nocapture noundef, ptr nonnull swiftself, ptr) #3

declare noalias nonnull ptr @julia.gc_alloc_obj(ptr, i64, ptr) #4

declare swiftcc nonnull ptr @"j_#sizehint!#139_1608"(ptr nonnull swiftself, i8 zeroext, i8 zeroext, ptr, i64 signext) #5

declare swiftcc void @"j_#133_1617"(ptr noalias nocapture noundef sret({ ptr, ptr }), ptr noalias nocapture noundef, ptr nonnull swiftself, ptr nocapture readonly) #6

declare noundef nonnull ptr @julia.gc_loaded(ptr nocapture noundef nonnull readnone, ptr noundef nonnull readnone) #7

declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #8

declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #8

declare noalias nonnull ptr @julia.new_gc_frame(i32)

declare void @julia.push_gc_frame(ptr, i32)

declare ptr @julia.get_gc_frame_slot(ptr, i32)

declare void @julia.pop_gc_frame(ptr)

declare noalias nonnull ptr @julia.gc_alloc_bytes(ptr, i64, i64) #4

declare void @ijl_gc_queue_root(ptr) #9

declare noalias nonnull ptr @ijl_gc_pool_alloc(ptr, i32, i32, i64) #10

declare noalias nonnull ptr @ijl_gc_big_alloc(ptr, i64, i64) #4

declare noalias nonnull ptr @ijl_gc_alloc_typed(ptr, i64, i64) #4

declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #11

For comparison, the LLVM IR of the Rust version, which doesn't blow up the stack, from the Mojo blogpost:

Rust LLVM IR
@__rust_no_alloc_shim_is_unstable = external global i8

define internal fastcc void @alloc::raw_vec::finish_grow::ha9fe1ec34f38f857(ptr dead_on_unwind noalias nocapture noundef writable writeonly align 8 dereferenceable(24) %_0, i64 noundef %0, i64 %1, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %current_memory) unnamed_addr {
start:
  %2 = icmp eq i64 %0, 0
  br i1 %2, label %bb8, label %bb9

bb9:
  %3 = getelementptr inbounds i8, ptr %current_memory, i64 8
  %4 = load i64, ptr %3, align 8
  %5 = icmp eq i64 %4, 0
  br i1 %5, label %bb2, label %bb3

bb8:
  %6 = getelementptr inbounds i8, ptr %_0, i64 8
  store i64 0, ptr %6, align 8
  br label %bb7

bb3:
  %ptr = load ptr, ptr %current_memory, align 8
  %7 = getelementptr inbounds i8, ptr %current_memory, i64 16
  %8 = load i64, ptr %7, align 8
  %cond = icmp eq i64 %4, %0
  tail call void @llvm.assume(i1 %cond)
  %9 = icmp eq i64 %8, 0
  br i1 %9, label %bb1.i.i, label %bb4.i.i

bb1.i.i:
  %10 = icmp eq i64 %1, 0
  br i1 %10, label %bb2.i.i.i, label %bb5.i.i.i

bb2.i.i.i:
  %ptr.i.i.i.i = getelementptr i8, ptr null, i64 %0
  br label %bb6

bb5.i.i.i:
  %11 = load volatile i8, ptr @__rust_no_alloc_shim_is_unstable, align 1
  %_0.i.i.i.i = tail call noalias noundef ptr @__rust_alloc(i64 noundef %1, i64 noundef %0) #12
  br label %bb6

bb4.i.i:
  %cond.i.i = icmp ule i64 %8, %1
  tail call void @llvm.assume(i1 %cond.i.i)
  %raw_ptr.i.i = tail call noundef ptr @__rust_realloc(ptr noundef nonnull %ptr, i64 noundef %8, i64 noundef %0, i64 noundef %1) #12
  br label %bb6

bb2:
  %12 = icmp eq i64 %1, 0
  br i1 %12, label %bb2.i.i, label %bb5.i.i25

bb2.i.i:
  %ptr.i.i.i = getelementptr i8, ptr null, i64 %0
  br label %bb6

bb5.i.i25:
  %13 = load volatile i8, ptr @__rust_no_alloc_shim_is_unstable, align 1
  %_0.i.i.i = tail call noalias noundef ptr @__rust_alloc(i64 noundef %1, i64 noundef %0) #12
  br label %bb6

bb6:
  %_0.sroa.0.0.i.i.pn = phi ptr [ %raw_ptr.i.i, %bb4.i.i ], [ %ptr.i.i.i.i, %bb2.i.i.i ], [ %_0.i.i.i.i, %bb5.i.i.i ], [ %ptr.i.i.i, %bb2.i.i ], [ %_0.i.i.i, %bb5.i.i25 ]
  %14 = icmp eq ptr %_0.sroa.0.0.i.i.pn, null
  %15 = getelementptr inbounds i8, ptr %_0, i64 8
  %16 = getelementptr inbounds i8, ptr %_0, i64 16
  br i1 %14, label %bb13, label %bb14

bb14:
  store ptr %_0.sroa.0.0.i.i.pn, ptr %15, align 8
  store i64 %1, ptr %16, align 8
  br label %bb7

bb13:
  store i64 %0, ptr %15, align 8
  store i64 %1, ptr %16, align 8
  br label %bb7

bb7:
  %storemerge24 = phi i64 [ 1, %bb8 ], [ 0, %bb14 ], [ 1, %bb13 ]
  store i64 %storemerge24, ptr %_0, align 8
  ret void
}

define internal fastcc void @"alloc::raw_vec::RawVec<T,A>::grow_one::h364980f97d6986be"(ptr noalias nocapture noundef align 8 dereferenceable(16) %self) unnamed_addr personality ptr @rust_eh_personality {
start:
  %_17.i = alloca [24 x i8], align 8
  %self3.i = alloca [24 x i8], align 8
  %_3 = load i64, ptr %self, align 8
  tail call void @llvm.experimental.noalias.scope.decl(metadata !133)
  %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %_3, i64 1)
  %_25.1.i = extractvalue { i64, i1 } %0, 1
  br i1 %_25.1.i, label %bb2, label %bb10.i

bb10.i:
  %_25.0.i = extractvalue { i64, i1 } %0, 0
  %v1.i = shl i64 %_3, 1
  %_0.sroa.0.0.sroa.speculated.i.i = tail call noundef i64 @llvm.umax.i64(i64 %v1.i, i64 %_25.0.i)
  %_0.sroa.0.0.sroa.speculated.i17.i = tail call noundef i64 @llvm.umax.i64(i64 %_0.sroa.0.0.sroa.speculated.i.i, i64 4)
  %_4.i.i = icmp ugt i64 %_0.sroa.0.0.sroa.speculated.i.i, 1152921504606846975
  %array_size.i.i = shl nuw nsw i64 %_0.sroa.0.0.sroa.speculated.i17.i, 3
  %_0.sroa.3.0.i.i = select i1 %_4.i.i, i64 undef, i64 %array_size.i.i
  %_0.sroa.0.0.i.i = select i1 %_4.i.i, i64 0, i64 8
  call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %self3.i)
  call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %_17.i)
  %1 = getelementptr inbounds i8, ptr %self, i64 8
  %2 = icmp eq i64 %_3, 0
  br i1 %2, label %"_ZN5alloc7raw_vec19RawVec$LT$T$C$A$GT$14current_memory17hfac323f7989e1023E.exit.i", label %bb4.i.i

bb4.i.i:
  %self.val16.i = load ptr, ptr %1, align 8
  %size.i.i = shl nuw i64 %_3, 3
  store ptr %self.val16.i, ptr %_17.i, align 8
  %_9.sroa.5.0._0.sroa_idx.i.i = getelementptr inbounds i8, ptr %_17.i, i64 16
  store i64 %size.i.i, ptr %_9.sroa.5.0._0.sroa_idx.i.i, align 8
  br label %"_ZN5alloc7raw_vec19RawVec$LT$T$C$A$GT$14current_memory17hfac323f7989e1023E.exit.i"

"_ZN5alloc7raw_vec19RawVec$LT$T$C$A$GT$14current_memory17hfac323f7989e1023E.exit.i":
  %.sink.i.i = phi i64 [ 8, %bb4.i.i ], [ 0, %bb10.i ]
  %3 = getelementptr inbounds i8, ptr %_17.i, i64 8
  store i64 %.sink.i.i, ptr %3, align 8
  call fastcc void @alloc::raw_vec::finish_grow::ha9fe1ec34f38f857(ptr noalias nocapture noundef nonnull align 8 dereferenceable(24) %self3.i, i64 noundef %_0.sroa.0.0.i.i, i64 %_0.sroa.3.0.i.i, ptr noalias nocapture noundef nonnull readonly align 8 dereferenceable(24) %_17.i)
  call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %_17.i)
  %_39.i = load i64, ptr %self3.i, align 8
  %trunc.not.i = icmp eq i64 %_39.i, 0
  %4 = getelementptr inbounds i8, ptr %self3.i, i64 8
  br i1 %trunc.not.i, label %bb3, label %bb14.i

bb14.i:
  %e.0.i = load i64, ptr %4, align 8
  %5 = getelementptr inbounds i8, ptr %self3.i, i64 16
  %e.1.i = load i64, ptr %5, align 8
  call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %self3.i)
  br label %bb2

bb2:
  %_0.sroa.4.0.i.ph = phi i64 [ undef, %start ], [ %e.1.i, %bb14.i ]
  %_0.sroa.0.0.i.ph = phi i64 [ 0, %start ], [ %e.0.i, %bb14.i ]
  tail call void @alloc::raw_vec::handle_error::h0fc9691652206c4f(i64 noundef %_0.sroa.0.0.i.ph, i64 %_0.sroa.4.0.i.ph) #13
  unreachable

bb3:
  %v.0.i = load ptr, ptr %4, align 8
  call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %self3.i)
  store ptr %v.0.i, ptr %1, align 8
  store i64 %_0.sroa.0.0.sroa.speculated.i17.i, ptr %self, align 8
  ret void
}

define void @recursive(i64 noundef %x) unnamed_addr personality ptr @rust_eh_personality {
start:
  %stuff = alloca [24 x i8], align 8
  %0 = icmp eq i64 %x, 0
  br i1 %0, label %bb8, label %bb2

bb2:
  call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %stuff)
  %_4.i.i = icmp ugt i64 %x, 1152921504606846975
  %array_size.i.i = shl nuw nsw i64 %x, 3
  br i1 %_4.i.i, label %bb12, label %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$8allocate17h5f1c0cec1dc28999E.exit.i"

"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$8allocate17h5f1c0cec1dc28999E.exit.i":
  %1 = load volatile i8, ptr @__rust_no_alloc_shim_is_unstable, align 1
  %_0.i.i.i.i = tail call noalias noundef align 8 ptr @__rust_alloc(i64 noundef %array_size.i.i, i64 noundef 8) #12
  %2 = icmp eq ptr %_0.i.i.i.i, null
  br i1 %2, label %bb12, label %bb13

bb8:
  ret void

bb13:
  store i64 %x, ptr %stuff, align 8
  %3 = getelementptr inbounds i8, ptr %stuff, i64 8
  store ptr %_0.i.i.i.i, ptr %3, align 8
  %4 = getelementptr inbounds i8, ptr %stuff, i64 16
  store i64 0, ptr %4, align 8
  br label %bb15

bb12:
  %_12.sroa.4.0.ph = phi i64 [ 8, %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$8allocate17h5f1c0cec1dc28999E.exit.i" ], [ 0, %bb2 ]
  tail call void @alloc::raw_vec::handle_error::h0fc9691652206c4f(i64 noundef %_12.sroa.4.0.ph, i64 %array_size.i.i) #13
  unreachable

bb16:
  %_10 = add nsw i64 %x, -1
  invoke void @recursive(i64 noundef %_10)
          to label %bb6 unwind label %cleanup.loopexit.split-lp

cleanup.loopexit:
  %lpad.loopexit = landingpad { ptr, i32 }
          cleanup
  br label %cleanup

cleanup.loopexit.split-lp:
  %lpad.loopexit.split-lp = landingpad { ptr, i32 }
          cleanup
  br label %cleanup

cleanup:
  %lpad.phi = phi { ptr, i32 } [ %lpad.loopexit, %cleanup.loopexit ], [ %lpad.loopexit.split-lp, %cleanup.loopexit.split-lp ]
  %stuff.val = load i64, ptr %stuff, align 8
  %5 = icmp eq i64 %stuff.val, 0
  br i1 %5, label %bb10, label %bb2.i.i4.i

bb2.i.i4.i:
  %stuff.val2 = load ptr, ptr %3, align 8
  %size.i.i.i5.i = shl nuw i64 %stuff.val, 3
  tail call void @__rust_dealloc(ptr noundef nonnull %stuff.val2, i64 noundef %size.i.i.i5.i, i64 noundef 8) #12
  br label %bb10

bb6:
  %stuff.val3 = load i64, ptr %stuff, align 8
  %6 = icmp eq i64 %stuff.val3, 0
  br i1 %6, label %"_ZN4core3ptr49drop_in_place$LT$alloc..vec..Vec$LT$usize$GT$$GT$17h0c79e9ae257d3447E.exit7", label %bb2.i.i4.i5

bb2.i.i4.i5:
  %stuff.val4 = load ptr, ptr %3, align 8
  %size.i.i.i5.i6 = shl nuw i64 %stuff.val3, 3
  tail call void @__rust_dealloc(ptr noundef nonnull %stuff.val4, i64 noundef %size.i.i.i5.i6, i64 noundef 8) #12
  br label %"_ZN4core3ptr49drop_in_place$LT$alloc..vec..Vec$LT$usize$GT$$GT$17h0c79e9ae257d3447E.exit7"

"_ZN4core3ptr49drop_in_place$LT$alloc..vec..Vec$LT$usize$GT$$GT$17h0c79e9ae257d3447E.exit7":
  call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %stuff)
  br label %bb8

bb15:
  %self1.i18 = phi ptr [ %_0.i.i.i.i, %bb13 ], [ %self1.i, %"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit" ]
  %len.i = phi i64 [ 0, %bb13 ], [ %8, %"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit" ]
  %iter.sroa.0.016 = phi i64 [ 0, %bb13 ], [ %_0.i, %"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit" ]
  %_0.i = add nuw i64 %iter.sroa.0.016, 1
  tail call void @llvm.experimental.noalias.scope.decl(metadata !327)
  %7 = load i64, ptr %stuff, align 8
  %_4.i = icmp eq i64 %len.i, %7
  br i1 %_4.i, label %bb1.i, label %"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit"

bb1.i:
  invoke fastcc void @"alloc::raw_vec::RawVec<T,A>::grow_one::h364980f97d6986be"(ptr noalias noundef nonnull align 8 dereferenceable(16) %stuff)
          to label %"bb1.i._ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit_crit_edge" unwind label %cleanup.loopexit

"bb1.i._ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit_crit_edge":
  %self1.i.pre = load ptr, ptr %3, align 8
  br label %"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit"

"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit":
  %self1.i = phi ptr [ %self1.i.pre, %"bb1.i._ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit_crit_edge" ], [ %self1.i18, %bb15 ]
  %end.i = getelementptr inbounds i64, ptr %self1.i, i64 %len.i
  store i64 %iter.sroa.0.016, ptr %end.i, align 8
  %8 = add i64 %len.i, 1
  store i64 %8, ptr %4, align 8
  %exitcond.not = icmp eq i64 %_0.i, %x
  br i1 %exitcond.not, label %bb16, label %bb15

bb10:
  resume { ptr, i32 } %lpad.phi
}

declare noundef i32 @rust_eh_personality(i32 noundef, i32 noundef, i64 noundef, ptr noundef, ptr noundef) unnamed_addr #2

declare void @llvm.assume(i1 noundef) #3

declare noalias noundef ptr @__rust_alloc(i64 noundef, i64 allocalign noundef) unnamed_addr #4

declare noalias noundef ptr @__rust_realloc(ptr allocptr noundef, i64 noundef, i64 allocalign noundef, i64 noundef) unnamed_addr #5

declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #6

declare void @alloc::raw_vec::handle_error::h0fc9691652206c4f(i64 noundef, i64) unnamed_addr #7

declare void @__rust_dealloc(ptr allocptr noundef, i64 noundef, i64 noundef) unnamed_addr #8

declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #9

declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #9

declare void @llvm.experimental.noalias.scope.decl(metadata) #10

declare i64 @llvm.umax.i64(i64, i64) #11

Side note, it'd be amazing if the compiler would detect that all the allocations and stores are unused and it'd just elide them 🙂

Tangentially related, at the beginning I wrote a slightly different implementation of the recursive function which has much better performance, but on Julia nightly (same version as bove) it runs in a segfault in the garbage collector for larger arrays:

julia> function recursive_setindex(x::Int)
           if x < 1
               return nothing
           end
           stuff = Vector{Int}(undef, x)
           for idx in eachindex(stuff)
               stuff[idx] = x
           end
           recursive_setindex(x - 1)
       end
recursive_setindex (generic function with 1 method)

julia> @time recursive_setindex(50_000)
  0.948488 seconds (99.75 k allocations: 9.318 GiB, 25.54% gc time)

julia> @time recursive_setindex(100_000)
  3.406842 seconds (199.75 k allocations: 37.262 GiB, 18.83% gc time)

julia> @time recursive_setindex(200_000)
 11.615417 seconds (399.75 k allocations: 149.029 GiB, 17.08% gc time)

julia> @time recursive_setindex(400_000)

[5662] signal 11 (2): Segmentation fault: 11
in expression starting at REPL[6]:1
gc_sweep_page at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:0 [inlined]
gc_sweep_pool_page at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1509
gc_sweep_prescan at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1578
gc_sweep_wake_all at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1599
gc_sweep_pool at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1779 [inlined]
_jl_gc_collect at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:3583
ijl_gc_collect at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:3847
maybe_collect at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:861 [inlined]
jl_gc_pool_alloc_inner at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1276 [inlined]
jl_gc_pool_alloc_noinline at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1334 [inlined]
jl_gc_alloc_ at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/./julia_internal.h:509
_new_genericmemory_ at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/genericmemory.c:56
GenericMemory at ./boot.jl:537 [inlined]
Array at ./boot.jl:597 [inlined]
recursive_setindex at ./REPL[4]:5
recursive_setindex at ./REPL[4]:9
[...]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

No one assigned

    Labels

    GCGarbage collectorregression 1.11Regression in the 1.11 release

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions