Description
openedon Jul 21, 2024
While reading https://www.modular.com/blog/mojo-vs-rust-is-mojo-faster-than-rust, I wanted to see how Julia fares with the TCO example. I tried this translation:
julia> versioninfo()
Julia Version 1.12.0-DEV.878
Commit 04af4460e68 (2024-07-20 17:31 UTC)
Build Info:
Official https://julialang.org/ release
Platform Info:
OS: macOS (arm64-apple-darwin22.4.0)
CPU: 8 × Apple M1
WORD_SIZE: 64
LLVM: libLLVM-17.0.6 (ORCJIT, apple-m1)
Threads: 1 default, 0 interactive, 1 GC (on 4 virtual cores)
julia> function recursive(x::Int)
if x < 1
return nothing
end
stuff = sizehint!(Int[], x)
for idx in 1:x
push!(stuff, idx)
end
recursive(x - 1)
end
recursive (generic function with 1 method)
julia> @time recursive(50_000)
Warning: detected a stack overflow; program state may be corrupted, so further execution might be unreliable.
[4967] signal 4 (1): Illegal instruction: 4
in expression starting at REPL[10]:1
_os_unfair_lock_recursive_abort at /usr/lib/system/libsystem_platform.dylib (unknown line)
_os_unfair_lock_lock_slow at /usr/lib/system/libsystem_platform.dylib (unknown line)
Allocations: 1782749 (Pool: 1782702; Big: 47); GC: 2202
The stackoverflow error is new in Julia v1.11, it works up to Julia v1.10:
julia> versioninfo()
Julia Version 1.10.4
Commit 48d4fd48430 (2024-06-04 10:41 UTC)
Build Info:
Official https://julialang.org/ release
Platform Info:
OS: macOS (arm64-apple-darwin22.4.0)
CPU: 8 × Apple M1
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-15.0.7 (ORCJIT, apple-m1)
Threads: 1 default, 0 interactive, 1 GC (on 4 virtual cores)
julia> function recursive(x::Int)
if x < 1
return nothing
end
stuff = sizehint!(Int[], x)
for idx in 1:x
push!(stuff, idx)
end
recursive(x - 1)
end
recursive (generic function with 1 method)
julia> @time recursive(50_000)
6.232747 seconds (100.00 k allocations: 9.322 GiB, 8.83% gc time)
For the record, LLVM IR of the julia version:
Julia LLVM IR
@jl_nothing = external constant ptr
@"+Core.Array#1605.jit" = private alias ptr, inttoptr (i64 129754708354704 to ptr)
@"+Core.GenericMemory#1602.jit" = private alias ptr, inttoptr (i64 129754708354896 to ptr)
define swiftcc void @julia_recursive_1597(ptr nonnull swiftself %pgcstack, i64 signext %"x::Int64") {
top:
%gcframe1 = alloca [8 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 64, i1 true)
%gc_slot_addr4 = getelementptr inbounds ptr, ptr %gcframe1, i64 6
%gc_slot_addr3 = getelementptr inbounds ptr, ptr %gcframe1, i64 5
%gc_slot_addr2 = getelementptr inbounds ptr, ptr %gcframe1, i64 4
%0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
%1 = alloca { ptr, ptr }, align 8
%2 = alloca { ptr, ptr }, align 8
%3 = alloca { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, align 8
store i64 24, ptr %gcframe1, align 16
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %pgcstack, align 8
%4 = icmp sgt i64 %"x::Int64", 0
br i1 %4, label %L4, label %common.ret
common.ret:
%frame.prev47 = phi ptr [ %frame.prev47.pre, %L62 ], [ %task.gcstack, %top ]
store ptr %frame.prev47, ptr %pgcstack, align 8
ret void
L4:
%5 = getelementptr inbounds ptr, ptr %gcframe1, i64 3
%.instance = load atomic ptr, ptr getelementptr inbounds (ptr, ptr @"+Core.GenericMemory#1602.jit", i64 4) unordered, align 16
%gc_slot_addr_5 = getelementptr inbounds ptr, ptr %gcframe1, i64 7
store ptr %.instance, ptr %gc_slot_addr_5, align 8
call swiftcc void @j_memoryref_1604(ptr noalias nocapture noundef nonnull sret({ ptr, ptr }) %1, ptr noalias nocapture noundef nonnull %5, ptr nonnull swiftself %pgcstack, ptr nonnull %.instance)
%ptls_field = getelementptr inbounds ptr, ptr %pgcstack, i64 2
%ptls_load = load ptr, ptr %ptls_field, align 8
%"new::Array" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_pool_alloc(ptr %ptls_load, i32 552, i32 32, i64 129754708354704) #10
%"new::Array.tag_addr" = getelementptr inbounds i64, ptr %"new::Array", i64 -1
store atomic i64 129754708354704, ptr %"new::Array.tag_addr" unordered, align 8
%6 = getelementptr inbounds ptr, ptr %"new::Array", i64 1
%.unbox.fca.0.load = load ptr, ptr %1, align 8
%.unbox.fca.1.gep = getelementptr inbounds { ptr, ptr }, ptr %1, i64 0, i32 1
%.unbox.fca.1.load = load ptr, ptr %.unbox.fca.1.gep, align 8
store ptr %.unbox.fca.0.load, ptr %"new::Array", align 8
store ptr %.unbox.fca.1.load, ptr %6, align 8
%"new::Array.size_ptr" = getelementptr inbounds i8, ptr %"new::Array", i64 16
store i64 0, ptr %"new::Array.size_ptr", align 8
store ptr %"new::Array", ptr %gc_slot_addr_5, align 8
%7 = call swiftcc nonnull ptr @"j_#sizehint!#139_1608"(ptr nonnull swiftself %pgcstack, i8 zeroext 0, i8 zeroext 1, ptr nonnull %"new::Array", i64 signext %"x::Int64")
%8 = getelementptr inbounds { ptr, ptr }, ptr %7, i64 0, i32 1
%9 = getelementptr inbounds i8, ptr %7, i64 16
%.fca.1.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 1
%.fca.2.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 2
%.fca.3.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 3
%.fca.4.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 4
%.fca.5.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 5
%.fca.6.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 6
%.fca.7.0.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 7, i32 0
%.fca.7.1.gep = getelementptr inbounds { ptr, i64, i64, i64, i64, i64, ptr, { ptr, ptr } }, ptr %3, i64 0, i32 7, i32 1
%.pre = load ptr, ptr %7, align 8
%.pre32 = load ptr, ptr %8, align 8
%.size.sroa.0.0.copyload.pre = load i64, ptr %9, align 8
br label %L25
L25:
%10 = phi ptr [ %.pre32, %L4 ], [ %20, %L44 ]
%11 = phi ptr [ %.pre, %L4 ], [ %21, %L44 ]
%.size.sroa.0.0.copyload = phi i64 [ %.size.sroa.0.0.copyload.pre, %L4 ], [ %.size18.sroa.0.0.copyload, %L44 ]
%value_phi6 = phi i64 [ 1, %L4 ], [ %25, %L44 ]
%12 = add i64 %.size.sroa.0.0.copyload, 1
%.data_ptr = getelementptr inbounds { i64, ptr }, ptr %10, i64 0, i32 1
%13 = load ptr, ptr %.data_ptr, align 8
%14 = ptrtoint ptr %13 to i64
%15 = ptrtoint ptr %11 to i64
%16 = sub i64 %15, %14
%17 = lshr exact i64 %16, 3
%18 = add nuw nsw i64 %17, 1
store i64 %12, ptr %9, align 8
%19 = add i64 %18, %.size.sroa.0.0.copyload
%.unbox14 = load i64, ptr %10, align 8
%.not = icmp slt i64 %.unbox14, %19
br i1 %.not, label %L41, label %L44
L41:
store ptr %7, ptr %gc_slot_addr4, align 16
store ptr %7, ptr %3, align 8
store i64 %19, ptr %.fca.1.gep, align 8
store i64 %18, ptr %.fca.2.gep, align 8
store i64 %12, ptr %.fca.3.gep, align 8
store i64 %.size.sroa.0.0.copyload, ptr %.fca.4.gep, align 8
store i64 %.unbox14, ptr %.fca.5.gep, align 8
store ptr %10, ptr %gc_slot_addr3, align 8
store ptr %10, ptr %.fca.6.gep, align 8
store ptr %11, ptr %.fca.7.0.gep, align 8
store ptr %10, ptr %gc_slot_addr2, align 16
store ptr %10, ptr %.fca.7.1.gep, align 8
store ptr %7, ptr %gc_slot_addr_5, align 8
call swiftcc void @"j_#133_1617"(ptr noalias nocapture noundef nonnull sret({ ptr, ptr }) %2, ptr noalias nocapture noundef nonnull %0, ptr nonnull swiftself %pgcstack, ptr nocapture nonnull readonly %3)
%.size18.sroa.0.0.copyload.pre = load i64, ptr %9, align 8
%.pre35 = load ptr, ptr %7, align 8
%.pre36 = load ptr, ptr %8, align 8
br label %L44
L44:
%20 = phi ptr [ %10, %L25 ], [ %.pre36, %L41 ]
%21 = phi ptr [ %11, %L25 ], [ %.pre35, %L41 ]
%.size18.sroa.0.0.copyload = phi i64 [ %12, %L25 ], [ %.size18.sroa.0.0.copyload.pre, %L41 ]
%22 = shl i64 %.size18.sroa.0.0.copyload, 3
%23 = add i64 %22, -8
%24 = getelementptr i8, ptr %21, i64 %23
store i64 %value_phi6, ptr %24, align 8
%.not31.not = icmp eq i64 %value_phi6, %"x::Int64"
%25 = add nuw i64 %value_phi6, 1
br i1 %.not31.not, label %L62, label %L25
L62:
%26 = add i64 %"x::Int64", -1
call swiftcc void @julia_recursive_1597(ptr nonnull swiftself %pgcstack, i64 signext %26)
%frame.prev47.pre = load ptr, ptr %frame.prev, align 8
br label %common.ret
}
define nonnull ptr @jfptr_recursive_1598(ptr %"function::Core.Function", ptr noalias nocapture noundef readonly %"args::Any[]", i32 %"nargs::UInt32") {
top:
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"()
%tls_ppgcstack = getelementptr i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
%0 = getelementptr inbounds ptr, ptr %"args::Any[]", i32 0
%1 = load ptr, ptr %0, align 8
%2 = load i64, ptr %1, align 8
call swiftcc void @julia_recursive_1597(ptr nonnull swiftself %tls_pgcstack, i64 signext %2)
%jl_nothing = load ptr, ptr @jl_nothing, align 8
ret ptr %jl_nothing
}
declare void @llvm.dbg.value(metadata, metadata, metadata) #2
declare swiftcc void @j_memoryref_1604(ptr noalias nocapture noundef sret({ ptr, ptr }), ptr noalias nocapture noundef, ptr nonnull swiftself, ptr) #3
declare noalias nonnull ptr @julia.gc_alloc_obj(ptr, i64, ptr) #4
declare swiftcc nonnull ptr @"j_#sizehint!#139_1608"(ptr nonnull swiftself, i8 zeroext, i8 zeroext, ptr, i64 signext) #5
declare swiftcc void @"j_#133_1617"(ptr noalias nocapture noundef sret({ ptr, ptr }), ptr noalias nocapture noundef, ptr nonnull swiftself, ptr nocapture readonly) #6
declare noundef nonnull ptr @julia.gc_loaded(ptr nocapture noundef nonnull readnone, ptr noundef nonnull readnone) #7
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #8
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #8
declare noalias nonnull ptr @julia.new_gc_frame(i32)
declare void @julia.push_gc_frame(ptr, i32)
declare ptr @julia.get_gc_frame_slot(ptr, i32)
declare void @julia.pop_gc_frame(ptr)
declare noalias nonnull ptr @julia.gc_alloc_bytes(ptr, i64, i64) #4
declare void @ijl_gc_queue_root(ptr) #9
declare noalias nonnull ptr @ijl_gc_pool_alloc(ptr, i32, i32, i64) #10
declare noalias nonnull ptr @ijl_gc_big_alloc(ptr, i64, i64) #4
declare noalias nonnull ptr @ijl_gc_alloc_typed(ptr, i64, i64) #4
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #11
For comparison, the LLVM IR of the Rust version, which doesn't blow up the stack, from the Mojo blogpost:
Rust LLVM IR
@__rust_no_alloc_shim_is_unstable = external global i8
define internal fastcc void @alloc::raw_vec::finish_grow::ha9fe1ec34f38f857(ptr dead_on_unwind noalias nocapture noundef writable writeonly align 8 dereferenceable(24) %_0, i64 noundef %0, i64 %1, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %current_memory) unnamed_addr {
start:
%2 = icmp eq i64 %0, 0
br i1 %2, label %bb8, label %bb9
bb9:
%3 = getelementptr inbounds i8, ptr %current_memory, i64 8
%4 = load i64, ptr %3, align 8
%5 = icmp eq i64 %4, 0
br i1 %5, label %bb2, label %bb3
bb8:
%6 = getelementptr inbounds i8, ptr %_0, i64 8
store i64 0, ptr %6, align 8
br label %bb7
bb3:
%ptr = load ptr, ptr %current_memory, align 8
%7 = getelementptr inbounds i8, ptr %current_memory, i64 16
%8 = load i64, ptr %7, align 8
%cond = icmp eq i64 %4, %0
tail call void @llvm.assume(i1 %cond)
%9 = icmp eq i64 %8, 0
br i1 %9, label %bb1.i.i, label %bb4.i.i
bb1.i.i:
%10 = icmp eq i64 %1, 0
br i1 %10, label %bb2.i.i.i, label %bb5.i.i.i
bb2.i.i.i:
%ptr.i.i.i.i = getelementptr i8, ptr null, i64 %0
br label %bb6
bb5.i.i.i:
%11 = load volatile i8, ptr @__rust_no_alloc_shim_is_unstable, align 1
%_0.i.i.i.i = tail call noalias noundef ptr @__rust_alloc(i64 noundef %1, i64 noundef %0) #12
br label %bb6
bb4.i.i:
%cond.i.i = icmp ule i64 %8, %1
tail call void @llvm.assume(i1 %cond.i.i)
%raw_ptr.i.i = tail call noundef ptr @__rust_realloc(ptr noundef nonnull %ptr, i64 noundef %8, i64 noundef %0, i64 noundef %1) #12
br label %bb6
bb2:
%12 = icmp eq i64 %1, 0
br i1 %12, label %bb2.i.i, label %bb5.i.i25
bb2.i.i:
%ptr.i.i.i = getelementptr i8, ptr null, i64 %0
br label %bb6
bb5.i.i25:
%13 = load volatile i8, ptr @__rust_no_alloc_shim_is_unstable, align 1
%_0.i.i.i = tail call noalias noundef ptr @__rust_alloc(i64 noundef %1, i64 noundef %0) #12
br label %bb6
bb6:
%_0.sroa.0.0.i.i.pn = phi ptr [ %raw_ptr.i.i, %bb4.i.i ], [ %ptr.i.i.i.i, %bb2.i.i.i ], [ %_0.i.i.i.i, %bb5.i.i.i ], [ %ptr.i.i.i, %bb2.i.i ], [ %_0.i.i.i, %bb5.i.i25 ]
%14 = icmp eq ptr %_0.sroa.0.0.i.i.pn, null
%15 = getelementptr inbounds i8, ptr %_0, i64 8
%16 = getelementptr inbounds i8, ptr %_0, i64 16
br i1 %14, label %bb13, label %bb14
bb14:
store ptr %_0.sroa.0.0.i.i.pn, ptr %15, align 8
store i64 %1, ptr %16, align 8
br label %bb7
bb13:
store i64 %0, ptr %15, align 8
store i64 %1, ptr %16, align 8
br label %bb7
bb7:
%storemerge24 = phi i64 [ 1, %bb8 ], [ 0, %bb14 ], [ 1, %bb13 ]
store i64 %storemerge24, ptr %_0, align 8
ret void
}
define internal fastcc void @"alloc::raw_vec::RawVec<T,A>::grow_one::h364980f97d6986be"(ptr noalias nocapture noundef align 8 dereferenceable(16) %self) unnamed_addr personality ptr @rust_eh_personality {
start:
%_17.i = alloca [24 x i8], align 8
%self3.i = alloca [24 x i8], align 8
%_3 = load i64, ptr %self, align 8
tail call void @llvm.experimental.noalias.scope.decl(metadata !133)
%0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %_3, i64 1)
%_25.1.i = extractvalue { i64, i1 } %0, 1
br i1 %_25.1.i, label %bb2, label %bb10.i
bb10.i:
%_25.0.i = extractvalue { i64, i1 } %0, 0
%v1.i = shl i64 %_3, 1
%_0.sroa.0.0.sroa.speculated.i.i = tail call noundef i64 @llvm.umax.i64(i64 %v1.i, i64 %_25.0.i)
%_0.sroa.0.0.sroa.speculated.i17.i = tail call noundef i64 @llvm.umax.i64(i64 %_0.sroa.0.0.sroa.speculated.i.i, i64 4)
%_4.i.i = icmp ugt i64 %_0.sroa.0.0.sroa.speculated.i.i, 1152921504606846975
%array_size.i.i = shl nuw nsw i64 %_0.sroa.0.0.sroa.speculated.i17.i, 3
%_0.sroa.3.0.i.i = select i1 %_4.i.i, i64 undef, i64 %array_size.i.i
%_0.sroa.0.0.i.i = select i1 %_4.i.i, i64 0, i64 8
call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %self3.i)
call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %_17.i)
%1 = getelementptr inbounds i8, ptr %self, i64 8
%2 = icmp eq i64 %_3, 0
br i1 %2, label %"_ZN5alloc7raw_vec19RawVec$LT$T$C$A$GT$14current_memory17hfac323f7989e1023E.exit.i", label %bb4.i.i
bb4.i.i:
%self.val16.i = load ptr, ptr %1, align 8
%size.i.i = shl nuw i64 %_3, 3
store ptr %self.val16.i, ptr %_17.i, align 8
%_9.sroa.5.0._0.sroa_idx.i.i = getelementptr inbounds i8, ptr %_17.i, i64 16
store i64 %size.i.i, ptr %_9.sroa.5.0._0.sroa_idx.i.i, align 8
br label %"_ZN5alloc7raw_vec19RawVec$LT$T$C$A$GT$14current_memory17hfac323f7989e1023E.exit.i"
"_ZN5alloc7raw_vec19RawVec$LT$T$C$A$GT$14current_memory17hfac323f7989e1023E.exit.i":
%.sink.i.i = phi i64 [ 8, %bb4.i.i ], [ 0, %bb10.i ]
%3 = getelementptr inbounds i8, ptr %_17.i, i64 8
store i64 %.sink.i.i, ptr %3, align 8
call fastcc void @alloc::raw_vec::finish_grow::ha9fe1ec34f38f857(ptr noalias nocapture noundef nonnull align 8 dereferenceable(24) %self3.i, i64 noundef %_0.sroa.0.0.i.i, i64 %_0.sroa.3.0.i.i, ptr noalias nocapture noundef nonnull readonly align 8 dereferenceable(24) %_17.i)
call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %_17.i)
%_39.i = load i64, ptr %self3.i, align 8
%trunc.not.i = icmp eq i64 %_39.i, 0
%4 = getelementptr inbounds i8, ptr %self3.i, i64 8
br i1 %trunc.not.i, label %bb3, label %bb14.i
bb14.i:
%e.0.i = load i64, ptr %4, align 8
%5 = getelementptr inbounds i8, ptr %self3.i, i64 16
%e.1.i = load i64, ptr %5, align 8
call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %self3.i)
br label %bb2
bb2:
%_0.sroa.4.0.i.ph = phi i64 [ undef, %start ], [ %e.1.i, %bb14.i ]
%_0.sroa.0.0.i.ph = phi i64 [ 0, %start ], [ %e.0.i, %bb14.i ]
tail call void @alloc::raw_vec::handle_error::h0fc9691652206c4f(i64 noundef %_0.sroa.0.0.i.ph, i64 %_0.sroa.4.0.i.ph) #13
unreachable
bb3:
%v.0.i = load ptr, ptr %4, align 8
call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %self3.i)
store ptr %v.0.i, ptr %1, align 8
store i64 %_0.sroa.0.0.sroa.speculated.i17.i, ptr %self, align 8
ret void
}
define void @recursive(i64 noundef %x) unnamed_addr personality ptr @rust_eh_personality {
start:
%stuff = alloca [24 x i8], align 8
%0 = icmp eq i64 %x, 0
br i1 %0, label %bb8, label %bb2
bb2:
call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %stuff)
%_4.i.i = icmp ugt i64 %x, 1152921504606846975
%array_size.i.i = shl nuw nsw i64 %x, 3
br i1 %_4.i.i, label %bb12, label %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$8allocate17h5f1c0cec1dc28999E.exit.i"
"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$8allocate17h5f1c0cec1dc28999E.exit.i":
%1 = load volatile i8, ptr @__rust_no_alloc_shim_is_unstable, align 1
%_0.i.i.i.i = tail call noalias noundef align 8 ptr @__rust_alloc(i64 noundef %array_size.i.i, i64 noundef 8) #12
%2 = icmp eq ptr %_0.i.i.i.i, null
br i1 %2, label %bb12, label %bb13
bb8:
ret void
bb13:
store i64 %x, ptr %stuff, align 8
%3 = getelementptr inbounds i8, ptr %stuff, i64 8
store ptr %_0.i.i.i.i, ptr %3, align 8
%4 = getelementptr inbounds i8, ptr %stuff, i64 16
store i64 0, ptr %4, align 8
br label %bb15
bb12:
%_12.sroa.4.0.ph = phi i64 [ 8, %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$8allocate17h5f1c0cec1dc28999E.exit.i" ], [ 0, %bb2 ]
tail call void @alloc::raw_vec::handle_error::h0fc9691652206c4f(i64 noundef %_12.sroa.4.0.ph, i64 %array_size.i.i) #13
unreachable
bb16:
%_10 = add nsw i64 %x, -1
invoke void @recursive(i64 noundef %_10)
to label %bb6 unwind label %cleanup.loopexit.split-lp
cleanup.loopexit:
%lpad.loopexit = landingpad { ptr, i32 }
cleanup
br label %cleanup
cleanup.loopexit.split-lp:
%lpad.loopexit.split-lp = landingpad { ptr, i32 }
cleanup
br label %cleanup
cleanup:
%lpad.phi = phi { ptr, i32 } [ %lpad.loopexit, %cleanup.loopexit ], [ %lpad.loopexit.split-lp, %cleanup.loopexit.split-lp ]
%stuff.val = load i64, ptr %stuff, align 8
%5 = icmp eq i64 %stuff.val, 0
br i1 %5, label %bb10, label %bb2.i.i4.i
bb2.i.i4.i:
%stuff.val2 = load ptr, ptr %3, align 8
%size.i.i.i5.i = shl nuw i64 %stuff.val, 3
tail call void @__rust_dealloc(ptr noundef nonnull %stuff.val2, i64 noundef %size.i.i.i5.i, i64 noundef 8) #12
br label %bb10
bb6:
%stuff.val3 = load i64, ptr %stuff, align 8
%6 = icmp eq i64 %stuff.val3, 0
br i1 %6, label %"_ZN4core3ptr49drop_in_place$LT$alloc..vec..Vec$LT$usize$GT$$GT$17h0c79e9ae257d3447E.exit7", label %bb2.i.i4.i5
bb2.i.i4.i5:
%stuff.val4 = load ptr, ptr %3, align 8
%size.i.i.i5.i6 = shl nuw i64 %stuff.val3, 3
tail call void @__rust_dealloc(ptr noundef nonnull %stuff.val4, i64 noundef %size.i.i.i5.i6, i64 noundef 8) #12
br label %"_ZN4core3ptr49drop_in_place$LT$alloc..vec..Vec$LT$usize$GT$$GT$17h0c79e9ae257d3447E.exit7"
"_ZN4core3ptr49drop_in_place$LT$alloc..vec..Vec$LT$usize$GT$$GT$17h0c79e9ae257d3447E.exit7":
call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %stuff)
br label %bb8
bb15:
%self1.i18 = phi ptr [ %_0.i.i.i.i, %bb13 ], [ %self1.i, %"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit" ]
%len.i = phi i64 [ 0, %bb13 ], [ %8, %"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit" ]
%iter.sroa.0.016 = phi i64 [ 0, %bb13 ], [ %_0.i, %"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit" ]
%_0.i = add nuw i64 %iter.sroa.0.016, 1
tail call void @llvm.experimental.noalias.scope.decl(metadata !327)
%7 = load i64, ptr %stuff, align 8
%_4.i = icmp eq i64 %len.i, %7
br i1 %_4.i, label %bb1.i, label %"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit"
bb1.i:
invoke fastcc void @"alloc::raw_vec::RawVec<T,A>::grow_one::h364980f97d6986be"(ptr noalias noundef nonnull align 8 dereferenceable(16) %stuff)
to label %"bb1.i._ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit_crit_edge" unwind label %cleanup.loopexit
"bb1.i._ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit_crit_edge":
%self1.i.pre = load ptr, ptr %3, align 8
br label %"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit"
"_ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit":
%self1.i = phi ptr [ %self1.i.pre, %"bb1.i._ZN5alloc3vec16Vec$LT$T$C$A$GT$4push17h1114aa02267b6000E.exit_crit_edge" ], [ %self1.i18, %bb15 ]
%end.i = getelementptr inbounds i64, ptr %self1.i, i64 %len.i
store i64 %iter.sroa.0.016, ptr %end.i, align 8
%8 = add i64 %len.i, 1
store i64 %8, ptr %4, align 8
%exitcond.not = icmp eq i64 %_0.i, %x
br i1 %exitcond.not, label %bb16, label %bb15
bb10:
resume { ptr, i32 } %lpad.phi
}
declare noundef i32 @rust_eh_personality(i32 noundef, i32 noundef, i64 noundef, ptr noundef, ptr noundef) unnamed_addr #2
declare void @llvm.assume(i1 noundef) #3
declare noalias noundef ptr @__rust_alloc(i64 noundef, i64 allocalign noundef) unnamed_addr #4
declare noalias noundef ptr @__rust_realloc(ptr allocptr noundef, i64 noundef, i64 allocalign noundef, i64 noundef) unnamed_addr #5
declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #6
declare void @alloc::raw_vec::handle_error::h0fc9691652206c4f(i64 noundef, i64) unnamed_addr #7
declare void @__rust_dealloc(ptr allocptr noundef, i64 noundef, i64 noundef) unnamed_addr #8
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #9
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #9
declare void @llvm.experimental.noalias.scope.decl(metadata) #10
declare i64 @llvm.umax.i64(i64, i64) #11
Side note, it'd be amazing if the compiler would detect that all the allocations and stores are unused and it'd just elide them 🙂
Tangentially related, at the beginning I wrote a slightly different implementation of the recursive function which has much better performance, but on Julia nightly (same version as bove) it runs in a segfault in the garbage collector for larger arrays:
julia> function recursive_setindex(x::Int)
if x < 1
return nothing
end
stuff = Vector{Int}(undef, x)
for idx in eachindex(stuff)
stuff[idx] = x
end
recursive_setindex(x - 1)
end
recursive_setindex (generic function with 1 method)
julia> @time recursive_setindex(50_000)
0.948488 seconds (99.75 k allocations: 9.318 GiB, 25.54% gc time)
julia> @time recursive_setindex(100_000)
3.406842 seconds (199.75 k allocations: 37.262 GiB, 18.83% gc time)
julia> @time recursive_setindex(200_000)
11.615417 seconds (399.75 k allocations: 149.029 GiB, 17.08% gc time)
julia> @time recursive_setindex(400_000)
[5662] signal 11 (2): Segmentation fault: 11
in expression starting at REPL[6]:1
gc_sweep_page at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:0 [inlined]
gc_sweep_pool_page at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1509
gc_sweep_prescan at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1578
gc_sweep_wake_all at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1599
gc_sweep_pool at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1779 [inlined]
_jl_gc_collect at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:3583
ijl_gc_collect at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:3847
maybe_collect at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:861 [inlined]
jl_gc_pool_alloc_inner at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1276 [inlined]
jl_gc_pool_alloc_noinline at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/gc.c:1334 [inlined]
jl_gc_alloc_ at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/./julia_internal.h:509
_new_genericmemory_ at /Users/julia/.julia/scratchspaces/a66863c6-20e8-4ff4-8a62-49f30b1f605e/agent-cache/default-honeycrisp-HL2F7YQ3XH.0/build/default-honeycrisp-HL2F7YQ3XH-0/julialang/julia-master/src/genericmemory.c:56
GenericMemory at ./boot.jl:537 [inlined]
Array at ./boot.jl:597 [inlined]
recursive_setindex at ./REPL[4]:5
recursive_setindex at ./REPL[4]:9
[...]