Closed
Description
Follow up from #68 (comment):
julia> code_llvm((FixedSizeVector{Float64,Memory{Float64}},)) do v for idx in eachindex(v) v[idx] = idx end endLLVM IR
; Function Signature: var"#23"(FixedSizeArrays.FixedSizeArray{Float64, 1, Memory{Float64}}) ; @ REPL[16]:2 within `#23` define void @"julia_#23_3098"(ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) %"v::FixedSizeArray", ptr nocapture readonly %.roots.v) #0 { top: ; ┌ @ abstractarray.jl:321 within `eachindex` ; │┌ @ abstractarray.jl:137 within `axes1` ; ││┌ @ abstractarray.jl:98 within `axes` ; │││┌ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:64 within `size` ; ││││┌ @ Base.jl:49 within `getproperty` %0 = getelementptr inbounds i8, ptr %"v::FixedSizeArray", i64 8 ; └└└└└ ; ┌ @ range.jl:911 within `iterate` ; │┌ @ range.jl:688 within `isempty` ; ││┌ @ operators.jl:425 within `>` ; │││┌ @ int.jl:83 within `<` %.unbox = load i64, ptr %0, align 8 %1 = icmp slt i64 %.unbox, 1 ; └└└└ br i1 %1, label %L48, label %L13.preheader16 L13.preheader16: ; preds = %top %memoryref_mem = load ptr, ptr %.roots.v, align 8 %memory_data_ptr = getelementptr inbounds { i64, ptr }, ptr %memoryref_mem, i64 0, i32 1 ; @ REPL[16]:3 within `#23` ; ┌ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:60 within `setindex!` @ genericmemory.jl:240 ; │┌ @ boot.jl:544 within `memoryref` %memoryref_data.pre = load ptr, ptr %memory_data_ptr, align 8 ; │└ ; │ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:59 within `setindex!` br label %L29 L29: ; preds = %L29, %L13.preheader16 %value_phi3 = phi i64 [ %4, %L29 ], [ 1, %L13.preheader16 ] ; │ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:60 within `setindex!` @ genericmemory.jl:239 ; │┌ @ number.jl:7 within `convert` ; ││┌ @ float.jl:239 within `Float64` %2 = sitofp i64 %value_phi3 to double ; │└└ ; │ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:60 within `setindex!` @ genericmemory.jl:240 %memoryref_offset = shl i64 %value_phi3, 3 ; │ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:60 within `setindex!` @ genericmemory.jl:241 %3 = getelementptr i8, ptr %memoryref_data.pre, i64 %memoryref_offset %memoryref_data6 = getelementptr i8, ptr %3, i64 -8 store double %2, ptr %memoryref_data6, align 8 ; └ ; @ REPL[16]:4 within `#23` ; ┌ @ range.jl:915 within `iterate` %4 = add nuw i64 %value_phi3, 1 ; └ %5 = icmp ult i64 %value_phi3, %.unbox br i1 %5, label %L29, label %L48 L48: ; preds = %L29, %top ret void }Compare with
julia> code_llvm((FixedSizeVector{Float64,Memory{Float64}},)) do v for idx in eachindex(v) @inbounds v[idx] = idx end endLLVM IR
; Function Signature: var"#26"(FixedSizeArrays.FixedSizeArray{Float64, 1, Memory{Float64}}) ; @ REPL[17]:2 within `#26` define void @"julia_#26_3146"(ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) %"v::FixedSizeArray", ptr nocapture readonly %.roots.v) #0 { top: ; ┌ @ abstractarray.jl:321 within `eachindex` ; │┌ @ abstractarray.jl:137 within `axes1` ; ││┌ @ abstractarray.jl:98 within `axes` ; │││┌ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:64 within `size` ; ││││┌ @ Base.jl:49 within `getproperty` %0 = getelementptr inbounds i8, ptr %"v::FixedSizeArray", i64 8 ; └└└└└ ; ┌ @ range.jl:911 within `iterate` ; │┌ @ range.jl:688 within `isempty` ; ││┌ @ operators.jl:425 within `>` ; │││┌ @ int.jl:83 within `<` %.unbox = load i64, ptr %0, align 8 %1 = icmp slt i64 %.unbox, 1 ; └└└└ br i1 %1, label %L48, label %L13.preheader L13.preheader: ; preds = %top %memoryref_mem = load ptr, ptr %.roots.v, align 8 %memory_data_ptr = getelementptr inbounds { i64, ptr }, ptr %memoryref_mem, i64 0, i32 1 %memoryref_data = load ptr, ptr %memory_data_ptr, align 8 ; @ REPL[17]:4 within `#26` %invariant.gep = getelementptr i8, ptr %memoryref_data, i64 -8 %min.iters.check = icmp ult i64 %.unbox, 8 br i1 %min.iters.check, label %scalar.ph, label %vector.ph vector.ph: ; preds = %L13.preheader %n.vec = and i64 %.unbox, 9223372036854775800 %ind.end = or disjoint i64 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.ind = phi <2 x i64> [ <i64 1, i64 2>, %vector.ph ], [ %vec.ind.next, %vector.body ] %step.add = add <2 x i64> %vec.ind, <i64 2, i64 2> %step.add13 = add <2 x i64> %vec.ind, <i64 4, i64 4> %step.add14 = add <2 x i64> %vec.ind, <i64 6, i64 6> ; @ REPL[17]:3 within `#26` ; ┌ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:60 within `setindex!` @ genericmemory.jl:239 ; │┌ @ number.jl:7 within `convert` ; ││┌ @ float.jl:239 within `Float64` %2 = sitofp <2 x i64> %vec.ind to <2 x double> %3 = sitofp <2 x i64> %step.add to <2 x double> %4 = sitofp <2 x i64> %step.add13 to <2 x double> %5 = sitofp <2 x i64> %step.add14 to <2 x double> ; │└└ ; │ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:60 within `setindex!` @ genericmemory.jl:240 %offset.idx = shl i64 %index, 3 %6 = or disjoint i64 %offset.idx, 8 ; │ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:60 within `setindex!` @ genericmemory.jl:241 %7 = getelementptr i8, ptr %invariant.gep, i64 %6 %8 = getelementptr double, ptr %7, i64 2 %9 = getelementptr double, ptr %7, i64 4 %10 = getelementptr double, ptr %7, i64 6 store <2 x double> %2, ptr %7, align 8 store <2 x double> %3, ptr %8, align 8 store <2 x double> %4, ptr %9, align 8 store <2 x double> %5, ptr %10, align 8 %index.next = add nuw i64 %index, 8 %vec.ind.next = add <2 x i64> %vec.ind, <i64 8, i64 8> %11 = icmp eq i64 %index.next, %n.vec br i1 %11, label %middle.block, label %vector.body middle.block: ; preds = %vector.body ; └ ; @ REPL[17]:4 within `#26` %cmp.n = icmp eq i64 %.unbox, %n.vec br i1 %cmp.n, label %L48, label %scalar.ph scalar.ph: ; preds = %middle.block, %L13.preheader %bc.resume.val = phi i64 [ %ind.end, %middle.block ], [ 1, %L13.preheader ] br label %L13 L13: ; preds = %L13, %scalar.ph %value_phi3 = phi i64 [ %13, %L13 ], [ %bc.resume.val, %scalar.ph ] ; @ REPL[17]:3 within `#26` ; ┌ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:60 within `setindex!` @ genericmemory.jl:239 ; │┌ @ number.jl:7 within `convert` ; ││┌ @ float.jl:239 within `Float64` %12 = sitofp i64 %value_phi3 to double ; │└└ ; │ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:60 within `setindex!` @ genericmemory.jl:240 %memoryref_offset = shl i64 %value_phi3, 3 ; │ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:60 within `setindex!` @ genericmemory.jl:241 %gep = getelementptr i8, ptr %invariant.gep, i64 %memoryref_offset store double %12, ptr %gep, align 8 ; └ ; @ REPL[17]:4 within `#26` ; ┌ @ range.jl:915 within `iterate` ; │┌ @ promotion.jl:639 within `==` %.not.not = icmp eq i64 %value_phi3, %.unbox ; │└ %13 = add nuw i64 %value_phi3, 1 ; └ br i1 %.not.not, label %L48, label %L13 L48: ; preds = %L13, %middle.block, %top ret void }
I believe this is entirely due to the fact we're using Memory
instead of MemoryRef
, and MemoryRef
is somehow better optimised for stores (it doesn't need to call memoryrefnew(mem)
). As a proof of concept (don't take this code seriously!), with this patch:
diff --git a/src/FixedSizeArrays.jl b/src/FixedSizeArrays.jl
index 6aeac57..3fd1506 100644
--- a/src/FixedSizeArrays.jl
+++ b/src/FixedSizeArrays.jl
@@ -13,9 +13,11 @@ struct Internal end
struct FixedSizeArray{T,N,Mem<:GenericMemory{<:Any,T}} <: DenseArray{T,N}
mem::Mem
+ ref::MemoryRef{T}
size::NTuple{N,Int}
function FixedSizeArray{T,N,M}(::Internal, mem::M, size::NTuple{N,Int}) where {T,N,M<:GenericMemory{<:Any,T}}
- new{T,N,M}(mem, size)
+ ref = Base.memoryref(mem)
+ new{T,N,M}(mem, ref, size)
end
end
@@ -57,7 +59,7 @@ Base.IndexStyle(::Type{<:FixedSizeArray}) = IndexLinear()
Base.@propagate_inbounds Base.getindex(A::FixedSizeArray, i::Int) = A.mem[i]
Base.@propagate_inbounds Base.@assume_effects :noub_if_noinbounds function Base.setindex!(A::FixedSizeArray{T}, x, i::Int) where {T}
@boundscheck checkbounds(A, i)
- @inbounds A.mem[i] = x
+ Base.memoryrefset!(Base.memoryrefnew(A.ref, i, false), x isa T ? x : convert(T,x)::T, :not_atomic, false)
return A
end
I get
julia> code_llvm((FixedSizeVector{Float64,Memory{Float64}},)) do v
for idx in eachindex(v)
v[idx] = 1.0
end
end
; Function Signature: var"#2"(FixedSizeArrays.FixedSizeArray{Float64, 1, Memory{Float64}})
; @ REPL[2]:2 within `#2`
define void @"julia_#2_2000"(ptr nocapture noundef nonnull readonly align 8 dereferenceable(32) %"v::FixedSizeArray", ptr nocapture readonly %.roots.v) #0 {
top:
; ┌ @ abstractarray.jl:321 within `eachindex`
; │┌ @ abstractarray.jl:137 within `axes1`
; ││┌ @ abstractarray.jl:98 within `axes`
; │││┌ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:66 within `size`
; ││││┌ @ Base.jl:49 within `getproperty`
%0 = getelementptr inbounds i8, ptr %"v::FixedSizeArray", i64 24
; └└└└└
; ┌ @ range.jl:911 within `iterate`
; │┌ @ range.jl:688 within `isempty`
; ││┌ @ operators.jl:425 within `>`
; │││┌ @ int.jl:83 within `<`
%.unbox = load i64, ptr %0, align 8
%1 = icmp slt i64 %.unbox, 1
; └└└└
br i1 %1, label %L45, label %L13.preheader14
L13.preheader14: ; preds = %top
%2 = getelementptr inbounds i8, ptr %"v::FixedSizeArray", i64 8
%memoryref_data = load ptr, ptr %2, align 8
; @ REPL[2]:3 within `#2`
; ┌ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:61 within `setindex!`
; │┌ @ abstractarray.jl:699 within `checkbounds`
%invariant.gep = getelementptr i8, ptr %memoryref_data, i64 -8
%min.iters.check = icmp ult i64 %.unbox, 8
br i1 %min.iters.check, label %scalar.ph, label %vector.ph
vector.ph: ; preds = %L13.preheader14
%n.vec = and i64 %.unbox, 9223372036854775800
%ind.end = or disjoint i64 %n.vec, 1
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; │└
; │ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:62 within `setindex!`
%offset.idx = shl i64 %index, 3
%3 = or disjoint i64 %offset.idx, 8
%4 = getelementptr i8, ptr %invariant.gep, i64 %3
%5 = getelementptr i64, ptr %4, i64 2
%6 = getelementptr i64, ptr %4, i64 4
%7 = getelementptr i64, ptr %4, i64 6
store <2 x i64> <i64 4607182418800017408, i64 4607182418800017408>, ptr %4, align 8
store <2 x i64> <i64 4607182418800017408, i64 4607182418800017408>, ptr %5, align 8
store <2 x i64> <i64 4607182418800017408, i64 4607182418800017408>, ptr %6, align 8
store <2 x i64> <i64 4607182418800017408, i64 4607182418800017408>, ptr %7, align 8
%index.next = add nuw i64 %index, 8
%8 = icmp eq i64 %index.next, %n.vec
br i1 %8, label %middle.block, label %vector.body
middle.block: ; preds = %vector.body
; └
; @ REPL[2]:4 within `#2`
%cmp.n = icmp eq i64 %.unbox, %n.vec
br i1 %cmp.n, label %L45, label %scalar.ph
scalar.ph: ; preds = %middle.block, %L13.preheader14
%bc.resume.val = phi i64 [ %ind.end, %middle.block ], [ 1, %L13.preheader14 ]
; @ REPL[2]:3 within `#2`
; ┌ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:61 within `setindex!`
; │┌ @ abstractarray.jl:699 within `checkbounds`
br label %L30
L30: ; preds = %L30, %scalar.ph
%value_phi3 = phi i64 [ %9, %L30 ], [ %bc.resume.val, %scalar.ph ]
; │└
; │ @ /Users/mose/.julia/dev/FixedSizeArrays/src/FixedSizeArrays.jl:62 within `setindex!`
%memoryref_offset = shl i64 %value_phi3, 3
%gep = getelementptr i8, ptr %invariant.gep, i64 %memoryref_offset
store i64 4607182418800017408, ptr %gep, align 8
; └
; @ REPL[2]:4 within `#2`
; ┌ @ range.jl:915 within `iterate`
%9 = add nuw i64 %value_phi3, 1
; └
%10 = icmp ult i64 %value_phi3, %.unbox
br i1 %10, label %L30, label %L45
L45: ; preds = %L30, %middle.block, %top
ret void
}
which is pretty much what you'd get for Vector
s. It looks like using Memory
instead of MemoryRef
adds an extra layer of indirection, no idea where to go from here. CC: @oscardssmith who may have opinions about this (I already checked, JuliaLang/julia#55913 doesn't change anything here)