Skip to content

Commit 27e21c8

Browse files
hvncat: Added inbounds annotations that improve performance (#41200)
* Added judicious inbounds/inline decorations * add inline to other one * bump * grammar Co-authored-by: Jeff Bezanson <jeff.bezanson@gmail.com> * Remove `@inline` * bump CI * bump CI 2 * Merge fix * Ensure `hvncat_fill!` can't execute when N < 2 * Bounds check in three-arg `hvncat_fill!` * Narrow inbounds * Moved bounds check up --------- Co-authored-by: Jeff Bezanson <jeff.bezanson@gmail.com>
1 parent 36a39b0 commit 27e21c8

File tree

1 file changed

+22
-10
lines changed

1 file changed

+22
-10
lines changed

base/abstractarray.jl

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2404,26 +2404,30 @@ function _typed_hvncat(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, xs::Num
24042404
end
24052405

24062406
function hvncat_fill!(A::Array, row_first::Bool, xs::Tuple)
2407+
nr, nc = size(A, 1), size(A, 2)
2408+
na = prod(size(A)[3:end])
2409+
len = length(xs)
2410+
nrc = nr * nc
2411+
if nrc * na != len
2412+
throw(ArgumentError("argument count $(len) does not match specified shape $(size(A))"))
2413+
end
24072414
# putting these in separate functions leads to unnecessary allocations
24082415
if row_first
2409-
nr, nc = size(A, 1), size(A, 2)
2410-
nrc = nr * nc
2411-
na = prod(size(A)[3:end])
24122416
k = 1
24132417
for d 1:na
24142418
dd = nrc * (d - 1)
24152419
for i 1:nr
24162420
Ai = dd + i
24172421
for j 1:nc
2418-
A[Ai] = xs[k]
2422+
@inbounds A[Ai] = xs[k]
24192423
k += 1
24202424
Ai += nr
24212425
end
24222426
end
24232427
end
24242428
else
24252429
for k eachindex(xs)
2426-
A[k] = xs[k]
2430+
@inbounds A[k] = xs[k]
24272431
end
24282432
end
24292433
end
@@ -2609,28 +2613,36 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::
26092613
return A
26102614
end
26112615

2612-
function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, d1::Int, d2::Int, as::Tuple{Vararg}) where {T, N}
2616+
function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int},
2617+
d1::Int, d2::Int, as::Tuple) where {T, N}
2618+
N > 1 || throw(ArgumentError("dimensions of the destination array must be at least 2"))
2619+
length(scratch1) == length(scratch2) == N ||
2620+
throw(ArgumentError("scratch vectors must have as many elements as the destination array has dimensions"))
2621+
0 < d1 < 3 &&
2622+
0 < d2 < 3 &&
2623+
d1 != d2 ||
2624+
throw(ArgumentError("d1 and d2 must be either 1 or 2, exclusive."))
26132625
outdims = size(A)
26142626
offsets = scratch1
26152627
inneroffsets = scratch2
26162628
for a as
26172629
if isa(a, AbstractArray)
26182630
for ai a
2619-
Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
2631+
@inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
26202632
A[Ai] = ai
26212633

2622-
for j 1:N
2634+
@inbounds for j 1:N
26232635
inneroffsets[j] += 1
26242636
inneroffsets[j] < cat_size(a, j) && break
26252637
inneroffsets[j] = 0
26262638
end
26272639
end
26282640
else
2629-
Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
2641+
@inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
26302642
A[Ai] = a
26312643
end
26322644

2633-
for j (d1, d2, 3:N...)
2645+
@inbounds for j (d1, d2, 3:N...)
26342646
offsets[j] += cat_size(a, j)
26352647
offsets[j] < outdims[j] && break
26362648
offsets[j] = 0

0 commit comments

Comments
 (0)