Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extending Base.stack for DimArrays #645

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
105 changes: 99 additions & 6 deletions src/array/methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,15 @@
"""
function Base.eachslice(A::AbstractDimArray; dims)
dimtuple = _astuple(dims)
if !(dimtuple == ())
if !(dimtuple == ())

Check warning on line 134 in src/array/methods.jl

View check run for this annotation

Codecov / codecov/patch

src/array/methods.jl#L134

Added line #L134 was not covered by tests
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to remove all these whitespace changes so we can see the real changes

all(hasdim(A, dimtuple...)) || throw(DimensionMismatch("A doesn't have all dimensions $dims"))
end
_eachslice(A, dimtuple)
end
else
@inline function Base.eachslice(A::AbstractDimArray; dims, drop=true)
dimtuple = _astuple(dims)
if !(dimtuple == ())
if !(dimtuple == ())
all(hasdim(A, dimtuple...)) || throw(DimensionMismatch("A doesn't have all dimensions $dims"))
end
_eachslice(A, dimtuple, drop)
Expand Down Expand Up @@ -195,7 +195,7 @@
return rebuild(A, newdata, newdims)
end


Base.cumsum(A::AbstractDimVector) = rebuild(A, Base.cumsum(parent(A)))
Base.cumsum(A::AbstractDimArray; dims) = rebuild(A, cumsum(parent(A); dims=dimnum(A, dims)))
Base.cumsum!(B::AbstractArray, A::AbstractDimVector) = cumsum!(B, parent(A))
Expand Down Expand Up @@ -293,7 +293,7 @@
else
# vcat the index for the catdim in each of Xin
joindims = map(A -> dims(A, catdim), Xin)
if !check_cat_lookups(joindims...)
if !check_cat_lookups(joindims...)
return rebuild(catdim, NoLookup())
end
_vcat_dims(joindims...)
Expand Down Expand Up @@ -367,7 +367,7 @@
(catdim,)
else
# Make sure this is the same dimension for all arrays
if !comparedims(Bool, map(x -> dims(x, 2), As)...;
if !comparedims(Bool, map(x -> dims(x, 2), As)...;
val=true, warn = " Can't `vcat` AbstractDimArray, applying to `parent` object."
)
return Base.vcat(map(parent, As)...)
Expand Down Expand Up @@ -542,11 +542,104 @@
_span_string(D, S, span) = _cat_warn_string(D, "not all lookups have `$S` spans. Found $(basetypeof(span))")
_cat_warn_string(D, message) = """
`cat` cannot concatenate `Dimension`s, falling back to `parent` type:
$message on dimension $D.
$message on dimension $D.

To fix for `AbstractDimArray`, pass new lookup values as `cat(As...; dims=$D(newlookupvals))` keyword or `dims=$D()` for empty `NoLookup`.
"""

function Base._typed_stack(::Colon, ::Type{T}, ::Type{S}, A, Aax=_iterator_axes(A)) where {T,S<:AbstractDimArray}
Copy link
Owner

@rafaqz rafaqz Mar 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How stable do you think these methods are... could we add a method to Base.stack instead? What do we gain from touching these internals?

I know we use internals elsewhere, but we should stop:
#522

origdims = map(dims, A)
_A = parent.(A)
t = eltype(_A)
_A = Base._typed_stack(:, T, t, A)

if !comparedims(Bool, origdims...;
order=true, val=true, warn=" Can't `stack` AbstractDimArray, applying to `parent` object."
)
return _A
else
DimArray(_A, (first(origdims)..., AnonDim()))
rafaqz marked this conversation as resolved.
Show resolved Hide resolved
end
end

function Base._dim_stack(newdim::Integer, ::Type{T}, ::Type{S}, A) where {T,S<:AbstractDimArray}
origdims = dims.(A)
_A = parent.(A)
t = eltype(_A)
_A = Base._dim_stack(newdim, T, t, A)

if !comparedims(Bool, origdims...;
order=true, val=true, warn=" Can't `stack` AbstractDimArray, applying to `parent` object."
)
return _A
end

newdims = first(origdims)
newdims = ntuple(length(newdims) + 1) do d
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this type-stable?

if d == newdim
AnonDim()
else # Return the old dimension, shifted across once if it comes after the new dim
newdims[d-(d>newdim)]
end
end
DimArray(_A, newdims)
rafaqz marked this conversation as resolved.
Show resolved Hide resolved
end

"""
Base.stack(A::AbstractVector{<:AbstractDimArray}; dims=Pair(ndims(A[1]), AnonDim()))

Stack arrays along a new axis while preserving the dimensional information of other axes.

The optional keyword argument `dims` has the following behavior:
- `dims isa Integer`: The dimension of the new axis is an `AnonDim` at position `dims`
- `dims isa Dimension`: The new axis is at `ndims(A[1])+1` and has a dimension of `dims`.
- `dims isa Pair{Integer, Dimension}`: The new axis is at `first(dims)` and has a dimension
of `last(dims)`.

If `dims` contains a `Dimension`, that `Dimension` must have the same length as A.

# Examples
```julia-repl
julia> da = DimArray([1 2 3; 4 5 6], (X(10:10:20), Y(300:-100:100)));
julia> db = DimArray([6 5 4; 3 2 1], (X(10:10:20), Y(300:-100:100)));

# Stack along a new dimension `Z`
julia> dc = stack(Z(1:2), [da, db], dims=3)
╭─────────────────────────╮
│ 2×3×2 DimArray{Int64,3} │
├─────────────────────────┴──────────────────────────────── dims ┐
↓ X Sampled{Int64} 10:10:20 ForwardOrdered Regular Points,
→ Y Sampled{Int64} 300:-100:100 ReverseOrdered Regular Points,
↗ Z 1:2
└────────────────────────────────────────────────────────────────┘

julia> dims(dc, 3) == Z(1:2)
true
julia> parent(dc) == stack(map(parent, [da, db]), dims=3)
true
```
"""
function Base.stack(A::AbstractVector{<:AbstractDimArray}; dims=Pair(ndims(A[1]), AnonDim()), kwargs...)
Copy link
Owner

@rafaqz rafaqz Mar 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
function Base.stack(A::AbstractVector{<:AbstractDimArray}; dims=Pair(ndims(A[1]), AnonDim()), kwargs...)
function Base.stack(A::AbstractVector{<:AbstractDimArray}; dims=Pair(ndims(A[1]) + 1, AnonDim()), kwargs...)

Isn't the default ndims(A) + 1 ?

dims isa Integer && (dims = dims => AnonDim())
dims isa Dimension && (dims = ndims(A[1])+1 => dims)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
dims isa Integer && (dims = dims => AnonDim())
dims isa Dimension && (dims = ndims(A[1])+1 => dims)
if dims isa Integer
dims = dims => AnonDim())
elseif dims isa Dimension
dims = ndims(A[1])+1 => dims)
end

Assigning after && is best avoided as the assignment is hard to see when skimming code


B = Base._stack(first(dims), A)

if B isa AbstractDimArray
newdims = ntuple(ndims(B)) do d
if d == first(dims) # Use the new provided dimension
last(dims)
else
DimensionalData.dims(B, d)
end
end
newdims = format(newdims, B)

B = rebuild(B; dims=newdims)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
B = rebuild(B; dims=newdims)
B = rebuild(B; dims=format(newdims, B))

User specified dims are usually incomplete and possibly incorrect

end
return B
end

function Base.inv(A::AbstractDimArray{T,2}) where T
newdata = inv(parent(A))
newdims = reverse(dims(A))
Expand Down
100 changes: 65 additions & 35 deletions test/methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ end
@test dropdims(da[X(1:1)]; dims=X) == [1, 2, 3]
@test dropdims(da[2:2, 1:1]; dims=(X(), Y()))[] == 4
@test typeof(dropdims(da[2:2, 1:1]; dims=(X(), Y()))) <: DimArray{Int,0,Tuple{}}
@test refdims(dropdims(da[X(1:1)]; dims=X)) ==
@test refdims(dropdims(da[X(1:1)]; dims=X)) ==
(X(Sampled(143:2:143, ForwardOrdered(), Regular(2), Points(), NoMetadata())),)
dropped = dropdims(da[X(1:1)]; dims=X)
@test dropped[1:2] == [1, 2]
Expand Down Expand Up @@ -240,7 +240,7 @@ end
@test tda == transpose(parent(da))
resultdims = (X(Sampled(1:4, ForwardOrdered(), Regular(1), Points(), NoMetadata())),
Y(Sampled(LinRange(10.0, 20.0, 5), ForwardOrdered(), Regular(2.5), Points(), NoMetadata())))
@test typeof(dims(tda)) == typeof(resultdims)
@test typeof(dims(tda)) == typeof(resultdims)
@test dims(tda) == resultdims
@test size(tda) == (4, 5)

Expand Down Expand Up @@ -319,14 +319,14 @@ end
for dims in xs
cvda = cov(da; dims=X)
@test cvda == cov(a; dims=2)
@test DimensionalData.dims(cvda) ==
@test DimensionalData.dims(cvda) ==
(Y(Sampled(LinRange(10.0, 20.0, 5), ForwardOrdered(), Regular(2.5), Points(), NoMetadata())),
Y(Sampled(LinRange(10.0, 20.0, 5), ForwardOrdered(), Regular(2.5), Points(), NoMetadata())))
end
for dims in ys
crda = cor(da; dims)
@test crda == cor(a; dims=1)
@test DimensionalData.dims(crda) ==
@test DimensionalData.dims(crda) ==
(X(Sampled(1:4, ForwardOrdered(), Regular(1), Points(), NoMetadata())),
X(Sampled(1:4, ForwardOrdered(), Regular(1), Points(), NoMetadata())))
end
Expand All @@ -336,15 +336,15 @@ end
a = [1 2 3 4
3 4 5 6
5 6 7 8]
y = Y(Sampled(10:10:30; sampling=Intervals()))
y = Y(Sampled(10:10:30; sampling=Intervals()))
ti = Ti(Sampled(1:4; sampling=Intervals()))
da = DimArray(a, (y, ti))
ys = (1, Y, Y(), :Y, y)
tis = (2, Ti, Ti(), :Ti, ti)
for dims in ys
ms = mapslices(sum, da; dims)
@test ms == [9 12 15 18]
@test DimensionalData.dims(ms) ==
@test DimensionalData.dims(ms) ==
(Y(NoLookup(Base.OneTo(1))), Ti(Sampled(1:4, ForwardOrdered(), Regular(1), Intervals(Start()), NoMetadata())))
@test refdims(ms) == ()
end
Expand Down Expand Up @@ -390,13 +390,13 @@ end
@test dims(sort(v)) == (X(NoLookup(Base.OneTo(10))),)
A = rand((X(5:-1:1), Y(11:15)))
@test sort(A; dims=X) == sort(parent(A); dims=1)
@test dims(sort(A; dims=X)) == (X(NoLookup(Base.OneTo(5))), dims(A, Y))
@test dims(sort(A; dims=X)) == (X(NoLookup(Base.OneTo(5))), dims(A, Y))
end

@testset "sortslices" begin
M = rand((X(5:-1:1), Y(11:15)))
@test sortslices(M; dims=X) == sortslices(parent(M); dims=1)
@test dims(sort(M; dims=X)) == (X(NoLookup(Base.OneTo(5))), dims(M, Y))
@test dims(sort(M; dims=X)) == (X(NoLookup(Base.OneTo(5))), dims(M, Y))
M = rand((X(5:-1:1), Y(11:15), Ti(3:10)))
@test sortslices(M; dims=(X, Y)) == sortslices(parent(M); dims=(1, 2))
@test dims(sortslices(M; dims=(X, Y))) == (X(NoLookup(Base.OneTo(5))), Y(NoLookup(Base.OneTo(5))), dims(M, Ti))
Expand All @@ -419,7 +419,7 @@ end
@test_throws DimensionMismatch vcat(dims(da, 1), dims(de, 1))
testdims = (X(Sampled([4.0, 5.0, 6.0, 7.0], ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:8.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
@test cat(da, db; dims=(X(),)) == cat(da, db; dims=X())
@test cat(da, db; dims=(X(),)) == cat(da, db; dims=X())
@test cat(da, db; dims=X) == cat(da, db; dims=(X,)) == cat(da, db; dims=1) == cat(da, db; dims=(1,))
@test dims(cat(da, db; dims=X)) == testdims
@test val(cat(da, db; dims=X)) == val(testdims)
Expand Down Expand Up @@ -460,7 +460,7 @@ end

@testset "Irregular Sampled" begin
@testset "Intervals" begin
d1 = X(Sampled([1, 3, 4], ForwardOrdered(), Irregular(1, 5), Intervals(), NoMetadata()))
d1 = X(Sampled([1, 3, 4], ForwardOrdered(), Irregular(1, 5), Intervals(), NoMetadata()))
d2 = X(Sampled([7, 8], ForwardOrdered(), Irregular(7, 9), Intervals(), NoMetadata()))
iri_dim = vcat(d1, d2)
@test span(iri_dim) == Irregular(1, 9)
Expand Down Expand Up @@ -488,7 +488,7 @@ end
d2 = X(Sampled([7.5, 9], ForwardOrdered(), Explicit([7 8; 8 10]), Intervals(Center()), NoMetadata()))
ed = vcat(d1, d2)
@test span(ed) == Explicit([1 3 4 7 8; 3 4 7 8 10])
@test index(ed) == [2, 3.5, 5, 7.5, 9]
@test index(ed) == [2, 3.5, 5, 7.5, 9]
@test lookup(ed) == Sampled([2, 3.5, 5, 7.5, 9], ForwardOrdered(), Explicit([1 3 4 7 8; 3 4 7 8 10]), Intervals(Center()), NoMetadata())
@test_warn "lookups are mixed `ForwardOrdered` and `ReverseOrdered`" vcat(d1, reverse(d2))
@test_warn "lookups are misaligned" vcat(d2, d1)
Expand Down Expand Up @@ -556,16 +556,16 @@ end

@test vcat(da) isa DimArray{Int,1}
@test vcat(da) == da
@test dims(vcat(da)) ==
@test dims(vcat(da)) ==
dims(cat(da; dims=1)) ==
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),)
@test vcat(da, db) == cat(da, db; dims=1)
@test dims(vcat(da, db)) ==
@test dims(vcat(da, db)) ==
dims(cat(da, db; dims=1)) ==
(X(Sampled(4.0:7.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),)
@test_warn "X and Y dims on the same axis" hcat(da, dd)
@test vcat(da, db, dc) == cat(da, db, dc; dims=1)
@test dims(vcat(da, db, dc)) ==
@test dims(vcat(da, db, dc)) ==
dims(cat(da, db, dc; dims=1)) ==
(X(Sampled(4.0:9.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),)
@test_warn "do not match" hcat(da, db, dd)
Expand All @@ -582,21 +582,21 @@ end
dd = DimArray(c, (X(8.0:9.0), Z(6.0:8.0)))

@test vcat(da) == da
@test dims(vcat(da)) ==
@test dims(vcat(da)) ==
dims(cat(da; dims=1)) ==
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:8.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:8.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
@test vcat(da, db) == cat(da, db; dims=1)
@test dims(vcat(da, db)) ==
@test dims(vcat(da, db)) ==
dims(cat(da, db; dims=1)) ==
(X(Sampled(4.0:7.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:8.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
(X(Sampled(4.0:7.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:8.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
@test_warn "lookups are misaligned" hcat(da, dd)
@test vcat(da, db, dc) == cat(da, db, dc; dims=1)
@test dims(vcat(da, db, dc)) ==
@test dims(vcat(da, db, dc)) ==
dims(cat(da, db, dc; dims=1)) ==
(X(Sampled(4.0:9.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:8.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
(X(Sampled(4.0:9.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:8.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
@test_warn "lookups are misaligned" hcat(da, db, dd)
end
end
Expand All @@ -612,16 +612,16 @@ end
dd = DimArray(c, X(8.0:9.0))

@test hcat(da) == permutedims([1 2])
@test dims(hcat(da)) ==
@test dims(hcat(da)) ==
dims(cat(da; dims=2)) ==
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())), AnonDim(NoLookup(Base.OneTo(1))))
@test hcat(da, db) == cat(da, db; dims=2)
@test dims(hcat(da, db)) ==
@test dims(hcat(da, db)) ==
dims(cat(da, db; dims=2)) ==
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())), AnonDim(NoLookup(Base.OneTo(2))))
@test_warn "do not match" hcat(da, dd)
@test hcat(da, db, dc) == cat(da, db, dc; dims=2)
@test dims(hcat(da, db, dc)) ==
@test dims(hcat(da, db, dc)) ==
dims(cat(da, db, dc; dims=2)) ==
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())), AnonDim(NoLookup(Base.OneTo(3))))
@test_warn "do not match" hcat(da, db, dd)
Expand All @@ -637,24 +637,54 @@ end

@test hcat(da) isa DimArray{Int,2}
@test hcat(da) == da
@test dims(hcat(da)) ==
@test dims(hcat(da)) ==
dims(cat(da; dims=2)) ==
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:8.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:8.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
@test hcat(da, db) == cat(da, db; dims=2)
@test dims(hcat(da, db)) ==
@test dims(hcat(da, db)) ==
dims(cat(da, db; dims=2)) ==
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:11.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:11.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
@test_warn "do not join with the correct step size" hcat(da, dd)
@test hcat(da, db, dc) == cat(da, db, dc; dims=2)
@test dims(hcat(da, db, dc)) == dims(cat(da, db, dc; dims=2)) ==
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:14.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
(X(Sampled(4.0:5.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())),
Y(Sampled(6.0:14.0, ForwardOrdered(), Regular(1.0), Points(), NoMetadata())))
@test_warn "do not match" hcat(da, db, dd)
end
end

@testset "Base.stack" begin
a = [1 2 3; 4 5 6]
da = DimArray(a, (X(4.0:5.0), Y(6.0:8.0)))
b = [7 8 9; 10 11 12]
ca = DimArray(b, (X(4.0:5.0), Y(6.0:8.0)))
db = DimArray(b, (X(6.0:7.0), Y(6.0:8.0)))

@test stack([da, db]; dims=3) == stack([parent(da), parent(db)], dims=3)
@test_warn "Lookup values for X" stack([da, db]; dims=3)

@test stack([da, ca]; dims=1) == stack([parent(da), parent(ca)], dims=1)
@test_warn "Lookup values for X" stack([da, db]; dims=1)

dc = stack([da, ca], dims=Z(1:2))
@test dims(dc, ndims(da)+1) == Z(1:2)
@test parent(dc) == stack(map(parent, [da, db]))

for d = 1:3
dc = stack([da, ca], dims=d)
@test dims(dc, d) isa AnonDim
@test parent(dc) == stack(map(parent, [da, db]), dims=d)
end

for d = 1:3
dc = stack([da, ca], dims=d=>Z(1:2))
@test dims(dc, d) == Z(1:2)
@test parent(dc) == stack(map(parent, [da, db]), dims=d)
end
end

@testset "unique" begin
a = [1 1 6; 1 1 6]
xs = (X, X(), :X)
Expand Down Expand Up @@ -684,7 +714,7 @@ end
@test diff(A; dims) == DimArray([111 93 -169 142; 98 -55 110 -131], (Y(['a', 'b']), ti))
end
for dims in tis
@test diff(A; dims) ==
@test diff(A; dims) ==
DimArray([38 156 -125; 20 -106 186; -133 59 -55], (y, Ti(DateTime(2021, 1):Month(1):DateTime(2021, 3))))
end
@test_throws ArgumentError diff(A; dims='X')
Expand Down
Loading