Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Materialize DimArray or DimStack From a Table #739

Open
wants to merge 40 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
60256a0
Table Materializer Methods
JoshuaBillson Jun 18, 2024
3526b96
Merged Main
JoshuaBillson Jun 18, 2024
eab2fa0
Made col Optional for DimArray
JoshuaBillson Jun 18, 2024
d4892df
Apply suggestions from code review
JoshuaBillson Jun 20, 2024
ea6751a
Handle coordinates with different loci
JoshuaBillson Jun 20, 2024
13c80da
Merge branch 'materialize' of github.com:JoshuaBillson/DimensionalDat…
JoshuaBillson Jun 20, 2024
6a9d26e
replaced At() with Contains() in _coords_to_ords
JoshuaBillson Jun 20, 2024
9164c22
Added optional selectors and public methods for table materializer
JoshuaBillson Jun 25, 2024
2ebec1c
Updated table constructors for DimArray and DimStack
JoshuaBillson Jun 25, 2024
8e791bf
Updated DimArray and DimStack docs to include table materializer methods
JoshuaBillson Jul 5, 2024
4cd5f9d
Table materializer test cases
JoshuaBillson Jul 5, 2024
0c1991a
export table materializer methods
JoshuaBillson Jul 5, 2024
8758ba9
Merge branch 'rafaqz:main' into materialize
JoshuaBillson Jul 5, 2024
4534de5
Added Random to tables.jl test cases
JoshuaBillson Jul 5, 2024
119fa30
Merge branch 'rafaqz:main' into materialize
JoshuaBillson Aug 8, 2024
ed395ca
Update src/array/array.jl
JoshuaBillson Aug 8, 2024
00336af
Update src/table_ops.jl
JoshuaBillson Aug 8, 2024
532f887
Removed exports
JoshuaBillson Aug 8, 2024
c98dcb0
Merge branch 'materialize' of github.com:JoshuaBillson/DimensionalDat…
JoshuaBillson Aug 8, 2024
06a2c91
Update src/table_ops.jl
JoshuaBillson Aug 8, 2024
3bacf33
Update src/table_ops.jl
JoshuaBillson Aug 8, 2024
4ced6f7
Update src/table_ops.jl
JoshuaBillson Aug 8, 2024
c846dfd
Update src/table_ops.jl
JoshuaBillson Aug 8, 2024
fe2c871
Update src/table_ops.jl
JoshuaBillson Aug 8, 2024
61f8220
Replaced selector type with instance.
JoshuaBillson Aug 8, 2024
3d28b43
Merge branch 'materialize' of github.com:JoshuaBillson/DimensionalDat…
JoshuaBillson Aug 8, 2024
dbe7b99
Table materializer can now infer dimensions from the coordinates.
JoshuaBillson Aug 12, 2024
f410988
Update src/stack/stack.jl
JoshuaBillson Sep 18, 2024
a17f069
Update src/table_ops.jl
JoshuaBillson Sep 18, 2024
9bdded9
Update src/table_ops.jl
JoshuaBillson Sep 18, 2024
5451087
Update src/table_ops.jl
JoshuaBillson Sep 18, 2024
faf4d76
Update src/table_ops.jl
JoshuaBillson Sep 18, 2024
02f60a3
Update src/table_ops.jl
JoshuaBillson Sep 18, 2024
fafd357
Update src/table_ops.jl
JoshuaBillson Sep 22, 2024
d7f15f5
Update src/array/array.jl
JoshuaBillson Sep 25, 2024
34a0a69
Update src/table_ops.jl
JoshuaBillson Sep 26, 2024
d0b9eb7
Added support for guessing the dimension ordering and span for Dates …
JoshuaBillson Sep 26, 2024
32b0c00
Merge branch 'materialize' of github.com:JoshuaBillson/DimensionalDat…
JoshuaBillson Sep 26, 2024
0ea72a0
Replaced LinRange with StepRangeLen in _build_dim
JoshuaBillson Sep 27, 2024
bc62932
Added Tables.istable check to DimArray constructor
JoshuaBillson Oct 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/DimensionalData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ const DD = DimensionalData
# Common
include("interface.jl")
include("name.jl")
include("table_ops.jl")

# Arrays
include("array/array.jl")
Expand Down
8 changes: 8 additions & 0 deletions src/array/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,14 @@ function DimArray(A::AbstractBasicDimArray;
newdata = collect(data)
DimArray(newdata, format(dims, newdata); refdims, name, metadata)
end
# Write a single column from a table with one or more coordinate columns to a DimArray
function DimArray(table, dims; col=nothing, missingval=missing)
perm = _sort_coords(table, dims)
col = isnothing(col) ? _data_col_names(table, dims) |> first : col
data = Tables.getcolumn(table, col)
dst = _write_vals(data, dims, perm, missingval)
return DimArray(reshape(dst, size(dims)), dims, name=col)
end
"""
DimArray(f::Function, dim::Dimension; [name])

Expand Down
11 changes: 11 additions & 0 deletions src/stack/stack.jl
Original file line number Diff line number Diff line change
Expand Up @@ -423,5 +423,16 @@ function DimStack(data::NamedTuple, dims::Tuple;
all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch()
DimStack(data, format(dims, first(data)), refdims, layerdims, metadata, layermetadata)
end
# Write each column from a table with one or more coordinate columns to a layer in a DimStack
function DimStack(table, dims::Tuple; missingval=missing)
arrays = Any[]
perm = _sort_coords(table, dims)
data_cols = _data_cols(table, dims)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again we probably need a Tables.istable check here

for (name, data) in pairs(data_cols)
dst = _write_vals(data, dims, perm, missingval)
push!(arrays, reshape(dst, size(dims)))
end
return DimStack(NamedTuple{keys(data_cols)}(arrays), dims)
end

layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s)
77 changes: 77 additions & 0 deletions src/table_ops.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
function _write_vals(data, dims::Tuple, perm, missingval)
# Allocate Destination Array
dst_size = reduce(*, length.(dims))
JoshuaBillson marked this conversation as resolved.
Show resolved Hide resolved
dst = Vector{eltype(data)}(undef, dst_size)
dst[perm] .= data

# Handle Missing Rows
_missingval = _cast_missing(data, missingval)
missing_rows = ones(Bool, dst_size)
missing_rows[perm] .= false
return ifelse.(missing_rows, _missingval, dst)
end

# Find the order of the table's rows according to the coordinate values
_sort_coords(table, dims::Tuple) = _sort_coords(_dim_cols(table, dims), dims)
function _sort_coords(coords::NamedTuple, dims::Tuple)
ords = _coords_to_ords(coords, dims)
indices = _ords_to_indices(ords, dims)
return indices
end

# Extract coordinate columns from table
function _dim_cols(table, dims::Tuple)
dim_cols = name.(dims)
JoshuaBillson marked this conversation as resolved.
Show resolved Hide resolved
return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols)
end

# Extract data columns from table
function _data_cols(table, dims::Tuple)
data_cols = _data_col_names(table, dims)
return NamedTuple{Tuple(data_cols)}(Tables.getcolumn(table, col) for col in data_cols)
end

# Get names of data columns from table
function _data_col_names(table, dims::Tuple)
dim_cols = name.(dims)
JoshuaBillson marked this conversation as resolved.
Show resolved Hide resolved
return filter(x -> !(x in dim_cols), Tables.columnnames(table))
end

# Determine the ordinality of a set of numerical coordinates
function _coords_to_ords(coords::AbstractVector, dim::AbstractVector{<:Real})
stride = (last(dim) - first(dim)) / (length(dim) - 1)
JoshuaBillson marked this conversation as resolved.
Show resolved Hide resolved
return round.(UInt32, ((coords .- first(dim)) ./ stride) .+ 1)
JoshuaBillson marked this conversation as resolved.
Show resolved Hide resolved
end

JoshuaBillson marked this conversation as resolved.
Show resolved Hide resolved
# Determine the ordinality of a set of categorical coordinates
function _coords_to_ords(coords::AbstractVector, dim::AbstractVector)
d = Dict{eltype(dim),UInt32}()
for (i, x) in enumerate(dim)
d[x] = i
end
return map(x -> d[x], coords)
end

# Preprocessing methods for _coords_to_ords
_coords_to_ords(coords::AbstractVector, dim::Dimension) = _coords_to_ords(coords, collect(dim))
_coords_to_ords(coords::Tuple, dims::Tuple) = Tuple(_coords_to_ords(c, d) for (c, d) in zip(coords, dims))
_coords_to_ords(coords::NamedTuple, dims::Tuple) = _coords_to_ords(Tuple(coords[d] for d in name.(dims)), dims)

# Determine the index from a tuple of coordinate orders
function _ords_to_indices(ords, dims)
stride = 1
indices = ones(Int, length(ords[1]))
for (ord, dim) in zip(ords, dims)
indices .+= (ord .- 1) .* stride
stride *= length(dim)
end
return indices
end

function _cast_missing(::AbstractArray{T}, missingval) where {T}
JoshuaBillson marked this conversation as resolved.
Show resolved Hide resolved
try
return convert(T, missingval)
catch e
return missingval
end
end
Loading