Skip to content
This repository was archived by the owner on Dec 8, 2020. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ while `A.keys isa Tuple` for matrices & higher. But `axiskeys(A)` always returns
* Named tuples can be converted to and from keyed vectors,
with `collect(keys(nt)) == Symbol.(axiskeys(V),1)`

* The [Tables.jl](https://github.com/JuliaData/Tables.jl) interface is supported,
with `wrapdims(df, :val, :x, :y)` creating a matrix from 3 columns.

* [FFTW](https://github.com/JuliaMath/FFTW.jl)`.fft` transforms the keys;
if these are times such as [Unitful](https://github.com/PainterQubits/Unitful.jl)`.s`
then the results are fequency labels. ([PR#15](https://github.com/mcabbott/AxisKeys.jl/pull/15).)
Expand Down
59 changes: 42 additions & 17 deletions src/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -149,45 +149,70 @@ function populate!(A, table, value::Symbol; force=false)
end

"""
wrapdims(table, value, keys...; default=undef, sort=false, force=false) -> KeyedArray
wrapdims(T, table, value, keys...; default=undef, sort=false, force=false) -> T

Construct a `KeyedArray`/`NamedDimsArray` (specified by type `T`) from a `table` matching
the [Tables.jl](https://github.com/JuliaData/Tables.jl) API. The `table` should support both
`Tables.columns` and `Tables.rows`. The `default` value is used in cases where no
value is identified for a given keypair. If the `keys` columns do not uniquely identify
rows in the table then an `ArgumentError` is throw. If `force` is true then the duplicate
(non-unique) entries will be overwritten.
wrapdims(table, value, names...; default=undef, sort=false, force=false)

Construct `KeyedArray(NamedDimsArray(A,names),keys)` from a `table` matching
the [Tables.jl](https://github.com/JuliaData/Tables.jl) API.
(It must support both `Tables.columns` and `Tables.rows`.)

The contents of the array is taken from the column `value::Symbol` of the table.
Each symbol in `names` specifies a column whose unique entries
become the keys along a dimenension of the array.

If there is no row in the table matching a possible set of keys,
then this element of the array is undefined, unless you provide the `default` keyword.
If several rows share the same set of keys, then by default an `ArgumentError` is thrown.
Keyword `force=true` will instead cause these non-unique entries to be overwritten.

Setting `AxisKeys.nameouter() = false` will reverse the order of wrappers produced.
"""
function wrapdims(table, value::Symbol, names::Symbol...; kw...)
if nameouter() == false
_wrap_table(KeyedArray, identity, table, value, names...; kw...)
else
_wrap_table(NamedDimsArray, identity, table, value, names...; kw...)
end
end

"""
wrapdims(df, UniqueVector, :val, :x, :y)

Converts at Tables.jl table to a `KeyedArray` + `NamedDimsArray` pair,
using column `:val` for values, and columns `:x, :y` for names & keys.
Optional 2nd argument applies this type to all the key-vectors.
"""
function wrapdims(table, value::Symbol, keys::Symbol...; kwargs...)
wrapdims(KeyedArray, table, value, keys...; kwargs...)
function wrapdims(table, KT::Type, value::Symbol, names::Symbol...; kw...)
if nameouter() == false
_wrap_table(KeyedArray, KT, table, value, names...; kw...)
else
_wrap_table(NamedDimsArray, KT, table, value, names...; kw...)
end
end

function wrapdims(T::Type, table, value::Symbol, keys::Symbol...; default=undef, sort::Bool=false, kwargs...)
function _wrap_table(AT::Type, KT, table, value::Symbol, names::Symbol...; default=undef, sort::Bool=false, kwargs...)
# get columns of the input table source
cols = Tables.columns(table)

# Extract key columns
pairs = map(keys) do k
pairs = map(names) do k
col = unique(Tables.getcolumn(cols, k))
sort && Base.sort!(col)
return k => col
return k => KT(col)
end

# Extract data/value column
vals = Tables.getcolumn(cols, value)

# Initialize the KeyedArray
sz = length.(last.(pairs))

A = if default === undef
if default === undef
data = similar(vals, sz)
else
data = similar(vals, Union{eltype(vals), typeof(default)}, sz)
fill!(data, default)
end
A = AT(data; pairs...)

A = T(data; pairs...)
populate!(A, table, value; kwargs...)
return A
end
19 changes: 14 additions & 5 deletions test/_packages.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,19 @@ end
@test Tables.columns(N).a == [11, 12, 11, 12, 11, 12]
end
@testset "sink" begin
A = KeyedArray(rand(24, 11, 3); :time => 0:23, :loc => -5:5, :id => ["a", "b", "c"])
A = KeyedArray(rand(24, 11, 3); time = 0:23, loc = -5:5, id = ["a", "b", "c"])
table = Tables.columntable(A)

# Test fully constructing from a table
# Common when working with adhoc data
B = wrapdims(table, :value, :time, :loc, :id)
@test B == A

# Test wrapping of key vectors, and wrong order:
U = wrapdims(table, UniqueVector, :value, :id, :time, :loc)
@test axiskeys(U, :time) isa UniqueVector
@test U(time=3, id="b") == A(time=3, id="b")

# Test populating an existing array (e.g., expected data based on calculated targets/offsets)
C = KeyedArray(
zeros(Float64, size(A));
Expand All @@ -60,14 +65,18 @@ end

# Constructing a NamedDimsArray with different default value and table type
# Partial populating
table = Tables.rowtable(A)
n = length(table)
r_table = Tables.rowtable(A)
n = length(r_table)
idx = rand(Bool, n)
D = wrapdims(table[idx], :value, :time, :loc, :id; default=missing)
D = wrapdims(r_table[idx], :value, :time, :loc, :id; default=missing)
# dimnames should still match, but we'll have missing values
@test dimnames(D) == dimnames(A)
@test any(ismissing, D)

# BTW, this is why it's a method of wrapdims, not KeyedArray:
# @code_warntype wrapdims(table, :value, :time, :loc, :id) # ::Any
# @code_warntype wrapdims(r_table[idx], :value, :time, :loc, :id; default=missing)

# Construction with invalid columns error as expected, but the specific error is
# dependent on the table type.
# ERROR: ArgumentError: wrong number of names, got (:q, :time, :loc, :id) with ndims(A) == 1
Expand All @@ -82,7 +91,7 @@ end
# Construction with duplicates
# ERROR: ArgumentError: Key (Date("2019-01-01"), -5) is not unique
@test_throws ArgumentError wrapdims(table, :value, :time, :loc)
@test wrapdims(table, :value, :time, :loc; force=true) == C(:, :, Key("c"))
@test wrapdims(r_table, :value, :time, :loc; force=true) == C(:, :, Key("c"))
end
end
@testset "stack" begin
Expand Down
6 changes: 3 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ using Statistics, OffsetArrays, Tables, UniqueVectors, LazyStack
AxisKeys.nameouter() = false
end

# include("_basic.jl")
include("_basic.jl")

include("_functions.jl")

# include("_fast.jl")
include("_fast.jl")

# include("_packages.jl")
include("_packages.jl")

end
@testset "fast findfirst & findall" begin
Expand Down