Skip to content

Commit

Permalink
Merge pull request #54 from cjprybol/cjp/stacktypes
Browse files Browse the repository at this point in the history
Stack should use similar_nullable, not NullableArray
  • Loading branch information
ararslan authored Apr 23, 2017
2 parents 2671be7 + 7b59074 commit 817ab76
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 20 deletions.
15 changes: 9 additions & 6 deletions src/abstractdatatable/join.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@
##

# Like similar, but returns a nullable array
similar_nullable{T}(dv::AbstractArray{T}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =
NullableArray(T, dims)
similar_nullable{T}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
NullableArray{T}(dims)

similar_nullable{T<:Nullable}(dv::AbstractArray{T}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =
NullableArray(eltype(T), dims)
similar_nullable{T<:Nullable}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) =
NullableArray{eltype(T)}(dims)

similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =
NullableCategoricalArray(T, dims)
similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
NullableCategoricalArray{T}(dims)

similar_nullable(dt::AbstractDataTable, dims::Int) =
DataTable(Any[similar_nullable(x, dims) for x in columns(dt)], copy(index(dt)))

similar_nullable{T,R}(dv::NullableCategoricalArray{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
NullableCategoricalArray{T}(dims)

# helper structure for DataTables joining
immutable DataTableJoiner{DT1<:AbstractDataTable, DT2<:AbstractDataTable}
dtl::DT1
Expand Down
20 changes: 6 additions & 14 deletions src/abstractdatatable/reshape.jl
Original file line number Diff line number Diff line change
Expand Up @@ -198,12 +198,7 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int)
keycol = NullableCategoricalArray(dt[colkey])
Nrow = length(refkeycol.pool)
Ncol = length(keycol.pool)
T = eltype(valuecol)
if T <: Nullable
T = eltype(T)
end
payload = DataTable(Any[NullableArray(T, Nrow) for i in 1:Ncol],
map(Symbol, levels(keycol)))
payload = DataTable(Any[similar_nullable(valuecol, Nrow) for i in 1:Ncol], map(Symbol, levels(keycol)))
nowarning = true
for k in 1:nrow(dt)
j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
Expand All @@ -216,7 +211,9 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int)
payload[j][i] = valuecol[k]
end
end
insert!(payload, 1, NullableArray(levels(refkeycol)), _names(dt)[rowkey])
levs = levels(refkeycol)
col = similar_nullable(dt[rowkey], length(levs))
insert!(payload, 1, copy!(col, levs), _names(dt)[rowkey])
end
unstack(dt::AbstractDataTable, rowkey, colkey, value) =
unstack(dt, index(dt)[rowkey], index(dt)[colkey], index(dt)[value])
Expand All @@ -235,15 +232,10 @@ function unstack(dt::AbstractDataTable, colkey::Int, value::Int)
end
keycol = NullableCategoricalArray(dt[colkey])
valuecol = dt[value]
dt1 = dt[g.idx[g.starts], g.cols]
dt1 = nullable!(dt[g.idx[g.starts], g.cols], g.cols)
Nrow = length(g)
Ncol = length(levels(keycol))
T = eltype(valuecol)
if T <: Nullable
T = eltype(T)
end
dt2 = DataTable(Any[NullableArray(T, Nrow) for i in 1:Ncol],
map(@compat(Symbol), levels(keycol)))
dt2 = DataTable(Any[similar_nullable(valuecol, Nrow) for i in 1:Ncol], map(Symbol, levels(keycol)))
nowarning = true
for k in 1:nrow(dt)
j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
Expand Down
38 changes: 38 additions & 0 deletions test/datatable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -341,4 +341,42 @@ module TestDataTable
@test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), :A).columns) == [1]
@test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), [1]).columns) == [1]
@test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), 1).columns) == [1]

@testset "unstack nullable promotion" begin
dt = DataTable(Any[repeat(1:2, inner=4), repeat('a':'d', outer=2), collect(1:8)],
[:id, :variable, :value])
udt = unstack(dt)
@test udt == unstack(dt, :variable, :value) == unstack(dt, :id, :variable, :value)
@test udt == DataTable(Any[Nullable[1, 2], Nullable[1, 5], Nullable[2, 6],
Nullable[3, 7], Nullable[4, 8]], [:id, :a, :b, :c, :d])
@test all(typeof.(udt.columns) .== NullableVector{Int})
dt = DataTable(Any[categorical(repeat(1:2, inner=4)),
categorical(repeat('a':'d', outer=2)), categorical(1:8)],
[:id, :variable, :value])
udt = unstack(dt)
@test udt == unstack(dt, :variable, :value) == unstack(dt, :id, :variable, :value)
@test udt == DataTable(Any[Nullable[1, 2], Nullable[1, 5], Nullable[2, 6],
Nullable[3, 7], Nullable[4, 8]], [:id, :a, :b, :c, :d])
@test all(typeof.(udt.columns) .== NullableCategoricalVector{Int, UInt32})
end

@testset "duplicate entries in unstack warnings" begin
dt = DataTable(id=NullableArray([1, 2, 1, 2]), variable=["a", "b", "a", "b"], value=[3, 4, 5, 6])
@static if VERSION >= v"0.6.0-dev.1980"
@test_warn "Duplicate entries in unstack." unstack(dt, :id, :variable, :value)
@test_warn "Duplicate entries in unstack at row 3." unstack(dt, :variable, :value)
end
a = unstack(dt, :id, :variable, :value)
b = unstack(dt, :variable, :value)
@test a == b == DataTable(id = Nullable[1, 2], a = [5, Nullable()], b = [Nullable(), 6])

dt = DataTable(id=NullableArray(1:2), variable=["a", "b"], value=3:4)
@static if VERSION >= v"0.6.0-dev.1980"
@test_nowarn unstack(dt, :id, :variable, :value)
@test_nowarn unstack(dt, :variable, :value)
end
a = unstack(dt, :id, :variable, :value)
b = unstack(dt, :variable, :value)
@test a == b == DataTable(id = Nullable[1, 2], a = [3, Nullable()], b = [Nullable(), 4])
end
end

0 comments on commit 817ab76

Please sign in to comment.