Skip to content
This repository has been archived by the owner on Jan 2, 2023. It is now read-only.

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
tk3369 committed Sep 19, 2020
1 parent 78be66a commit 1fb1e06
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 2 deletions.
4 changes: 4 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ uuid = "fa347795-2256-4518-8e2a-87a426559093"
authors = ["Tom Kwong <tk3369@gmail.com> and contributors"]
version = "0.1.0"

[deps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Lazy = "50d2b5c4-7a5e-59d5-8109-a42b560f39c0"

[compat]
julia = "1"

Expand Down
107 changes: 106 additions & 1 deletion src/AbstractWrappedDataFrames.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,110 @@
module AbstractWrappedDataFrames

# Write your package code here.
export AbstractWrappedDataFrame

using DataFrames
using Lazy: @forward

abstract type AbstractWrappedDataFrame <: AbstractDataFrame end

# Functions for AbstractWrappedDataFrame

# TODO Not ideal to hard code `df` field
dataframe(sf::AbstractWrappedDataFrame) = getfield(sf, :df)

# Implement the "unofficial" AbstractDataFrame interface
# See https://github.com/invenia/KeyedFrames.jl/issues/19#issuecomment-674753267

# Using Lazy.jl, we can forward these functions to the underlying data frame.

@forward AbstractWrappedDataFrame.df Base.getindex
@forward AbstractWrappedDataFrame.df Base.setindex!
@forward AbstractWrappedDataFrame.df Base.propertynames
@forward AbstractWrappedDataFrame.df Base.push!
@forward AbstractWrappedDataFrame.df Base.copy
@forward AbstractWrappedDataFrame.df Base.empty!
@forward AbstractWrappedDataFrame.df Base.hcat
@forward AbstractWrappedDataFrame.df Base.vcat
@forward AbstractWrappedDataFrame.df Base.sort!
@forward AbstractWrappedDataFrame.df Base.append!
@forward AbstractWrappedDataFrame.df Base.delete!
@forward AbstractWrappedDataFrame.df Base.parent
@forward AbstractWrappedDataFrame.df Base.parentindices

@forward AbstractWrappedDataFrame.df DataFrames.insertcols!
@forward AbstractWrappedDataFrame.df DataFrames.ncol
@forward AbstractWrappedDataFrame.df DataFrames.nrow
@forward AbstractWrappedDataFrame.df DataFrames.transform!
@forward AbstractWrappedDataFrame.df DataFrames.select!
@forward AbstractWrappedDataFrame.df DataFrames.index

# TODO This is not ideal because I would rather return only the properties
# of the underlying data frame. But if I don't support getting the object's
# own properties then I run into other problems with the `parent` function.
function Base.getproperty(sf::T, s::Symbol) where {T <: AbstractWrappedDataFrame}
if s fieldnames(T)
return getfield(sf, s)
else
return getproperty(dataframe(sf), s)
end
end

# Custom forwarders to avoid ambiguity

# Avoid ambiguity since
# 1) Base defines setproperty!(::Any, ::Symbol, ::Any)
# 2) @forward defines setproperty!(::AbstractWrappedDataFrame, args...; kwargs...)
function Base.setproperty!(sf::AbstractWrappedDataFrame, s::Symbol, v::Any)
return setproperty!(dataframe(sf), s, v)
end

# required by REPL completion.
# or get this error "propertynames(::SF, ::Bool) is ambiguous."
function Base.propertynames(sf::AbstractWrappedDataFrame, private::Bool)
return propertynames(dataframe(sf), private)
end

# required by all joins
function Base.convert(::Type{S}, df::D) where {S <: AbstractWrappedDataFrame, D <: AbstractDataFrame}
return S(df)
end

# required by eachcol
function Base.convert(::Type{D}, sf::S) where {S <: AbstractWrappedDataFrame, D <: AbstractDataFrame}
return dataframe(sf)
end

# required by eachcol
function Base.convert(::Type{S}, sf::S) where {S <: AbstractWrappedDataFrame}
return sf
end

# Custom extensions as these functions do not take AbstractDataFrame

using DataFrames: RowGroupDict, MultiColumnIndex

# required by semi/anti-joins
function DataFrames.findrow(gd::RowGroupDict,
sf::AbstractWrappedDataFrame,
args...)
return DataFrames.findrow(gd, dataframe(sf), args...)
end

# required by inner/outer/left/right-joins
function DataFrames.findrows(gd::RowGroupDict,
sf::AbstractWrappedDataFrame,
args...)
return DataFrames.findrows(gd, dataframe(sf), args...)
end

# required by select and many others
function DataFrames.manipulate(sf::AbstractWrappedDataFrame, args...; kwargs...)
return DataFrames.manipulate(dataframe(sf), args...; kwargs...)
end

# required for groupby
function DataFrames.SubDataFrame(sf::AbstractWrappedDataFrame, args...; kwargs...)
return DataFrames.SubDataFrame(dataframe(sf), args...; kwargs...)
end

end
76 changes: 75 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,80 @@
using AbstractWrappedDataFrames
using DataFrames
using Test

struct WDF <: AbstractWrappedDataFrame
df::DataFrame
end

struct NDF <: AbstractWrappedDataFrame
df::DataFrame
name::String
end

# This is required to support various DataFrames join operations
NDF(df::DataFrame) = NDF(df, "No name")

@testset "AbstractWrappedDataFrames.jl" begin
# Write your tests here.

function test_operations(label::AbstractString, make_wdf::Function, args...)

@testset "$label" begin

df1 = make_wdf(DataFrame(x = [1,2], y = [4,5]), args...)
df2 = make_wdf(DataFrame(x = [1,4], z = [7,8]), args...)

@testset "Metadata" begin
@test sort(names(df1)) == ["x", "y"]
@test sort(string.(propertynames(df1))) == ["x", "y"]
@test nrow(df1) == 2
@test ncol(df1) == 2
end

@testset "Filter" begin
@test first(df1, 1).x == [1]
@test last(df1, 1).x == [2]
@test filter(:x => ==(1), df1) |> nrow == 1
end

@testset "Add columns" begin
sf3 = copy(df1)
@test_nowarn sf3.z1 = 10
@test_nowarn sf3.z2 = [10,11]
@test_nowarn sf3.z3 = sf3.z2 .* 2
end

@testset "Summarize" begin
@test select(df1, :x) |> ncol == 1
@test transform(df1, :x => (x -> x .+ 1) => :q) |> ncol == 3
@test combine(df1, :x => sum) |> nrow == 1
@test groupby(df1, :x) |> keys |> length == 2
end

@testset "Joins" begin
@test innerjoin(df1, df2, on = :x) isa DataFrame
@test leftjoin(df1, df2, on = :x) isa DataFrame
@test rightjoin(df1, df2, on = :x) isa DataFrame
@test outerjoin(df1, df2, on = :x) isa DataFrame
@test semijoin(df1, df2, on = :x) isa DataFrame
@test antijoin(df1, df2, on = :x) isa DataFrame
end

@testset "Return types" begin
@test select(df1, :x) isa DataFrame
@test transform(df1, :x => (x -> x .+ 1) => :q) isa DataFrame
@test combine(df1, :x => sum) isa DataFrame
@test groupby(df1, :x) isa GroupedDataFrame

sf3 = copy(df1)
@test select!(sf3, :x) isa DataFrame
@test transform!(sf3, :x => identity) isa DataFrame
end
end
end

@testset "Wrapped Data Frames" begin
test_operations("Simple", df -> WDF(df))
test_operations("Complex", df -> NDF(df, "MyName"))
end

end

0 comments on commit 1fb1e06

Please sign in to comment.