From 1fb1e0659306ce81b675151cc2925433149d67a6 Mon Sep 17 00:00:00 2001 From: Tom Kwong Date: Fri, 18 Sep 2020 19:19:33 -0700 Subject: [PATCH] Initial commit --- Project.toml | 4 ++ src/AbstractWrappedDataFrames.jl | 107 ++++++++++++++++++++++++++++++- test/runtests.jl | 76 +++++++++++++++++++++- 3 files changed, 185 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index a2e429e..c7cd52b 100644 --- a/Project.toml +++ b/Project.toml @@ -3,6 +3,10 @@ uuid = "fa347795-2256-4518-8e2a-87a426559093" authors = ["Tom Kwong and contributors"] version = "0.1.0" +[deps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Lazy = "50d2b5c4-7a5e-59d5-8109-a42b560f39c0" + [compat] julia = "1" diff --git a/src/AbstractWrappedDataFrames.jl b/src/AbstractWrappedDataFrames.jl index 64dd930..c3dc2ce 100644 --- a/src/AbstractWrappedDataFrames.jl +++ b/src/AbstractWrappedDataFrames.jl @@ -1,5 +1,110 @@ module AbstractWrappedDataFrames -# Write your package code here. +export AbstractWrappedDataFrame + +using DataFrames +using Lazy: @forward + +abstract type AbstractWrappedDataFrame <: AbstractDataFrame end + +# Functions for AbstractWrappedDataFrame + +# TODO Not ideal to hard code `df` field +dataframe(sf::AbstractWrappedDataFrame) = getfield(sf, :df) + +# Implement the "unofficial" AbstractDataFrame interface +# See https://github.com/invenia/KeyedFrames.jl/issues/19#issuecomment-674753267 + +# Using Lazy.jl, we can forward these functions to the underlying data frame. + +@forward AbstractWrappedDataFrame.df Base.getindex +@forward AbstractWrappedDataFrame.df Base.setindex! +@forward AbstractWrappedDataFrame.df Base.propertynames +@forward AbstractWrappedDataFrame.df Base.push! +@forward AbstractWrappedDataFrame.df Base.copy +@forward AbstractWrappedDataFrame.df Base.empty! +@forward AbstractWrappedDataFrame.df Base.hcat +@forward AbstractWrappedDataFrame.df Base.vcat +@forward AbstractWrappedDataFrame.df Base.sort! +@forward AbstractWrappedDataFrame.df Base.append! +@forward AbstractWrappedDataFrame.df Base.delete! +@forward AbstractWrappedDataFrame.df Base.parent +@forward AbstractWrappedDataFrame.df Base.parentindices + +@forward AbstractWrappedDataFrame.df DataFrames.insertcols! +@forward AbstractWrappedDataFrame.df DataFrames.ncol +@forward AbstractWrappedDataFrame.df DataFrames.nrow +@forward AbstractWrappedDataFrame.df DataFrames.transform! +@forward AbstractWrappedDataFrame.df DataFrames.select! +@forward AbstractWrappedDataFrame.df DataFrames.index + +# TODO This is not ideal because I would rather return only the properties +# of the underlying data frame. But if I don't support getting the object's +# own properties then I run into other problems with the `parent` function. +function Base.getproperty(sf::T, s::Symbol) where {T <: AbstractWrappedDataFrame} + if s ∈ fieldnames(T) + return getfield(sf, s) + else + return getproperty(dataframe(sf), s) + end +end + +# Custom forwarders to avoid ambiguity + +# Avoid ambiguity since +# 1) Base defines setproperty!(::Any, ::Symbol, ::Any) +# 2) @forward defines setproperty!(::AbstractWrappedDataFrame, args...; kwargs...) +function Base.setproperty!(sf::AbstractWrappedDataFrame, s::Symbol, v::Any) + return setproperty!(dataframe(sf), s, v) +end + +# required by REPL completion. +# or get this error "propertynames(::SF, ::Bool) is ambiguous." +function Base.propertynames(sf::AbstractWrappedDataFrame, private::Bool) + return propertynames(dataframe(sf), private) +end + +# required by all joins +function Base.convert(::Type{S}, df::D) where {S <: AbstractWrappedDataFrame, D <: AbstractDataFrame} + return S(df) +end + +# required by eachcol +function Base.convert(::Type{D}, sf::S) where {S <: AbstractWrappedDataFrame, D <: AbstractDataFrame} + return dataframe(sf) +end + +# required by eachcol +function Base.convert(::Type{S}, sf::S) where {S <: AbstractWrappedDataFrame} + return sf +end + +# Custom extensions as these functions do not take AbstractDataFrame + +using DataFrames: RowGroupDict, MultiColumnIndex + +# required by semi/anti-joins +function DataFrames.findrow(gd::RowGroupDict, + sf::AbstractWrappedDataFrame, + args...) + return DataFrames.findrow(gd, dataframe(sf), args...) +end + +# required by inner/outer/left/right-joins +function DataFrames.findrows(gd::RowGroupDict, + sf::AbstractWrappedDataFrame, + args...) + return DataFrames.findrows(gd, dataframe(sf), args...) +end + +# required by select and many others +function DataFrames.manipulate(sf::AbstractWrappedDataFrame, args...; kwargs...) + return DataFrames.manipulate(dataframe(sf), args...; kwargs...) +end + +# required for groupby +function DataFrames.SubDataFrame(sf::AbstractWrappedDataFrame, args...; kwargs...) + return DataFrames.SubDataFrame(dataframe(sf), args...; kwargs...) +end end diff --git a/test/runtests.jl b/test/runtests.jl index bcc6df5..72f7ace 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,80 @@ using AbstractWrappedDataFrames +using DataFrames using Test +struct WDF <: AbstractWrappedDataFrame + df::DataFrame +end + +struct NDF <: AbstractWrappedDataFrame + df::DataFrame + name::String +end + +# This is required to support various DataFrames join operations +NDF(df::DataFrame) = NDF(df, "No name") + @testset "AbstractWrappedDataFrames.jl" begin - # Write your tests here. + +function test_operations(label::AbstractString, make_wdf::Function, args...) + + @testset "$label" begin + + df1 = make_wdf(DataFrame(x = [1,2], y = [4,5]), args...) + df2 = make_wdf(DataFrame(x = [1,4], z = [7,8]), args...) + + @testset "Metadata" begin + @test sort(names(df1)) == ["x", "y"] + @test sort(string.(propertynames(df1))) == ["x", "y"] + @test nrow(df1) == 2 + @test ncol(df1) == 2 + end + + @testset "Filter" begin + @test first(df1, 1).x == [1] + @test last(df1, 1).x == [2] + @test filter(:x => ==(1), df1) |> nrow == 1 + end + + @testset "Add columns" begin + sf3 = copy(df1) + @test_nowarn sf3.z1 = 10 + @test_nowarn sf3.z2 = [10,11] + @test_nowarn sf3.z3 = sf3.z2 .* 2 + end + + @testset "Summarize" begin + @test select(df1, :x) |> ncol == 1 + @test transform(df1, :x => (x -> x .+ 1) => :q) |> ncol == 3 + @test combine(df1, :x => sum) |> nrow == 1 + @test groupby(df1, :x) |> keys |> length == 2 + end + + @testset "Joins" begin + @test innerjoin(df1, df2, on = :x) isa DataFrame + @test leftjoin(df1, df2, on = :x) isa DataFrame + @test rightjoin(df1, df2, on = :x) isa DataFrame + @test outerjoin(df1, df2, on = :x) isa DataFrame + @test semijoin(df1, df2, on = :x) isa DataFrame + @test antijoin(df1, df2, on = :x) isa DataFrame + end + + @testset "Return types" begin + @test select(df1, :x) isa DataFrame + @test transform(df1, :x => (x -> x .+ 1) => :q) isa DataFrame + @test combine(df1, :x => sum) isa DataFrame + @test groupby(df1, :x) isa GroupedDataFrame + + sf3 = copy(df1) + @test select!(sf3, :x) isa DataFrame + @test transform!(sf3, :x => identity) isa DataFrame + end + end +end + +@testset "Wrapped Data Frames" begin + test_operations("Simple", df -> WDF(df)) + test_operations("Complex", df -> NDF(df, "MyName")) +end + end