|
| 1 | +asvector(x::AbstractVector) = x |
| 2 | +asvector(x) = collect(x) |
| 3 | + |
| 4 | +""" |
| 5 | + pycolumntable([T=PyObject,] src) :: T |
| 6 | +
|
| 7 | +Construct a "column table" from the `Tables.jl`-compatible table `src`, namely a Python `dict` mapping column names to column vectors. |
| 8 | +""" |
| 9 | +function pycolumntable(::Type{T}, src) where {T} |
| 10 | + cols = Tables.columns(src) |
| 11 | + pydict( |
| 12 | + T, |
| 13 | + pystr(String(n)) => asvector(Tables.getcolumn(cols, n)) for |
| 14 | + n in Tables.columnnames(cols) |
| 15 | + ) |
| 16 | +end |
| 17 | +pycolumntable(::Type{T}; cols...) where {T} = pycolumntable(T, cols) |
| 18 | +pycolumntable(src) = pycolumntable(PyObject, src) |
| 19 | +pycolumntable(; opts...) = pycolumntable(PyObject, opts) |
| 20 | +export pycolumntable |
| 21 | + |
| 22 | +""" |
| 23 | + pyrowtable([T=PyObject,] src) :: T |
| 24 | +
|
| 25 | +Construct a "row table" from the `Tables.jl`-compatible table `src`, namely a Python `list` of rows, each row being a Python `dict` mapping column names to values. |
| 26 | +""" |
| 27 | +function pyrowtable(::Type{T}, src) where {T} |
| 28 | + rows = Tables.rows(src) |
| 29 | + names = Tables.columnnames(rows) |
| 30 | + pynames = [pystr(String(n)) for n in names] |
| 31 | + pylist( |
| 32 | + T, |
| 33 | + pydict(pn => Tables.getcolumn(row, n) for (n, pn) in zip(names, pynames)) for |
| 34 | + row in rows |
| 35 | + ) |
| 36 | +end |
| 37 | +pyrowtable(::Type{T}; cols...) where {T} = pyrowtable(T, cols) |
| 38 | +pyrowtable(src) = pyrowtable(PyObject, src) |
| 39 | +pyrowtable(; opts...) = pyrowtable(PyObject, opts) |
| 40 | +export pyrowtable |
| 41 | + |
| 42 | +aspandasvector(x) = asvector(x) |
| 43 | + |
| 44 | +@init @require CategoricalArrays="324d7699-5711-5eae-9e2f-1d82baa6b597" @eval begin |
| 45 | + aspandasvector(x::CategoricalArrays.CategoricalArray) = begin |
| 46 | + codes = map(x -> x===missing ? -1 : Int(CategoricalArrays.levelcode(x))-1, x) |
| 47 | + cats = CategoricalArrays.levels(x) |
| 48 | + ordered = x.pool.ordered |
| 49 | + pypandasmodule().Categorical.from_codes(codes, cats, ordered=ordered) |
| 50 | + end |
| 51 | +end |
| 52 | + |
| 53 | +""" |
| 54 | + pypandasdataframe([T=PyObject,] [src]; ...) :: T |
| 55 | +
|
| 56 | +Construct a pandas dataframe from `src`. |
| 57 | +
|
| 58 | +Usually equivalent to `pyimport("pandas").DataFrame(src, ...)`, but `src` may also be `Tables.jl`-compatible table. |
| 59 | +""" |
| 60 | +pypandasdataframe(::Type{T}; opts...) where {T} = pycall(T, pypandasmodule().DataFrame; opts...) |
| 61 | +pypandasdataframe(::Type{T}, t; opts...) where {T} = begin |
| 62 | + if Tables.istable(t) |
| 63 | + cs = Tables.columns(t) |
| 64 | + pycall(T, pypandasmodule().DataFrame, pydict(pystr(String(n)) => aspandasvector(Tables.getcolumn(cs, n)) for n in Tables.columnnames(cs)); opts...) |
| 65 | + else |
| 66 | + pycall(T, pypandasmodule().DataFrame, t; opts...) |
| 67 | + end |
| 68 | +end |
| 69 | +pypandasdataframe(args...; opts...) = pypandasdataframe(PyObject, args...; opts...) |
| 70 | +export pypandasdataframe |
| 71 | + |
| 72 | +multidict(src) = Dict{String,Type}(k => v for (ks, v) in src for k in (ks isa Vector ? ks : [ks])) |
| 73 | + |
| 74 | +""" |
| 75 | + PyPandasDataFrame(o; indexname="index", columntypes=(), copy=false) |
| 76 | +
|
| 77 | +Wrap the Pandas dataframe `o` as a Julia table. |
| 78 | +
|
| 79 | +It is an `AbstractDict{String,AbstractVector}` mapping names to columns. |
| 80 | +
|
| 81 | +It satisfies the `Tables.jl` and `TableTraits.jl` interfaces. |
| 82 | +
|
| 83 | +- `indexname`: The name of the index column when converting this to a table, and may be `nothing` to exclude the index. |
| 84 | +- `columntypes`: An iterable of `columnname=>type` or `[columnnames...]=>type` pairs, used when converting to a table. |
| 85 | +- `copy`: True to copy columns on conversion. |
| 86 | +""" |
| 87 | +mutable struct PyPandasDataFrame <: AbstractDict{String,AbstractVector} |
| 88 | + ptr::CPyPtr |
| 89 | + indexname::Union{String,Nothing} |
| 90 | + columntypes::Dict{String,Type} |
| 91 | + copy::Bool |
| 92 | + PyPandasDataFrame(::Val{:new}, ptr::Ptr, indexname::Union{String,Nothing}, columntypes::Dict{String,Type}, copy::Bool) = |
| 93 | + finalizer(pyref_finalize!, new(CPyPtr(ptr), indexname, columntypes, copy)) |
| 94 | +end |
| 95 | +PyPandasDataFrame(o; indexname::Union{String,Nothing} = "index", columntypes = (), copy::Bool = false) = |
| 96 | + PyPandasDataFrame(Val(:new), checknull(C.PyObject_From(o)), indexname, multidict(columntypes), copy) |
| 97 | +export PyPandasDataFrame |
| 98 | + |
| 99 | +ispyreftype(::Type{PyPandasDataFrame}) = true |
| 100 | +pyptr(df::PyPandasDataFrame) = df.ptr |
| 101 | +Base.unsafe_convert(::Type{CPyPtr}, df::PyPandasDataFrame) = checknull(pyptr(df)) |
| 102 | +C.PyObject_TryConvert__initial(o, ::Type{PyPandasDataFrame}) = |
| 103 | + C.putresult(PyPandasDataFrame(pyborrowedref(o))) |
| 104 | + |
| 105 | +Base.show(io::IO, x::PyPandasDataFrame) = print(io, pystr(String, x)) |
| 106 | +Base.show(io::IO, mime::MIME, o::PyPandasDataFrame) = _py_mime_show(io, mime, o) |
| 107 | +Base.show(io::IO, mime::MIME"text/plain", o::PyPandasDataFrame) = _py_mime_show(io, mime, o) |
| 108 | +Base.show(io::IO, mime::MIME"text/csv", o::PyPandasDataFrame) = _py_mime_show(io, mime, o) |
| 109 | +Base.show(io::IO, mime::MIME"text/tab-separated-values", o::PyPandasDataFrame) = _py_mime_show(io, mime, o) |
| 110 | +Base.showable(mime::MIME, o::PyPandasDataFrame) = _py_mime_showable(mime, o) |
| 111 | + |
| 112 | +Base.iterate(x::PyPandasDataFrame, st=nothing) = begin |
| 113 | + if st === nothing |
| 114 | + names = @pyv `$x.columns`::Vector{String} |
| 115 | + if x.indexname !== nothing |
| 116 | + x.indexname ∈ names && error("table already has a column called $(x.indexname), cannot use it for index") |
| 117 | + pushfirst!(names, x.indexname) |
| 118 | + end |
| 119 | + it = iterate(names) |
| 120 | + else |
| 121 | + names = st[1] |
| 122 | + it = iterate(st[1], st[2]) |
| 123 | + end |
| 124 | + if it === nothing |
| 125 | + nothing |
| 126 | + else |
| 127 | + name, newst = it |
| 128 | + (name => x[name], (names, newst)) |
| 129 | + end |
| 130 | +end |
| 131 | + |
| 132 | +Base.length(x::PyPandasDataFrame) = (@pyv `len($x.columns)`::Int) + (x.indexname !== nothing) |
| 133 | + |
| 134 | +Base.haskey(x::PyPandasDataFrame, c::AbstractString) = c == x.indexname || @pyv `$c in $x`::Bool |
| 135 | + |
| 136 | +Base.getindex(x::PyPandasDataFrame, c::AbstractString) = begin |
| 137 | + T = haskey(x.columntypes, c) ? AbstractVector{x.columntypes[c]} : AbstractVector |
| 138 | + if c === x.indexname |
| 139 | + v = @pyv `$x.index`::T |
| 140 | + else |
| 141 | + v = @pyv `$x[$c]`::T |
| 142 | + end |
| 143 | + x.copy ? copy(v) : v |
| 144 | +end |
| 145 | + |
| 146 | +Base.get(x::PyPandasDataFrame, c::AbstractString, d) = haskey(x, c) ? x[c] : d |
| 147 | + |
| 148 | +### Tables.jl / TableTraits.jl integration |
| 149 | + |
| 150 | +Tables.istable(::Type{PyPandasDataFrame}) = true |
| 151 | +Tables.columnaccess(::Type{PyPandasDataFrame}) = true |
| 152 | +function Tables.columns(x::PyPandasDataFrame) |
| 153 | + names = collect(keys(x)) |
| 154 | + columns = [x[c] for c in names] |
| 155 | + return NamedTuple{Tuple(map(Symbol, names))}(Tuple(columns)) |
| 156 | +end |
| 157 | + |
| 158 | +IteratorInterfaceExtensions.isiterable(x::PyPandasDataFrame) = true |
| 159 | +IteratorInterfaceExtensions.getiterator(x::PyPandasDataFrame) = |
| 160 | + IteratorInterfaceExtensions.getiterator(Tables.rows(x)) |
| 161 | + |
| 162 | +TableTraits.isiterabletable(x::PyPandasDataFrame) = true |
0 commit comments