Skip to content

Commit b291d89

Browse files
author
Christopher Doris
committed
merge master
2 parents c3e1299 + 9cf962e commit b291d89

File tree

2 files changed

+167
-2
lines changed

2 files changed

+167
-2
lines changed

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
![PythonCall.jl logo](https://raw.githubusercontent.com/cjdoris/PythonCall.jl/master/logo-text.svg)
22
---
3-
[![Stable Docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://cjdoris.github.io/PythonCall.jl/stable)
4-
[![Dev Docs](https://img.shields.io/badge/docs-dev-blue.svg)](https://cjdoris.github.io/PythonCall.jl/dev)
3+
[![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
4+
[![Stable Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://cjdoris.github.io/PythonCall.jl/stable)
5+
[![Dev Documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://cjdoris.github.io/PythonCall.jl/dev)
56
[![Test Status](https://github.com/cjdoris/PythonCall.jl/workflows/Tests/badge.svg)](https://github.com/cjdoris/PythonCall.jl/actions?query=workflow%3ATests)
67
[![Codecov](https://codecov.io/gh/cjdoris/PythonCall.jl/branch/master/graph/badge.svg?token=A813UUIHGS)](https://codecov.io/gh/cjdoris/PythonCall.jl)
78

9+
**Note: This package is currently being re-written from scratch. See the 'rewrite' branch. The API may change a little.**
10+
811
Bringing [**Python®**](https://www.python.org/) and [**Julia**](https://julialang.org/) together in seamless harmony:
912
- Call Python code from Julia and Julia code from Python via a symmetric interface.
1013
- Simple syntax, so the Python code looks like Python and the Julia code looks like Julia.

src/PyPandasDataFrame.jl

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
asvector(x::AbstractVector) = x
2+
asvector(x) = collect(x)
3+
4+
"""
5+
pycolumntable([T=PyObject,] src) :: T
6+
7+
Construct a "column table" from the `Tables.jl`-compatible table `src`, namely a Python `dict` mapping column names to column vectors.
8+
"""
9+
function pycolumntable(::Type{T}, src) where {T}
10+
cols = Tables.columns(src)
11+
pydict(
12+
T,
13+
pystr(String(n)) => asvector(Tables.getcolumn(cols, n)) for
14+
n in Tables.columnnames(cols)
15+
)
16+
end
17+
pycolumntable(::Type{T}; cols...) where {T} = pycolumntable(T, cols)
18+
pycolumntable(src) = pycolumntable(PyObject, src)
19+
pycolumntable(; opts...) = pycolumntable(PyObject, opts)
20+
export pycolumntable
21+
22+
"""
23+
pyrowtable([T=PyObject,] src) :: T
24+
25+
Construct a "row table" from the `Tables.jl`-compatible table `src`, namely a Python `list` of rows, each row being a Python `dict` mapping column names to values.
26+
"""
27+
function pyrowtable(::Type{T}, src) where {T}
28+
rows = Tables.rows(src)
29+
names = Tables.columnnames(rows)
30+
pynames = [pystr(String(n)) for n in names]
31+
pylist(
32+
T,
33+
pydict(pn => Tables.getcolumn(row, n) for (n, pn) in zip(names, pynames)) for
34+
row in rows
35+
)
36+
end
37+
pyrowtable(::Type{T}; cols...) where {T} = pyrowtable(T, cols)
38+
pyrowtable(src) = pyrowtable(PyObject, src)
39+
pyrowtable(; opts...) = pyrowtable(PyObject, opts)
40+
export pyrowtable
41+
42+
aspandasvector(x) = asvector(x)
43+
44+
@init @require CategoricalArrays="324d7699-5711-5eae-9e2f-1d82baa6b597" @eval begin
45+
aspandasvector(x::CategoricalArrays.CategoricalArray) = begin
46+
codes = map(x -> x===missing ? -1 : Int(CategoricalArrays.levelcode(x))-1, x)
47+
cats = CategoricalArrays.levels(x)
48+
ordered = x.pool.ordered
49+
pypandasmodule().Categorical.from_codes(codes, cats, ordered=ordered)
50+
end
51+
end
52+
53+
"""
54+
pypandasdataframe([T=PyObject,] [src]; ...) :: T
55+
56+
Construct a pandas dataframe from `src`.
57+
58+
Usually equivalent to `pyimport("pandas").DataFrame(src, ...)`, but `src` may also be `Tables.jl`-compatible table.
59+
"""
60+
pypandasdataframe(::Type{T}; opts...) where {T} = pycall(T, pypandasmodule().DataFrame; opts...)
61+
pypandasdataframe(::Type{T}, t; opts...) where {T} = begin
62+
if Tables.istable(t)
63+
cs = Tables.columns(t)
64+
pycall(T, pypandasmodule().DataFrame, pydict(pystr(String(n)) => aspandasvector(Tables.getcolumn(cs, n)) for n in Tables.columnnames(cs)); opts...)
65+
else
66+
pycall(T, pypandasmodule().DataFrame, t; opts...)
67+
end
68+
end
69+
pypandasdataframe(args...; opts...) = pypandasdataframe(PyObject, args...; opts...)
70+
export pypandasdataframe
71+
72+
multidict(src) = Dict{String,Type}(k => v for (ks, v) in src for k in (ks isa Vector ? ks : [ks]))
73+
74+
"""
75+
PyPandasDataFrame(o; indexname="index", columntypes=(), copy=false)
76+
77+
Wrap the Pandas dataframe `o` as a Julia table.
78+
79+
It is an `AbstractDict{String,AbstractVector}` mapping names to columns.
80+
81+
It satisfies the `Tables.jl` and `TableTraits.jl` interfaces.
82+
83+
- `indexname`: The name of the index column when converting this to a table, and may be `nothing` to exclude the index.
84+
- `columntypes`: An iterable of `columnname=>type` or `[columnnames...]=>type` pairs, used when converting to a table.
85+
- `copy`: True to copy columns on conversion.
86+
"""
87+
mutable struct PyPandasDataFrame <: AbstractDict{String,AbstractVector}
88+
ptr::CPyPtr
89+
indexname::Union{String,Nothing}
90+
columntypes::Dict{String,Type}
91+
copy::Bool
92+
PyPandasDataFrame(::Val{:new}, ptr::Ptr, indexname::Union{String,Nothing}, columntypes::Dict{String,Type}, copy::Bool) =
93+
finalizer(pyref_finalize!, new(CPyPtr(ptr), indexname, columntypes, copy))
94+
end
95+
PyPandasDataFrame(o; indexname::Union{String,Nothing} = "index", columntypes = (), copy::Bool = false) =
96+
PyPandasDataFrame(Val(:new), checknull(C.PyObject_From(o)), indexname, multidict(columntypes), copy)
97+
export PyPandasDataFrame
98+
99+
ispyreftype(::Type{PyPandasDataFrame}) = true
100+
pyptr(df::PyPandasDataFrame) = df.ptr
101+
Base.unsafe_convert(::Type{CPyPtr}, df::PyPandasDataFrame) = checknull(pyptr(df))
102+
C.PyObject_TryConvert__initial(o, ::Type{PyPandasDataFrame}) =
103+
C.putresult(PyPandasDataFrame(pyborrowedref(o)))
104+
105+
Base.show(io::IO, x::PyPandasDataFrame) = print(io, pystr(String, x))
106+
Base.show(io::IO, mime::MIME, o::PyPandasDataFrame) = _py_mime_show(io, mime, o)
107+
Base.show(io::IO, mime::MIME"text/plain", o::PyPandasDataFrame) = _py_mime_show(io, mime, o)
108+
Base.show(io::IO, mime::MIME"text/csv", o::PyPandasDataFrame) = _py_mime_show(io, mime, o)
109+
Base.show(io::IO, mime::MIME"text/tab-separated-values", o::PyPandasDataFrame) = _py_mime_show(io, mime, o)
110+
Base.showable(mime::MIME, o::PyPandasDataFrame) = _py_mime_showable(mime, o)
111+
112+
Base.iterate(x::PyPandasDataFrame, st=nothing) = begin
113+
if st === nothing
114+
names = @pyv `$x.columns`::Vector{String}
115+
if x.indexname !== nothing
116+
x.indexname names && error("table already has a column called $(x.indexname), cannot use it for index")
117+
pushfirst!(names, x.indexname)
118+
end
119+
it = iterate(names)
120+
else
121+
names = st[1]
122+
it = iterate(st[1], st[2])
123+
end
124+
if it === nothing
125+
nothing
126+
else
127+
name, newst = it
128+
(name => x[name], (names, newst))
129+
end
130+
end
131+
132+
Base.length(x::PyPandasDataFrame) = (@pyv `len($x.columns)`::Int) + (x.indexname !== nothing)
133+
134+
Base.haskey(x::PyPandasDataFrame, c::AbstractString) = c == x.indexname || @pyv `$c in $x`::Bool
135+
136+
Base.getindex(x::PyPandasDataFrame, c::AbstractString) = begin
137+
T = haskey(x.columntypes, c) ? AbstractVector{x.columntypes[c]} : AbstractVector
138+
if c === x.indexname
139+
v = @pyv `$x.index`::T
140+
else
141+
v = @pyv `$x[$c]`::T
142+
end
143+
x.copy ? copy(v) : v
144+
end
145+
146+
Base.get(x::PyPandasDataFrame, c::AbstractString, d) = haskey(x, c) ? x[c] : d
147+
148+
### Tables.jl / TableTraits.jl integration
149+
150+
Tables.istable(::Type{PyPandasDataFrame}) = true
151+
Tables.columnaccess(::Type{PyPandasDataFrame}) = true
152+
function Tables.columns(x::PyPandasDataFrame)
153+
names = collect(keys(x))
154+
columns = [x[c] for c in names]
155+
return NamedTuple{Tuple(map(Symbol, names))}(Tuple(columns))
156+
end
157+
158+
IteratorInterfaceExtensions.isiterable(x::PyPandasDataFrame) = true
159+
IteratorInterfaceExtensions.getiterator(x::PyPandasDataFrame) =
160+
IteratorInterfaceExtensions.getiterator(Tables.rows(x))
161+
162+
TableTraits.isiterabletable(x::PyPandasDataFrame) = true

0 commit comments

Comments
 (0)