Closed
Description
JuliaCall doesn't work with pandas DataFrames that have categorical columns. Here's an example:
>>> import pandas as pd
>>> from juliacall import Main as jl
>>> jl.seval('using DataFrames')
>>> df = pd.DataFrame({'a': [1,2,3], 'b': ['x','y','z']})
>>> jl.DataFrame(df) # works fine
<jl 3×2 DataFrame
Row │ a b
│ Int64 String
─────┼───────────────
1 │ 1 x
2 │ 2 y
3 │ 3 z>
>>> jl.DataFrame(df.astype({'a': 'category'})) # gives DimensionMismatch error?!
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/nethome/kcni/mwainberg/.julia/packages/PythonCall/DqZCE/src/jlwrap/any.jl", line 201, in __call__
return self._jl_callmethod($(pyjl_methodnum(pyjlany_call)), args, kwargs)
juliacall.JuliaError: DimensionMismatch: column :a has length 16 and column :b has length 3
Stacktrace:
[1] DataFrame(columns::Vector{AbstractVector}, colindex::DataFrames.Index; copycols::Bool)
@ DataFrames ~/.julia/packages/DataFrames/a6np0/src/dataframe/dataframe.jl:192
[2] fromcolumns(x::Tables.DictColumnTable, names::Vector{Symbol}; copycols::Nothing)
@ DataFrames ~/.julia/packages/DataFrames/a6np0/src/other/tables.jl:36
[3] DataFrame(x::PythonCall.PyPandasDataFrame; copycols::Nothing)
@ DataFrames ~/.julia/packages/DataFrames/a6np0/src/other/tables.jl:60
[4] DataFrame(x::PythonCall.PyPandasDataFrame)
@ DataFrames ~/.julia/packages/DataFrames/a6np0/src/other/tables.jl:48
[5] pyjlany_call(self::Type{DataFrame}, args_::PythonCall.Py, kwargs_::PythonCall.Py)
@ PythonCall ~/.julia/packages/PythonCall/DqZCE/src/jlwrap/any.jl:31
[6] _pyjl_callmethod(f::Any, self_::Ptr{PythonCall.C.PyObject}, args_::Ptr{PythonCall.C.PyObject}, nargs::Int64)
@ PythonCall ~/.julia/packages/PythonCall/DqZCE/src/jlwrap/base.jl:69
[7] _pyjl_callmethod(o::Ptr{PythonCall.C.PyObject}, args::Ptr{PythonCall.C.PyObject})
@ PythonCall.C ~/.julia/packages/PythonCall/DqZCE/src/cpython/jlwrap.jl:47
>>> jl.DataFrame(df.astype({'b': 'category'})) # gives internal SystemError in Python
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/nethome/kcni/mwainberg/.julia/packages/PythonCall/DqZCE/src/jlwrap/any.jl", line 201, in __call__
return self._jl_callmethod($(pyjl_methodnum(pyjlany_call)), args, kwargs)
SystemError: null argument to internal routine
>>> jl.DataFrame(df.astype('category')) # ditto
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/nethome/kcni/mwainberg/.julia/packages/PythonCall/DqZCE/src/jlwrap/any.jl", line 201, in __call__
return self._jl_callmethod($(pyjl_methodnum(pyjlany_call)), args, kwargs)
SystemError: null argument to internal routine
Metadata
Metadata
Assignees
Labels
No labels