Skip to content

DataFusion error with mark_area and Polars categorical columns #502

Open

Description

When creating an area chart from a polars dataframe with a categorical datatype a ValueError is thrown. This works fine if using mark_line instead of mark_area or if one converts the polars df to a pandas df (even though pandas still uses a categorical datatype).

import altair as alt
import polars as pl

import vegafusion as vf

vf.enable()

df = pl.DataFrame(
    {
        "time": [1, 2, 1, 2],
        "category": ["A", "A", "B", "B"],
        "value": [1, 1, 2, 2],
    },
    schema_overrides={'category': pl.Categorical}
)

alt.Chart(df).mark_area().encode(
    x="time:Q",
    y="value:Q",
    color="category:N",
)
{
	"name": "ValueError",
	"message": "DataFusion error: Schema error: No field named py_table_dbb1aa9a_8b0b_4c95_8cf2_ba72da66b2ed_2.category. Valid fields are _key.time, _key._vf_order_key, _groups.category, _groups._vf_order_groups.
    Context[0]: Failed to get node value
",
	"stack": "---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File .venv/lib/python3.11/site-packages/IPython/core/formatters.py:977, in MimeBundleFormatter.__call__(self, obj, include, exclude)
    974     method = get_real_method(obj, self.print_method)
    976     if method is not None:
--> 977         return method(include=include, exclude=exclude)
    978     return None
    979 else:

File .venv/lib/python3.11/site-packages/altair/vegalite/v5/api.py:2576, in TopLevelMixin._repr_mimebundle_(self, include, exclude)
   2574     return {}
   2575 else:
-> 2576     return renderers.get()(dct)

File .venv/lib/python3.11/site-packages/toolz/functoolz.py:304, in curry.__call__(self, *args, **kwargs)
    302 def __call__(self, *args, **kwargs):
    303     try:
--> 304         return self._partial(*args, **kwargs)
    305     except TypeError as exc:
    306         if self._should_curry(args, kwargs, exc):

File .venv/lib/python3.11/site-packages/vegafusion/renderer.py:16, in vegafusion_mime_renderer(spec, mimetype, row_limit, embed_options)
     15 def vegafusion_mime_renderer(spec, mimetype=\"html\", row_limit=None, embed_options=None):
---> 16     return spec_to_mime_bundle(
     17         spec,
     18         mimetype=mimetype,
     19         row_limit=row_limit,
     20         embed_options=embed_options
     21     )

File .venv/lib/python3.11/site-packages/vegafusion/renderer.py:38, in spec_to_mime_bundle(spec, mimetype, row_limit, embed_options, html_template, full_html, scale)
     35 vega_spec = vegalite_compilers.get()(spec)
     37 inline_datasets = transformer.get_inline_datasets_for_spec(vega_spec)
---> 38 tx_vega_spec, warnings = runtime.pre_transform_spec(
     39     vega_spec,
     40     local_tz.get_local_tz(),
     41     row_limit=row_limit,
     42     inline_datasets=inline_datasets
     43 )
     45 for warning in warnings:
     46     if warning.get(\"type\", \"\") == \"RowLimitExceeded\":

File .venv/lib/python3.11/site-packages/vegafusion/runtime.py:371, in VegaFusionRuntime.pre_transform_spec(self, spec, local_tz, default_input_tz, row_limit, preserve_interactivity, inline_datasets, keep_signals, keep_datasets, data_encoding_threshold, data_encoding_format)
    369 try:
    370     if data_encoding_threshold is None:
--> 371         new_spec, warnings = self.embedded_runtime.pre_transform_spec(
    372             spec,
    373             local_tz=local_tz,
    374             default_input_tz=default_input_tz,
    375             row_limit=row_limit,
    376             preserve_interactivity=preserve_interactivity,
    377             inline_datasets=imported_inline_dataset,
    378             keep_signals=keep_signals,
    379             keep_datasets=keep_datasets,
    380         )
    381     else:
    382         # Use pre_transform_extract to extract large datasets
    383         new_spec, datasets, warnings = self.embedded_runtime.pre_transform_extract(
    384             spec,
    385             local_tz=local_tz,
   (...)
    392             keep_datasets=keep_datasets,
    393         )

ValueError: DataFusion error: Schema error: No field named py_table_dbb1aa9a_8b0b_4c95_8cf2_ba72da66b2ed_2.category. Valid fields are _key.time, _key._vf_order_key, _groups.category, _groups._vf_order_groups.
    Context[0]: Failed to get node value
"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingenhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions