Skip to content

Commit d305e7e

Browse files
committed
replace pandas usage with pyarrow
1 parent 8eadba1 commit d305e7e

File tree

6 files changed

+21
-34
lines changed

6 files changed

+21
-34
lines changed

dlt/helpers/dashboard/dlt_dashboard.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,10 @@
1313

1414
import marimo as mo
1515

16-
import pandas as pd
17-
1816
import dlt
19-
from dlt.common.json import json
17+
import pyarrow
2018
from dlt.helpers.dashboard import strings, utils, ui_elements as ui
2119
from dlt.helpers.dashboard.config import DashboardConfiguration
22-
from dlt.destinations.dataset.dataset import ReadableDBAPIDataset
23-
from dlt.destinations.dataset.relation import ReadableDBAPIRelation
2420

2521

2622
@app.cell(hide_code=True)
@@ -848,7 +844,7 @@ def utils_caches_and_state(
848844
"""
849845

850846
# some state variables
851-
dlt_get_last_query_result, dlt_set_last_query_result = mo.state(pd.DataFrame())
847+
dlt_get_last_query_result, dlt_set_last_query_result = mo.state(pyarrow.table({}))
852848
# a cache of query results in the form of {query: row_count}
853849
dlt_get_query_cache, dlt_set_query_cache = mo.state(cast(Dict[str, int], {}))
854850

dlt/helpers/dashboard/runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def _detect_dashboard_command() -> str:
2020
# keep this, will raise if user tries to run dashboard without dependencies
2121
try:
2222
import marimo
23-
import pandas
23+
import pyarrow
2424
import ibis
2525
except ModuleNotFoundError:
2626
raise MissingDependencyException(

dlt/helpers/dashboard/utils.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010

1111
import dlt
1212
import marimo as mo
13-
import pandas as pd
14-
import yaml
13+
import pyarrow
1514
import traceback
1615

1716

@@ -381,7 +380,7 @@ def get_example_query_for_dataset(pipeline: dlt.Pipeline, schema_name: str) -> T
381380
return "", "Schema does not contain any tables.", None
382381

383382

384-
def get_query_result(pipeline: dlt.Pipeline, query: str) -> Tuple[pd.DataFrame, str, str]:
383+
def get_query_result(pipeline: dlt.Pipeline, query: str) -> Tuple[pyarrow.Table, str, str]:
385384
"""
386385
Get the result of a query. Parses the query to ensure it is a valid SQL query before sending it to the destination.
387386
"""
@@ -392,12 +391,12 @@ def get_query_result(pipeline: dlt.Pipeline, query: str) -> Tuple[pd.DataFrame,
392391
)
393392
return get_query_result_cached(pipeline, query), None, None
394393
except Exception as exc:
395-
return pd.DataFrame(), _exception_to_string(exc), traceback.format_exc()
394+
return pyarrow.table({}), _exception_to_string(exc), traceback.format_exc()
396395

397396

398397
@functools.cache
399-
def get_query_result_cached(pipeline: dlt.Pipeline, query: str) -> pd.DataFrame:
400-
return pipeline.dataset()(query, _execute_raw_query=True).df()
398+
def get_query_result_cached(pipeline: dlt.Pipeline, query: str) -> pyarrow.Table:
399+
return pipeline.dataset()(query, _execute_raw_query=True).arrow()
401400

402401

403402
def get_row_counts(
@@ -415,8 +414,8 @@ def get_row_counts(
415414
i["table_name"]: i["row_count"]
416415
for i in pipeline.dataset(schema=selected_schema_name)
417416
.row_counts(dlt_tables=True, load_id=load_id)
418-
.df()
419-
.to_dict(orient="records")
417+
.arrow()
418+
.to_pylist()
420419
}
421420
except (
422421
DatabaseUndefinedRelation,
@@ -451,7 +450,7 @@ def get_loads(
451450
if limit:
452451
loads = loads.limit(limit)
453452
loads = loads.order_by("inserted_at", "desc")
454-
loads_list = loads.df().to_dict(orient="records")
453+
loads_list = loads.arrow().to_pylist()
455454
loads_list = [_humanize_datetime_values(c, load) for load in loads_list]
456455
return loads_list, None, None
457456
except Exception as exc:

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,9 @@ postgis = [
179179
]
180180
workspace = [
181181
"duckdb>=0.9",
182-
"ibis-framework[duckdb]>=10.5.0 ; python_version >= '3.10'",
182+
"ibis-framework>=10.5.0 ; python_version >= '3.10'",
183+
"duckdb",
183184
"pyarrow>=16.0.0",
184-
"pandas>=2.1.4",
185185
"marimo>=0.14.5",
186186
]
187187
dbml = [

tests/helpers/dashboard/test_utils.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,13 @@
22
import tempfile
33
from datetime import datetime
44
from pathlib import Path
5-
from this import d
65

7-
from ibis import pi
86
import marimo as mo
7+
import pyarrow
98
import pytest
10-
import pandas as pd
11-
12-
from dlt.sources._single_file_templates.fruitshop_pipeline import (
13-
fruitshop as fruitshop_source,
14-
)
15-
import pendulum
16-
179

1810
import dlt
11+
from dlt.common import pendulum
1912
from dlt.helpers.dashboard.config import DashboardConfiguration
2013
from dlt.helpers.dashboard.utils import (
2114
PICKLE_TRACE_FILE,
@@ -346,10 +339,10 @@ def test_get_query_result(pipeline: dlt.Pipeline):
346339
)
347340

348341
if pipeline.pipeline_name in PIPELINES_WITH_LOAD:
349-
assert isinstance(result, pd.DataFrame)
342+
assert isinstance(result, pyarrow.Table)
350343
assert len(result) == 1
351344
assert (
352-
result.iloc[0]["count"] == 100
345+
result[0][0].as_py() == 100
353346
if pipeline.pipeline_name == SUCCESS_PIPELINE_DUCKDB
354347
else 103
355348
) # merge does not work on filesystem
@@ -697,7 +690,7 @@ def test_integration_pipeline_workflow(pipeline, temp_pipelines_dir):
697690
)
698691
if pipeline.pipeline_name in PIPELINES_WITH_LOAD:
699692
assert len(query_result) == 13
700-
assert query_result.iloc[0]["name"] == "simon"
693+
assert query_result[0][0].as_py() == "simon"
701694
assert not error_message
702695
assert not traceback_string
703696
else:

uv.lock

Lines changed: 3 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)