1
1
"""
2
2
Built-in datasets for demonstration, educational and test purposes.
3
3
"""
4
+ import os
5
+ from importlib import import_module
6
+
4
7
import narwhals .stable .v1 as nw
5
8
9
+ AVAILABLE_BACKENDS = {"pandas" , "polars" , "pyarrow" }
10
+
6
11
7
12
def gapminder (
8
13
datetimes = False ,
@@ -372,9 +377,10 @@ def _get_dataset(d, return_type):
372
377
"""
373
378
Loads the dataset using the specified backend.
374
379
375
- Notice that the available backends are 'pandas', 'polars', 'pyarrow' and they all
376
- have a `read_csv` function. Therefore we can dynamically load the library via
377
- `importlib.import_module` and then call `backend.read_csv(filepath)`.
380
+ Notice that the available backends are 'pandas', 'polars', 'pyarrow' and they all have
381
+ a `read_csv` function (pyarrow has it via pyarrow.csv). Therefore we can dynamically
382
+ load the library using `importlib.import_module` and then call
383
+ `backend.read_csv(filepath)`.
378
384
379
385
Parameters
380
386
----------
@@ -388,23 +394,20 @@ def _get_dataset(d, return_type):
388
394
-------
389
395
Dataframe of `return_type` type
390
396
"""
391
- import os
392
- from importlib import import_module
393
-
394
- AVAILABLE_BACKENDS = {"pandas" , "polars" , "pyarrow" }
395
-
396
397
filepath = os .path .join (
397
398
os .path .dirname (os .path .dirname (__file__ )),
398
399
"package_data" ,
399
400
"datasets" ,
400
401
d + ".csv.gz" ,
401
402
)
403
+
402
404
if return_type not in AVAILABLE_BACKENDS :
403
405
msg = f"Unsupported return_type. Found { return_type } , expected one of { AVAILABLE_BACKENDS } "
404
406
raise NotImplementedError (msg )
405
407
406
408
try :
407
- backend = import_module (return_type )
409
+ module_to_load = "pyarrow.csv" if return_type == "pyarrow" else return_type
410
+ backend = import_module (module_to_load )
408
411
except ModuleNotFoundError :
409
412
msg = f"return_type={ return_type } , but { return_type } is not installed"
410
413
raise ModuleNotFoundError (msg )
0 commit comments