Skip to content

Commit de8a653

Browse files
authored
CI-713 - Lazy load pandas when needed (#162)
* lazy load pandas * lint
1 parent 5193e2e commit de8a653

File tree

3 files changed

+12
-7
lines changed

3 files changed

+12
-7
lines changed

cirro/cli/controller.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import sys
66
from pathlib import Path
77

8-
import pandas as pd
98
import requests
109
from cirro_api_client.v1.models import UploadDatasetRequest, Status, Executor
1110

@@ -58,6 +57,8 @@ def run_list_datasets(input_params: ListArguments, interactive=False):
5857
datasets = cirro.datasets.list(input_params['project'])
5958

6059
sorted_datasets = sorted(datasets, key=lambda d: d.created_at, reverse=True)
60+
61+
import pandas as pd
6162
df = pd.DataFrame.from_records([d.to_dict() for d in sorted_datasets])
6263
df = df[['id', 'name', 'description', 'processId', 'status', 'createdBy', 'createdAt']]
6364
print(df.to_string())

cirro/helpers/preprocess_dataset.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22
import logging
33
import os
44
from pathlib import Path
5+
from typing import TYPE_CHECKING
56

67
import boto3
7-
import pandas as pd
8+
9+
if TYPE_CHECKING:
10+
from pandas import DataFrame
811

912
from cirro.models.s3_path import S3Path
1013

@@ -66,11 +69,12 @@ def log(self):
6669
self.logger.info(f"Number of files in dataset: {self.files.shape[0]:,}")
6770
self.logger.info(f"Number of samples in dataset: {self.samplesheet.shape[0]:,}")
6871

69-
def _read_csv(self, suffix: str, required_columns=None) -> pd.DataFrame:
72+
def _read_csv(self, suffix: str, required_columns=None) -> 'DataFrame':
7073
"""Read a CSV from the dataset and check for any required columns."""
7174
if required_columns is None:
7275
required_columns = []
7376

77+
import pandas as pd
7478
df = pd.read_csv(f"{self.s3_dataset}/{suffix}")
7579
for col in required_columns:
7680
assert col in df.columns.values, f"Did not find expected columns {col} in {self.s3_dataset}/{suffix}"

cirro/sdk/file.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,10 @@
22
from io import BytesIO, StringIO
33
from typing import List
44

5-
import pandas as pd
6-
75
from typing import TYPE_CHECKING
86
if TYPE_CHECKING:
97
import anndata
8+
from pandas import DataFrame
109

1110
from cirro.cirro_client import CirroApi
1211
from cirro.models.file import File
@@ -88,7 +87,7 @@ def _get(self) -> bytes:
8887

8988
return self._client.file.get_file(self._file)
9089

91-
def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> pd.DataFrame:
90+
def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> 'DataFrame':
9291
"""
9392
Parse the file as a Pandas DataFrame.
9493
@@ -100,6 +99,7 @@ def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> pd.DataFr
10099
All other keyword arguments are passed to pandas.read_csv
101100
https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
102101
"""
102+
import pandas
103103

104104
if compression == 'infer':
105105
# If the file appears to be compressed
@@ -119,7 +119,7 @@ def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> pd.DataFr
119119
else:
120120
handle = StringIO(self._get().decode(encoding))
121121

122-
df = pd.read_csv(
122+
df = pandas.read_csv(
123123
handle,
124124
compression=compression,
125125
encoding=encoding,

0 commit comments

Comments
 (0)