Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[KED-1876] Allow GBQTableDataSet to optionally accept a sql query to load data #443

3 changes: 2 additions & 1 deletion RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
## Major features and improvements

## Bug fixes and other changes
* Modified `GBQTableDataSet` to load customized results using customized queries from Google Big Query tables.

## Breaking changes to the API

## Thanks for supporting contributions

[Ajay Bisht](https://github.com/ajb7)

# Release 0.16.3

Expand Down
18 changes: 12 additions & 6 deletions kedro/extras/datasets/pandas/gbq_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,18 @@ def _describe(self) -> Dict[str, Any]:

def _load(self) -> pd.DataFrame:
sql = "select * from {}.{}".format(self._dataset, self._table_name) # nosec
return pd.read_gbq(
sql,
project_id=self._project_id,
credentials=self._credentials,
**self._load_args
)
if "query" not in self._load_args:
self._load_args.update({"query": sql})
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved

self._load_args["query"] = str(self._load_args["query"])
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved
try:
return pd.read_gbq(
project_id=self._project_id,
credentials=self._credentials,
**self._load_args
)
except ValueError:
raise ValueError()
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved

def _save(self, data: pd.DataFrame) -> None:
data.to_gbq(
Expand Down
3 changes: 1 addition & 2 deletions tests/extras/datasets/pandas/test_gbq_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ def test_str_representation(self, gbq_dataset, save_args, load_args):

def test_save_load_data(self, gbq_dataset, dummy_dataframe, mocker):
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved
"""Test saving and reloading the data set."""
sql = "select * from {}.{}".format(DATASET, TABLE_NAME)
table_id = "{}.{}".format(DATASET, TABLE_NAME)
mocked_read_gbq = mocker.patch(
"kedro.extras.datasets.pandas.gbq_dataset.pd.read_gbq"
Expand All @@ -150,7 +149,7 @@ def test_save_load_data(self, gbq_dataset, dummy_dataframe, mocker):
table_id, project_id=PROJECT, credentials=None, progress_bar=False
)
mocked_read_gbq.assert_called_once_with(
sql, project_id=PROJECT, credentials=None
project_id=PROJECT, credentials=None, **gbq_dataset._load_args
mzjp2 marked this conversation as resolved.
Show resolved Hide resolved
)
assert_frame_equal(dummy_dataframe, loaded_data)

Expand Down