Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BigQuery: Add dry_run option to BigQuery magic #9067

Merged
merged 20 commits into from
Aug 23, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
5dfcc10
added dry_run option to bigquery magics. when --dry_run flag is prese…
shubha-rajan Aug 21, 2019
7ce1ef4
formatted for PEP8 compliance
shubha-rajan Aug 21, 2019
7d3f4e9
Edited dry_run help string
shubha-rajan Aug 21, 2019
9580faa
fixed output for magic dry_run option
shubha-rajan Aug 21, 2019
7f31051
Merge branch 'bq-add-dryrun-to-magic' of https://github.com/shubha-ra…
shubha-rajan Aug 21, 2019
e3107f6
print estimated bytes instead of total bytes
shubha-rajan Aug 21, 2019
3336697
Revert "print estimated bytes instead of total bytes"
shubha-rajan Aug 22, 2019
6efa408
running the bigquery magic in a notebook with the dryrun option print…
shubha-rajan Aug 22, 2019
1a9f187
running the bigquery magic in a notebook with the dryrun option print…
shubha-rajan Aug 22, 2019
c301b0f
added SQL query to error output and messaging for failure to save to …
shubha-rajan Aug 22, 2019
50d5f72
fixed broken tests and coverage failure
shubha-rajan Aug 23, 2019
106f115
resolve merge
shubha-rajan Aug 23, 2019
617ea00
Update docstring for QueryJob._begin
shubha-rajan Aug 23, 2019
d6931b0
removed result variable from dryrun tests
shubha-rajan Aug 23, 2019
e7a0371
merged in docstring changes
shubha-rajan Aug 23, 2019
16e3127
updated docstring for _AsyncJob._begin
shubha-rajan Aug 23, 2019
dacbd2a
fixed bug in dryrun test
shubha-rajan Aug 23, 2019
f0eab53
removed trailing whitespace to make linter happy
shubha-rajan Aug 23, 2019
425671b
fix docstring format
tswast Aug 23, 2019
f5877c0
fix docstring
tswast Aug 23, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions bigquery/google/cloud/bigquery/magics.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,15 @@ def _run_query(client, query, job_config=None):
"the context default_query_job_config.maximum_bytes_billed."
),
)
@magic_arguments.argument(
"--dry_run",
action="store_true",
default=False,
help=(
"Sets query to be a dry run to estimate costs"
shubha-rajan marked this conversation as resolved.
Show resolved Hide resolved
"Defaults to executing the query instead of dry run if this argument is not used."
),
)
@magic_arguments.argument(
"--use_legacy_sql",
action="store_true",
Expand Down Expand Up @@ -410,6 +419,7 @@ def _cell_magic(line, query):
job_config = bigquery.job.QueryJobConfig()
job_config.query_parameters = params
job_config.use_legacy_sql = args.use_legacy_sql
job_config.dry_run = args.dry_run

if args.maximum_bytes_billed == "None":
job_config.maximum_bytes_billed = 0
Expand All @@ -430,6 +440,12 @@ def _cell_magic(line, query):
print("\nERROR:\n", error, file=sys.stderr)
return

if args.dry_run and args.destination_var:
IPython.get_ipython().push({args.destination_var: query_job})
return
elif args.dry_run:
return query_job

result = query_job.to_dataframe(bqstorage_client=bqstorage_client)
if args.destination_var:
IPython.get_ipython().push({args.destination_var: result})
Expand Down
72 changes: 72 additions & 0 deletions bigquery/tests/unit/test_magics.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,78 @@ def test_bigquery_magic_without_bqstorage(monkeypatch):
assert isinstance(return_value, pandas.DataFrame)


def test_bigquery_magic_dryrun_option_sets_job_config():
ip = IPython.get_ipython()
ip.extension_manager.load_extension("google.cloud.bigquery")
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)

run_query_patch = mock.patch(
"google.cloud.bigquery.magics._run_query", autospec=True
)

sql = "SELECT 17 AS num"

with run_query_patch as run_query_mock:
ip.run_cell_magic("bigquery", "--dry_run", sql)

job_config_used = run_query_mock.call_args_list[0][0][-1]
assert job_config_used.dry_run is True


def test_bigquery_magic_dryrun_option_returns_query_job():
ip = IPython.get_ipython()
ip.extension_manager.load_extension("google.cloud.bigquery")
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
run_query_patch = mock.patch(
"google.cloud.bigquery.magics._run_query", autospec=True
)

sql = "SELECT 17 AS num"
result = pandas.DataFrame([17], columns=["num"])
shubha-rajan marked this conversation as resolved.
Show resolved Hide resolved

with run_query_patch as run_query_mock:
run_query_mock.return_value = query_job_mock
query_job_mock.to_dataframe.return_value = result
return_value = ip.run_cell_magic("bigquery", "--dry_run", sql)

assert isinstance(return_value, job.QueryJob)


def test_bigquery_magic_dryrun_option_saves_query_job_to_variable():
ip = IPython.get_ipython()
ip.extension_manager.load_extension("google.cloud.bigquery")
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
run_query_patch = mock.patch(
"google.cloud.bigquery.magics._run_query", autospec=True
)

sql = "SELECT 17 AS num"
result = pandas.DataFrame([17], columns=["num"])
shubha-rajan marked this conversation as resolved.
Show resolved Hide resolved
query_job_mock.to_dataframe.return_value = result
assert "q_job" not in ip.user_ns

with run_query_patch as run_query_mock:
run_query_mock.return_value = query_job_mock
return_value = ip.run_cell_magic("bigquery", "q_job --dry_run", sql)

assert return_value is None
assert "q_job" in ip.user_ns
q_job = ip.user_ns["q_job"]
assert isinstance(q_job, job.QueryJob)


@pytest.mark.usefixtures("ipython_interactive")
def test_bigquery_magic_w_maximum_bytes_billed_invalid():
ip = IPython.get_ipython()
Expand Down