Skip to content

Commit 3cbaf76

Browse files
bijanvakilitswast
andauthored
feat: Add '--use-geodataframe' argument to return a GeoDataFrame (#91)
* feat: Add '--use-geodataframe' argument to return a geopands.GeoDataFrame. chore: Re-activate python v3.10 unit tests * Update bigquery_magics/bigquery.py * test against minimum geopandas --------- Co-authored-by: Tim Sweña (Swast) <tswast@gmail.com>
1 parent 60aa712 commit 3cbaf76

File tree

5 files changed

+99
-11
lines changed

5 files changed

+99
-11
lines changed

packages/bigquery-magics/bigquery_magics/bigquery.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,15 @@
5353
amount of time for the query to complete will not be cleared after the
5454
query is finished. By default, this information will be displayed but
5555
will be cleared after the query is finished.
56+
* ``--use_geodataframe <params>`` (Optional[line argument]):
57+
Return the query result as a geopandas.GeoDataFrame.
58+
If present, the argument that follows the ``--use_geodataframe`` flag
59+
must be a string representing column names to use as the active
60+
geometry.
61+
62+
See geopandas.GeoDataFrame for details.
63+
The Coordinate Reference System will be set to “EPSG:4326”.
64+
5665
* ``--params <params>`` (Optional[line argument]):
5766
If present, the argument following the ``--params`` flag must be
5867
either:
@@ -75,7 +84,8 @@
7584
7685
Returns:
7786
A :class:`pandas.DataFrame` or :class:`bigframes.pandas.DataFrame`
78-
with the query results, depending on the ``engine`` chosen.
87+
with the query results, depending on the ``engine`` chosen or if
88+
``--as_geodataframe`` was provided.
7989
8090
.. note::
8191
All queries run using this magic will run using the context
@@ -343,6 +353,16 @@ def _create_dataset_if_necessary(client, dataset_id):
343353
"name (ex. $my_dict_var)."
344354
),
345355
)
356+
@magic_arguments.argument(
357+
"--use_geodataframe",
358+
type=str,
359+
default=None,
360+
help=(
361+
"Return the query result as a geopandas.GeoDataFrame. If present, the "
362+
"--use_geodataframe flag should be followed by a string name of the "
363+
"column."
364+
),
365+
)
346366
@magic_arguments.argument(
347367
"--progress_bar_type",
348368
type=str,
@@ -574,6 +594,7 @@ def _make_bq_query(
574594
bqstorage_client: Any,
575595
):
576596
max_results = int(args.max_results) if args.max_results else None
597+
geography_column = args.use_geodataframe
577598

578599
# Any query that does not contain whitespace (aside from leading and trailing whitespace)
579600
# is assumed to be a table id
@@ -631,19 +652,24 @@ def _make_bq_query(
631652
return query_job
632653

633654
progress_bar = context.progress_bar_type or args.progress_bar_type
655+
dataframe_kwargs = {
656+
"bqstorage_client": bqstorage_client,
657+
"create_bqstorage_client": False,
658+
"progress_bar_type": progress_bar,
659+
}
660+
if max_results:
661+
dataframe_kwargs["bqstorage_client"] = None
634662

663+
result = query_job
635664
if max_results:
636-
result = query_job.result(max_results=max_results).to_dataframe(
637-
bqstorage_client=None,
638-
create_bqstorage_client=False,
639-
progress_bar_type=progress_bar,
665+
result = result.result(max_results=max_results)
666+
667+
if geography_column:
668+
result = result.to_geodataframe(
669+
geography_column=geography_column, **dataframe_kwargs
640670
)
641671
else:
642-
result = query_job.to_dataframe(
643-
bqstorage_client=bqstorage_client,
644-
create_bqstorage_client=False,
645-
progress_bar_type=progress_bar,
646-
)
672+
result = result.to_dataframe(**dataframe_kwargs)
647673

648674
return _handle_result(result, args)
649675

packages/bigquery-magics/noxfile.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,15 @@
3434

3535
DEFAULT_PYTHON_VERSION = "3.8"
3636

37-
UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.7", "3.8", "3.9", "3.11", "3.12", "3.13"]
37+
UNIT_TEST_PYTHON_VERSIONS: List[str] = [
38+
"3.7",
39+
"3.8",
40+
"3.9",
41+
"3.10",
42+
"3.11",
43+
"3.12",
44+
"3.13",
45+
]
3846
UNIT_TEST_STANDARD_DEPENDENCIES = [
3947
"mock",
4048
"asyncmock",
@@ -62,12 +70,14 @@
6270
"3.10": [
6371
"bqstorage",
6472
"bigframes",
73+
"geopandas",
6574
],
6675
"3.11": [],
6776
"3.12": [],
6877
"3.13": [
6978
"bqstorage",
7079
"bigframes",
80+
"geopandas",
7181
],
7282
}
7383

packages/bigquery-magics/setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
"grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'",
5656
],
5757
"bigframes": ["bigframes >= 1.17.0"],
58+
"geopandas": ["geopandas >= 1.0.1"],
5859
}
5960

6061
all_extras = []
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
# IMPORTANT: When Python 3.9 support is dropped, update these to
22
# match the minimums in setup.py.
3+
geopandas==1.0.1

packages/bigquery-magics/tests/unit/test_bigquery.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@
4848
except ImportError:
4949
bpd = None
5050

51+
try:
52+
import geopandas as gpd
53+
except ImportError:
54+
gpd = None
55+
5156

5257
def make_connection(*args):
5358
# TODO(tswast): Remove this in favor of a mock google.cloud.bigquery.Client
@@ -737,6 +742,51 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result():
737742
)
738743

739744

745+
@pytest.mark.usefixtures("ipython_interactive")
746+
@pytest.mark.skipif(gpd is None, reason="Requires `geopandas`")
747+
def test_bigquery_magic_with_use_geodataframe():
748+
ip = IPython.get_ipython()
749+
ip.extension_manager.load_extension("bigquery_magics")
750+
bigquery_magics.context._project = None
751+
752+
credentials_mock = mock.create_autospec(
753+
google.auth.credentials.Credentials, instance=True
754+
)
755+
default_patch = mock.patch(
756+
"google.auth.default", return_value=(credentials_mock, "general-project")
757+
)
758+
client_query_patch = mock.patch(
759+
"google.cloud.bigquery.client.Client.query", autospec=True
760+
)
761+
762+
sql = """
763+
SELECT
764+
17 AS num,
765+
ST_GEOGFROMTEXT('POINT(-122.083855 37.386051)') AS my_geom
766+
"""
767+
result = gpd.GeoDataFrame(
768+
[[17, "POINT(-122.083855 37.386051)"]], columns=["num", "my_geom"]
769+
)
770+
771+
query_job_mock = mock.create_autospec(
772+
google.cloud.bigquery.job.QueryJob, instance=True
773+
)
774+
query_job_mock.to_geodataframe.return_value = result
775+
776+
with client_query_patch as client_query_mock, default_patch:
777+
client_query_mock.return_value = query_job_mock
778+
return_value = ip.run_cell_magic("bigquery", "--use_geodataframe my_geom", sql)
779+
780+
query_job_mock.to_dataframe.assert_not_called()
781+
query_job_mock.to_geodataframe.assert_called_once_with(
782+
geography_column="my_geom",
783+
bqstorage_client=mock.ANY,
784+
create_bqstorage_client=False,
785+
progress_bar_type="tqdm_notebook",
786+
)
787+
assert isinstance(return_value, gpd.GeoDataFrame)
788+
789+
740790
@pytest.mark.usefixtures("ipython_interactive")
741791
def test_bigquery_magic_w_max_results_query_job_results_fails():
742792
ip = IPython.get_ipython()

0 commit comments

Comments
 (0)