Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deps: make pyarrow and BQ Storage optional dependencies #1282

Merged
merged 62 commits into from
Dec 8, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
b7e2cbb
update dependencies
steffnay Jun 30, 2022
951a43e
Merge branch 'main' of github.com:googleapis/python-bigquery into py-…
steffnay Jul 8, 2022
e9c57d6
deps: pyarrow extras
steffnay Jul 8, 2022
47a489b
clean up comments
steffnay Jul 8, 2022
fd0c40c
add test pyarrow skips
steffnay Jul 8, 2022
85db3e5
Merge branch 'main' into py-extra
parthea Jul 9, 2022
1fac4d2
replace storage checks
steffnay Jul 11, 2022
eaada14
Merge branch 'main' of github.com:googleapis/python-bigquery into py-…
steffnay Jul 11, 2022
61c69e9
update tests
steffnay Jul 11, 2022
95da5c7
Merge branch 'py-extra' of github.com:steffnay/python-bigquery into p…
steffnay Jul 11, 2022
e31e4ef
update tests
steffnay Jul 11, 2022
b4f7160
Update setup.py
steffnay Jul 11, 2022
2bb6461
update system tests
steffnay Jul 12, 2022
2602e4d
Merge branch 'py-extra' of github.com:steffnay/python-bigquery into p…
steffnay Jul 12, 2022
3a87275
update verify_pandas_imports
steffnay Jul 14, 2022
e0a9a2a
add pyarrow guards
steffnay Jul 14, 2022
f3dbaea
add datetime check
steffnay Jul 15, 2022
91fccef
change pyarrow import
steffnay Jul 15, 2022
ac78a33
update
steffnay Jul 15, 2022
0d89234
add pyarrow skips
steffnay Jul 21, 2022
b774b4b
merge
steffnay Jul 21, 2022
79dd4cc
fix types
steffnay Jul 21, 2022
37d7a25
lint
steffnay Jul 21, 2022
9dedf78
Update google/cloud/bigquery/client.py
steffnay Aug 1, 2022
933963e
update pyarrow version
steffnay Aug 1, 2022
93d7639
Merge branch 'py-extra' of github.com:steffnay/python-bigquery into p…
steffnay Aug 1, 2022
45eed33
update test
steffnay Aug 1, 2022
6ac7204
Merge branch 'main' into py-extra
steffnay Aug 1, 2022
af00605
lint
steffnay Aug 1, 2022
5bd1f30
Merge branch 'py-extra' of github.com:steffnay/python-bigquery into p…
steffnay Aug 1, 2022
ef20ab5
update pyarrow req
steffnay Aug 1, 2022
95aceca
update noxfile
steffnay Aug 1, 2022
12591b3
Merge branch 'main' into py-extra
steffnay Aug 5, 2022
d0e9045
remove bignum check
steffnay Aug 5, 2022
5045ead
remove comments
steffnay Aug 5, 2022
050af79
Merge branch 'main' into py-extra
steffnay Aug 18, 2022
01dd2b2
Merge branch 'main' of github.com:googleapis/python-bigquery into py-…
steffnay Sep 23, 2022
1eb5fac
add test importorskip
steffnay Sep 23, 2022
f23657b
update test
steffnay Sep 24, 2022
7138f1e
update test
steffnay Sep 24, 2022
abb9b8c
update dependency
steffnay Sep 24, 2022
d69f8ad
change version
steffnay Sep 24, 2022
caa21cb
update imports
steffnay Sep 26, 2022
17d922a
Merge branch 'main' into py-extra
steffnay Oct 3, 2022
d52b301
Merge branch 'main' into py-extra
steffnay Dec 6, 2022
21ebf7d
adjust test expectations when google-cloud-bigquery-storage is not av…
tswast Dec 8, 2022
39b173a
export pyarrow exception
tswast Dec 8, 2022
88fa115
whitespace in docstrings
tswast Dec 8, 2022
1b926aa
format minimum bqstorage version string
tswast Dec 8, 2022
d71141d
restore optional bqstorage_client
tswast Dec 8, 2022
51332d1
restore optional bqstorage_client (in table.py)
tswast Dec 8, 2022
4c296ae
synchronize constraints and setup.py
tswast Dec 8, 2022
6067f90
synchronize signatures
tswast Dec 8, 2022
6c2b8a5
remove unnecessary bignumeric_type extra
tswast Dec 8, 2022
8196a15
more constraints sync
tswast Dec 8, 2022
5bac083
remove unnecessary mock
tswast Dec 8, 2022
dafdb64
fix unittest skip
tswast Dec 8, 2022
805f5d3
synchronize constraints
tswast Dec 8, 2022
b85dcf3
adjust shapely
tswast Dec 8, 2022
bf4f218
simplify with importorskip
tswast Dec 8, 2022
794f70c
blacken
tswast Dec 8, 2022
bab28b5
Merge branch 'main' into py-extra
tswast Dec 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
remove comments
  • Loading branch information
steffnay committed Aug 5, 2022
commit 5045ead2cf13936e837df1abd7314b5d6a9bca3e
6 changes: 0 additions & 6 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import logging
import queue
import warnings
from packaging import version

from google.cloud.bigquery import _helpers

Expand Down Expand Up @@ -204,19 +203,14 @@ def pyarrow_timestamp():
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
}

# if version.parse(pyarrow.__version__) >= version.parse("3.0.0"):
BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric
# The exact decimal's scale and precision are not important, as only
# the type ID matters, and it's the same for all decimal256 instances.
ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC"
# # _BIGNUMERIC_SUPPORT = True
# else:
# _BIGNUMERIC_SUPPORT = False

else: # pragma: NO COVER
BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER
ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER
# _BIGNUMERIC_SUPPORT = False # pragma: NO COVER


BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = {
Expand Down
16 changes: 5 additions & 11 deletions tests/system/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,9 +380,8 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
bigquery.SchemaField("str_col", "STRING"),
bigquery.SchemaField("time_col", "TIME"),
bigquery.SchemaField("ts_col", "TIMESTAMP"),
bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
)
# if _BIGNUMERIC_SUPPORT:
table_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),)

num_rows = 100
nulls = [None] * num_rows
Expand All @@ -398,9 +397,8 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
("str_col", nulls),
("time_col", nulls),
("ts_col", nulls),
("bignum_col", nulls),
]
# if _BIGNUMERIC_SUPPORT:
df_data.append(("bignum_col", nulls))
df_data = collections.OrderedDict(df_data)
dataframe = pandas.DataFrame(df_data, columns=df_data.keys())

Expand Down Expand Up @@ -478,9 +476,8 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
bigquery.SchemaField("str_col", "STRING"),
bigquery.SchemaField("time_col", "TIME"),
bigquery.SchemaField("ts_col", "TIMESTAMP"),
bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
)
# if _BIGNUMERIC_SUPPORT:
table_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),)

df_data = [
("row_num", [1, 2, 3]),
Expand Down Expand Up @@ -524,18 +521,15 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
),
],
),
]
# if _BIGNUMERIC_SUPPORT:
df_data.append(
(
"bignum_col",
[
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
None,
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
],
)
)
),
]
df_data = collections.OrderedDict(df_data)
dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())

Expand Down
46 changes: 8 additions & 38 deletions tests/unit/test__pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
from google.cloud.bigquery import _helpers
from google.cloud.bigquery import schema

# from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT

pyarrow = _helpers.PYARROW_VERSIONS.try_import()
if pyarrow:
Expand All @@ -68,11 +67,6 @@
# Set to less than MIN version.
PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0")

# skip_if_no_bignumeric = pytest.mark.skipif(
# not _BIGNUMERIC_SUPPORT,
# reason="BIGNUMERIC support requires pyarrow>=3.0.0",
# )


@pytest.fixture
def module_under_test():
Expand Down Expand Up @@ -166,7 +160,6 @@ def test_all_():
"BIGNUMERIC",
"NULLABLE",
is_bignumeric,
# marks=skip_if_no_bignumeric,
),
("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean),
("BOOL", "NULLABLE", pyarrow.types.is_boolean),
Expand Down Expand Up @@ -250,7 +243,6 @@ def test_all_():
"BIGNUMERIC",
"REPEATED",
all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)),
# marks=skip_if_no_bignumeric,
),
(
"BOOLEAN",
Expand Down Expand Up @@ -372,7 +364,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
schema.SchemaField("field05", "FLOAT"),
schema.SchemaField("field06", "FLOAT64"),
schema.SchemaField("field07", "NUMERIC"),
# schema.SchemaField("field08", "BIGNUMERIC"),
schema.SchemaField("field08", "BIGNUMERIC"),
schema.SchemaField("field09", "BOOLEAN"),
schema.SchemaField("field10", "BOOL"),
schema.SchemaField("field11", "TIMESTAMP"),
Expand All @@ -381,8 +373,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
schema.SchemaField("field14", "DATETIME"),
schema.SchemaField("field15", "GEOGRAPHY"),
)
# if _BIGNUMERIC_SUPPORT:
fields += (schema.SchemaField("field08", "BIGNUMERIC"),)

field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields)
actual = module_under_test.bq_to_arrow_data_type(field)
Expand All @@ -395,7 +385,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
pyarrow.field("field05", pyarrow.float64()),
pyarrow.field("field06", pyarrow.float64()),
pyarrow.field("field07", module_under_test.pyarrow_numeric()),
# pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),
pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),
pyarrow.field("field09", pyarrow.bool_()),
pyarrow.field("field10", pyarrow.bool_()),
pyarrow.field("field11", module_under_test.pyarrow_timestamp()),
Expand All @@ -404,8 +394,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
pyarrow.field("field14", module_under_test.pyarrow_datetime()),
pyarrow.field("field15", pyarrow.string()),
)
# if _BIGNUMERIC_SUPPORT:
expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),)
expected_value_type = pyarrow.struct(expected)

assert pyarrow.types.is_list(actual)
Expand Down Expand Up @@ -459,7 +447,6 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test):
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
decimal.Decimal("3.141592653589793238462643383279"),
],
# marks=skip_if_no_bignumeric,
),
("BOOLEAN", [True, None, False, None]),
("BOOL", [False, None, True, None]),
Expand Down Expand Up @@ -1036,7 +1023,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test):
schema.SchemaField("field05", "FLOAT", mode="REQUIRED"),
schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"),
schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"),
# schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),
schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),
schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"),
schema.SchemaField("field10", "BOOL", mode="REQUIRED"),
schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"),
Expand All @@ -1045,8 +1032,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test):
schema.SchemaField("field14", "DATETIME", mode="REQUIRED"),
schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"),
)
# if _BIGNUMERIC_SUPPORT:
bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),)

data = {
"field01": ["hello", "world"],
Expand All @@ -1056,10 +1041,10 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test):
"field05": [1.25, 9.75],
"field06": [-1.75, -3.5],
"field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")],
# "field08": [
# decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
# decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
# ],
"field08": [
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
],
"field09": [True, False],
"field10": [False, True],
"field11": [
Expand All @@ -1074,11 +1059,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test):
],
"field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"],
}
# if _BIGNUMERIC_SUPPORT:
data["field08"] = [
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
]
dataframe = pandas.DataFrame(data)

arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema)
Expand Down Expand Up @@ -1379,12 +1359,8 @@ def test_augment_schema_type_detection_succeeds(module_under_test):
schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"),
schema.SchemaField("string_field", field_type=None, mode="NULLABLE"),
schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"),
# schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"),
schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"),
)
# if _BIGNUMERIC_SUPPORT:
current_schema += ( # type: ignore
schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), # type: ignore
) # type: ignore

with warnings.catch_warnings(record=True) as warned:
augmented_schema = module_under_test.augment_schema(dataframe, current_schema)
Expand All @@ -1406,12 +1382,6 @@ def test_augment_schema_type_detection_succeeds(module_under_test):
schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"),
schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"),
schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"),
# schema.SchemaField(
# "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE"
# ),
)
# if _BIGNUMERIC_SUPPORT:
expected_schema += (
schema.SchemaField(
"bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE"
),
Expand Down