Skip to content

Commit cc3394f

Browse files
feat: add BIGNUMERIC support (#527)
* feat: add support of BIGNUMERIC * feat: add BIGNUMERIC support * Add bignumeric_type extra * Add additional BIGNUMERIC tests * Prevent import time error if no BIGNUMERIC support * Add/improve a few comments * Add feature flag for BIGNUMERIC suppport Co-authored-by: HemangChothani <hemang.chothani@qlogic.io>
1 parent 696c443 commit cc3394f

File tree

10 files changed

+305
-152
lines changed

10 files changed

+305
-152
lines changed

google/cloud/bigquery/_pandas_helpers.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import queue
2121
import warnings
2222

23+
from packaging import version
2324

2425
try:
2526
import pandas
@@ -80,6 +81,10 @@ def pyarrow_numeric():
8081
return pyarrow.decimal128(38, 9)
8182

8283

84+
def pyarrow_bignumeric():
85+
return pyarrow.decimal256(76, 38)
86+
87+
8388
def pyarrow_time():
8489
return pyarrow.time64("us")
8590

@@ -128,14 +133,23 @@ def pyarrow_timestamp():
128133
pyarrow.date64().id: "DATETIME", # because millisecond resolution
129134
pyarrow.binary().id: "BYTES",
130135
pyarrow.string().id: "STRING", # also alias for pyarrow.utf8()
136+
# The exact scale and precision don't matter, see below.
131137
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
132-
# The exact decimal's scale and precision are not important, as only
133-
# the type ID matters, and it's the same for all decimal128 instances.
134138
}
135139

140+
if version.parse(pyarrow.__version__) >= version.parse("3.0.0"):
141+
BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric
142+
# The exact decimal's scale and precision are not important, as only
143+
# the type ID matters, and it's the same for all decimal256 instances.
144+
ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC"
145+
_BIGNUMERIC_SUPPORT = True
146+
else:
147+
_BIGNUMERIC_SUPPORT = False
148+
136149
else: # pragma: NO COVER
137150
BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER
138151
ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER
152+
_BIGNUMERIC_SUPPORT = False # pragma: NO COVER
139153

140154

141155
def bq_to_arrow_struct_data_type(field):

google/cloud/bigquery/dbapi/_helpers.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
import functools
2020
import numbers
2121

22+
try:
23+
import pyarrow
24+
except ImportError: # pragma: NO COVER
25+
pyarrow = None
26+
2227
from google.cloud import bigquery
2328
from google.cloud.bigquery import table
2429
from google.cloud.bigquery.dbapi import exceptions
@@ -184,7 +189,12 @@ def bigquery_scalar_type(value):
184189
elif isinstance(value, numbers.Real):
185190
return "FLOAT64"
186191
elif isinstance(value, decimal.Decimal):
187-
return "NUMERIC"
192+
# We check for NUMERIC before BIGNUMERIC in order to support pyarrow < 3.0.
193+
scalar_object = pyarrow.scalar(value)
194+
if isinstance(scalar_object, pyarrow.Decimal128Scalar):
195+
return "NUMERIC"
196+
else:
197+
return "BIGNUMERIC"
188198
elif isinstance(value, str):
189199
return "STRING"
190200
elif isinstance(value, bytes):

google/cloud/bigquery/dbapi/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def __eq__(self, other):
7878
STRING = "STRING"
7979
BINARY = _DBAPITypeObject("BYTES", "RECORD", "STRUCT")
8080
NUMBER = _DBAPITypeObject(
81-
"INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BOOLEAN", "BOOL"
81+
"INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BIGNUMERIC", "BOOLEAN", "BOOL"
8282
)
8383
DATETIME = _DBAPITypeObject("TIMESTAMP", "DATE", "TIME", "DATETIME")
8484
ROWID = "ROWID"

google/cloud/bigquery/query.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class ScalarQueryParameter(_AbstractQueryParameter):
8383
8484
type_ (str):
8585
Name of parameter type. One of 'STRING', 'INT64',
86-
'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or
86+
'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or
8787
'DATE'.
8888
8989
value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]):
@@ -102,7 +102,7 @@ def positional(cls, type_, value):
102102
Args:
103103
type_ (str):
104104
Name of parameter type. One of 'STRING', 'INT64',
105-
'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or
105+
'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or
106106
'DATE'.
107107
108108
value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]):
@@ -186,7 +186,7 @@ class ArrayQueryParameter(_AbstractQueryParameter):
186186
187187
array_type (str):
188188
Name of type of array elements. One of `'STRING'`, `'INT64'`,
189-
`'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`.
189+
`'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`.
190190
191191
values (List[appropriate scalar type]): The parameter array values.
192192
"""
@@ -203,7 +203,7 @@ def positional(cls, array_type, values):
203203
Args:
204204
array_type (str):
205205
Name of type of array elements. One of `'STRING'`, `'INT64'`,
206-
`'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`.
206+
`'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`.
207207
208208
values (List[appropriate scalar type]): The parameter array values.
209209

google/cloud/bigquery/schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"FLOAT": types.StandardSqlDataType.TypeKind.FLOAT64,
3333
"FLOAT64": types.StandardSqlDataType.TypeKind.FLOAT64,
3434
"NUMERIC": types.StandardSqlDataType.TypeKind.NUMERIC,
35+
"BIGNUMERIC": types.StandardSqlDataType.TypeKind.BIGNUMERIC,
3536
"BOOLEAN": types.StandardSqlDataType.TypeKind.BOOL,
3637
"BOOL": types.StandardSqlDataType.TypeKind.BOOL,
3738
"GEOGRAPHY": types.StandardSqlDataType.TypeKind.GEOGRAPHY,

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
"proto-plus >= 1.10.0",
3434
"google-cloud-core >= 1.4.1, < 2.0dev",
3535
"google-resumable-media >= 0.6.0, < 2.0dev",
36+
"packaging >= 14.3",
3637
"protobuf >= 3.12.0",
3738
]
3839
extras = {
@@ -48,6 +49,7 @@
4849
"pyarrow >= 1.0.0, < 4.0dev",
4950
],
5051
"pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",],
52+
"bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"],
5153
"tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
5254
"opentelemetry": [
5355
"opentelemetry-api==0.11b0",

tests/system/test_client.py

Lines changed: 100 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
from google.api_core.iam import Policy
6666
from google.cloud import bigquery
6767
from google.cloud import bigquery_v2
68+
from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT
6869
from google.cloud.bigquery.dataset import Dataset
6970
from google.cloud.bigquery.dataset import DatasetReference
7071
from google.cloud.bigquery.table import Table
@@ -891,6 +892,9 @@ def test_load_table_from_dataframe_w_nulls(self):
891892
bigquery.SchemaField("time_col", "TIME"),
892893
bigquery.SchemaField("ts_col", "TIMESTAMP"),
893894
)
895+
if _BIGNUMERIC_SUPPORT:
896+
scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),)
897+
894898
table_schema = scalars_schema + (
895899
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
896900
# mode mismatch. See:
@@ -902,21 +906,22 @@ def test_load_table_from_dataframe_w_nulls(self):
902906
)
903907
num_rows = 100
904908
nulls = [None] * num_rows
905-
df_data = collections.OrderedDict(
906-
[
907-
("bool_col", nulls),
908-
("bytes_col", nulls),
909-
("date_col", nulls),
910-
("dt_col", nulls),
911-
("float_col", nulls),
912-
("geo_col", nulls),
913-
("int_col", nulls),
914-
("num_col", nulls),
915-
("str_col", nulls),
916-
("time_col", nulls),
917-
("ts_col", nulls),
918-
]
919-
)
909+
df_data = [
910+
("bool_col", nulls),
911+
("bytes_col", nulls),
912+
("date_col", nulls),
913+
("dt_col", nulls),
914+
("float_col", nulls),
915+
("geo_col", nulls),
916+
("int_col", nulls),
917+
("num_col", nulls),
918+
("str_col", nulls),
919+
("time_col", nulls),
920+
("ts_col", nulls),
921+
]
922+
if _BIGNUMERIC_SUPPORT:
923+
df_data.append(("bignum_col", nulls))
924+
df_data = collections.OrderedDict(df_data)
920925
dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
921926

922927
dataset_id = _make_dataset_id("bq_load_test")
@@ -1003,6 +1008,9 @@ def test_load_table_from_dataframe_w_explicit_schema(self):
10031008
bigquery.SchemaField("time_col", "TIME"),
10041009
bigquery.SchemaField("ts_col", "TIMESTAMP"),
10051010
)
1011+
if _BIGNUMERIC_SUPPORT:
1012+
scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),)
1013+
10061014
table_schema = scalars_schema + (
10071015
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
10081016
# mode mismatch. See:
@@ -1012,57 +1020,65 @@ def test_load_table_from_dataframe_w_explicit_schema(self):
10121020
# https://jira.apache.org/jira/browse/ARROW-2587
10131021
# bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema),
10141022
)
1015-
df_data = collections.OrderedDict(
1016-
[
1017-
("bool_col", [True, None, False]),
1018-
("bytes_col", [b"abc", None, b"def"]),
1019-
(
1020-
"date_col",
1021-
[datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)],
1022-
),
1023-
# (
1024-
# "dt_col",
1025-
# [
1026-
# datetime.datetime(1, 1, 1, 0, 0, 0),
1027-
# None,
1028-
# datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
1029-
# ],
1030-
# ),
1031-
("float_col", [float("-inf"), float("nan"), float("inf")]),
1032-
(
1033-
"geo_col",
1034-
[
1035-
"POINT(30 10)",
1036-
None,
1037-
"POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))",
1038-
],
1039-
),
1040-
("int_col", [-9223372036854775808, None, 9223372036854775807]),
1041-
(
1042-
"num_col",
1043-
[
1044-
decimal.Decimal("-99999999999999999999999999999.999999999"),
1045-
None,
1046-
decimal.Decimal("99999999999999999999999999999.999999999"),
1047-
],
1048-
),
1049-
("str_col", [u"abc", None, u"def"]),
1050-
(
1051-
"time_col",
1052-
[datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)],
1053-
),
1023+
1024+
df_data = [
1025+
("bool_col", [True, None, False]),
1026+
("bytes_col", [b"abc", None, b"def"]),
1027+
("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]),
1028+
# (
1029+
# "dt_col",
1030+
# [
1031+
# datetime.datetime(1, 1, 1, 0, 0, 0),
1032+
# None,
1033+
# datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
1034+
# ],
1035+
# ),
1036+
("float_col", [float("-inf"), float("nan"), float("inf")]),
1037+
(
1038+
"geo_col",
1039+
[
1040+
"POINT(30 10)",
1041+
None,
1042+
"POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))",
1043+
],
1044+
),
1045+
("int_col", [-9223372036854775808, None, 9223372036854775807]),
1046+
(
1047+
"num_col",
1048+
[
1049+
decimal.Decimal("-99999999999999999999999999999.999999999"),
1050+
None,
1051+
decimal.Decimal("99999999999999999999999999999.999999999"),
1052+
],
1053+
),
1054+
("str_col", [u"abc", None, u"def"]),
1055+
(
1056+
"time_col",
1057+
[datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)],
1058+
),
1059+
(
1060+
"ts_col",
1061+
[
1062+
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
1063+
None,
1064+
datetime.datetime(
1065+
9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc
1066+
),
1067+
],
1068+
),
1069+
]
1070+
if _BIGNUMERIC_SUPPORT:
1071+
df_data.append(
10541072
(
1055-
"ts_col",
1073+
"bignum_col",
10561074
[
1057-
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
1075+
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
10581076
None,
1059-
datetime.datetime(
1060-
9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc
1061-
),
1077+
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
10621078
],
1063-
),
1064-
]
1065-
)
1079+
)
1080+
)
1081+
df_data = collections.OrderedDict(df_data)
10661082
dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())
10671083

10681084
dataset_id = _make_dataset_id("bq_load_test")
@@ -1172,6 +1188,7 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self):
11721188
bigquery.SchemaField("geo_col", "GEOGRAPHY"),
11731189
bigquery.SchemaField("int_col", "INTEGER"),
11741190
bigquery.SchemaField("num_col", "NUMERIC"),
1191+
bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
11751192
bigquery.SchemaField("str_col", "STRING"),
11761193
bigquery.SchemaField("time_col", "TIME"),
11771194
bigquery.SchemaField("ts_col", "TIMESTAMP"),
@@ -1210,6 +1227,14 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self):
12101227
decimal.Decimal("99999999999999999999999999999.999999999"),
12111228
],
12121229
),
1230+
(
1231+
"bignum_col",
1232+
[
1233+
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
1234+
None,
1235+
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
1236+
],
1237+
),
12131238
("str_col", [u"abc", None, u"def"]),
12141239
(
12151240
"time_col",
@@ -2157,6 +2182,10 @@ def test_query_w_query_params(self):
21572182
pi_numeric_param = ScalarQueryParameter(
21582183
name="pi_numeric_param", type_="NUMERIC", value=pi_numeric
21592184
)
2185+
bignum = decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38))
2186+
bignum_param = ScalarQueryParameter(
2187+
name="bignum_param", type_="BIGNUMERIC", value=bignum
2188+
)
21602189
truthy = True
21612190
truthy_param = ScalarQueryParameter(name="truthy", type_="BOOL", value=truthy)
21622191
beef = b"DEADBEEF"
@@ -2302,6 +2331,15 @@ def test_query_w_query_params(self):
23022331
"query_parameters": [with_friends_param],
23032332
},
23042333
]
2334+
if _BIGNUMERIC_SUPPORT:
2335+
examples.append(
2336+
{
2337+
"sql": "SELECT @bignum_param",
2338+
"expected": bignum,
2339+
"query_parameters": [bignum_param],
2340+
}
2341+
)
2342+
23052343
for example in examples:
23062344
jconfig = QueryJobConfig()
23072345
jconfig.query_parameters = example["query_parameters"]

0 commit comments

Comments
 (0)