Skip to content

Commit aa343a0

Browse files
committed
fix: load_table_from_dataframe for higher scale decimal
1 parent 40bc244 commit aa343a0

File tree

2 files changed

+60
-0
lines changed

2 files changed

+60
-0
lines changed

google/cloud/bigquery/_pandas_helpers.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,10 @@ def augment_schema(dataframe, current_bq_schema):
509509
else:
510510
detected_mode = field.mode
511511
detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id)
512+
if detected_type == "NUMERIC" and (
513+
arrow_table.type.precision > 38 or arrow_table.type.scale > 9
514+
):
515+
detected_type = "BIGNUMERIC"
512516

513517
if detected_type is None:
514518
unknown_type_fields.append(field)

tests/unit/test_client.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8745,6 +8745,62 @@ def test_load_table_from_dataframe_with_csv_source_format(self):
87458745
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
87468746
assert sent_config.source_format == job.SourceFormat.CSV
87478747

8748+
@unittest.skipIf(
8749+
pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION,
8750+
"Only `pandas version >=1.0.0` supported",
8751+
)
8752+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
8753+
def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self):
8754+
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
8755+
from google.cloud.bigquery import job
8756+
from google.cloud.bigquery.schema import SchemaField
8757+
from decimal import Decimal
8758+
8759+
client = self._make_client()
8760+
dataframe = pandas.DataFrame(
8761+
{
8762+
"x": [
8763+
Decimal("0.12345678901234560000000000000000000000"),
8764+
Decimal("01234567890123456789012345678901234567.1234567891"),
8765+
]
8766+
}
8767+
)
8768+
load_patch = mock.patch(
8769+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
8770+
)
8771+
8772+
get_table_patch = mock.patch(
8773+
"google.cloud.bigquery.client.Client.get_table",
8774+
autospec=True,
8775+
return_value=mock.Mock(schema=[SchemaField("x", "BIGNUMERIC", "NULLABLE")]),
8776+
)
8777+
8778+
with load_patch as load_table_from_file, get_table_patch:
8779+
client.load_table_from_dataframe(
8780+
dataframe, self.TABLE_REF, location=self.LOCATION
8781+
)
8782+
8783+
load_table_from_file.assert_called_once_with(
8784+
client,
8785+
mock.ANY,
8786+
self.TABLE_REF,
8787+
num_retries=_DEFAULT_NUM_RETRIES,
8788+
rewind=True,
8789+
size=mock.ANY,
8790+
job_id=mock.ANY,
8791+
job_id_prefix=None,
8792+
location=self.LOCATION,
8793+
project=None,
8794+
job_config=mock.ANY,
8795+
timeout=DEFAULT_TIMEOUT,
8796+
)
8797+
8798+
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
8799+
assert sent_config.source_format == job.SourceFormat.PARQUET
8800+
assert tuple(sent_config.schema) == (
8801+
SchemaField("x", "BIGNUMERIC", "NULLABLE", None),
8802+
)
8803+
87488804
def test_load_table_from_json_basic_use(self):
87498805
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
87508806
from google.cloud.bigquery import job

0 commit comments

Comments
 (0)