Closed
Description
Environment details
- OS type and version: macOS Ventura 13.3.1
- Python version: 3.9.12
- pip version: pip 23.1.1
google-cloud-bigquery
version: 3.8.0, 3.9.0, 3.10.0
Steps to reproduce
See the code sample below. Loading a string > 186 bytes to a STRING column raises the following exception: google.api_core.exceptions.BadRequest: 400 Provided Schema does not match Table. Field longstring has changed type from STRING to BYTES
Code example
import json
from google.cloud import bigquery
from google.cloud.bigquery.schema import SchemaField
from google.oauth2.service_account import Credentials
from pocs.poc_settings import settings
service_acc = json.loads(settings.GCP_BQ_SERVICE_ACC)
credentials = Credentials.from_service_account_info(info=service_acc)
bq_client = bigquery.Client(credentials=credentials)
def add_data(user_id):
dataset_ref = bigquery.DatasetReference(project="myproject", dataset_id="mydataset")
table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=user_id)
json_rows = [
{"longstring": "a" * 138} # 187 bytes
]
job_config = bigquery.LoadJobConfig(
autodetect=False,
create_disposition="CREATE_NEVER",
max_bad_records=0,
write_disposition="WRITE_APPEND"
)
job = bq_client.load_table_from_json(
json_rows=json_rows,
destination=table_ref,
num_retries=2,
job_config=job_config
)
job.result()
def create_table(user_id):
dataset_ref = bigquery.DatasetReference(project="myproject", dataset_id="mydataset")
table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=user_id)
schema = [
SchemaField(name="longstring", field_type="STRING")
]
table = bigquery.Table(table_ref=table_ref, schema=schema)
table = bq_client.create_table(table)
create_table("test")
add_data("test")
Stack trace
google.api_core.exceptions.BadRequest: 400 Provided Schema does not match Table. Field longstring has changed type from STRING to BYTES
UPDATE: After some more debugging, I'm starting to believe that this is a bug of the load_table_from_json()
method. Replacing this method by a call to insert_rows_json()
did it for me:
def add_data(user_id):
dataset_ref = bigquery.DatasetReference(project="myproject", dataset_id="mydb")
table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=user_id)
json_rows = [
{"longstring": a * 138} # 187 bytes
]
pot_errors = bq_client.insert_rows_json(table=table_ref, json_rows=json_rows, skip_invalid_rows=False)