Skip to content

Commit

Permalink
Phase 1 for storing schemas for later use. (#7761)
Browse files Browse the repository at this point in the history
* Added functions to client for loading and saving schemas to a file.

* Tests for schema to/from json.
  • Loading branch information
lbristol88 authored and tswast committed Apr 25, 2019
1 parent 7755867 commit 55a8097
Show file tree
Hide file tree
Showing 2 changed files with 227 additions and 0 deletions.
47 changes: 47 additions & 0 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import functools
import gzip
import io
import json
import os
import tempfile
import uuid
Expand Down Expand Up @@ -50,6 +52,7 @@
from google.cloud.bigquery.model import ModelReference
from google.cloud.bigquery.query import _QueryResults
from google.cloud.bigquery.retry import DEFAULT_RETRY
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import _table_arg_to_table
from google.cloud.bigquery.table import _table_arg_to_table_ref
from google.cloud.bigquery.table import Table
Expand Down Expand Up @@ -1929,6 +1932,50 @@ def list_rows(
)
return row_iterator

def _schema_from_json_file_object(self, file_obj):
"""Helper function for schema_from_json that takes a
file object that describes a table schema.
Returns:
List of schema field objects.
"""
json_data = json.load(file_obj)
return [SchemaField.from_api_repr(field) for field in json_data]

def _schema_to_json_file_object(self, schema_list, file_obj):
"""Helper function for schema_to_json that takes a schema list and file
object and writes the schema list to the file object with json.dump
"""
json.dump(schema_list, file_obj, indent=2, sort_keys=True)

def schema_from_json(self, file_or_path):
"""Takes a file object or file path that contains json that describes
a table schema.
Returns:
List of schema field objects.
"""
if isinstance(file_or_path, io.IOBase):
return self._schema_from_json_file_object(file_or_path)

with open(file_or_path) as file_obj:
return self._schema_from_json_file_object(file_obj)

def schema_to_json(self, schema_list, destination):
"""Takes a list of schema field objects.
Serializes the list of schema field objects as json to a file.
Destination is a file path or a file object.
"""
json_schema_list = [f.to_api_repr() for f in schema_list]

if isinstance(destination, io.IOBase):
return self._schema_to_json_file_object(json_schema_list, destination)

with open(destination, mode="w") as file_obj:
return self._schema_to_json_file_object(json_schema_list, file_obj)


# pylint: disable=unused-argument
def _item_to_project(iterator, resource):
Expand Down
180 changes: 180 additions & 0 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5161,3 +5161,183 @@ def test__do_multipart_upload_wrong_size(self):

with pytest.raises(ValueError):
client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None)

def test_schema_from_json_with_file_path(self):
from google.cloud.bigquery.schema import SchemaField

file_content = """[
{
"description": "quarter",
"mode": "REQUIRED",
"name": "qtr",
"type": "STRING"
},
{
"description": "sales representative",
"mode": "NULLABLE",
"name": "rep",
"type": "STRING"
},
{
"description": "total sales",
"mode": "NULLABLE",
"name": "sales",
"type": "FLOAT"
}
]"""

expected = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
]

client = self._make_client()
mock_file_path = "/mocked/file.json"

if six.PY2:
open_patch = mock.patch(
"__builtin__.open", mock.mock_open(read_data=file_content)
)
else:
open_patch = mock.patch(
"builtins.open", new=mock.mock_open(read_data=file_content)
)

with open_patch as _mock_file:
actual = client.schema_from_json(mock_file_path)
_mock_file.assert_called_once_with(mock_file_path)
# This assert is to make sure __exit__ is called in the context
# manager that opens the file in the function
_mock_file().__exit__.assert_called_once()

assert expected == actual

def test_schema_from_json_with_file_object(self):
from google.cloud.bigquery.schema import SchemaField

file_content = """[
{
"description": "quarter",
"mode": "REQUIRED",
"name": "qtr",
"type": "STRING"
},
{
"description": "sales representative",
"mode": "NULLABLE",
"name": "rep",
"type": "STRING"
},
{
"description": "total sales",
"mode": "NULLABLE",
"name": "sales",
"type": "FLOAT"
}
]"""

expected = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
]

client = self._make_client()

if six.PY2:
fake_file = io.BytesIO(file_content)
else:
fake_file = io.StringIO(file_content)

actual = client.schema_from_json(fake_file)

assert expected == actual

def test_schema_to_json_with_file_path(self):
from google.cloud.bigquery.schema import SchemaField

file_content = [
{
"description": "quarter",
"mode": "REQUIRED",
"name": "qtr",
"type": "STRING",
},
{
"description": "sales representative",
"mode": "NULLABLE",
"name": "rep",
"type": "STRING",
},
{
"description": "total sales",
"mode": "NULLABLE",
"name": "sales",
"type": "FLOAT",
},
]

schema_list = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
]

client = self._make_client()
mock_file_path = "/mocked/file.json"

if six.PY2:
open_patch = mock.patch("__builtin__.open", mock.mock_open())
else:
open_patch = mock.patch("builtins.open", mock.mock_open())

with open_patch as mock_file, mock.patch("json.dump") as mock_dump:
client.schema_to_json(schema_list, mock_file_path)
mock_file.assert_called_once_with(mock_file_path, mode="w")
# This assert is to make sure __exit__ is called in the context
# manager that opens the file in the function
mock_file().__exit__.assert_called_once()
mock_dump.assert_called_with(
file_content, mock_file.return_value, indent=2, sort_keys=True
)

def test_schema_to_json_with_file_object(self):
from google.cloud.bigquery.schema import SchemaField

file_content = [
{
"description": "quarter",
"mode": "REQUIRED",
"name": "qtr",
"type": "STRING",
},
{
"description": "sales representative",
"mode": "NULLABLE",
"name": "rep",
"type": "STRING",
},
{
"description": "total sales",
"mode": "NULLABLE",
"name": "sales",
"type": "FLOAT",
},
]

schema_list = [
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
]

if six.PY2:
fake_file = io.BytesIO()
else:
fake_file = io.StringIO()

client = self._make_client()

client.schema_to_json(schema_list, fake_file)
assert file_content == json.loads(fake_file.getvalue())

0 comments on commit 55a8097

Please sign in to comment.