Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bigquery: add table schemas #1022

Merged
merged 3 commits into from
Aug 4, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 57 additions & 1 deletion gcloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,35 @@
from gcloud.bigquery._helpers import _prop_from_datetime


class SchemaField(object):
"""Describe a single field within a table schema.

:type name: string
:param name: the name of the field

:type field_type: string
:param field_type: the type of the field (one of 'STRING', 'INTEGER',
'FLOAT', 'BOOLEAN', 'TIMESTAMP' or 'RECORD')

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.


:type mode: string
:param mode: the type of the field (one of 'NULLABLE', 'REQUIRED',
or 'REPEATED')

:type description: string
:param description: optional description for the field

:type fields: list of :class:`SchemaField`, or None
:param fields: subfields (requires ``field_type`` of 'RECORD').
"""
def __init__(self, name, field_type, mode='NULLABLE', description=None,
fields=None):
self.name = name
self.field_type = field_type
self.mode = mode
self.description = description
self.fields = fields


class Table(object):
"""Tables represent a set of rows whose values correspond to a schema.

Expand All @@ -33,12 +62,16 @@ class Table(object):

:type dataset: :class:`gcloud.bigquery.dataset.Dataset`
:param dataset: The dataset which contains the table.

:type schema: list of :class:`SchemaField`
:param schema: The table's schema
"""

def __init__(self, name, dataset):
def __init__(self, name, dataset, schema=()):
self.name = name
self._dataset = dataset
self._properties = {}
self.schema = schema

@property
def path(self):
Expand All @@ -49,6 +82,29 @@ def path(self):
"""
return '%s/tables/%s' % (self._dataset.path, self.name)

@property
def schema(self):
"""Table's schema.

:rtype: list of :class:`SchemaField`
:returns: fields describing the schema
"""
return list(self._schema)

@schema.setter
def schema(self, value):
"""Update table's schema

:type value: list of :class:`SchemaField`
:param value: fields describing the schema

:raises: TypeError if 'value' is not a sequence, or ValueError if
any item in the sequence is not a SchemaField
"""
if not all(isinstance(field, SchemaField) for field in value):
raise ValueError('Schema items must be fields')
self._schema = tuple(value)

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.


This comment was marked as spam.

This comment was marked as spam.

@property
def created(self):
"""Datetime at which the table was created.
Expand Down
3 changes: 2 additions & 1 deletion gcloud/bigquery/test__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def test_w_millis(self):
import datetime
import pytz
from gcloud.bigquery._helpers import _total_seconds
NOW = datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
NOW = datetime.datetime(2015, 7, 29, 17, 45, 21, 123456,
tzinfo=pytz.utc)

This comment was marked as spam.

This comment was marked as spam.

EPOCH = datetime.datetime(1970, 1, 1, tzinfo=pytz.utc)
MILLIS = _total_seconds(NOW - EPOCH) * 1000
self.assertEqual(self._callFUT(MILLIS), NOW)
Expand Down
84 changes: 84 additions & 0 deletions gcloud/bigquery/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,53 @@
import unittest2


class TestSchemaField(unittest2.TestCase):

def _getTargetClass(self):
from gcloud.bigquery.table import SchemaField
return SchemaField

def _makeOne(self, *args, **kw):
return self._getTargetClass()(*args, **kw)

def test_ctor_defaults(self):
field = self._makeOne('test', 'STRING')
self.assertEqual(field.name, 'test')
self.assertEqual(field.field_type, 'STRING')
self.assertEqual(field.mode, 'NULLABLE')
self.assertEqual(field.description, None)
self.assertEqual(field.fields, None)

def test_ctor_explicit(self):
field = self._makeOne('test', 'STRING', mode='REQUIRED',
description='Testing')
self.assertEqual(field.name, 'test')
self.assertEqual(field.field_type, 'STRING')
self.assertEqual(field.mode, 'REQUIRED')
self.assertEqual(field.description, 'Testing')
self.assertEqual(field.fields, None)

def test_ctor_subfields(self):
field = self._makeOne('phone_number', 'RECORD',
fields=[self._makeOne('area_code', 'STRING'),
self._makeOne('local_number', 'STRING')])
self.assertEqual(field.name, 'phone_number')
self.assertEqual(field.field_type, 'RECORD')
self.assertEqual(field.mode, 'NULLABLE')
self.assertEqual(field.description, None)
self.assertEqual(len(field.fields), 2)
self.assertEqual(field.fields[0].name, 'area_code')
self.assertEqual(field.fields[0].field_type, 'STRING')
self.assertEqual(field.fields[0].mode, 'NULLABLE')
self.assertEqual(field.fields[0].description, None)
self.assertEqual(field.fields[0].fields, None)
self.assertEqual(field.fields[1].name, 'local_number')
self.assertEqual(field.fields[1].field_type, 'STRING')
self.assertEqual(field.fields[1].mode, 'NULLABLE')
self.assertEqual(field.fields[1].description, None)
self.assertEqual(field.fields[1].fields, None)


class TestTable(unittest2.TestCase):
PROJECT = 'project'
DS_NAME = 'dataset-name'
Expand All @@ -37,6 +84,7 @@ def test_ctor(self):
table.path,
'/projects/%s/datasets/%s/tables/%s' % (
self.PROJECT, self.DS_NAME, self.TABLE_NAME))
self.assertEqual(table.schema, [])

self.assertEqual(table.created, None)
self.assertEqual(table.etag, None)
Expand All @@ -53,6 +101,42 @@ def test_ctor(self):
self.assertEqual(table.location, None)
self.assertEqual(table.view_query, None)

def test_ctor_w_schema(self):
from gcloud.bigquery.table import SchemaField
client = _Client(self.PROJECT)
dataset = _Dataset(client)
full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
age = SchemaField('age', 'INTEGER', mode='REQUIRED')
table = self._makeOne(self.TABLE_NAME, dataset,
schema=[full_name, age])
self.assertEqual(table.schema, [full_name, age])

def test_schema_setter_non_list(self):
client = _Client(self.PROJECT)
dataset = _Dataset(client)
table = self._makeOne(self.TABLE_NAME, dataset)
with self.assertRaises(TypeError):
table.schema = object()

def test_schema_setter_invalid_field(self):
from gcloud.bigquery.table import SchemaField
client = _Client(self.PROJECT)
dataset = _Dataset(client)
table = self._makeOne(self.TABLE_NAME, dataset)
full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
with self.assertRaises(ValueError):
table.schema = [full_name, object()]

def test_schema_setter(self):
from gcloud.bigquery.table import SchemaField
client = _Client(self.PROJECT)
dataset = _Dataset(client)
table = self._makeOne(self.TABLE_NAME, dataset)
full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
age = SchemaField('age', 'INTEGER', mode='REQUIRED')
table.schema = [full_name, age]
self.assertEqual(table.schema, [full_name, age])

def test_props_set_by_server(self):
import datetime
import pytz
Expand Down