Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for INTERVAL data type to list_rows #840

Merged
merged 39 commits into from
Oct 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
5704f1e
test: refactor `list_rows` tests and add test for scalars
tswast Jul 28, 2021
5eb3794
WIP: INTERVAL support
tswast Jul 28, 2021
9e54277
feat: add support for INTERVAL data type to `list_rows`
tswast Jul 28, 2021
a89c1c1
Merge remote-tracking branch 'upstream/master' into issue826-interval
tswast Jul 29, 2021
60d9ca7
fix relativedelta construction for non-microseconds
tswast Jul 29, 2021
da6ef5b
WIP: support INTERVAL query params
tswast Jul 29, 2021
b73a610
remove dead code
tswast Jul 29, 2021
52f2b7b
INTERVAL not supported in query parameters
tswast Jul 29, 2021
ca8872e
Merge remote-tracking branch 'upstream/master' into issue826-interval
tswast Jul 29, 2021
018a617
Merge branch 'master' into issue826-interval
tswast Aug 3, 2021
2872d85
Merge remote-tracking branch 'upstream/master' into issue826-interval
tswast Aug 3, 2021
31c3f92
revert query parameter changes
tswast Aug 3, 2021
e3a3a6a
Merge remote-tracking branch 'origin/issue826-interval' into issue826…
tswast Aug 3, 2021
5f78311
Merge branch 'master' into issue826-interval
tswast Aug 5, 2021
bb03618
add validation error for interval
tswast Aug 5, 2021
f3711e7
add unit tests for extreme intervals
tswast Aug 5, 2021
68035ba
add dateutil to intersphinx
tswast Aug 11, 2021
9a011e9
Merge remote-tracking branch 'upstream/master' into issue826-interval
tswast Aug 11, 2021
9f6b02d
use dictionary for intersphinx
tswast Aug 11, 2021
7cccbd2
🦉 Updates from OwlBot
gcf-owl-bot[bot] Aug 11, 2021
152e8c2
Merge remote-tracking branch 'upstream/master' into issue826-interval
tswast Aug 16, 2021
f0f3fbd
Merge remote-tracking branch 'origin/issue826-interval' into issue826…
tswast Aug 16, 2021
c4636fa
🦉 Updates from OwlBot
gcf-owl-bot[bot] Aug 16, 2021
18aae17
Merge branch 'master' into issue826-interval
tswast Aug 25, 2021
eccea82
Merge branch 'main' into issue826-interval
tswast Sep 10, 2021
5cdeffb
Merge remote-tracking branch 'upstream/main' into issue826-interval
tswast Sep 30, 2021
0497b19
add test case for trailing .
tswast Sep 30, 2021
92f41b9
Merge remote-tracking branch 'origin/issue826-interval' into issue826…
tswast Sep 30, 2021
0318f54
explicit none
tswast Sep 30, 2021
6b1f238
🦉 Updates from OwlBot
gcf-owl-bot[bot] Sep 30, 2021
54e47f7
truncate nanoseconds
tswast Oct 4, 2021
ced356b
Merge remote-tracking branch 'origin/issue826-interval' into issue826…
tswast Oct 4, 2021
dcc8b57
use \d group for digits
tswast Oct 4, 2021
5c31fe2
Merge branch 'main' into issue826-interval
tswast Oct 5, 2021
2091478
Merge branch 'main' into issue826-interval
tswast Oct 25, 2021
21a2975
Merge branch 'main' into issue826-interval
plamut Oct 26, 2021
87b0d81
Merge remote-tracking branch 'upstream/main' into issue826-interval
tswast Oct 26, 2021
7bd48be
Merge remote-tracking branch 'origin/issue826-interval' into issue826…
tswast Oct 26, 2021
d62b950
use \d for consistency
tswast Oct 26, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,9 @@
"grpc": ("https://grpc.github.io/grpc/python/", None),
"proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
"protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
"pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
"dateutil": ("https://dateutil.readthedocs.io/en/latest/", None),
"geopandas": ("https://geopandas.org/", None),
"pandas": ("https://pandas.pydata.org/pandas-docs/dev", None),
}


Expand Down
47 changes: 46 additions & 1 deletion google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
import decimal
import math
import re
from typing import Any, Union
from typing import Any, Optional, Union

from dateutil import relativedelta
from google.cloud._helpers import UTC
from google.cloud._helpers import _date_from_iso8601_date
from google.cloud._helpers import _datetime_from_microseconds
Expand All @@ -45,6 +46,14 @@
re.VERBOSE,
)

# BigQuery sends INTERVAL data in "canonical format"
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type
_INTERVAL_PATTERN = re.compile(
r"(?P<calendar_sign>-?)(?P<years>\d+)-(?P<months>\d+) "
r"(?P<days>-?\d+) "
r"(?P<time_sign>-?)(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d+)\.?(?P<fraction>\d*)?$"
)

_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")
_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
Expand Down Expand Up @@ -191,6 +200,41 @@ def _int_from_json(value, field):
return int(value)


def _interval_from_json(
value: Optional[str], field
) -> Optional[relativedelta.relativedelta]:
"""Coerce 'value' to an interval, if set or not nullable."""
if not _not_null(value, field):
return None
if value is None:
raise TypeError(f"got {value} for REQUIRED field: {repr(field)}")

parsed = _INTERVAL_PATTERN.match(value)
tswast marked this conversation as resolved.
Show resolved Hide resolved
if parsed is None:
raise ValueError(f"got interval: '{value}' with unexpected format")

calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1
years = calendar_sign * int(parsed.group("years"))
months = calendar_sign * int(parsed.group("months"))
days = int(parsed.group("days"))
time_sign = -1 if parsed.group("time_sign") == "-" else 1
hours = time_sign * int(parsed.group("hours"))
minutes = time_sign * int(parsed.group("minutes"))
seconds = time_sign * int(parsed.group("seconds"))
fraction = parsed.group("fraction")
microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0

return relativedelta.relativedelta(
years=years,
months=months,
days=days,
hours=hours,
minutes=minutes,
seconds=seconds,
microseconds=microseconds,
)


def _float_from_json(value, field):
"""Coerce 'value' to a float, if set or not nullable."""
if _not_null(value, field):
Expand Down Expand Up @@ -327,6 +371,7 @@ def _record_from_json(value, field):
_CELLDATA_FROM_JSON = {
"INTEGER": _int_from_json,
"INT64": _int_from_json,
"INTERVAL": _interval_from_json,
"FLOAT": _float_from_json,
"FLOAT64": _float_from_json,
"NUMERIC": _decimal_from_json,
Expand Down
1 change: 1 addition & 0 deletions google/cloud/bigquery/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ class SqlTypeNames(str, enum.Enum):
DATE = "DATE"
TIME = "TIME"
DATETIME = "DATETIME"
INTERVAL = "INTERVAL" # NOTE: not available in legacy types


class SqlParameterScalarTypes:
Expand Down
7 changes: 2 additions & 5 deletions owlbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@
microgenerator=True,
split_system_tests=True,
intersphinx_dependencies={
"pandas": "http://pandas.pydata.org/pandas-docs/stable/",
"dateutil": "https://dateutil.readthedocs.io/en/latest/",
"geopandas": "https://geopandas.org/",
"pandas": "https://pandas.pydata.org/pandas-docs/dev",
},
)

Expand All @@ -115,10 +116,6 @@
# Include custom SNIPPETS_TESTS job for performance.
# https://github.com/googleapis/python-bigquery/issues/191
".kokoro/presubmit/presubmit.cfg",
# Group all renovate PRs together. If this works well, remove this and
# update the shared templates (possibly with configuration option to
# py_library.)
"renovate.json",
],
)

Expand Down
5 changes: 4 additions & 1 deletion renovate.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
{
"extends": [
"config:base", "group:all", ":preserveSemverRanges"
"config:base",
"group:all",
":preserveSemverRanges",
":disableDependencyDashboard"
],
"ignorePaths": [".pre-commit-config.yaml"],
"pip_requirements": {
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"google-resumable-media >= 0.6.0, < 3.0dev",
"packaging >= 14.3",
"protobuf >= 3.12.0",
"python-dateutil >= 2.7.2, <3.0dev",
"requests >= 2.18.0, < 3.0.0dev",
]
extras = {
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.6.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pandas==0.24.2
proto-plus==1.10.0
protobuf==3.12.0
pyarrow==3.0.0
python-dateutil==2.7.2
requests==2.18.0
Shapely==1.6.0
six==1.13.0
Expand Down
5 changes: 0 additions & 5 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,6 @@
except ImportError: # pragma: NO COVER
bigquery_storage = None

try:
import fastavro # to parse BQ storage client results
except ImportError: # pragma: NO COVER
fastavro = None

try:
import pyarrow
import pyarrow.types
Expand Down
8 changes: 8 additions & 0 deletions tests/system/test_list_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import datetime
import decimal

from dateutil import relativedelta

from google.cloud import bigquery
from google.cloud.bigquery import enums

Expand Down Expand Up @@ -64,6 +66,9 @@ def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str)
assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45)
assert row["geography_col"] == "POINT(-122.0838511 37.3860517)"
assert row["int64_col"] == 123456789
assert row["interval_col"] == relativedelta.relativedelta(
years=7, months=11, days=9, hours=4, minutes=15, seconds=37, microseconds=123456
)
assert row["numeric_col"] == decimal.Decimal("1.23456789")
assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819")
assert row["float64_col"] == 1.25
Expand Down Expand Up @@ -95,6 +100,9 @@ def test_list_rows_scalars_extreme(
assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
assert row["geography_col"] == "POINT(-135 90)"
assert row["int64_col"] == 9223372036854775807
assert row["interval_col"] == relativedelta.relativedelta(
years=-10000, days=-3660000, hours=-87840000
)
assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28")
assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37")
assert row["float64_col"] == float("Inf")
Expand Down
157 changes: 157 additions & 0 deletions tests/unit/helpers/test_from_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from dateutil.relativedelta import relativedelta
import pytest

from google.cloud.bigquery.schema import SchemaField


def create_field(mode="NULLABLE", type_="IGNORED"):
return SchemaField("test_field", type_, mode=mode)


@pytest.fixture
def mut():
from google.cloud.bigquery import _helpers

return _helpers


def test_interval_from_json_w_none_nullable(mut):
got = mut._interval_from_json(None, create_field())
assert got is None


def test_interval_from_json_w_none_required(mut):
with pytest.raises(TypeError):
mut._interval_from_json(None, create_field(mode="REQUIRED"))


def test_interval_from_json_w_invalid_format(mut):
with pytest.raises(ValueError, match="NOT_AN_INTERVAL"):
mut._interval_from_json("NOT_AN_INTERVAL", create_field())


@pytest.mark.parametrize(
("value", "expected"),
(
("0-0 0 0:0:0", relativedelta()),
# SELECT INTERVAL X YEAR
("-10000-0 0 0:0:0", relativedelta(years=-10000)),
("-1-0 0 0:0:0", relativedelta(years=-1)),
("1-0 0 0:0:0", relativedelta(years=1)),
("10000-0 0 0:0:0", relativedelta(years=10000)),
# SELECT INTERVAL X MONTH
("-0-11 0 0:0:0", relativedelta(months=-11)),
("-0-1 0 0:0:0", relativedelta(months=-1)),
("0-1 0 0:0:0", relativedelta(months=1)),
("0-11 0 0:0:0", relativedelta(months=11)),
# SELECT INTERVAL X DAY
("0-0 -3660000 0:0:0", relativedelta(days=-3660000)),
("0-0 -1 0:0:0", relativedelta(days=-1)),
("0-0 1 0:0:0", relativedelta(days=1)),
("0-0 3660000 0:0:0", relativedelta(days=3660000)),
# SELECT INTERVAL X HOUR
("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)),
("0-0 0 -1:0:0", relativedelta(hours=-1)),
("0-0 0 1:0:0", relativedelta(hours=1)),
("0-0 0 87840000:0:0", relativedelta(hours=87840000)),
# SELECT INTERVAL X MINUTE
("0-0 0 -0:59:0", relativedelta(minutes=-59)),
("0-0 0 -0:1:0", relativedelta(minutes=-1)),
("0-0 0 0:1:0", relativedelta(minutes=1)),
("0-0 0 0:59:0", relativedelta(minutes=59)),
# SELECT INTERVAL X SECOND
("0-0 0 -0:0:59", relativedelta(seconds=-59)),
("0-0 0 -0:0:1", relativedelta(seconds=-1)),
("0-0 0 0:0:1", relativedelta(seconds=1)),
("0-0 0 0:0:59", relativedelta(seconds=59)),
# SELECT (INTERVAL -1 SECOND) / 1000000
("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)),
("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)),
("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)),
("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)),
("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)),
# Test with multiple digits in each section.
(
"32-11 45 67:16:23.987654",
relativedelta(
years=32,
months=11,
days=45,
hours=67,
minutes=16,
seconds=23,
microseconds=987654,
),
),
(
"-32-11 -45 -67:16:23.987654",
relativedelta(
years=-32,
months=-11,
days=-45,
hours=-67,
minutes=-16,
seconds=-23,
microseconds=-987654,
),
),
# Test with mixed +/- sections.
(
"9999-9 -999999 9999999:59:59.999999",
relativedelta(
years=9999,
months=9,
days=-999999,
hours=9999999,
minutes=59,
seconds=59,
microseconds=999999,
),
),
# Test with fraction that is not microseconds.
("0-0 0 0:0:42.", relativedelta(seconds=42)),
("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)),
("0-0 0 0:0:0.12", relativedelta(microseconds=120000)),
("0-0 0 0:0:0.123", relativedelta(microseconds=123000)),
("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)),
# Fractional seconds can cause rounding problems if cast to float. See:
# https://github.com/googleapis/python-db-dtypes-pandas/issues/18
("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)),
(
"0-0 0 01:01:01.010101",
relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101),
),
(
"0-0 0 09:09:09.090909",
relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909),
),
(
"0-0 0 11:11:11.111111",
relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111),
),
(
"0-0 0 19:16:23.987654",
relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654),
),
# Nanoseconds are not expected, but should not cause error.
("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)),
("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)),
),
)
def test_w_string_values(mut, value, expected):
got = mut._interval_from_json(value, create_field())
assert got == expected