Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: guard imports against unsupported pyarrow versions #934

Merged
merged 4 commits into from
Sep 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 69 additions & 3 deletions google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import decimal
import math
import re
from typing import Union
from typing import Any, Union

from google.cloud._helpers import UTC
from google.cloud._helpers import _date_from_iso8601_date
Expand All @@ -29,7 +29,10 @@
from google.cloud._helpers import _to_bytes
import packaging.version

from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
from google.cloud.bigquery.exceptions import (
LegacyBigQueryStorageError,
LegacyPyarrowError,
)


_RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f"
Expand All @@ -42,6 +45,7 @@
re.VERBOSE,
)

_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")
_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")

Expand Down Expand Up @@ -95,12 +99,74 @@ def verify_version(self):
if self.installed_version < _MIN_BQ_STORAGE_VERSION:
msg = (
"Dependency google-cloud-bigquery-storage is outdated, please upgrade "
f"it to version >= 2.0.0 (version found: {self.installed_version})."
f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})."
)
raise LegacyBigQueryStorageError(msg)


class PyarrowVersions:
"""Version comparisons for pyarrow package."""

def __init__(self):
self._installed_version = None

@property
def installed_version(self) -> packaging.version.Version:
"""Return the parsed version of pyarrow."""
if self._installed_version is None:
import pyarrow

self._installed_version = packaging.version.parse(
# Use 0.0.0, since it is earlier than any released version.
# Legacy versions also have the same property, but
# creating a LegacyVersion has been deprecated.
# https://github.com/pypa/packaging/issues/321
getattr(pyarrow, "__version__", "0.0.0")
)

return self._installed_version

def try_import(self, raise_if_error: bool = False) -> Any:
"""Verify that a recent enough version of pyarrow extra is
installed.

The function assumes that pyarrow extra is installed, and should thus
be used in places where this assumption holds.

Because `pip` can install an outdated version of this extra despite the
constraints in `setup.py`, the calling code can use this helper to
verify the version compatibility at runtime.

Returns:
The ``pyarrow`` module or ``None``.

Raises:
LegacyPyarrowError:
If the pyarrow package is outdated and ``raise_if_error`` is ``True``.
"""
try:
import pyarrow
except ImportError as exc: # pragma: NO COVER
if raise_if_error:
raise LegacyPyarrowError(
f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}."
) from exc
return None

if self.installed_version < _MIN_PYARROW_VERSION:
if raise_if_error:
msg = (
"Dependency pyarrow is outdated, please upgrade "
f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})."
)
raise LegacyPyarrowError(msg)
return None

return pyarrow


BQ_STORAGE_VERSIONS = BQStorageVersions()
PYARROW_VERSIONS = PyarrowVersions()


def _not_null(value, field):
Expand Down
19 changes: 6 additions & 13 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,6 @@ def _to_wkb(v):

_to_wkb = _to_wkb()

try:
import pyarrow
import pyarrow.parquet
except ImportError: # pragma: NO COVER
pyarrow = None

try:
from google.cloud.bigquery_storage import ArrowSerializationOptions
except ImportError:
Expand All @@ -73,12 +67,10 @@ def _to_wkb(v):
from google.cloud.bigquery import schema


_LOGGER = logging.getLogger(__name__)
pyarrow = _helpers.PYARROW_VERSIONS.try_import()

_NO_BQSTORAGE_ERROR = (
"The google-cloud-bigquery-storage library is not installed, "
"please install google-cloud-bigquery-storage to use bqstorage features."
)

_LOGGER = logging.getLogger(__name__)

_PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds.

Expand Down Expand Up @@ -548,8 +540,9 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN
serializing method. Defaults to "SNAPPY".
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
"""
if pyarrow is None:
raise ValueError("pyarrow is required for BigQuery schema conversion.")
pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True)

import pyarrow.parquet

bq_schema = schema._to_schema_fields(bq_schema)
arrow_table = dataframe_to_arrow(dataframe, bq_schema)
Expand Down
4 changes: 4 additions & 0 deletions google/cloud/bigquery/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ class BigQueryError(Exception):

class LegacyBigQueryStorageError(BigQueryError):
"""Raised when too old a version of BigQuery Storage extra is detected at runtime."""


class LegacyPyarrowError(BigQueryError):
"""Raised when too old a version of pyarrow package is detected at runtime."""
9 changes: 8 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,16 @@ def unit(session):
default(session)


@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1])
@nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]])
def unit_noextras(session):
"""Run the unit test suite."""

# Install optional dependencies that are out-of-date.
# https://github.com/googleapis/python-bigquery/issues/933
# There is no pyarrow 1.0.0 package for Python 3.9.
if session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
session.install("pyarrow==1.0.0")

default(session, install_extras=False)


Expand Down
2 changes: 1 addition & 1 deletion testing/constraints-3.6.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ proto-plus==1.10.0
protobuf==3.12.0
pyarrow==3.0.0
requests==2.18.0
shapely==1.6.0
Shapely==1.6.0
six==1.13.0
tqdm==4.7.4
8 changes: 4 additions & 4 deletions tests/unit/job/test_query_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@
import geopandas
except (ImportError, AttributeError): # pragma: NO COVER
geopandas = None
try:
import pyarrow
except (ImportError, AttributeError): # pragma: NO COVER
pyarrow = None
try:
from google.cloud import bigquery_storage
except (ImportError, AttributeError): # pragma: NO COVER
Expand All @@ -44,11 +40,15 @@
except (ImportError, AttributeError): # pragma: NO COVER
tqdm = None

from google.cloud.bigquery import _helpers
from .helpers import _make_client
from .helpers import _make_connection
from .helpers import _make_job_resource


pyarrow = _helpers.PYARROW_VERSIONS.try_import()


@pytest.fixture
def table_read_options_kwarg():
# Create a BigQuery Storage table read options object with pyarrow compression
Expand Down
68 changes: 68 additions & 0 deletions tests/unit/test__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,20 @@
except ImportError: # pragma: NO COVER
bigquery_storage = None

try:
import pyarrow
except ImportError: # pragma: NO COVER
pyarrow = None


@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`")
class TestBQStorageVersions(unittest.TestCase):
def tearDown(self):
from google.cloud.bigquery import _helpers

# Reset any cached versions since it may not match reality.
_helpers.BQ_STORAGE_VERSIONS._installed_version = None

def _object_under_test(self):
from google.cloud.bigquery import _helpers

Expand Down Expand Up @@ -89,6 +100,63 @@ def test_is_read_session_optional_false(self):
assert not versions.is_read_session_optional


@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
class TestPyarrowVersions(unittest.TestCase):
def tearDown(self):
from google.cloud.bigquery import _helpers

# Reset any cached versions since it may not match reality.
_helpers.PYARROW_VERSIONS._installed_version = None

def _object_under_test(self):
from google.cloud.bigquery import _helpers

return _helpers.PyarrowVersions()

def _call_try_import(self, **kwargs):
from google.cloud.bigquery import _helpers

_helpers.PYARROW_VERSIONS._installed_version = None
return _helpers.PYARROW_VERSIONS.try_import(**kwargs)

def test_try_import_raises_no_error_w_recent_pyarrow(self):
from google.cloud.bigquery.exceptions import LegacyPyarrowError

with mock.patch("pyarrow.__version__", new="5.0.0"):
try:
pyarrow = self._call_try_import(raise_if_error=True)
self.assertIsNotNone(pyarrow)
except LegacyPyarrowError: # pragma: NO COVER
self.fail("Legacy error raised with a non-legacy dependency version.")

def test_try_import_returns_none_w_legacy_pyarrow(self):
with mock.patch("pyarrow.__version__", new="2.0.0"):
pyarrow = self._call_try_import()
self.assertIsNone(pyarrow)

def test_try_import_raises_error_w_legacy_pyarrow(self):
from google.cloud.bigquery.exceptions import LegacyPyarrowError

with mock.patch("pyarrow.__version__", new="2.0.0"):
with self.assertRaises(LegacyPyarrowError):
self._call_try_import(raise_if_error=True)

def test_installed_version_returns_cached(self):
versions = self._object_under_test()
versions._installed_version = object()
assert versions.installed_version is versions._installed_version

def test_installed_version_returns_parsed_version(self):
versions = self._object_under_test()

with mock.patch("pyarrow.__version__", new="1.2.3"):
version = versions.installed_version

assert version.major == 1
assert version.minor == 2
assert version.micro == 3


class Test_not_null(unittest.TestCase):
def _call_fut(self, value, field):
from google.cloud.bigquery._helpers import _not_null
Expand Down
29 changes: 18 additions & 11 deletions tests/unit/test__pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,6 @@
import pandas.testing
except ImportError: # pragma: NO COVER
pandas = None
try:
import pyarrow
import pyarrow.types
except ImportError: # pragma: NO COVER
# Mock out pyarrow when missing, because methods from pyarrow.types are
# used in test parameterization.
pyarrow = mock.Mock()
try:
import geopandas
except ImportError: # pragma: NO COVER
Expand All @@ -44,9 +37,19 @@
import pytest

from google import api_core
from google.cloud.bigquery import exceptions
from google.cloud.bigquery import _helpers
from google.cloud.bigquery import schema


pyarrow = _helpers.PYARROW_VERSIONS.try_import()
if pyarrow:
import pyarrow.types
else: # pragma: NO COVER
# Mock out pyarrow when missing, because methods from pyarrow.types are
# used in test parameterization.
pyarrow = mock.Mock()

try:
from google.cloud import bigquery_storage

Expand Down Expand Up @@ -1120,15 +1123,19 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test):

@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch):
monkeypatch.setattr(module_under_test, "pyarrow", None)
with pytest.raises(ValueError) as exc_context:
mock_pyarrow_import = mock.Mock()
mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError(
"pyarrow not installed"
)
monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import)

with pytest.raises(exceptions.LegacyPyarrowError):
module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None)
assert "pyarrow is required" in str(exc_context.value)


@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch):
def test_dataframe_to_parquet_w_extra_fields(module_under_test):
with pytest.raises(ValueError) as exc_context:
module_under_test.dataframe_to_parquet(
pandas.DataFrame(), (schema.SchemaField("not_in_df", "STRING"),), None
Expand Down
12 changes: 6 additions & 6 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,18 @@
except (ImportError, AttributeError): # pragma: NO COVER
geopandas = None

try:
import pyarrow
import pyarrow.types
except ImportError: # pragma: NO COVER
pyarrow = None

try:
from tqdm import tqdm
except (ImportError, AttributeError): # pragma: NO COVER
tqdm = None

from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery import _helpers


pyarrow = _helpers.PYARROW_VERSIONS.try_import()
if pyarrow:
import pyarrow.types


def _mock_client():
Expand Down