-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BigQuery: Add support to Dataset for project_ids with org prefix. #8877
Changes from 7 commits
2526dc7
533f959
009ef57
452adf8
3e04273
7bc877f
4c13b06
07dde13
e75e0fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
|
||
import six | ||
import copy | ||
import re | ||
|
||
import google.cloud._helpers | ||
from google.cloud.bigquery import _helpers | ||
|
@@ -26,6 +27,14 @@ | |
from google.cloud.bigquery.table import TableReference | ||
|
||
|
||
_PROJECT_PREFIX_PATTERN = re.compile( | ||
r""" | ||
(?P<prefix>\S+\:\S+)\.+(?P<remaining>\S*) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that we're matching this way, prefix isn't the right term. Should be Also, instead of We want to match the whole string, so we should probably end this pattern with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I renamed parts of the pattern, but the second comment about |
||
""", | ||
re.VERBOSE, | ||
) | ||
|
||
|
||
def _get_table_reference(self, table_id): | ||
"""Constructs a TableReference. | ||
|
||
|
@@ -269,7 +278,7 @@ def from_string(cls, dataset_id, default_project=None): | |
Args: | ||
dataset_id (str): | ||
A dataset ID in standard SQL format. If ``default_project`` | ||
is not specified, this must included both the project ID and | ||
is not specified, this must include both the project ID and | ||
the dataset ID, separated by ``.``. | ||
default_project (str): | ||
Optional. The project ID to use when ``dataset_id`` does not | ||
|
@@ -290,13 +299,19 @@ def from_string(cls, dataset_id, default_project=None): | |
""" | ||
output_dataset_id = dataset_id | ||
output_project_id = default_project | ||
parts = dataset_id.split(".") | ||
with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id) | ||
if with_prefix is None: | ||
parts = dataset_id.split(".") | ||
else: | ||
prefix = with_prefix.group("prefix") | ||
remaining = with_prefix.group("remaining") | ||
parts = [prefix, remaining] | ||
|
||
if len(parts) == 1 and not default_project: | ||
raise ValueError( | ||
"When default_project is not set, dataset_id must be a " | ||
"fully-qualified dataset ID in standard SQL format. " | ||
'e.g. "project.dataset_id", got {}'.format(dataset_id) | ||
"fully-qualified dataset ID in standard SQL format, " | ||
emar-kar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
'e.g., "project.dataset_id" got {}'.format(dataset_id) | ||
) | ||
elif len(parts) == 2: | ||
output_project_id, output_dataset_id = parts | ||
|
@@ -554,7 +569,7 @@ def from_string(cls, full_dataset_id): | |
Args: | ||
full_dataset_id (str): | ||
A fully-qualified dataset ID in standard SQL format. Must | ||
included both the project ID and the dataset ID, separated by | ||
include both the project ID and the dataset ID, separated by | ||
``.``. | ||
|
||
Returns: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -186,11 +186,22 @@ def test_from_string(self): | |
self.assertEqual(got.project, "string-project") | ||
self.assertEqual(got.dataset_id, "string_dataset") | ||
|
||
def test_from_string_w_prefix(self): | ||
cls = self._get_target_class() | ||
got = cls.from_string("prefix:string-project.string_dataset") | ||
self.assertEqual(got.project, "prefix:string-project") | ||
self.assertEqual(got.dataset_id, "string_dataset") | ||
|
||
def test_from_string_legacy_string(self): | ||
cls = self._get_target_class() | ||
with self.assertRaises(ValueError): | ||
cls.from_string("string-project:string_dataset") | ||
|
||
def test_from_string_w_incorrect_prefix(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add an additional test where the project ID / dataset ID contains an illegal |
||
cls = self._get_target_class() | ||
with self.assertRaises(ValueError): | ||
cls.from_string("google.com.string-project.dataset_id") | ||
|
||
def test_from_string_not_fully_qualified(self): | ||
cls = self._get_target_class() | ||
with self.assertRaises(ValueError): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why are we using a multi-line string here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just for the readability. I think I’ll switch this to the single line, after correcting the pattern implementation.