-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Custom names for generic tests #4898
Changes from 7 commits
82b3740
705e6ed
ed3a1a5
d0e1fde
444c79d
6674932
75b2f43
2117072
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
kind: Features | ||
body: Support custom names for generic tests | ||
time: 2022-03-18T08:57:56.05584+01:00 | ||
custom: | ||
Author: jtcohen6 | ||
Issue: "3348" | ||
PR: "4898" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -838,31 +838,46 @@ def raise_duplicate_macro_name(node_1, node_2, namespace) -> NoReturn: | |
|
||
def raise_duplicate_resource_name(node_1, node_2): | ||
duped_name = node_1.name | ||
node_type = NodeType(node_1.resource_type) | ||
pluralized = ( | ||
node_type.pluralize() | ||
if node_1.resource_type == node_2.resource_type | ||
else "resources" # still raise if ref() collision, e.g. model + seed | ||
) | ||
|
||
if node_1.resource_type in NodeType.refable(): | ||
get_func = 'ref("{}")'.format(duped_name) | ||
elif node_1.resource_type == NodeType.Source: | ||
action = "looking for" | ||
# duplicate 'ref' targets | ||
if node_type in NodeType.refable(): | ||
formatted_name = f'ref("{duped_name}")' | ||
# duplicate sources | ||
elif node_type == NodeType.Source: | ||
duped_name = node_1.get_full_source_name() | ||
get_func = node_1.get_source_representation() | ||
elif node_1.resource_type == NodeType.Documentation: | ||
get_func = 'doc("{}")'.format(duped_name) | ||
elif node_1.resource_type == NodeType.Test and "schema" in node_1.tags: | ||
return | ||
formatted_name = node_1.get_source_representation() | ||
# duplicate docs blocks | ||
elif node_type == NodeType.Documentation: | ||
formatted_name = f'doc("{duped_name}")' | ||
# duplicate generic tests | ||
elif node_type == NodeType.Test and hasattr(node_1, "test_metadata"): | ||
column_name = f'column "{node_1.column_name}" in ' if node_1.column_name else "" | ||
# TODO: we need a better way of storing the target model/source/etc in generic tests | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's log a tech debt ticket and put the github issue number in the comment here, unless @gshank's suggestion is the solution. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gerda's suggestion is definitely better than what I'm doing here, and I think it's enough for now—but I agree that this is a limitation in generic tests, which also prevents us from being able to do things like #4723 |
||
model_name = ".".join((node_1.refs or node_1.sources)[0]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if this would help, but I did add 'file_key_name' to ParsedGenericTestNode because I was having the same problem in partial parsing. It stores things like 'models.my_model' and 'sources.my_source'. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great call! This is much less hacky, so I think it's the right way to go. I do wish the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I was thinking you might want that. It might be possible to update it to do that... |
||
duped_name = f'{node_1.name}" defined on {column_name}"{model_name}' | ||
action = "running" | ||
formatted_name = "tests" | ||
# all other resource types | ||
else: | ||
get_func = '"{}"'.format(duped_name) | ||
formatted_name = duped_name | ||
|
||
# should this be raise_parsing_error instead? | ||
raise_compiler_error( | ||
'dbt found two resources with the name "{}". Since these resources ' | ||
"have the same name,\ndbt will be unable to find the correct resource " | ||
"when {} is used. To fix this,\nchange the name of one of " | ||
"these resources:\n- {} ({})\n- {} ({})".format( | ||
duped_name, | ||
get_func, | ||
node_1.unique_id, | ||
node_1.original_file_path, | ||
node_2.unique_id, | ||
node_2.original_file_path, | ||
) | ||
f'dbt found two {pluralized} with the name "{duped_name}". ' | ||
f"\n" | ||
f"\nSince these resources have the same name, dbt will be unable to find the correct resource " | ||
f"\nwhen {action} {formatted_name}. " | ||
f"\n" | ||
f"\nTo fix this, change the name of one of these resources: " | ||
f"\n- {node_1.unique_id} ({node_1.original_file_path}) " | ||
f"\n- {node_2.unique_id} ({node_2.original_file_path})" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [nit] for readability, I'd rather see a multiline f-string instead of relying on c-style string concatenation with explicit newline characters. For example:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The downside of a multiline f-string is that indentation gets messy:
There is a way to get the indentation right, but then it looks a bit odd in the source code: raise_compiler_error(f"""
dbt found two {pluralized} with the name "{duped_name}".
Since these resources have the same name, dbt will be unable to find the correct resource
when {action} {formatted_name}.
To fix this, change the name of one of these resources:
- {node_1.unique_id} ({node_1.original_file_path})
- {node_2.unique_id} ({node_2.original_file_path})
""".strip()) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. flake + black seem to be happy with the latter, so I think that's what I'll go with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yup. this is exactly the trade off, but I most commonly see the latter across languages that support "raw strings" so I'm inclined to do it here as well. |
||
) | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,9 +25,13 @@ | |
from dbt.parser.search import FileBlock | ||
|
||
|
||
def get_nice_generic_test_name( | ||
def synthesize_generic_test_names( | ||
test_type: str, test_name: str, args: Dict[str, Any] | ||
) -> Tuple[str, str]: | ||
# Using the type, name, and arguments to this generic test, synthesize a (hopefully) unique name | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [nit] If we're going to use the word "hopefully" in this comment let's lay out exactly when this would not be unique. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks for adding that! |
||
# Will not be unique if multiple tests have same name + arguments, and only configs differ | ||
# Returns a shorter version (hashed/truncated, for the compiled file) | ||
# as well as the full name (for the unique_id + FQN) | ||
flat_args = [] | ||
for arg_name in sorted(args): | ||
# the model is already embedded in the name, so skip it | ||
|
@@ -263,13 +267,25 @@ def __init__( | |
if self.namespace is not None: | ||
self.package_name = self.namespace | ||
|
||
compiled_name, fqn_name = self.get_test_name() | ||
self.compiled_name: str = compiled_name | ||
self.fqn_name: str = fqn_name | ||
|
||
# use hashed name as alias if too long | ||
if compiled_name != fqn_name and "alias" not in self.config: | ||
self.config["alias"] = compiled_name | ||
# If the user has provided a custom name for this generic test, use it | ||
# Then delete the "name" argument to avoid passing it into the test macro | ||
# Otherwise, use an auto-generated name synthesized from test inputs | ||
self.compiled_name: str = "" | ||
self.fqn_name: str = "" | ||
|
||
if "name" in self.args: | ||
# Trust the user to have a unique name for this model + column combo | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [nit] "trust the user" makes it sounds like we're relying on them to enforce uniqueness themselves. We aren't because we raise an error. I'd change this comment to something more like "assign the user-defined name here, but it will be checked for uniqueness later" |
||
# otherwise, raise_duplicate_resource_name later on | ||
self.compiled_name = self.args["name"] | ||
self.fqn_name = self.args["name"] | ||
del self.args["name"] | ||
else: | ||
short_name, full_name = self.get_synthetic_test_names() | ||
self.compiled_name = short_name | ||
self.fqn_name = full_name | ||
# use hashed name as alias if full name is too long | ||
if short_name != full_name and "alias" not in self.config: | ||
self.config["alias"] = short_name | ||
|
||
def _bad_type(self) -> TypeError: | ||
return TypeError('invalid target type "{}"'.format(type(self.target))) | ||
|
@@ -281,13 +297,23 @@ def extract_test_args(test, name=None) -> Tuple[str, Dict[str, Any]]: | |
"test must be dict or str, got {} (value {})".format(type(test), test) | ||
) | ||
|
||
test = list(test.items()) | ||
if len(test) != 1: | ||
raise_parsing_error( | ||
"test definition dictionary must have exactly one key, got" | ||
" {} instead ({} keys)".format(test, len(test)) | ||
) | ||
test_name, test_args = test[0] | ||
# If the test is a dictionary with top-level keys, the test name is "test_name" | ||
# and the rest are arguments | ||
# {'name': 'my_favorite_test', 'test_name': 'unique', 'config': {'where': '1=1'}} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if "test_name" in test.keys(): | ||
test_name = test.pop("test_name") | ||
test_args = test | ||
# If the test is a nested dictionary with one top-level key, the test name | ||
# is the dict name, and nested keys are arguments | ||
# {'unique': {'name': 'my_favorite_test', 'config': {'where': '1=1'}}} | ||
else: | ||
test = list(test.items()) | ||
if len(test) != 1: | ||
raise_parsing_error( | ||
"test definition dictionary must have exactly one key, got" | ||
" {} instead ({} keys)".format(test, len(test)) | ||
) | ||
test_name, test_args = test[0] | ||
|
||
if not isinstance(test_args, dict): | ||
raise_parsing_error( | ||
|
@@ -401,7 +427,8 @@ def macro_name(self) -> str: | |
macro_name = "{}.{}".format(self.namespace, macro_name) | ||
return macro_name | ||
|
||
def get_test_name(self) -> Tuple[str, str]: | ||
def get_synthetic_test_names(self) -> Tuple[str, str]: | ||
# Returns two names: shorter (for the compiled file), full (for the unique_id + FQN) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This type signature is a very easy-to-misuse one. good call adding a comment. |
||
if isinstance(self.target, UnparsedNodeUpdate): | ||
name = self.name | ||
elif isinstance(self.target, UnpatchedSourceDefinition): | ||
|
@@ -410,7 +437,7 @@ def get_test_name(self) -> Tuple[str, str]: | |
raise self._bad_type() | ||
if self.namespace is not None: | ||
name = "{}_{}".format(self.namespace, name) | ||
return get_nice_generic_test_name(name, self.target.name, self.args) | ||
return synthesize_generic_test_names(name, self.target.name, self.args) | ||
|
||
def construct_config(self) -> str: | ||
configs = ",".join( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,9 @@ | |
seeds, | ||
test_context_models, | ||
name_collision, | ||
dupe_tests_collide, | ||
custom_generic_test_names, | ||
custom_generic_test_names_alt_format, | ||
test_context_where_subq_macros, | ||
invalid_schema_models, | ||
all_models, | ||
|
@@ -25,7 +28,7 @@ | |
project_files, | ||
case_sensitive_models__uppercase_SQL, | ||
) | ||
from dbt.exceptions import ParsingException | ||
from dbt.exceptions import ParsingException, CompilationException | ||
from dbt.contracts.results import TestStatus | ||
|
||
|
||
|
@@ -658,6 +661,60 @@ def test_collision_test_names_get_hash( | |
assert test_results[1].node.unique_id in expected_unique_ids | ||
|
||
|
||
class TestGenericTestsCollide: | ||
@pytest.fixture(scope="class") | ||
def models(self, dupe_tests_collide): # noqa: F811 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [nit] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I copied this from the other test definitions, it looks like this is a
Indeed because There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. interesting. that seems like a flake8 bug counting a reference as a redefinition. thanks for double checking! |
||
return dupe_tests_collide | ||
|
||
def test_generic_test_collision( | ||
self, | ||
project, | ||
): | ||
"""These tests collide, since only the configs differ""" | ||
with pytest.raises(CompilationException) as exc: | ||
run_dbt() | ||
assert "dbt found two tests with the name" in str(exc) | ||
|
||
|
||
class TestGenericTestsCustomNames: | ||
@pytest.fixture(scope="class") | ||
def models(self, custom_generic_test_names): # noqa: F811 | ||
return custom_generic_test_names | ||
|
||
def test_collision_test_names_get_hash( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [nit] let's add a comment to all the test methods describing why we need this test. (imagine the reader is considering deleting it) |
||
self, | ||
project, | ||
): | ||
"""These tests don't collide, since they have user-provided custom names""" | ||
results = run_dbt() | ||
test_results = run_dbt(["test"]) | ||
|
||
# model + both tests run | ||
assert len(results) == 1 | ||
assert len(test_results) == 2 | ||
|
||
# custom names propagate to the unique_id | ||
expected_unique_ids = [ | ||
"test.test.not_null_where_1_equals_1.7b96089006", | ||
"test.test.not_null_where_1_equals_2.8ae586e17f", | ||
] | ||
assert test_results[0].node.unique_id in expected_unique_ids | ||
assert test_results[1].node.unique_id in expected_unique_ids | ||
|
||
|
||
class TestGenericTestsCustomNamesAltFormat(TestGenericTestsCustomNames): | ||
@pytest.fixture(scope="class") | ||
def models(self, custom_generic_test_names_alt_format): # noqa: F811 | ||
return custom_generic_test_names_alt_format | ||
|
||
# exactly as above, just alternative format for yaml definition | ||
def test_collision_test_names_get_hash( | ||
self, | ||
project, | ||
): | ||
super().test_collision_test_names_get_hash(project) | ||
|
||
|
||
class TestInvalidSchema: | ||
@pytest.fixture(scope="class") | ||
def models(self, invalid_schema_models): # noqa: F811 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it just generic tests or all tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Singular tests take their name from the file in which they're defined (
tests/my_custom_test.sql
), just like models — so they've always required custom namesThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Even more evidence that this PR is a good idea!