Skip to content

Commit b2ed341

Browse files
anuunchinrudolfix
andauthored
QoL: accept destination name as shorthand form of destination (#3122)
* Destination type lookup fallback in from_reference * Test test_import_destination_type_config * Docs adjusted, e2e test added * e2e test with pipeline load * first named des with type, then dest as type * Tests fixed, exception improved, docs adjusted * fixes plugin module list * Code simplified * exception just sets attrs, from_name removed --------- Co-authored-by: Marcin Rudolf <rudolfix@rudolfix.org>
1 parent 352b053 commit b2ed341

File tree

14 files changed

+284
-58
lines changed

14 files changed

+284
-58
lines changed

dlt/common/configuration/exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def _build_config_error_message(
113113

114114
def __str__(self) -> str:
115115
msg = (
116-
f"Missing {len(self.fields)} field(s) in configuration`{self.spec_name}`:"
116+
f"Missing {len(self.fields)} field(s) in configuration `{self.spec_name}`:"
117117
f" {', '.join(f'`{f}`' for f in self.fields)}\n"
118118
)
119119
msg += self._build_config_error_message(

dlt/common/destination/exceptions.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from typing import Any, Iterable, List, Sequence
2+
import textwrap
23

34
from dlt.common.exceptions import DltException, TerminalException, TransientException
45
from dlt.common.reflection.exceptions import ReferenceImportError
@@ -11,17 +12,58 @@ class DestinationException(DltException):
1112

1213
class UnknownDestinationModule(ReferenceImportError, DestinationException, KeyError):
1314
def __init__(
14-
self, ref: str, qualified_refs: Sequence[str], traces: Sequence[ImportTrace]
15+
self,
16+
ref: str,
17+
qualified_refs: Sequence[str],
18+
traces: Sequence[ImportTrace],
19+
destination_type: str = None,
20+
named_dest_attempted: bool = False,
1521
) -> None:
1622
self.ref = ref
1723
self.qualified_refs = qualified_refs
24+
self.destination_type = destination_type
25+
self.named_dest_attempted = named_dest_attempted
1826
super().__init__(traces=traces)
1927

2028
def __str__(self) -> str:
29+
msg = ""
2130
if "." in self.ref:
22-
msg = f"Destination module `{self.ref}` is not registered."
31+
msg += f"Destination module `{self.ref}` is not registered."
2332
else:
24-
msg = f"Destination `{self.ref}` is not one of the standard dlt destinations."
33+
if self.named_dest_attempted:
34+
msg += (
35+
f"Destination '{self.ref}' was first attempted to be resolved as a named"
36+
" destination with a configured type. "
37+
)
38+
if self.destination_type:
39+
msg += (
40+
f"However, the configured destination type '{self.destination_type}' is not"
41+
" valid. Set a valid destination type. "
42+
)
43+
else:
44+
msg += (
45+
"However, no destination type was configured. "
46+
"If your destination is a named destination, "
47+
"set a valid destination type either as an environment variable:\n\n"
48+
)
49+
msg += textwrap.indent(
50+
f"DESTINATION__{self.ref.upper()}__DESTINATION_TYPE=duckdb\n", " "
51+
)
52+
msg += "\nor in your configuration files:\n\n"
53+
msg += textwrap.indent(
54+
f'[destination.{self.ref}]\ndestination_type="duckdb"\n\n', " "
55+
)
56+
57+
msg += (
58+
f"Since no{' valid' if self.destination_type else ''} destination type was"
59+
f" found, dlt also tried to resolve '{self.ref}' as a standard destination."
60+
" However, "
61+
)
62+
63+
msg += (
64+
f"{'d' if self.named_dest_attempted else 'D'}estination `{self.ref}` is not one of"
65+
" the standard dlt destinations."
66+
)
2567

2668
if len(self.qualified_refs) == 1 and self.qualified_refs[0] == self.ref:
2769
pass

dlt/common/destination/reference.py

Lines changed: 64 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from typing_extensions import TypeAlias
1717
import inspect
1818

19+
import dlt
1920
from dlt.common import logger
2021
from dlt.common.configuration.specs.base_configuration import BaseConfiguration
2122
from dlt.common.normalizers.naming import NamingConvention
@@ -267,9 +268,36 @@ def from_reference(
267268
)
268269
return ref
269270

270-
return DestinationReference.from_reference(
271-
ref, credentials, destination_name, environment, **kwargs
272-
)
271+
# If destination name is provided or ref is a module ref
272+
# don't attempt to resolve as named destination
273+
if destination_name or "." in ref:
274+
return DestinationReference.from_reference(
275+
ref, credentials, destination_name, environment, **kwargs
276+
)
277+
278+
# First, try to resolve as a named destination with configured type
279+
destination_type: str = dlt.config.get(f"destination.{ref}.destination_type")
280+
if destination_type:
281+
try:
282+
return DestinationReference.from_reference(
283+
ref=destination_type,
284+
destination_name=ref,
285+
credentials=credentials,
286+
environment=environment,
287+
**kwargs,
288+
)
289+
except Exception:
290+
pass
291+
292+
# Then, try to resolve as a shorthand destination ref
293+
try:
294+
return DestinationReference.from_reference(
295+
ref, credentials, destination_name, environment, **kwargs
296+
)
297+
except UnknownDestinationModule as e:
298+
e.destination_type = destination_type
299+
e.named_dest_attempted = True
300+
raise e
273301

274302

275303
class DestinationReference:
@@ -278,17 +306,6 @@ class DestinationReference:
278306
DESTINATIONS: ClassVar[Dict[str, Type[AnyDestination]]] = {}
279307
"""A registry of all the destination factories"""
280308

281-
@staticmethod
282-
def normalize_type(destination_type: str) -> str:
283-
"""Normalizes destination type string into a canonical form. Assumes that type names without dots correspond to built in destinations."""
284-
if "." not in destination_type:
285-
destination_type = "dlt.destinations." + destination_type
286-
# the next two lines shorten the dlt internal destination paths to dlt.destinations.<destination_type>
287-
pattern = r"\.destinations\.impl\.[a-zA-Z_][.a-zA-Z0-9_]*\."
288-
replacement = ".destinations."
289-
destination_type = re.sub(pattern, replacement, destination_type)
290-
return destination_type
291-
292309
@classmethod
293310
def register(cls, factory: Type[AnyDestination_CO], ref: str) -> None:
294311
"""Registers `factory` class under `ref`. `ref`"""
@@ -299,28 +316,6 @@ def register(cls, factory: Type[AnyDestination_CO], ref: str) -> None:
299316
)
300317
cls.DESTINATIONS[ref] = factory
301318

302-
@staticmethod
303-
def to_fully_qualified_refs(ref: str) -> List[str]:
304-
"""Converts ref into fully qualified form, return one or more alternatives for shorthand notations.
305-
Run context is injected if needed. Following formats are recognized
306-
- name
307-
NOTE: the last component of destination type serves as destination name if not explicitly specified
308-
"""
309-
ref_split = ref.split(".")
310-
ref_parts = len(ref_split)
311-
if ref_parts < 2:
312-
# context name is needed
313-
refs = []
314-
for ref_prefix in get_plugin_modules():
315-
if ref_prefix:
316-
ref_prefix = f"{ref_prefix}.{known_sections.DESTINATIONS}"
317-
else:
318-
ref_prefix = f"{known_sections.DESTINATIONS}"
319-
refs.append(f"{ref_prefix}.{ref}")
320-
return refs
321-
322-
return []
323-
324319
@classmethod
325320
def find(
326321
cls,
@@ -411,3 +406,36 @@ def from_reference(
411406
if environment:
412407
kwargs["environment"] = environment
413408
return factory(**kwargs)
409+
410+
@staticmethod
411+
def normalize_type(destination_type: str) -> str:
412+
"""Normalizes destination type string into a canonical form. Assumes that type names without dots correspond to built in destinations."""
413+
if "." not in destination_type:
414+
destination_type = "dlt.destinations." + destination_type
415+
# the next two lines shorten the dlt internal destination paths to dlt.destinations.<destination_type>
416+
pattern = r"\.destinations\.impl\.[a-zA-Z_][.a-zA-Z0-9_]*\."
417+
replacement = ".destinations."
418+
destination_type = re.sub(pattern, replacement, destination_type)
419+
return destination_type
420+
421+
@staticmethod
422+
def to_fully_qualified_refs(ref: str) -> List[str]:
423+
"""Converts ref into fully qualified form, return one or more alternatives for shorthand notations.
424+
Run context is injected if needed. Following formats are recognized
425+
- name
426+
NOTE: the last component of destination type serves as destination name if not explicitly specified
427+
"""
428+
ref_split = ref.split(".")
429+
ref_parts = len(ref_split)
430+
if ref_parts < 2:
431+
# context name is needed
432+
refs = []
433+
for ref_prefix in get_plugin_modules():
434+
if ref_prefix:
435+
ref_prefix = f"{ref_prefix}.{known_sections.DESTINATIONS}"
436+
else:
437+
ref_prefix = f"{known_sections.DESTINATIONS}"
438+
refs.append(f"{ref_prefix}.{ref}")
439+
return refs
440+
441+
return []

dlt/common/runtime/run_context.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from contextlib import contextmanager
1+
from contextlib import contextmanager, suppress
22
import os
33
import tempfile
44
import warnings
@@ -204,7 +204,11 @@ def get_plugin_modules() -> List[str]:
204204
ctx_module = active().module
205205
run_module_name = ctx_module.__name__ if ctx_module else ""
206206

207-
return [run_module_name] + [p for p in Container()[PluginContext].plugin_modules]
207+
plugin_modules = Container()[PluginContext].plugin_modules.copy()
208+
with suppress(ValueError):
209+
plugin_modules.remove(run_module_name)
210+
plugin_modules.insert(0, run_module_name)
211+
return plugin_modules
208212

209213

210214
def context_uri(name: str, run_dir: str, runtime_kwargs: Optional[Dict[str, Any]]) -> str:

docs/website/docs/general-usage/destination.md

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,21 @@ We recommend that you declare the destination type when creating a pipeline inst
1818

1919
Above, we want to use the **filesystem** built-in destination. You can use shorthand types only for built-ins.
2020

21+
* Use a **custom destination name** with a configured type
22+
<!--@@@DLT_SNIPPET ./snippets/destination-snippets.py::custom_destination_name-->
23+
24+
Above, we use a custom destination name and configure the destination type to **filesystem** using an environment variable.
25+
26+
:::note
27+
When resolving non-module destination references (e.g., `"filesystem"` or `"my_destination"`, not `"dlt.destinations.filesystem"`), dlt first attempts to resolve the reference as a named destination with a valid destination type configured, then falls back to shorthand type resolution.
28+
29+
This means that, in the previous example, if the destination type was not properly configured or was not a valid destination type, dlt would have attempted to resolve `"my_destination"` as a shorthand for a built-in type and would have eventually failed.
30+
31+
As another example, the following:
32+
<!--@@@DLT_SNIPPET ./snippets/destination-snippets.py::avoid_example-->
33+
will be resolved as a BigQuery destination that is named `"filesystem"`!
34+
:::
35+
2136
* Use full **destination factory type**
2237
<!--@@@DLT_SNIPPET ./snippets/destination-snippets.py::class_type-->
2338

@@ -30,32 +45,34 @@ Above, we import the destination factory for **filesystem** and pass it to the p
3045

3146
All examples above will create the same destination class with default parameters and pull required config and secret values from [configuration](credentials/index.md) - they are equivalent.
3247

33-
34-
### Pass explicit parameters and a name to a destination
48+
### Pass explicit parameters and a name to a destination factory
3549
You can instantiate the **destination factory** yourself to configure it explicitly. When doing this, you work with destinations the same way you work with [sources](source.md)
3650
<!--@@@DLT_SNIPPET ./snippets/destination-snippets.py::instance-->
3751

3852
Above, we import and instantiate the `filesystem` destination factory. We pass the explicit URL of the bucket and name the destination `production_az_bucket`.
3953

40-
If a destination is not named, its shorthand type (the Python factory name) serves as a destination name. Name your destination explicitly if you need several separate configurations of destinations of the same type (i.e., you wish to maintain credentials for development, staging, and production storage buckets in the same config file). The destination name is also stored in the [load info](../running-in-production/running.md#inspect-and-save-the-load-info-and-trace) and pipeline traces, so use them also when you need more descriptive names (other than, for example, `filesystem`).
54+
If a destination is not named, its shorthand type (the Python factory name) serves as the destination name. Name your destination explicitly if you need several separate configurations for destinations of the same type (i.e., when you wish to maintain credentials for development, staging, and production storage buckets in the same config file). The destination name is also stored in the [load info](../running-in-production/running.md#inspect-and-save-the-load-info-and-trace) and pipeline traces, so use explicit names when you need more descriptive identifiers (rather than generic names like `filesystem`).
4155

4256

4357
## Configure a destination
4458
We recommend passing the credentials and other required parameters to configuration via TOML files, environment variables, or other [config providers](credentials/setup). This allows you, for example, to easily switch to production destinations after deployment.
4559

46-
We recommend using the [default config section layout](credentials/advanced#organize-configuration-and-secrets-with-sections) as below:
60+
Use the [default config section layout](credentials/advanced#organize-configuration-and-secrets-with-sections) as shown below:
4761
<!--@@@DLT_SNIPPET ./snippets/destination-toml.toml::default_layout-->
4862

49-
or via environment variables:
63+
Alternatively, you can use environment variables:
5064
```sh
5165
DESTINATION__FILESYSTEM__BUCKET_URL=az://dlt-azure-bucket
5266
DESTINATION__FILESYSTEM__CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME=dltdata
5367
DESTINATION__FILESYSTEM__CREDENTIALS__AZURE_STORAGE_ACCOUNT_KEY="storage key"
5468
```
5569

56-
For named destinations, you use their names in the config section
70+
When using named destination factories, use the destination name in the config section:
5771
<!--@@@DLT_SNIPPET ./snippets/destination-toml.toml::name_layout-->
5872

73+
For custom destination names passed to your pipeline (e.g., `destination="my_destination"`), dlt resolves the destination type from configuration. Add `destination_type` to specify which destination type to use:
74+
<!--@@@DLT_SNIPPET ./snippets/destination-toml.toml::custom_name_layout-->
75+
5976

6077
Note that when you use the [`dlt init` command](../walkthroughs/add-a-verified-source.md) to create or add a data source, `dlt` creates a sample configuration for the selected destination.
6178

docs/website/docs/general-usage/pipeline.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ You instantiate a pipeline by calling the `dlt.pipeline` function with the follo
3131
events and to restore its state and data schemas on subsequent runs. If not provided, `dlt` will
3232
create a pipeline name from the file name of the currently executing Python module.
3333
- `destination`: a name of the [destination](../dlt-ecosystem/destinations) to which dlt
34-
will load the data. It may also be provided to the `run` method of the `pipeline`.
34+
will load the data. It may also be provided to the `run` method of the `pipeline` and can be declared in [various ways](destination.md).
3535
- `dataset_name`: a name of the dataset to which the data will be loaded. A dataset is a logical
3636
group of tables, i.e., `schema` in relational databases or a folder grouping many files. It may also be
3737
provided later to the `run` or `load` methods of the pipeline. If not provided, then

docs/website/docs/general-usage/snippets/destination-snippets.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,30 @@ def destination_instantiation_snippet() -> None:
3030

3131
assert pipeline.destination.destination_name == "filesystem"
3232

33+
# @@@DLT_SNIPPET_START custom_destination_name
34+
import os
35+
import dlt
36+
37+
os.environ["DESTINATION__MY_DESTINATION__DESTINATION_TYPE"] = "filesystem"
38+
39+
pipeline = dlt.pipeline("pipeline", destination="my_destination")
40+
# @@@DLT_SNIPPET_END custom_destination_name
41+
42+
assert pipeline.destination.destination_type == "dlt.destinations.filesystem"
43+
assert pipeline.destination.destination_name == "my_destination"
44+
45+
# @@@DLT_SNIPPET_START avoid_example
46+
import os
47+
import dlt
48+
49+
os.environ["DESTINATION__FILESYSTEM__DESTINATION_TYPE"] = "bigquery"
50+
51+
pipeline = dlt.pipeline("pipeline", destination="filesystem")
52+
# @@@DLT_SNIPPET_END avoid_example
53+
54+
assert pipeline.destination.destination_type == "dlt.destinations.bigquery"
55+
assert pipeline.destination.destination_name == "filesystem"
56+
3357
# @@@DLT_SNIPPET_START instance
3458
import dlt
3559

docs/website/docs/general-usage/snippets/destination-toml.toml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,13 @@ bucket_url="az://dlt-azure-bucket"
1212
[destination.production_az_bucket.credentials]
1313
azure_storage_account_name="dltdata"
1414
azure_storage_account_key="storage key"
15-
# @@@DLT_SNIPPET_END name_layout
15+
# @@@DLT_SNIPPET_END name_layout
16+
17+
# @@@DLT_SNIPPET_START custom_name_layout
18+
[destination.my_destination]
19+
destination_type="filesystem"
20+
bucket_url="az://dlt-azure-bucket"
21+
[destination.my_destination.credentials]
22+
azure_storage_account_name="dltdata"
23+
azure_storage_account_key="storage key"
24+
# @@@DLT_SNIPPET_END custom_name_layout

tests/.dlt/config.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,7 @@ bucket_url_r2="s3://dlt-ci-test-bucket"
1212
# use "/" as root path
1313
bucket_url_gdrive="gdrive://15eC3e5MNew2XAIefWNlG8VlEa0ISnnaG"
1414
bucket_url_sftp="sftp://localhost/data"
15-
memory="memory:///m"
15+
memory="memory:///m"
16+
17+
[destination.custom_name]
18+
destination_type = "duckdb"

0 commit comments

Comments
 (0)