Skip to content

Commit 0019c61

Browse files
Merge remote-tracking branch 'origin/devel' into docs/dlthub/init
2 parents dfa81af + a94f5c7 commit 0019c61

File tree

60 files changed

+2311
-279
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+2311
-279
lines changed

.github/workflows/test_common.yml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ jobs:
4343
python-version: "3.13"
4444
shell: bash
4545

46-
# - os: ubuntu-latest
47-
# python-version: "3.14.0-beta.4"
48-
# shell: bash
46+
- os: "macos-14"
47+
python-version: "3.14"
48+
shell: bash
4949

5050
# linux test with minimal dependencies
5151
- os: ubuntu-latest
@@ -114,40 +114,40 @@ jobs:
114114
115115
- name: Install pyarrow
116116
run: uv sync ${{ matrix.uv_sync_args }} --extra duckdb --extra cli --extra parquet --group sentry-sdk
117-
if: matrix.python-version != '3.14.0-beta.4'
117+
if: matrix.python-version != '3.14'
118118

119119
- name: Run pipeline tests with pyarrow but no pandas installed
120120
run: |
121121
pytest tests/pipeline/test_pipeline_extra.py -k arrow ${{ matrix.pytest_args }}
122-
if: matrix.python-version != '3.14.0-beta.4'
122+
if: matrix.python-version != '3.14'
123123

124124
- name: Install workspace dependencies
125125
run: uv sync ${{ matrix.uv_sync_args }} --extra workspace --extra cli --group sentry-sdk
126-
if: matrix.python-version != '3.14.0-beta.4'
126+
if: matrix.python-version != '3.14'
127127

128128
- name: Run workspace tests
129129
run: |
130130
pytest tests/workspace ${{ matrix.pytest_args }}
131-
if: matrix.python-version != '3.14.0-beta.4'
131+
if: matrix.python-version != '3.14'
132132

133133
- name: Install pipeline and sources dependencies
134-
run: uv sync ${{ matrix.uv_sync_args }} --extra duckdb --extra cli --extra parquet --extra deltalake --extra sql_database --group sentry-sdk --group pipeline --group sources --group ibis
135-
if: matrix.python-version != '3.14.0-beta.4'
134+
run: uv sync ${{ matrix.uv_sync_args }} --extra http --extra duckdb --extra cli --extra parquet --extra deltalake --extra sql_database --group sentry-sdk --group pipeline --group sources --group ibis
135+
if: matrix.python-version != '3.14'
136136

137137
- name: Run extract and pipeline tests
138138
run: |
139139
pytest tests/extract tests/pipeline tests/libs tests/destinations tests/sources ${{ matrix.pytest_args }}
140-
if: matrix.python-version != '3.14.0-beta.4'
140+
if: matrix.python-version != '3.14'
141141

142142
# here we upgrade sql alchemy to 2 an run the sql_database tests again
143143
- name: Upgrade sql alchemy
144144
run: uv run pip install sqlalchemy==2.0.32
145-
if: matrix.python-version != '3.14.0-beta.4'
145+
if: matrix.python-version != '3.14'
146146

147147
- name: Run extract and pipeline tests
148148
run: |
149149
pytest tests/sources/sql_database
150-
if: matrix.python-version != '3.14.0-beta.4'
150+
if: matrix.python-version != '3.14'
151151
matrix_job_required_check:
152152
name: common | common tests
153153
needs: run_common

.github/workflows/test_tools_dashboard.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,16 +104,17 @@ jobs:
104104
run: |
105105
pytest tests/workspace/helpers/dashboard
106106
107-
# Run pipeline dashboard e2e tests (does not pass with python 3.9, does not pass on windows (playwright does not work somehow), does not pass on python 3.13 (ibis not available))
108-
# Mac is also disabled for the time being
107+
# Run pipeline dashboard e2e tests (does not pass with python 3.9
109108
- name: Run dashboard e2e
110109
run: |
111110
marimo run --headless dlt/_workspace/helpers/dashboard/dlt_dashboard.py -- -- --pipelines-dir _storage/.dlt/pipelines/ --with_test_identifiers true & pytest --browser chromium tests/e2e
112111
if: matrix.python-version != '3.9' && matrix.python-version != '3.14.0-beta.4' && matrix.os != 'windows-latest'
113112

113+
# note that this test will pass only when running from cmd shell (_storage\.dlt\pipelines\ must stay)
114114
- name: Run dashboard e2e windows
115115
run: |
116116
start marimo run --headless dlt/_workspace/helpers/dashboard/dlt_dashboard.py -- -- --pipelines-dir _storage\.dlt\pipelines\ --with_test_identifiers true
117+
timeout /t 2 /nobreak >NUL
117118
pytest --browser chromium tests/e2e
118119
if: matrix.python-version != '3.9' && matrix.python-version != '3.14.0-beta.4' && matrix.os == 'windows-latest'
119120

Makefile

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,6 @@ test-load-local-postgres:
125125
test-common:
126126
uv run pytest tests/common tests/normalize tests/extract tests/pipeline tests/reflection tests/sources tests/workspace tests/load/test_dummy_client.py tests/libs tests/destinations
127127

128-
reset-test-storage:
129-
-rm -r _storage
130-
mkdir _storage
131-
python3 tests/tools/create_storages.py
132-
133128
build-library: dev
134129
uv version
135130
uv build

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Be it a Google Colab notebook, AWS Lambda function, an Airflow DAG, your local l
3131

3232
## Installation
3333

34-
dlt supports Python 3.9 through Python 3.14 (beta 4). Note that some optional extras are not yet available for Python 3.14, so support for this version is considered experimental.
34+
dlt supports Python 3.9 through Python 3.14. Note that some optional extras are not yet available for Python 3.14, so support for this version is considered experimental.
3535

3636
```sh
3737
pip install dlt

dlt/_workspace/cli/_deploy_command.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ def __init__(
8080
run_on_push: bool = False,
8181
run_manually: bool = False,
8282
branch: Optional[str] = None,
83+
**kwargs: Any,
8384
):
8485
super().__init__(pipeline_script_path, location, branch)
8586
self.schedule = schedule
@@ -264,6 +265,7 @@ def __init__(
264265
location: str,
265266
branch: Optional[str] = None,
266267
secrets_format: Optional[str] = None,
268+
**kwargs: Any,
267269
):
268270
super().__init__(pipeline_script_path, location, branch)
269271
self.secrets_format = secrets_format

dlt/_workspace/cli/_dlt.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import sys
12
from typing import Any, Sequence, Type, cast, List, Dict, Tuple
23
import argparse
3-
import click
44
import rich_argparse
55
from rich.markdown import Markdown
66

@@ -134,6 +134,15 @@ def _create_parser() -> Tuple[argparse.ArgumentParser, Dict[str, SupportsCliComm
134134
" clear enough."
135135
),
136136
)
137+
parser.add_argument(
138+
"--no-pwd",
139+
default=False,
140+
action="store_true",
141+
help=(
142+
"Do not add current working directory to sys.path. By default $pwd is added to "
143+
"reproduce Python behavior when running scripts."
144+
),
145+
)
137146
subparsers = parser.add_subparsers(title="Available subcommands", dest="command")
138147

139148
# load plugins
@@ -190,6 +199,9 @@ def main() -> int:
190199
# switch to non-interactive if tty not connected
191200
with maybe_no_stdin():
192201
display_run_context_info()
202+
if not args.no_pwd:
203+
if "" not in sys.path:
204+
sys.path.insert(0, "")
193205
cmd.execute(args)
194206
except Exception as ex:
195207
docs_url = cmd.docs_url if hasattr(cmd, "docs_url") else DEFAULT_DOCS_URL
@@ -204,7 +216,7 @@ def main() -> int:
204216

205217
# print exception if available
206218
if raiseable_exception:
207-
click.secho(str(ex), err=True, fg="red")
219+
fmt.secho(str(ex), err=True, fg="red")
208220

209221
fmt.note("Please refer to our docs at '%s' for further assistance." % docs_url)
210222
if _debug.is_debug_enabled() and raiseable_exception:

dlt/_workspace/cli/utils.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,8 @@ def display_run_context_info() -> None:
3737
if run_context.default_profile != run_context.profile:
3838
# print warning
3939
fmt.echo(
40-
"Profile %s activated on %s"
41-
% (
42-
fmt.style(run_context.profile, fg="yellow", reset=True),
43-
fmt.bold(run_context.name),
44-
),
40+
"Profile `%s` is active."
41+
% (fmt.style(run_context.profile, fg="yellow", reset=True),),
4542
err=True,
4643
)
4744

dlt/common/runners/pool_runner.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,12 @@ def _run_func() -> bool:
201201
try:
202202
logger.debug("Running pool")
203203
while _run_func():
204-
# for next run
204+
# raise on signal: safe to do that out of _run_func()
205205
signals.raise_if_signalled()
206206
runs_count += 1
207207
sleep(config.run_sleep)
208+
# signal could come
209+
signals.raise_if_signalled()
208210
return runs_count
209211
except SignalReceivedException as sigex:
210212
# sleep this may raise SignalReceivedException

dlt/common/runtime/collector.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class DictCollector(Collector):
5353
"""A collector that just counts"""
5454

5555
def __init__(self) -> None:
56+
self.step = None
5657
self.counters: DefaultDict[str, int] = None
5758

5859
def update(
@@ -101,6 +102,7 @@ def __init__(
101102
log_level (str, optional): Log level for the logger. Defaults to INFO level
102103
dump_system_stats (bool, optional): Log memory and cpu usage. Defaults to True
103104
"""
105+
self.step = None
104106
self.log_period = log_period
105107
self.logger = logger
106108
self.log_level = log_level
@@ -260,6 +262,7 @@ def __init__(self, single_bar: bool = False, **tqdm_kwargs: Any) -> None:
260262
raise MissingDependencyException(
261263
"TqdmCollector", ["tqdm"], "We need tqdm to display progress bars."
262264
)
265+
self.step = None
263266
self.single_bar = single_bar
264267
self._bars: Dict[str, tqdm[None]] = {}
265268
self.tqdm_kwargs = tqdm_kwargs or {}
@@ -321,6 +324,7 @@ def __init__(self, single_bar: bool = True, **alive_kwargs: Any) -> None:
321324
["alive-progress"],
322325
"We need alive-progress to display progress bars.",
323326
)
327+
self.step = None
324328
self.single_bar = single_bar
325329
self._bars: Dict[str, Any] = {}
326330
self._bars_counts: Dict[str, int] = {}
@@ -399,6 +403,7 @@ def __init__(self, single_bar: bool = False, **enlighten_kwargs: Any) -> None:
399403
["enlighten"],
400404
"We need enlighten to display progress bars with a space for log messages.",
401405
)
406+
self.step = None
402407
self.single_bar = single_bar
403408
self.enlighten_kwargs = enlighten_kwargs
404409

dlt/common/runtime/signals.py

Lines changed: 88 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,74 @@
1+
import sys
12
import threading
23
import signal
34
from contextlib import contextmanager
45
from threading import Event
5-
from typing import Any, Iterator
6+
from types import FrameType
7+
from typing import Any, Callable, Dict, Iterator, Optional, Union
68

79
from dlt.common import logger
810
from dlt.common.exceptions import SignalReceivedException
911

1012
_received_signal: int = 0
1113
exit_event = Event()
14+
_signal_counts: Dict[int, int] = {}
15+
_original_handlers: Dict[int, Union[int, Callable[[int, Optional[FrameType]], Any]]] = {}
1216

1317

14-
def signal_receiver(sig: int, frame: Any) -> None:
15-
global _received_signal
18+
def signal_receiver(sig: int, frame: FrameType) -> None:
19+
"""Handle POSIX signals with two-stage escalation.
1620
17-
logger.info(f"Signal {sig} received")
21+
This handler is installed by delayed_signals(). On the first occurrence of a
22+
supported signal (eg. SIGINT, SIGTERM) it requests a graceful shutdown by
23+
setting a process-wide flag and waking sleeping threads via exit_event.
24+
A second occurrence of the same signal escalates by delegating to the
25+
original handler or the system default, which typically results in an
26+
immediate process termination (eg. KeyboardInterrupt for SIGINT).
1827
19-
if _received_signal > 0:
20-
logger.info(f"Another signal received after {_received_signal}")
21-
return
28+
Args:
29+
sig: Signal number (for example, signal.SIGINT or signal.SIGTERM).
30+
frame: The current stack frame when the signal was received.
2231
23-
_received_signal = sig
24-
# awake all threads sleeping on event
25-
exit_event.set()
32+
Notes:
33+
- The CPython runtime delivers signal handlers in the main thread only.
34+
Worker threads must cooperatively observe shutdown via raise_if_signalled()
35+
or the signal-aware sleep().
36+
"""
37+
global _received_signal
2638

27-
logger.info("Sleeping threads signalled")
39+
# track how many times this signal type has been received
40+
_signal_counts[sig] = _signal_counts.get(sig, 0) + 1
41+
42+
if _signal_counts[sig] == 1:
43+
# first signal of this type: set flag and wake threads
44+
_received_signal = sig
45+
if sig == signal.SIGINT:
46+
sig_desc = "CTRL-C"
47+
else:
48+
sig_desc = f"Signal {sig}"
49+
msg = (
50+
f"{sig_desc} received. Trying to shut down gracefully. It may take time to drain job"
51+
f" pools. Send {sig_desc} again to force stop."
52+
)
53+
if sys.stdin.isatty():
54+
# log to console
55+
sys.stderr.write(msg)
56+
sys.stderr.flush()
57+
else:
58+
logger.warning(msg)
59+
elif _signal_counts[sig] >= 2:
60+
# Second signal of this type: call original handler
61+
logger.debug(f"Second signal {sig} received, calling default handler")
62+
original_handler = _original_handlers.get(sig, signal.SIG_DFL)
63+
if callable(original_handler):
64+
original_handler(sig, frame)
65+
elif original_handler == signal.SIG_DFL:
66+
# Restore default and re-raise to trigger default behavior
67+
signal.signal(sig, signal.SIG_DFL)
68+
signal.raise_signal(sig)
69+
70+
exit_event.set()
71+
logger.debug("Sleeping threads signalled")
2872

2973

3074
def raise_if_signalled() -> None:
@@ -38,38 +82,61 @@ def signal_received() -> bool:
3882

3983

4084
def sleep(sleep_seconds: float) -> None:
41-
"""A signal-aware version of sleep function. Will raise SignalReceivedException if signal was received during sleep period."""
42-
# do not allow sleeping if signal was received
43-
raise_if_signalled()
85+
"""A signal-aware version of sleep function. Will wake up if signal is received but will not raise exception."""
4486
# sleep or wait for signal
4587
exit_event.clear()
4688
exit_event.wait(sleep_seconds)
47-
# if signal then raise
48-
raise_if_signalled()
4989

5090

5191
def wake_all() -> None:
5292
"""Wakes all threads sleeping on event"""
5393
exit_event.set()
5494

5595

96+
def _clear_signals() -> None:
97+
global _received_signal
98+
99+
_received_signal = 0
100+
_signal_counts.clear()
101+
_original_handlers.clear()
102+
103+
56104
@contextmanager
57105
def delayed_signals() -> Iterator[None]:
58-
"""Will delay signalling until `raise_if_signalled` is used or signalled `sleep`"""
106+
"""Will delay signalling until `raise_if_signalled` is explicitly used or when
107+
a second signal with the same int value arrives.
108+
109+
A no-op when not called on main thread.
110+
111+
Can be nested - nested calls are no-ops.
112+
"""
59113

60114
if threading.current_thread() is threading.main_thread():
61-
original_sigint_handler = signal.getsignal(signal.SIGINT)
115+
# check if handlers are already installed (nested call)
116+
current_sigint_handler = signal.getsignal(signal.SIGINT)
117+
118+
if current_sigint_handler is signal_receiver:
119+
# already installed, this is a nested call - just yield
120+
yield
121+
return
122+
123+
# First call - install handlers
124+
original_sigint_handler = current_sigint_handler
62125
original_sigterm_handler = signal.getsignal(signal.SIGTERM)
126+
127+
# store original handlers for signal_receiver to use
128+
_original_handlers[signal.SIGINT] = original_sigint_handler
129+
_original_handlers[signal.SIGTERM] = original_sigterm_handler
130+
63131
try:
64132
signal.signal(signal.SIGINT, signal_receiver)
65133
signal.signal(signal.SIGTERM, signal_receiver)
66134
yield
67135
finally:
68-
global _received_signal
69-
70-
_received_signal = 0
71136
signal.signal(signal.SIGINT, original_sigint_handler)
72137
signal.signal(signal.SIGTERM, original_sigterm_handler)
138+
_clear_signals()
139+
73140
else:
74141
logger.info("Running in daemon thread, signals not enabled")
75142
yield

0 commit comments

Comments
 (0)