Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
e6172a7
fix: enable parallel test execution with pytest-xdist in CI workflow
deependujha Jun 11, 2025
1ee614e
temporary fix to handle parallelly running tests in ci
deependujha Jun 12, 2025
cf5fb70
update
deependujha Jun 12, 2025
71eafa9
update
deependujha Jun 12, 2025
17eed41
update
deependujha Jun 12, 2025
311beae
update
deependujha Jun 12, 2025
46b5843
7 pm
deependujha Jun 12, 2025
93305ba
pytest-xdist ==3.4.0
Borda Jun 12, 2025
cbf1ca5
fix tmp path on windows
deependujha Jun 12, 2025
ecad3d8
add fixture for unique HF URL to support parallel test runs
deependujha Jun 12, 2025
5bad28f
Merge branch 'main' into feat/run-tests-parallely
deependujha Jun 12, 2025
4d730a5
update
deependujha Jun 12, 2025
c6ad3f9
Merge branch 'main' into feat/run-tests-parallely
Borda Jun 18, 2025
70f7501
update
deependujha Jun 13, 2025
f3bdcf8
increase timeout of 60s to 90s
deependujha Jun 18, 2025
c35ac31
bump pytest & pytest-xdist
deependujha Jun 19, 2025
1956b7f
rerun failing tests twice
deependujha Jun 19, 2025
f890dd6
refactor: update pytest command and adjust fixture scopes for better …
deependujha Jun 20, 2025
7b09b9f
update
deependujha Jun 20, 2025
43450e3
update
deependujha Jun 20, 2025
6f47a5b
update
deependujha Jun 20, 2025
6c47e98
update
deependujha Jun 20, 2025
e52dd98
update
deependujha Jun 20, 2025
51aea13
update
deependujha Jun 20, 2025
14c0e79
update
deependujha Jun 20, 2025
140350a
update
deependujha Jun 20, 2025
613309c
update
deependujha Jun 24, 2025
59056f0
let's just wait
deependujha Jun 24, 2025
f5cb8f6
Merge branch 'main' into feat/run-tests-parallely
deependujha Aug 4, 2025
9a8028f
Update tests/streaming/test_dataloader.py
deependujha Aug 4, 2025
2203738
update
deependujha Aug 4, 2025
c03d154
update
deependujha Aug 4, 2025
f5718d7
Merge branch 'main' into feat/run-tests-parallely
deependujha Aug 6, 2025
ca211c8
Apply suggestions from code review
Borda Aug 6, 2025
38c9671
Update src/litdata/streaming/resolver.py
deependujha Aug 7, 2025
3d9a504
update
deependujha Aug 7, 2025
204c3e8
Merge branch 'main' into feat/run-tests-parallely
deependujha Aug 7, 2025
24224e7
update
deependujha Aug 7, 2025
00d5a38
Update .github/workflows/ci-testing.yml
deependujha Aug 7, 2025
41c1f1e
Merge branch 'main' into feat/run-tests-parallely
deependujha Aug 7, 2025
8bf6699
update
deependujha Aug 7, 2025
02c475b
let's try running all tests in parallel
deependujha Aug 7, 2025
1ba1412
update
deependujha Aug 7, 2025
51db91a
update
deependujha Aug 7, 2025
fe292ef
let's run tests in groups
deependujha Aug 7, 2025
62b4cfb
update
deependujha Aug 7, 2025
b61b7d7
tests pass
deependujha Aug 7, 2025
1918a9d
update
deependujha Aug 7, 2025
f59b9ed
Merge branch 'main' into feat/run-tests-parallely
deependujha Aug 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/ci-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ jobs:
pip list

- name: Tests
run: coverage run --source litdata -m pytest tests -v --durations=100
run: |
pip install pytest-xdist
coverage run --source litdata -m pytest tests -v --durations=100 -n auto

- name: Statistics
run: |
Expand Down
5 changes: 5 additions & 0 deletions src/litdata/processing/data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,8 @@ def _terminate(self) -> None:
if self.remover and self.remover.is_alive():
self.remover.join()

sleep(5) # Give some buffer time for file creation/deletion

def _loop(self) -> None:
"""The main loop of the worker.

Expand Down Expand Up @@ -1339,6 +1341,9 @@ def run(self, data_recipe: DataRecipe) -> None:
pbar.close()

print("Workers are finished.")

sleep(5) # Give some buffer time for file creation/deletion

size = len(workers_user_items) if workers_user_items is not None else None
result = data_recipe._done(size, self.delete_cached_files, self.output_dir)

Expand Down
6 changes: 5 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import shutil
import sys
import threading
import uuid
from collections import OrderedDict
from types import ModuleType
from unittest.mock import Mock
Expand All @@ -25,9 +26,12 @@ def teardown_process_group():


@pytest.fixture(autouse=True)
def set_env():
def set_env(monkeypatch):
# Set environment variable before each test to configure BaseWorker's maximum wait time
os.environ["DATA_OPTIMIZER_TIMEOUT"] = "20"
uuid_str = uuid.uuid4().hex
monkeypatch.setenv("DATA_OPTIMIZER_DATA_CACHE_FOLDER", f"/tmp/{uuid_str}/") # noqa: S108
monkeypatch.setenv("DATA_OPTIMIZER_CACHE_FOLDER", f"/tmp/{uuid_str}/") # noqa: S108


@pytest.fixture
Expand Down
4 changes: 3 additions & 1 deletion tests/streaming/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import sys
import tempfile
import uuid
from contextlib import nullcontext
from fnmatch import fnmatch
from unittest.mock import Mock, patch
Expand Down Expand Up @@ -135,7 +136,8 @@ def test_get_parquet_indexer_cls(pq_url, tmp_path, cls, expectation, monkeypatch
@pytest.mark.parametrize(("pre_load_chunk"), [False, True])
@pytest.mark.parametrize(("low_memory"), [False, True])
def test_stream_hf_parquet_dataset(monkeypatch, huggingface_hub_fs_mock, pq_data, pre_load_chunk, low_memory):
hf_url = "hf://datasets/some_org/some_repo/some_path"
uuid_str = uuid.uuid4().hex # Unique identifier for the dataset
hf_url = f"hf://datasets/some_org/some_repo/{uuid_str}"

# Test case 1: Invalid item_loader
with pytest.raises(ValueError, match="Invalid item_loader for hf://datasets."):
Expand Down
Loading