Skip to content

[pre-commit.ci] pre-commit suggestions #542

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ repos:
- id: detect-private-key

- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
rev: v2.4.1
hooks:
- id: codespell
additional_dependencies: [tomli]
Expand All @@ -51,14 +51,14 @@ repos:
#args: ["--write-changes"] # uncomment if you want to get automatic fixing

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.6
rev: v0.11.4
hooks:
- id: ruff
args: ["--fix"]
- id: ruff-format

- repo: https://github.com/executablebooks/mdformat
rev: 0.7.21
rev: 0.7.22
hooks:
- id: mdformat
additional_dependencies:
Expand All @@ -77,11 +77,11 @@ repos:
args: ["--print-width=120"]

- repo: https://github.com/tox-dev/pyproject-fmt
rev: v2.5.0
rev: v2.5.1
hooks:
- id: pyproject-fmt
additional_dependencies: [tox]
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.23
rev: v0.24.1
hooks:
- id: validate-pyproject
2 changes: 1 addition & 1 deletion src/litdata/processing/data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,7 @@ def __init__(
self.item_loader = item_loader
self.storage_options = storage_options

self.state_dict = state_dict or {rank: 0 for rank in range(self.num_workers)}
self.state_dict = state_dict or dict.fromkeys(range(self.num_workers), 0)

if self.reader is not None and self.weights is not None:
raise ValueError("Either the reader or the weights needs to be defined.")
Expand Down
2 changes: 1 addition & 1 deletion src/litdata/processing/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def optimize(
num_workers = 1

num_workers = num_workers or _get_default_num_workers()
state_dict = {rank: 0 for rank in range(num_workers)}
state_dict = dict.fromkeys(range(num_workers), 0)

existing_index_file_content = (
read_index_file_content(_output_dir, storage_options) if mode == "append" else None
Expand Down
2 changes: 1 addition & 1 deletion src/litdata/streaming/fs_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,5 +264,5 @@ def _get_fs_provider(remote_filepath: str, storage_options: Optional[Dict[str, A

def not_supported_provider(remote_filepath: str) -> bool:
raise ValueError(
f"URL should start with one of {[el + '://' for el in _SUPPORTED_PROVIDERS]}." f"Found {remote_filepath}."
f"URL should start with one of {[el + '://' for el in _SUPPORTED_PROVIDERS]}.Found {remote_filepath}."
)
2 changes: 1 addition & 1 deletion src/litdata/utilities/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def _convert_bytes_to_int(bytes_str: str) -> int:
except ValueError:
raise ValueError(
f"Unsupported value/suffix {bytes_str}. Supported suffix are "
f'{["b"] + list(_FORMAT_TO_RATIO.keys())}.'
f"{['b'] + list(_FORMAT_TO_RATIO.keys())}."
)
raise ValueError(f"The supported units are {_FORMAT_TO_RATIO.keys()}")

Expand Down
2 changes: 1 addition & 1 deletion src/litdata/utilities/packing.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def _pack_greedily(items: List[Any], weights: List[int], num_bins: int) -> Tuple

sorted_items_and_weights = sorted(zip(items, weights), key=lambda x: x[1], reverse=True)
bin_contents = defaultdict(list)
bin_weights = {i: 0 for i in range(num_bins)}
bin_weights = dict.fromkeys(range(num_bins), 0)

for item, weight in sorted_items_and_weights:
min_bin_id = min(bin_weights, key=(lambda x: bin_weights[x]), default=0)
Expand Down
4 changes: 2 additions & 2 deletions tests/processing/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,11 +511,11 @@ def test_optimize_race_condition(tmpdir):
]

for i, url in enumerate(urls):
print(f"downloading {i+1} file")
print(f"downloading {i + 1} file")
with requests.get(url, stream=True, timeout=10) as r:
r.raise_for_status() # Raise an exception for bad status codes

with open(f"{tmpdir}/custom_texts/book{i+1}.txt", "wb") as f:
with open(f"{tmpdir}/custom_texts/book{i + 1}.txt", "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)

Expand Down
2 changes: 1 addition & 1 deletion tests/streaming/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def __len__(self) -> int:
os.makedirs(os.path.join(tmpdir, "cache_2"), exist_ok=True)
dataset = RandomDatasetAtRuntime(64, 64)
dataloader = CacheDataLoader(dataset, cache_dir=os.path.join(tmpdir, "cache_2"), chunk_bytes=2 << 12)
with pytest.raises(ValueError, match="Your dataset items aren't deterministic"): # noqa: PT012
with pytest.raises(ValueError, match="Your dataset items aren't deterministic"):
for _ in dataloader:
pass

Expand Down
12 changes: 6 additions & 6 deletions tests/streaming/test_combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,9 +552,9 @@ def test_combined_dataset_dataloader_states_partial_iterations(combined_dataset,
if batch_idx == break_at:
break

assert (
not dataloader.restore
), "Dataloader should not be in restore state after partial iteration, before loading state."
assert not dataloader.restore, (
"Dataloader should not be in restore state after partial iteration, before loading state."
)
dataloader.load_state_dict(dataloader.state_dict())
assert dataloader.restore, "Dataloader should be in restore state after loading the state from a partial iteration."

Expand All @@ -564,9 +564,9 @@ def test_combined_dataset_dataloader_states_partial_iterations(combined_dataset,
assert dataloader.current_epoch == 1, "Current epoch should be 1 during restore"
count += 1
expected_batches = total_batches - break_at - 1
assert (
count >= expected_batches
), f"There should be at least{expected_batches} remaining batches in the first epoch."
assert count >= expected_batches, (
f"There should be at least{expected_batches} remaining batches in the first epoch."
)
assert not dataloader.restore, "Dataloader should not be in restore state after completing first epoch."

# Verify batches in the second epoch
Expand Down
12 changes: 6 additions & 6 deletions tests/streaming/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@ def test_streaming_dataset_max_cache_dir(tmpdir, caplog):
StreamingDataset(input_dir=str(tmpdir), max_cache_size="10GB")
StreamingDataset(input_dir=str(tmpdir), max_cache_size="20GB")
assert len(caplog.messages) == 4
assert all(
"The provided `max_cache_size` is less than 25GB." in record.message for record in caplog.records
), "Expected warning about the `max_cache_size` being less than 25GB was not logged"
assert all("The provided `max_cache_size` is less than 25GB." in record.message for record in caplog.records), (
"Expected warning about the `max_cache_size` being less than 25GB was not logged"
)


@pytest.mark.parametrize("drop_last", [False, True])
Expand Down Expand Up @@ -1540,9 +1540,9 @@ def test_dataset_as_iterator_and_non_iterator(tmpdir, local, shuffle):
assert data is not None
if local and i < dataset_length - 1:
# In iterator mode with local or remote data, _chunks_queued_for_download should be enabled
assert (
dataset.cache._reader._chunks_queued_for_download is True
), "_chunks_queued_for_download should be enabled during iteration"
assert dataset.cache._reader._chunks_queued_for_download is True, (
"_chunks_queued_for_download should be enabled during iteration"
)
else:
assert dataset.cache._reader._chunks_queued_for_download is False, (
"_chunks_queued_for_download should be disabled when used as local dir without `local:` prefix"
Expand Down
Loading