Ran tox -e style

TomerG711 · TomerG711 · commit b6146c921bc0 · 2025-06-01T17:59:31.000+03:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,8 @@ name = "guidellm"
 description = "Guidance platform for deploying and managing large language models."
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.9.0,<4.0"
-license = { text = "Apache-2.0" }
+license = "Apache-2.0"
+license-files = ["LICENSE"]
 authors = [ { name = "Red Hat" } ]
 keywords = [
     "ai",
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -281,6 +281,7 @@ def benchmark(
         )
     )
 
+
 def decode_escaped_str(_ctx, _param, value):
     """
     Click auto adds characters. For example, when using --pad-char "\n",
@@ -294,6 +295,7 @@ def decode_escaped_str(_ctx, _param, value):
     except Exception as e:
         raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
 
+
 @cli.command(
     help=(
         "Print out the available configuration settings that can be set "
@@ -373,7 +375,7 @@ def preprocess():
     help=(
         "The delimiter to use when concatenating prompts that are too short."
         " Used when strategy is 'concatenate'."
-    )
+    ),
 )
 @click.option(
     "--prompt-tokens",
diff --git a/src/guidellm/benchmark/benchmark.py b/src/guidellm/benchmark/benchmark.py
@@ -815,10 +815,7 @@ def from_stats(
                         req.first_token_time or req.start_time
                         for req in total_with_output_first
                     ],
-                    iter_counts=[
-                        req.output_tokens
-                        for req in total_with_output_first
-                    ],
+                    iter_counts=[req.output_tokens for req in total_with_output_first],
                     first_iter_counts=[
                         req.prompt_tokens for req in total_with_output_first
                     ],
diff --git a/src/guidellm/preprocess/dataset.py b/src/guidellm/preprocess/dataset.py
@@ -220,6 +220,7 @@ def parse_config_file(data: Union[str, Path]) -> "TokensConfig":
 
         return TokensConfig(**config_dict)
 
+
 def save_dataset_to_file(dataset: Dataset, output_path: Union[str, Path]) -> None:
     """
     Saves a HuggingFace Dataset to file in a supported format.
@@ -291,8 +292,7 @@ def process_dataset(
 
     _validate_output_suffix(output_path)
     logger.info(
-        f"Starting dataset conversion | Input: {data} | "
-        f"Output directory: {output_path}"
+        f"Starting dataset conversion | Input: {data} | Output directory: {output_path}"
     )
 
     dataset, column_mappings = guidellm_load_dataset(
@@ -378,7 +378,8 @@ def process_dataset(
 
 
 def push_dataset_to_hub(
-    hub_dataset_id: Optional[str], processed_dataset: Dataset,
+    hub_dataset_id: Optional[str],
+    processed_dataset: Dataset,
 ) -> None:
     """
     Pushes the processed dataset to Hugging Face Hub using HF_TOKEN.
diff --git a/tests/unit/preprocess/test_dataset.py b/tests/unit/preprocess/test_dataset.py
@@ -33,17 +33,18 @@ def tokenizer_mock():
     )
     return tokenizer
 
+
 @pytest.mark.smoke
 @patch(f"{process_dataset.__module__}.guidellm_load_dataset")
 @patch(f"{process_dataset.__module__}.check_load_processor")
 @patch(f"{process_dataset.__module__}.Dataset")
 @patch(f"{process_dataset.__module__}.IntegerRangeSampler")
 def test_strategy_handler_called(
-        mock_sampler,
-        mock_dataset_class,
-        mock_check_processor,
-        mock_load_dataset,
-        tokenizer_mock,
+    mock_sampler,
+    mock_dataset_class,
+    mock_check_processor,
+    mock_load_dataset,
+    tokenizer_mock,
 ):
     mock_handler = MagicMock(return_value="processed_prompt")
     with patch.dict(STRATEGY_HANDLERS, {ShortPromptStrategy.IGNORE: mock_handler}):
@@ -68,18 +69,21 @@ def test_strategy_handler_called(
         mock_load_dataset.assert_called_once()
         mock_check_processor.assert_called_once()
 
+
 @pytest.mark.sanity
 def test_handle_ignore_strategy_too_short(tokenizer_mock):
     result = handle_ignore_strategy("short", 10, tokenizer_mock)
     assert result is None
     tokenizer_mock.encode.assert_called_with("short")
 
+
 @pytest.mark.sanity
 def test_handle_ignore_strategy_sufficient_length(tokenizer_mock):
     result = handle_ignore_strategy("long prompt", 5, tokenizer_mock)
     assert result == "long prompt"
     tokenizer_mock.encode.assert_called_with("long prompt")
 
+
 @pytest.mark.sanity
 def test_handle_concatenate_strategy_enough_prompts(tokenizer_mock):
     dataset_iter = iter([{"prompt": "longer"}])
@@ -88,6 +92,7 @@ def test_handle_concatenate_strategy_enough_prompts(tokenizer_mock):
     )
     assert result == "short\nlonger"
 
+
 @pytest.mark.sanity
 def test_handle_concatenate_strategy_not_enough_prompts(tokenizer_mock):
     dataset_iter: Iterator = iter([])
@@ -96,35 +101,39 @@ def test_handle_concatenate_strategy_not_enough_prompts(tokenizer_mock):
     )
     assert result is None
 
+
 @pytest.mark.sanity
 def test_handle_pad_strategy(tokenizer_mock):
     result = handle_pad_strategy("short", 10, tokenizer_mock, "p")
     assert result == "shortppppp"
 
+
 @pytest.mark.sanity
 def test_handle_error_strategy_valid_prompt(tokenizer_mock):
     result = handle_error_strategy("valid prompt", 5, tokenizer_mock)
     assert result == "valid prompt"
     tokenizer_mock.encode.assert_called_with("valid prompt")
 
+
 @pytest.mark.sanity
 def test_handle_error_strategy_too_short_prompt(tokenizer_mock):
     with pytest.raises(PromptTooShortError):
         handle_error_strategy("short", 10, tokenizer_mock)
 
+
 @pytest.mark.smoke
 @patch("guidellm.preprocess.dataset.save_dataset_to_file")
 @patch("guidellm.preprocess.dataset.Dataset")
 @patch("guidellm.preprocess.dataset.guidellm_load_dataset")
 @patch("guidellm.preprocess.dataset.check_load_processor")
 @patch("guidellm.preprocess.dataset.IntegerRangeSampler")
 def test_process_dataset_non_empty(
-        mock_sampler,
-        mock_check_processor,
-        mock_load_dataset,
-        mock_dataset_class,
-        mock_save_to_file,
-        tokenizer_mock,
+    mock_sampler,
+    mock_check_processor,
+    mock_load_dataset,
+    mock_dataset_class,
+    mock_save_to_file,
+    tokenizer_mock,
 ):
     from guidellm.preprocess.dataset import process_dataset
 
@@ -159,17 +168,18 @@ def test_process_dataset_non_empty(
         assert "output_tokens_count" in item
         assert len(tokenizer_mock.encode(item["prompt"])) <= 3
 
+
 @pytest.mark.sanity
 @patch(f"{process_dataset.__module__}.Dataset")
 @patch(f"{process_dataset.__module__}.guidellm_load_dataset")
 @patch(f"{process_dataset.__module__}.check_load_processor")
 @patch(f"{process_dataset.__module__}.IntegerRangeSampler")
 def test_process_dataset_empty_after_processing(
-        mock_sampler,
-        mock_check_processor,
-        mock_load_dataset,
-        mock_dataset_class,
-        tokenizer_mock,
+    mock_sampler,
+    mock_check_processor,
+    mock_load_dataset,
+    mock_dataset_class,
+    tokenizer_mock,
 ):
     mock_dataset = [{"prompt": ""}]
     mock_load_dataset.return_value = (mock_dataset, {"prompt_column": "prompt"})
@@ -188,19 +198,20 @@ def test_process_dataset_empty_after_processing(
     mock_check_processor.assert_called_once()
     mock_dataset_class.from_list.assert_not_called()
 
+
 @pytest.mark.smoke
 @patch(f"{process_dataset.__module__}.push_dataset_to_hub")
 @patch(f"{process_dataset.__module__}.Dataset")
 @patch(f"{process_dataset.__module__}.guidellm_load_dataset")
 @patch(f"{process_dataset.__module__}.check_load_processor")
 @patch(f"{process_dataset.__module__}.IntegerRangeSampler")
 def test_process_dataset_push_to_hub_called(
-        mock_sampler,
-        mock_check_processor,
-        mock_load_dataset,
-        mock_dataset_class,
-        mock_push,
-        tokenizer_mock,
+    mock_sampler,
+    mock_check_processor,
+    mock_load_dataset,
+    mock_dataset_class,
+    mock_push,
+    tokenizer_mock,
 ):
     mock_dataset = [{"prompt": "abc"}]
     mock_load_dataset.return_value = (mock_dataset, {"prompt_column": "prompt"})
@@ -221,19 +232,20 @@ def test_process_dataset_push_to_hub_called(
     )
     mock_push.assert_called_once_with("id123", mock_dataset_obj)
 
+
 @pytest.mark.sanity
 @patch(f"{process_dataset.__module__}.push_dataset_to_hub")
 @patch(f"{process_dataset.__module__}.Dataset")
 @patch(f"{process_dataset.__module__}.guidellm_load_dataset")
 @patch(f"{process_dataset.__module__}.check_load_processor")
 @patch(f"{process_dataset.__module__}.IntegerRangeSampler")
 def test_process_dataset_push_to_hub_not_called(
-        mock_sampler,
-        mock_check_processor,
-        mock_load_dataset,
-        mock_dataset_class,
-        mock_push,
-        tokenizer_mock,
+    mock_sampler,
+    mock_check_processor,
+    mock_load_dataset,
+    mock_dataset_class,
+    mock_push,
+    tokenizer_mock,
 ):
     mock_dataset = [{"prompt": "abc"}]
     mock_load_dataset.return_value = (mock_dataset, {"prompt_column": "prompt"})
@@ -253,13 +265,15 @@ def test_process_dataset_push_to_hub_not_called(
     )
     mock_push.assert_not_called()
 
+
 @pytest.mark.regression
 def test_push_dataset_to_hub_success():
     os.environ["HF_TOKEN"] = "token"
     mock_dataset = MagicMock(spec=Dataset)
     push_dataset_to_hub("dataset_id", mock_dataset)
     mock_dataset.push_to_hub.assert_called_once_with("dataset_id", token="token")
 
+
 @pytest.mark.regression
 def test_push_dataset_to_hub_error_no_env():
     if "HF_TOKEN" in os.environ:
@@ -268,13 +282,15 @@ def test_push_dataset_to_hub_error_no_env():
     with pytest.raises(ValueError, match="hub_dataset_id and HF_TOKEN"):
         push_dataset_to_hub("dataset_id", mock_dataset)
 
+
 @pytest.mark.regression
 def test_push_dataset_to_hub_error_no_id():
     os.environ["HF_TOKEN"] = "token"
     mock_dataset = MagicMock(spec=Dataset)
     with pytest.raises(ValueError, match="hub_dataset_id and HF_TOKEN"):
         push_dataset_to_hub(None, mock_dataset)
 
+
 @pytest.mark.regression
 @patch.object(Path, "mkdir")
 def test_save_dataset_to_file_csv(mock_mkdir):
@@ -284,6 +300,7 @@ def test_save_dataset_to_file_csv(mock_mkdir):
     mock_dataset.to_csv.assert_called_once_with(output_path)
     mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
 
+
 @pytest.mark.regression
 @patch.object(Path, "mkdir")
 def test_save_dataset_to_file_csv_capitalized(mock_mkdir):
@@ -293,6 +310,7 @@ def test_save_dataset_to_file_csv_capitalized(mock_mkdir):
     mock_dataset.to_csv.assert_called_once_with(output_path)
     mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
 
+
 @pytest.mark.regression
 @patch.object(Path, "mkdir")
 def test_save_dataset_to_file_json(mock_mkdir):
@@ -302,6 +320,7 @@ def test_save_dataset_to_file_json(mock_mkdir):
     mock_dataset.to_json.assert_called_once_with(output_path)
     mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
 
+
 @pytest.mark.regression
 @patch.object(Path, "mkdir")
 def test_save_dataset_to_file_json_capitalized(mock_mkdir):
@@ -311,6 +330,7 @@ def test_save_dataset_to_file_json_capitalized(mock_mkdir):
     mock_dataset.to_json.assert_called_once_with(output_path)
     mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
 
+
 @pytest.mark.regression
 @patch.object(Path, "mkdir")
 def test_save_dataset_to_file_jsonl(mock_mkdir):
@@ -320,6 +340,7 @@ def test_save_dataset_to_file_jsonl(mock_mkdir):
     mock_dataset.to_json.assert_called_once_with(output_path)
     mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
 
+
 @pytest.mark.regression
 @patch.object(Path, "mkdir")
 def test_save_dataset_to_file_jsonl_capitalized(mock_mkdir):
@@ -329,6 +350,7 @@ def test_save_dataset_to_file_jsonl_capitalized(mock_mkdir):
     mock_dataset.to_json.assert_called_once_with(output_path)
     mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
 
+
 @pytest.mark.regression
 @patch.object(Path, "mkdir")
 def test_save_dataset_to_file_parquet(mock_mkdir):
@@ -338,6 +360,7 @@ def test_save_dataset_to_file_parquet(mock_mkdir):
     mock_dataset.to_parquet.assert_called_once_with(output_path)
     mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
 
+
 @pytest.mark.regression
 @patch.object(Path, "mkdir")
 def test_save_dataset_to_file_unsupported_type(mock_mkdir):