Skip to content

Commit b6146c9

Browse files
committed
Ran tox -e style
1 parent f61b7f0 commit b6146c9

File tree

5 files changed

+61
-37
lines changed

5 files changed

+61
-37
lines changed

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ name = "guidellm"
2121
description = "Guidance platform for deploying and managing large language models."
2222
readme = { file = "README.md", content-type = "text/markdown" }
2323
requires-python = ">=3.9.0,<4.0"
24-
license = { text = "Apache-2.0" }
24+
license = "Apache-2.0"
25+
license-files = ["LICENSE"]
2526
authors = [ { name = "Red Hat" } ]
2627
keywords = [
2728
"ai",

src/guidellm/__main__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ def benchmark(
281281
)
282282
)
283283

284+
284285
def decode_escaped_str(_ctx, _param, value):
285286
"""
286287
Click auto adds characters. For example, when using --pad-char "\n",
@@ -294,6 +295,7 @@ def decode_escaped_str(_ctx, _param, value):
294295
except Exception as e:
295296
raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
296297

298+
297299
@cli.command(
298300
help=(
299301
"Print out the available configuration settings that can be set "
@@ -373,7 +375,7 @@ def preprocess():
373375
help=(
374376
"The delimiter to use when concatenating prompts that are too short."
375377
" Used when strategy is 'concatenate'."
376-
)
378+
),
377379
)
378380
@click.option(
379381
"--prompt-tokens",

src/guidellm/benchmark/benchmark.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -815,10 +815,7 @@ def from_stats(
815815
req.first_token_time or req.start_time
816816
for req in total_with_output_first
817817
],
818-
iter_counts=[
819-
req.output_tokens
820-
for req in total_with_output_first
821-
],
818+
iter_counts=[req.output_tokens for req in total_with_output_first],
822819
first_iter_counts=[
823820
req.prompt_tokens for req in total_with_output_first
824821
],

src/guidellm/preprocess/dataset.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ def parse_config_file(data: Union[str, Path]) -> "TokensConfig":
220220

221221
return TokensConfig(**config_dict)
222222

223+
223224
def save_dataset_to_file(dataset: Dataset, output_path: Union[str, Path]) -> None:
224225
"""
225226
Saves a HuggingFace Dataset to file in a supported format.
@@ -291,8 +292,7 @@ def process_dataset(
291292

292293
_validate_output_suffix(output_path)
293294
logger.info(
294-
f"Starting dataset conversion | Input: {data} | "
295-
f"Output directory: {output_path}"
295+
f"Starting dataset conversion | Input: {data} | Output directory: {output_path}"
296296
)
297297

298298
dataset, column_mappings = guidellm_load_dataset(
@@ -378,7 +378,8 @@ def process_dataset(
378378

379379

380380
def push_dataset_to_hub(
381-
hub_dataset_id: Optional[str], processed_dataset: Dataset,
381+
hub_dataset_id: Optional[str],
382+
processed_dataset: Dataset,
382383
) -> None:
383384
"""
384385
Pushes the processed dataset to Hugging Face Hub using HF_TOKEN.

tests/unit/preprocess/test_dataset.py

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,18 @@ def tokenizer_mock():
3333
)
3434
return tokenizer
3535

36+
3637
@pytest.mark.smoke
3738
@patch(f"{process_dataset.__module__}.guidellm_load_dataset")
3839
@patch(f"{process_dataset.__module__}.check_load_processor")
3940
@patch(f"{process_dataset.__module__}.Dataset")
4041
@patch(f"{process_dataset.__module__}.IntegerRangeSampler")
4142
def test_strategy_handler_called(
42-
mock_sampler,
43-
mock_dataset_class,
44-
mock_check_processor,
45-
mock_load_dataset,
46-
tokenizer_mock,
43+
mock_sampler,
44+
mock_dataset_class,
45+
mock_check_processor,
46+
mock_load_dataset,
47+
tokenizer_mock,
4748
):
4849
mock_handler = MagicMock(return_value="processed_prompt")
4950
with patch.dict(STRATEGY_HANDLERS, {ShortPromptStrategy.IGNORE: mock_handler}):
@@ -68,18 +69,21 @@ def test_strategy_handler_called(
6869
mock_load_dataset.assert_called_once()
6970
mock_check_processor.assert_called_once()
7071

72+
7173
@pytest.mark.sanity
7274
def test_handle_ignore_strategy_too_short(tokenizer_mock):
7375
result = handle_ignore_strategy("short", 10, tokenizer_mock)
7476
assert result is None
7577
tokenizer_mock.encode.assert_called_with("short")
7678

79+
7780
@pytest.mark.sanity
7881
def test_handle_ignore_strategy_sufficient_length(tokenizer_mock):
7982
result = handle_ignore_strategy("long prompt", 5, tokenizer_mock)
8083
assert result == "long prompt"
8184
tokenizer_mock.encode.assert_called_with("long prompt")
8285

86+
8387
@pytest.mark.sanity
8488
def test_handle_concatenate_strategy_enough_prompts(tokenizer_mock):
8589
dataset_iter = iter([{"prompt": "longer"}])
@@ -88,6 +92,7 @@ def test_handle_concatenate_strategy_enough_prompts(tokenizer_mock):
8892
)
8993
assert result == "short\nlonger"
9094

95+
9196
@pytest.mark.sanity
9297
def test_handle_concatenate_strategy_not_enough_prompts(tokenizer_mock):
9398
dataset_iter: Iterator = iter([])
@@ -96,35 +101,39 @@ def test_handle_concatenate_strategy_not_enough_prompts(tokenizer_mock):
96101
)
97102
assert result is None
98103

104+
99105
@pytest.mark.sanity
100106
def test_handle_pad_strategy(tokenizer_mock):
101107
result = handle_pad_strategy("short", 10, tokenizer_mock, "p")
102108
assert result == "shortppppp"
103109

110+
104111
@pytest.mark.sanity
105112
def test_handle_error_strategy_valid_prompt(tokenizer_mock):
106113
result = handle_error_strategy("valid prompt", 5, tokenizer_mock)
107114
assert result == "valid prompt"
108115
tokenizer_mock.encode.assert_called_with("valid prompt")
109116

117+
110118
@pytest.mark.sanity
111119
def test_handle_error_strategy_too_short_prompt(tokenizer_mock):
112120
with pytest.raises(PromptTooShortError):
113121
handle_error_strategy("short", 10, tokenizer_mock)
114122

123+
115124
@pytest.mark.smoke
116125
@patch("guidellm.preprocess.dataset.save_dataset_to_file")
117126
@patch("guidellm.preprocess.dataset.Dataset")
118127
@patch("guidellm.preprocess.dataset.guidellm_load_dataset")
119128
@patch("guidellm.preprocess.dataset.check_load_processor")
120129
@patch("guidellm.preprocess.dataset.IntegerRangeSampler")
121130
def test_process_dataset_non_empty(
122-
mock_sampler,
123-
mock_check_processor,
124-
mock_load_dataset,
125-
mock_dataset_class,
126-
mock_save_to_file,
127-
tokenizer_mock,
131+
mock_sampler,
132+
mock_check_processor,
133+
mock_load_dataset,
134+
mock_dataset_class,
135+
mock_save_to_file,
136+
tokenizer_mock,
128137
):
129138
from guidellm.preprocess.dataset import process_dataset
130139

@@ -159,17 +168,18 @@ def test_process_dataset_non_empty(
159168
assert "output_tokens_count" in item
160169
assert len(tokenizer_mock.encode(item["prompt"])) <= 3
161170

171+
162172
@pytest.mark.sanity
163173
@patch(f"{process_dataset.__module__}.Dataset")
164174
@patch(f"{process_dataset.__module__}.guidellm_load_dataset")
165175
@patch(f"{process_dataset.__module__}.check_load_processor")
166176
@patch(f"{process_dataset.__module__}.IntegerRangeSampler")
167177
def test_process_dataset_empty_after_processing(
168-
mock_sampler,
169-
mock_check_processor,
170-
mock_load_dataset,
171-
mock_dataset_class,
172-
tokenizer_mock,
178+
mock_sampler,
179+
mock_check_processor,
180+
mock_load_dataset,
181+
mock_dataset_class,
182+
tokenizer_mock,
173183
):
174184
mock_dataset = [{"prompt": ""}]
175185
mock_load_dataset.return_value = (mock_dataset, {"prompt_column": "prompt"})
@@ -188,19 +198,20 @@ def test_process_dataset_empty_after_processing(
188198
mock_check_processor.assert_called_once()
189199
mock_dataset_class.from_list.assert_not_called()
190200

201+
191202
@pytest.mark.smoke
192203
@patch(f"{process_dataset.__module__}.push_dataset_to_hub")
193204
@patch(f"{process_dataset.__module__}.Dataset")
194205
@patch(f"{process_dataset.__module__}.guidellm_load_dataset")
195206
@patch(f"{process_dataset.__module__}.check_load_processor")
196207
@patch(f"{process_dataset.__module__}.IntegerRangeSampler")
197208
def test_process_dataset_push_to_hub_called(
198-
mock_sampler,
199-
mock_check_processor,
200-
mock_load_dataset,
201-
mock_dataset_class,
202-
mock_push,
203-
tokenizer_mock,
209+
mock_sampler,
210+
mock_check_processor,
211+
mock_load_dataset,
212+
mock_dataset_class,
213+
mock_push,
214+
tokenizer_mock,
204215
):
205216
mock_dataset = [{"prompt": "abc"}]
206217
mock_load_dataset.return_value = (mock_dataset, {"prompt_column": "prompt"})
@@ -221,19 +232,20 @@ def test_process_dataset_push_to_hub_called(
221232
)
222233
mock_push.assert_called_once_with("id123", mock_dataset_obj)
223234

235+
224236
@pytest.mark.sanity
225237
@patch(f"{process_dataset.__module__}.push_dataset_to_hub")
226238
@patch(f"{process_dataset.__module__}.Dataset")
227239
@patch(f"{process_dataset.__module__}.guidellm_load_dataset")
228240
@patch(f"{process_dataset.__module__}.check_load_processor")
229241
@patch(f"{process_dataset.__module__}.IntegerRangeSampler")
230242
def test_process_dataset_push_to_hub_not_called(
231-
mock_sampler,
232-
mock_check_processor,
233-
mock_load_dataset,
234-
mock_dataset_class,
235-
mock_push,
236-
tokenizer_mock,
243+
mock_sampler,
244+
mock_check_processor,
245+
mock_load_dataset,
246+
mock_dataset_class,
247+
mock_push,
248+
tokenizer_mock,
237249
):
238250
mock_dataset = [{"prompt": "abc"}]
239251
mock_load_dataset.return_value = (mock_dataset, {"prompt_column": "prompt"})
@@ -253,13 +265,15 @@ def test_process_dataset_push_to_hub_not_called(
253265
)
254266
mock_push.assert_not_called()
255267

268+
256269
@pytest.mark.regression
257270
def test_push_dataset_to_hub_success():
258271
os.environ["HF_TOKEN"] = "token"
259272
mock_dataset = MagicMock(spec=Dataset)
260273
push_dataset_to_hub("dataset_id", mock_dataset)
261274
mock_dataset.push_to_hub.assert_called_once_with("dataset_id", token="token")
262275

276+
263277
@pytest.mark.regression
264278
def test_push_dataset_to_hub_error_no_env():
265279
if "HF_TOKEN" in os.environ:
@@ -268,13 +282,15 @@ def test_push_dataset_to_hub_error_no_env():
268282
with pytest.raises(ValueError, match="hub_dataset_id and HF_TOKEN"):
269283
push_dataset_to_hub("dataset_id", mock_dataset)
270284

285+
271286
@pytest.mark.regression
272287
def test_push_dataset_to_hub_error_no_id():
273288
os.environ["HF_TOKEN"] = "token"
274289
mock_dataset = MagicMock(spec=Dataset)
275290
with pytest.raises(ValueError, match="hub_dataset_id and HF_TOKEN"):
276291
push_dataset_to_hub(None, mock_dataset)
277292

293+
278294
@pytest.mark.regression
279295
@patch.object(Path, "mkdir")
280296
def test_save_dataset_to_file_csv(mock_mkdir):
@@ -284,6 +300,7 @@ def test_save_dataset_to_file_csv(mock_mkdir):
284300
mock_dataset.to_csv.assert_called_once_with(output_path)
285301
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
286302

303+
287304
@pytest.mark.regression
288305
@patch.object(Path, "mkdir")
289306
def test_save_dataset_to_file_csv_capitalized(mock_mkdir):
@@ -293,6 +310,7 @@ def test_save_dataset_to_file_csv_capitalized(mock_mkdir):
293310
mock_dataset.to_csv.assert_called_once_with(output_path)
294311
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
295312

313+
296314
@pytest.mark.regression
297315
@patch.object(Path, "mkdir")
298316
def test_save_dataset_to_file_json(mock_mkdir):
@@ -302,6 +320,7 @@ def test_save_dataset_to_file_json(mock_mkdir):
302320
mock_dataset.to_json.assert_called_once_with(output_path)
303321
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
304322

323+
305324
@pytest.mark.regression
306325
@patch.object(Path, "mkdir")
307326
def test_save_dataset_to_file_json_capitalized(mock_mkdir):
@@ -311,6 +330,7 @@ def test_save_dataset_to_file_json_capitalized(mock_mkdir):
311330
mock_dataset.to_json.assert_called_once_with(output_path)
312331
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
313332

333+
314334
@pytest.mark.regression
315335
@patch.object(Path, "mkdir")
316336
def test_save_dataset_to_file_jsonl(mock_mkdir):
@@ -320,6 +340,7 @@ def test_save_dataset_to_file_jsonl(mock_mkdir):
320340
mock_dataset.to_json.assert_called_once_with(output_path)
321341
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
322342

343+
323344
@pytest.mark.regression
324345
@patch.object(Path, "mkdir")
325346
def test_save_dataset_to_file_jsonl_capitalized(mock_mkdir):
@@ -329,6 +350,7 @@ def test_save_dataset_to_file_jsonl_capitalized(mock_mkdir):
329350
mock_dataset.to_json.assert_called_once_with(output_path)
330351
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
331352

353+
332354
@pytest.mark.regression
333355
@patch.object(Path, "mkdir")
334356
def test_save_dataset_to_file_parquet(mock_mkdir):
@@ -338,6 +360,7 @@ def test_save_dataset_to_file_parquet(mock_mkdir):
338360
mock_dataset.to_parquet.assert_called_once_with(output_path)
339361
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
340362

363+
341364
@pytest.mark.regression
342365
@patch.object(Path, "mkdir")
343366
def test_save_dataset_to_file_unsupported_type(mock_mkdir):

0 commit comments

Comments
 (0)