Skip to content

Commit b6816a8

Browse files
NathanHBCopilot
andauthored
Adds template for custom path saving results (#755)
## Pull Request Overview This PR adds support for using a custom template to determine where evaluation results are saved. The changes include adding a new parameter "results_path_template" across multiple main modules and updating the EvaluationTracker to honor this template for saving results; the associated tests and documentation have been updated accordingly. - Added a new test for the custom results template. - Extended CLI options in several main modules to accept "results_path_template". - Updated EvaluationTracker logic and documentation to reflect the new functionality. --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent ce1dbb5 commit b6816a8

10 files changed

+108
-4
lines changed

docs/source/saving-and-reading-results.mdx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ To save the details of the evaluation, you can use the `--save-details`
1313
option. The details will be saved in a parquet file
1414
`{output_dir}/details/{model_name}/{timestamp}/details_{task}_{timestamp}.parquet`.
1515

16+
If you want results to be saved in a custom path, you can set the `results-path-template` option.
17+
This allows you to set a string template for the path. The template need to contain the following
18+
variables: `output_dir`, `model_name`, `org`. For example
19+
`{output_dir}/{org}_{model}`. The template will be used to create the path for the results file.
20+
1621
## Pushing results to the HuggingFace hub
1722

1823
You can push the results and evaluation details to the HuggingFace hub. To do

src/lighteval/config/lighteval_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ class LightEvalLoggingArgs:
6060
"""Arguments related to logging for LightEval"""
6161

6262
output_dir: str
63+
results_path_template: str | None = None
6364
save_details: bool = True
6465
push_to_hub: bool = False
6566
push_to_tensorboard: bool = False

src/lighteval/logging/evaluation_tracker.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ class EvaluationTracker:
9797
9898
Args:
9999
output_dir (`str`): Local folder path where you want results to be saved.
100+
results_path_template (`str`, *optional*): template to use for the results output directory. for example,
101+
`"{output_dir}/results_this_time_it_will_work/{org}_{model}"` will create a folder named `results` in the output directory
102+
with the model name and the organization name.
100103
save_details (`bool`, defaults to True): If True, details are saved to the `output_dir`.
101104
push_to_hub (`bool`, defaults to False): If True, details are pushed to the hub.
102105
Results are pushed to `{hub_results_org}/details__{sanitized model_name}` for the model `model_name`, a public dataset,
@@ -119,6 +122,7 @@ class EvaluationTracker:
119122
def __init__(
120123
self,
121124
output_dir: str,
125+
results_path_template: str | None = None,
122126
save_details: bool = True,
123127
push_to_hub: bool = False,
124128
push_to_tensorboard: bool = False,
@@ -152,6 +156,7 @@ def __init__(
152156
self.tensorboard_repo = f"{hub_results_org}/tensorboard_logs"
153157
self.tensorboard_metric_prefix = tensorboard_metric_prefix
154158
self.nanotron_run_info = nanotron_run_info
159+
self.results_path_template = results_path_template
155160

156161
self.public = public
157162

@@ -259,7 +264,14 @@ def push_to_wandb(self, results_dict: dict, details_datasets: dict) -> None:
259264
self.wandb_run.finish()
260265

261266
def save_results(self, date_id: str, results_dict: dict):
262-
output_dir_results = Path(self.output_dir) / "results" / self.general_config_logger.model_name
267+
if self.results_path_template is not None:
268+
org_model_parts = self.general_config_logger.model_name.split("/")
269+
org = org_model_parts[0] if len(org_model_parts) >= 2 else ""
270+
model = org_model_parts[1] if len(org_model_parts) >= 2 else org_model_parts[0]
271+
output_dir = self.output_dir
272+
output_dir_results = Path(self.results_path_template.format(output_dir=output_dir, org=org, model=model))
273+
else:
274+
output_dir_results = Path(self.output_dir) / "results" / self.general_config_logger.model_name
263275
self.fs.mkdirs(output_dir_results, exist_ok=True)
264276
output_results_file = output_dir_results / f"results_{date_id}.json"
265277
logger.info(f"Saving results to {output_results_file}")

src/lighteval/main_accelerate.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,6 @@ def accelerate( # noqa C901
6060
custom_tasks: Annotated[
6161
Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
6262
] = None,
63-
cache_dir: Annotated[
64-
Optional[str], Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANEL_NAME_1)
65-
] = None,
6663
num_fewshot_seeds: Annotated[
6764
int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
6865
] = 1,
@@ -73,6 +70,13 @@ def accelerate( # noqa C901
7370
output_dir: Annotated[
7471
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
7572
] = "results",
73+
results_path_template: Annotated[
74+
str | None,
75+
Option(
76+
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
77+
rich_help_panel=HELP_PANEL_NAME_2,
78+
),
79+
] = None,
7680
push_to_hub: Annotated[
7781
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
7882
] = False,
@@ -118,6 +122,7 @@ def accelerate( # noqa C901
118122

119123
evaluation_tracker = EvaluationTracker(
120124
output_dir=output_dir,
125+
results_path_template=results_path_template,
121126
save_details=save_details,
122127
push_to_hub=push_to_hub,
123128
push_to_tensorboard=push_to_tensorboard,

src/lighteval/main_custom.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,13 @@ def custom(
7070
output_dir: Annotated[
7171
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2)
7272
] = "results",
73+
results_path_template: Annotated[
74+
str | None,
75+
Option(
76+
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
77+
rich_help_panel=HELP_PANNEL_NAME_2,
78+
),
79+
] = None,
7380
push_to_hub: Annotated[
7481
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANNEL_NAME_2)
7582
] = False,
@@ -101,6 +108,7 @@ def custom(
101108

102109
evaluation_tracker = EvaluationTracker(
103110
output_dir=output_dir,
111+
results_path_template=results_path_template,
104112
save_details=save_details,
105113
push_to_hub=push_to_hub,
106114
push_to_tensorboard=push_to_tensorboard,

src/lighteval/main_endpoint.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@ def inference_endpoint(
7272
output_dir: Annotated[
7373
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
7474
] = "results",
75+
results_path_template: Annotated[
76+
str | None,
77+
Option(
78+
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
79+
rich_help_panel=HELP_PANEL_NAME_2,
80+
),
81+
] = None,
7582
push_to_hub: Annotated[
7683
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
7784
] = False,
@@ -111,6 +118,7 @@ def inference_endpoint(
111118

112119
evaluation_tracker = EvaluationTracker(
113120
output_dir=output_dir,
121+
results_path_template=results_path_template,
114122
save_details=save_details,
115123
push_to_hub=push_to_hub,
116124
push_to_tensorboard=push_to_tensorboard,
@@ -185,6 +193,13 @@ def tgi(
185193
output_dir: Annotated[
186194
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
187195
] = "results",
196+
results_path_template: Annotated[
197+
str | None,
198+
Option(
199+
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
200+
rich_help_panel=HELP_PANEL_NAME_2,
201+
),
202+
] = None,
188203
push_to_hub: Annotated[
189204
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
190205
] = False,
@@ -227,6 +242,7 @@ def tgi(
227242

228243
evaluation_tracker = EvaluationTracker(
229244
output_dir=output_dir,
245+
results_path_template=results_path_template,
230246
save_details=save_details,
231247
push_to_hub=push_to_hub,
232248
push_to_tensorboard=push_to_tensorboard,
@@ -302,6 +318,13 @@ def litellm(
302318
output_dir: Annotated[
303319
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
304320
] = "results",
321+
results_path_template: Annotated[
322+
str | None,
323+
Option(
324+
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
325+
rich_help_panel=HELP_PANEL_NAME_2,
326+
),
327+
] = None,
305328
push_to_hub: Annotated[
306329
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
307330
] = False,
@@ -344,6 +367,7 @@ def litellm(
344367

345368
evaluation_tracker = EvaluationTracker(
346369
output_dir=output_dir,
370+
results_path_template=results_path_template,
347371
save_details=save_details,
348372
push_to_hub=push_to_hub,
349373
push_to_tensorboard=push_to_tensorboard,
@@ -420,6 +444,13 @@ def inference_providers(
420444
output_dir: Annotated[
421445
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
422446
] = "results",
447+
results_path_template: Annotated[
448+
str | None,
449+
Option(
450+
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
451+
rich_help_panel=HELP_PANEL_NAME_2,
452+
),
453+
] = None,
423454
push_to_hub: Annotated[
424455
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
425456
] = False,
@@ -462,6 +493,7 @@ def inference_providers(
462493

463494
evaluation_tracker = EvaluationTracker(
464495
output_dir=output_dir,
496+
results_path_template=results_path_template,
465497
save_details=save_details,
466498
push_to_hub=push_to_hub,
467499
push_to_tensorboard=push_to_tensorboard,

src/lighteval/main_nanotron.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def nanotron(
8181

8282
evaluation_tracker = EvaluationTracker(
8383
output_dir=lighteval_config.logging.output_dir,
84+
results_path_template=lighteval_config.logging.results_path_template,
8485
hub_results_org=lighteval_config.logging.results_org,
8586
public=lighteval_config.logging.public_run,
8687
push_to_hub=lighteval_config.logging.push_to_hub,

src/lighteval/main_sglang.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ def sglang(
6363
output_dir: Annotated[
6464
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
6565
] = "results",
66+
results_path_template: Annotated[
67+
str | None,
68+
Option(
69+
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
70+
rich_help_panel=HELP_PANEL_NAME_2,
71+
),
72+
] = None,
6673
push_to_hub: Annotated[
6774
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
6875
] = False,
@@ -104,6 +111,7 @@ def sglang(
104111

105112
evaluation_tracker = EvaluationTracker(
106113
output_dir=output_dir,
114+
results_path_template=results_path_template,
107115
save_details=save_details,
108116
push_to_hub=push_to_hub,
109117
push_to_tensorboard=push_to_tensorboard,

src/lighteval/main_vllm.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ def vllm(
6666
output_dir: Annotated[
6767
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
6868
] = "results",
69+
results_path_template: Annotated[
70+
str | None,
71+
Option(
72+
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
73+
rich_help_panel=HELP_PANEL_NAME_2,
74+
),
75+
] = None,
6976
push_to_hub: Annotated[
7077
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
7178
] = False,
@@ -107,6 +114,7 @@ def vllm(
107114

108115
evaluation_tracker = EvaluationTracker(
109116
output_dir=output_dir,
117+
results_path_template=results_path_template,
110118
save_details=save_details,
111119
push_to_hub=push_to_hub,
112120
push_to_tensorboard=push_to_tensorboard,

tests/logging/test_evaluation_tracker.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,30 @@ def test_results_logging(mock_evaluation_tracker: EvaluationTracker):
9696
assert saved_results["config_general"]["model_name"] == "test_model"
9797

9898

99+
def test_results_logging_template(mock_evaluation_tracker: EvaluationTracker):
100+
task_metrics = {
101+
"task1": {"accuracy": 0.8, "f1": 0.75},
102+
"task2": {"precision": 0.9, "recall": 0.85},
103+
}
104+
mock_evaluation_tracker.metrics_logger.metric_aggregated = task_metrics
105+
mock_evaluation_tracker.results_path_template = "{output_dir}/{org}_{model}"
106+
107+
mock_evaluation_tracker.save()
108+
109+
results_dir = Path(mock_evaluation_tracker.output_dir) / "_test_model"
110+
assert results_dir.exists()
111+
112+
result_files = list(results_dir.glob("results_*.json"))
113+
assert len(result_files) == 1
114+
115+
with open(result_files[0], "r") as f:
116+
saved_results = json.load(f)
117+
118+
assert "results" in saved_results
119+
assert saved_results["results"] == task_metrics
120+
assert saved_results["config_general"]["model_name"] == "test_model"
121+
122+
99123
@pytest.mark.evaluation_tracker(save_details=True)
100124
def test_details_logging(mock_evaluation_tracker, mock_datetime):
101125
task_details = {

0 commit comments

Comments
 (0)