Skip to content

Adds template for custom path saving results #755

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/source/saving-and-reading-results.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ To save the details of the evaluation, you can use the `--save-details`
option. The details will be saved in a parquet file
`{output_dir}/details/{model_name}/{timestamp}/details_{task}_{timestamp}.parquet`.

If you want results to be saved in a custom path, you can set the `results-path-template` option.
This allows you to set a string template for the path. The template need to contain the following
variables: `output_dir`, `model_name`, `org`. For example
`{output_dir}/{org}_{model}`. The template will be used to create the path for the results file.

## Pushing results to the HuggingFace hub

You can push the results and evaluation details to the HuggingFace hub. To do
Expand Down
1 change: 1 addition & 0 deletions src/lighteval/config/lighteval_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class LightEvalLoggingArgs:
"""Arguments related to logging for LightEval"""

output_dir: str
results_path_template: str | None = None
save_details: bool = True
push_to_hub: bool = False
push_to_tensorboard: bool = False
Expand Down
14 changes: 13 additions & 1 deletion src/lighteval/logging/evaluation_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ class EvaluationTracker:

Args:
output_dir (`str`): Local folder path where you want results to be saved.
results_path_template (`str`, *optional*): template to use for the results output directory. for example,
`"{output_dir}/results_this_time_it_will_work/{org}_{model}"` will create a folder named `results` in the output directory
with the model name and the organization name.
save_details (`bool`, defaults to True): If True, details are saved to the `output_dir`.
push_to_hub (`bool`, defaults to False): If True, details are pushed to the hub.
Results are pushed to `{hub_results_org}/details__{sanitized model_name}` for the model `model_name`, a public dataset,
Expand All @@ -119,6 +122,7 @@ class EvaluationTracker:
def __init__(
self,
output_dir: str,
results_path_template: str | None = None,
save_details: bool = True,
push_to_hub: bool = False,
push_to_tensorboard: bool = False,
Expand Down Expand Up @@ -152,6 +156,7 @@ def __init__(
self.tensorboard_repo = f"{hub_results_org}/tensorboard_logs"
self.tensorboard_metric_prefix = tensorboard_metric_prefix
self.nanotron_run_info = nanotron_run_info
self.results_path_template = results_path_template

self.public = public

Expand Down Expand Up @@ -259,7 +264,14 @@ def push_to_wandb(self, results_dict: dict, details_datasets: dict) -> None:
self.wandb_run.finish()

def save_results(self, date_id: str, results_dict: dict):
output_dir_results = Path(self.output_dir) / "results" / self.general_config_logger.model_name
if self.results_path_template is not None:
org_model_parts = self.general_config_logger.model_name.split("/")
org = org_model_parts[0] if len(org_model_parts) >= 2 else ""
model = org_model_parts[1] if len(org_model_parts) >= 2 else org_model_parts[0]
output_dir = self.output_dir
output_dir_results = Path(self.results_path_template.format(output_dir=output_dir, org=org, model=model))
else:
output_dir_results = Path(self.output_dir) / "results" / self.general_config_logger.model_name
self.fs.mkdirs(output_dir_results, exist_ok=True)
output_results_file = output_dir_results / f"results_{date_id}.json"
logger.info(f"Saving results to {output_results_file}")
Expand Down
11 changes: 8 additions & 3 deletions src/lighteval/main_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ def accelerate( # noqa C901
custom_tasks: Annotated[
Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
] = None,
cache_dir: Annotated[
Optional[str], Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANEL_NAME_1)
] = None,
num_fewshot_seeds: Annotated[
int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
] = 1,
Expand All @@ -73,6 +70,13 @@ def accelerate( # noqa C901
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
] = "results",
results_path_template: Annotated[
str | None,
Option(
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
rich_help_panel=HELP_PANEL_NAME_2,
),
] = None,
push_to_hub: Annotated[
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
] = False,
Expand Down Expand Up @@ -118,6 +122,7 @@ def accelerate( # noqa C901

evaluation_tracker = EvaluationTracker(
output_dir=output_dir,
results_path_template=results_path_template,
save_details=save_details,
push_to_hub=push_to_hub,
push_to_tensorboard=push_to_tensorboard,
Expand Down
8 changes: 8 additions & 0 deletions src/lighteval/main_custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ def custom(
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2)
] = "results",
results_path_template: Annotated[
str | None,
Option(
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
rich_help_panel=HELP_PANNEL_NAME_2,
),
] = None,
push_to_hub: Annotated[
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANNEL_NAME_2)
] = False,
Expand Down Expand Up @@ -101,6 +108,7 @@ def custom(

evaluation_tracker = EvaluationTracker(
output_dir=output_dir,
results_path_template=results_path_template,
save_details=save_details,
push_to_hub=push_to_hub,
push_to_tensorboard=push_to_tensorboard,
Expand Down
32 changes: 32 additions & 0 deletions src/lighteval/main_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ def inference_endpoint(
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
] = "results",
results_path_template: Annotated[
str | None,
Option(
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
rich_help_panel=HELP_PANEL_NAME_2,
),
] = None,
push_to_hub: Annotated[
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
] = False,
Expand Down Expand Up @@ -111,6 +118,7 @@ def inference_endpoint(

evaluation_tracker = EvaluationTracker(
output_dir=output_dir,
results_path_template=results_path_template,
save_details=save_details,
push_to_hub=push_to_hub,
push_to_tensorboard=push_to_tensorboard,
Expand Down Expand Up @@ -185,6 +193,13 @@ def tgi(
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
] = "results",
results_path_template: Annotated[
str | None,
Option(
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
rich_help_panel=HELP_PANEL_NAME_2,
),
] = None,
push_to_hub: Annotated[
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
] = False,
Expand Down Expand Up @@ -227,6 +242,7 @@ def tgi(

evaluation_tracker = EvaluationTracker(
output_dir=output_dir,
results_path_template=results_path_template,
save_details=save_details,
push_to_hub=push_to_hub,
push_to_tensorboard=push_to_tensorboard,
Expand Down Expand Up @@ -302,6 +318,13 @@ def litellm(
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
] = "results",
results_path_template: Annotated[
str | None,
Option(
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
rich_help_panel=HELP_PANEL_NAME_2,
),
] = None,
push_to_hub: Annotated[
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
] = False,
Expand Down Expand Up @@ -344,6 +367,7 @@ def litellm(

evaluation_tracker = EvaluationTracker(
output_dir=output_dir,
results_path_template=results_path_template,
save_details=save_details,
push_to_hub=push_to_hub,
push_to_tensorboard=push_to_tensorboard,
Expand Down Expand Up @@ -420,6 +444,13 @@ def inference_providers(
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
] = "results",
results_path_template: Annotated[
str | None,
Option(
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
rich_help_panel=HELP_PANEL_NAME_2,
),
] = None,
push_to_hub: Annotated[
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
] = False,
Expand Down Expand Up @@ -462,6 +493,7 @@ def inference_providers(

evaluation_tracker = EvaluationTracker(
output_dir=output_dir,
results_path_template=results_path_template,
save_details=save_details,
push_to_hub=push_to_hub,
push_to_tensorboard=push_to_tensorboard,
Expand Down
1 change: 1 addition & 0 deletions src/lighteval/main_nanotron.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def nanotron(

evaluation_tracker = EvaluationTracker(
output_dir=lighteval_config.logging.output_dir,
results_path_template=lighteval_config.logging.results_path_template,
hub_results_org=lighteval_config.logging.results_org,
public=lighteval_config.logging.public_run,
push_to_hub=lighteval_config.logging.push_to_hub,
Expand Down
8 changes: 8 additions & 0 deletions src/lighteval/main_sglang.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ def sglang(
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
] = "results",
results_path_template: Annotated[
str | None,
Option(
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
rich_help_panel=HELP_PANEL_NAME_2,
),
] = None,
push_to_hub: Annotated[
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
] = False,
Expand Down Expand Up @@ -104,6 +111,7 @@ def sglang(

evaluation_tracker = EvaluationTracker(
output_dir=output_dir,
results_path_template=results_path_template,
save_details=save_details,
push_to_hub=push_to_hub,
push_to_tensorboard=push_to_tensorboard,
Expand Down
8 changes: 8 additions & 0 deletions src/lighteval/main_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ def vllm(
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
] = "results",
results_path_template: Annotated[
str | None,
Option(
help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
rich_help_panel=HELP_PANEL_NAME_2,
),
] = None,
push_to_hub: Annotated[
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
] = False,
Expand Down Expand Up @@ -107,6 +114,7 @@ def vllm(

evaluation_tracker = EvaluationTracker(
output_dir=output_dir,
results_path_template=results_path_template,
save_details=save_details,
push_to_hub=push_to_hub,
push_to_tensorboard=push_to_tensorboard,
Expand Down
24 changes: 24 additions & 0 deletions tests/logging/test_evaluation_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,30 @@ def test_results_logging(mock_evaluation_tracker: EvaluationTracker):
assert saved_results["config_general"]["model_name"] == "test_model"


def test_results_logging_template(mock_evaluation_tracker: EvaluationTracker):
task_metrics = {
"task1": {"accuracy": 0.8, "f1": 0.75},
"task2": {"precision": 0.9, "recall": 0.85},
}
mock_evaluation_tracker.metrics_logger.metric_aggregated = task_metrics
mock_evaluation_tracker.results_path_template = "{output_dir}/{org}_{model}"

mock_evaluation_tracker.save()

results_dir = Path(mock_evaluation_tracker.output_dir) / "_test_model"
assert results_dir.exists()

result_files = list(results_dir.glob("results_*.json"))
assert len(result_files) == 1

with open(result_files[0], "r") as f:
saved_results = json.load(f)

assert "results" in saved_results
assert saved_results["results"] == task_metrics
assert saved_results["config_general"]["model_name"] == "test_model"


@pytest.mark.evaluation_tracker(save_details=True)
def test_details_logging(mock_evaluation_tracker, mock_datetime):
task_details = {
Expand Down
Loading