Skip to content

Commit 5ffe391

Browse files
committed
change error message and make output prettier
1 parent f4b50b4 commit 5ffe391

File tree

1 file changed

+19
-14
lines changed

1 file changed

+19
-14
lines changed

scripts/hf_eval.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import torch
2+
from tabulate import tabulate
23

34
from transformers import AutoModelForCausalLM, AutoTokenizer
45
try:
@@ -9,17 +10,7 @@
910
print("""
1011
Error: The 'lm_eval' module was not found.
1112
To install, follow these steps:
12-
13-
1. Clone the repository:
14-
git clone https://github.com/EleutherAI/lm-evaluation-harness
15-
16-
2. Change to the cloned directory:
17-
cd lm-evaluation-harness
18-
19-
3. Install the package in editable mode:
20-
pip install -e .
21-
22-
After installation, re-run this script to use the LM Evaluation Harness.
13+
pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git
2314
""")
2415
raise # Re-raise the ImportError
2516

@@ -33,6 +24,21 @@
3324
torch._inductor.config.force_fuse_int_mm_with_mul = True
3425
torch._inductor.config.fx_graph_cache = True
3526

27+
def pretty_print_nested_results(results, precision: int = 6):
28+
def format_value(value):
29+
if isinstance(value, float):
30+
return f"{value:.{precision}f}"
31+
return value
32+
33+
main_table = []
34+
for task, metrics in results["results"].items():
35+
subtable = [[k, format_value(v)] for k, v in metrics.items() if k != 'alias']
36+
subtable.sort(key=lambda x: x[0]) # Sort metrics alphabetically
37+
formatted_subtable = tabulate(subtable, tablefmt='grid')
38+
main_table.append([task, formatted_subtable])
39+
40+
print(tabulate(main_table, headers=['Task', 'Metrics'], tablefmt='grid'))
41+
3642
def run_evaluation(repo_id, tasks, limit, device, precision, quantization, compile, batch_size, max_length):
3743

3844
tokenizer = AutoTokenizer.from_pretrained(repo_id)
@@ -50,7 +56,6 @@ def run_evaluation(repo_id, tasks, limit, device, precision, quantization, compi
5056
change_linear_weights_to_int4_woqtensors(model.to(device=device))
5157
elif quantization == "autoquant":
5258
model = autoquant(model.to(device=device))
53-
5459
with torch.no_grad():
5560
result = evaluate(
5661
HFLM(
@@ -61,8 +66,8 @@ def run_evaluation(repo_id, tasks, limit, device, precision, quantization, compi
6166
get_task_dict(tasks),
6267
limit = limit,
6368
)
64-
for task, res in result["results"].items():
65-
print(f"{task}: {res}")
69+
70+
pretty_print_nested_results(result)
6671

6772

6873
if __name__ == '__main__':

0 commit comments

Comments
 (0)