Skip to content

Commit 57cff91

Browse files
authored
Add files via upload
1 parent a8de198 commit 57cff91

File tree

2 files changed

+119
-0
lines changed

2 files changed

+119
-0
lines changed

eval_human_eval.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import os
2+
import re
3+
import argparse
4+
from tqdm import tqdm
5+
6+
from transformers import pipeline, set_seed
7+
from transformers import AutoTokenizer, AutoModelForCausalLM
8+
from transformers.pipelines.base import Pipeline
9+
10+
from human_eval.data import write_jsonl, read_problems
11+
12+
def load_generation_pipe(model_name_or_path: str, gpu_device: int=0):
13+
model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
14+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
15+
16+
pipe = pipeline(
17+
'text-generation',
18+
model=model,
19+
tokenizer=tokenizer,
20+
device=gpu_device
21+
)
22+
23+
print("load generation pipeline from {} over, vocab size = {}, eos id = {}, gpu device = {}.".format(
24+
model_name_or_path, len(tokenizer), tokenizer.eos_token_id, gpu_device)
25+
)
26+
27+
return pipe
28+
29+
def extract_function_block(string):
30+
return re.split("\nclass|\ndef|\n#|\n@|\nprint|\nif", string)[0].rstrip()
31+
32+
def run_code_generation(pipe, prompt, num_completions=1, **gen_kwargs):
33+
set_seed(123)
34+
35+
code_gens = pipe(prompt,
36+
num_return_sequences=num_completions,
37+
**gen_kwargs
38+
)
39+
40+
return [extract_function_block(code_gen["generated_text"][len(prompt):]) for code_gen in code_gens]
41+
42+
def evaluate_on_human_eval(
43+
model_name_or_path: str,
44+
temperature: float,
45+
top_p: float,
46+
num_samples_per_task: int,
47+
max_new_tokens: int,
48+
gpu_device: int,
49+
output_dir: str,
50+
) -> str:
51+
52+
pipe: Pipeline = load_generation_pipe(model_name_or_path, gpu_device=gpu_device)
53+
eval_name = f"human_eval.t{temperature}.p{top_p}.l{max_new_tokens}.n{num_samples_per_task}"
54+
55+
if output_dir is None:
56+
if os.path.exists(model_name_or_path):
57+
output_dir = model_name_or_path
58+
else:
59+
raise ValueError("Output dir can't be null if you are not evaluation a local model.")
60+
61+
os.makedirs(output_dir, exist_ok=True)
62+
saved_path = os.path.join(output_dir, f"{eval_name}.samples.jsonl")
63+
64+
gen_kwargs = {
65+
"do_sample": True,
66+
"temperature": temperature,
67+
"max_new_tokens": max_new_tokens,
68+
"top_p": top_p,
69+
"top_k": 0,
70+
"pad_token_id": pipe.tokenizer.pad_token_id if pipe.tokenizer.pad_token_id else pipe.tokenizer.eos_token_id,
71+
"eos_token_id": pipe.tokenizer.eos_token_id
72+
}
73+
74+
problems = read_problems()
75+
samples = []
76+
generate_batch_size = min(50, num_samples_per_task)
77+
78+
bos_token = pipe.tokenizer.bos_token if pipe.tokenizer.bos_token else pipe.tokenizer.eos_token
79+
80+
for task_id in tqdm(problems):
81+
# Strip operation is important as new tokenizer will not treat '\n' as a independent token
82+
prompt = problems[task_id]["prompt"].strip()
83+
84+
for _ in range(num_samples_per_task // generate_batch_size):
85+
input_prompt = bos_token + prompt
86+
gen_results = run_code_generation(pipe, input_prompt, num_completions=generate_batch_size, **gen_kwargs)
87+
for gen_result in gen_results:
88+
samples.append(dict(task_id=task_id, completion=gen_result))
89+
90+
write_jsonl(saved_path, samples)
91+
print("Run generation over, save {} samples to {}.".format(len(samples), saved_path))
92+
93+
if __name__ == '__main__':
94+
parser = argparse.ArgumentParser(description='Run evaluation for code generation model on human-eval.')
95+
96+
parser.add_argument('-model', '--model_name_or_path', type=str, required=True)
97+
parser.add_argument('-o', '--output_dir', type=str, default=None)
98+
parser.add_argument('-n', '--num_completions', type=int, default=100)
99+
parser.add_argument('-t', '--temperature', type=float, default=0.2)
100+
parser.add_argument('-p', '--top_p', type=float, default=0.95)
101+
parser.add_argument('-l', '--max_new_tokens', type=int, default=100)
102+
parser.add_argument('-gpu', "--gpu_device", type=int, default=0)
103+
104+
args = parser.parse_args()
105+
106+
evaluate_on_human_eval(
107+
model_name_or_path=args.model_name_or_path,
108+
temperature=args.temperature,
109+
top_p=args.top_p,
110+
num_samples_per_task=args.num_completions,
111+
max_new_tokens=args.max_new_tokens,
112+
gpu_device=args.gpu_device,
113+
output_dir=args.output_dir,
114+
)
115+
pass

requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
torch
2+
transformers
3+
sentencepiece
4+
protobuf

0 commit comments

Comments
 (0)