Skip to content

Commit 81fc8fd

Browse files
authored
Improve the current chat template system (#38)
- adds system prompt - formats the choices too with assistant template
1 parent fb57ffc commit 81fc8fd

File tree

5 files changed

+28
-9
lines changed

5 files changed

+28
-9
lines changed

run_evals_accelerate.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def get_parser():
4343
help="Whether to force multiple choice continuations to not start with a space",
4444
)
4545
parser.add_argument("--use_chat_template", default=False, action="store_true")
46+
parser.add_argument("--system_prompt", type=str, default=None)
4647
# Model type 2) TGI
4748
task_type_group.add_argument("--inference_server_address", type=str)
4849
parser.add_argument("--inference_server_auth", type=str, default=None)

src/lighteval/few_shot_manager.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,12 @@ def get_examples_with_chat_template(
163163
example: str,
164164
instruction: str,
165165
fewshot_ex: list[str],
166+
system_prompt: str,
166167
):
167168
examples = []
169+
if system_prompt is not None:
170+
examples.append({"role": "system", "content": system_prompt})
168171
for ex in fewshot_ex:
169-
# many places to put these "\n" though
170172
examples.append({"role": "user", "content": task.doc_to_text_without_instructions(ex)})
171173
examples.append({"role": "assistant", "content": task.doc_to_target(ex)})
172174
# We add the actual example
@@ -202,6 +204,7 @@ def fewshot_context(
202204
max_model_length: Optional[int] = None,
203205
tokenizer: Optional[AutoTokenizer] = None,
204206
use_chat_template=False,
207+
system_prompt: str = None,
205208
):
206209
"""Returns a fewshot context string that is made up of a prepended description
207210
(if provided), the `num_fewshot` number of examples, and an appended prompt example.
@@ -230,7 +233,12 @@ def fewshot_context(
230233

231234
if use_chat_template:
232235
output = self.get_examples_with_chat_template(
233-
task=task, tokenizer=tokenizer, example=example, instruction=instruction, fewshot_ex=fewshot_ex
236+
task=task,
237+
tokenizer=tokenizer,
238+
example=example,
239+
instruction=instruction,
240+
fewshot_ex=fewshot_ex,
241+
system_prompt=system_prompt,
234242
)
235243
toks = tokenizer(output)["input_ids"]
236244
else:
@@ -254,6 +262,7 @@ def fewshot_context(
254262
example=example,
255263
instruction=instruction,
256264
fewshot_ex=fewshot_ex[:num_effective_fewshots],
265+
system_prompt=system_prompt,
257266
)
258267
toks = tokenizer(output)["input_ids"]
259268
else:

src/lighteval/main_accelerate.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,14 @@ def main(args):
6969

7070
hlog("Loading documents, and requests")
7171
requests, docs = create_requests_from_tasks(
72-
task_dict,
73-
few_shots_dict,
74-
args.num_fewshot_seeds,
75-
model,
76-
args.max_samples,
77-
evaluation_tracker,
78-
args.use_chat_template,
72+
task_dict=task_dict,
73+
fewshot_dict=few_shots_dict,
74+
num_fewshot_seeds=args.num_fewshot_seeds,
75+
lm=model,
76+
max_samples=args.max_samples,
77+
evaluation_tracker=evaluation_tracker,
78+
use_chat_template=args.use_chat_template,
79+
system_prompt=args.system_prompt,
7980
)
8081

8182
with htrack_block("Setting seeds and waiting for all processes"):

src/lighteval/main_nanotron.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ def main(
129129
max_samples=lighteval_config.tasks.max_samples,
130130
evaluation_tracker=evaluation_tracker,
131131
use_chat_template=False,
132+
system_prompt=None,
132133
)
133134

134135
with htrack_block("Setting seeds and waiting for all processes"):

src/lighteval/tasks/lighteval_task.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,7 @@ def create_requests_from_tasks( # noqa: C901
529529
max_samples: int,
530530
evaluation_tracker: "EvaluationTracker",
531531
use_chat_template: bool,
532+
system_prompt: str,
532533
) -> Tuple[dict[RequestType, list[Request]], dict[TaskExampleId, Doc]]:
533534
"""
534535
Takes a task dict and a fewshot dict and returns a dict of requests, a dict
@@ -598,10 +599,16 @@ def create_requests_from_tasks( # noqa: C901
598599
sampler=rnd,
599600
tokenizer=lm.tokenizer,
600601
use_chat_template=use_chat_template,
602+
system_prompt=system_prompt,
601603
)
602604
doc.num_effective_few_shots = num_effective_few_shots
603605
doc.num_asked_few_shots = num_fewshot
604606
doc.ctx = ctx
607+
if use_chat_template:
608+
doc.choices = [
609+
lm.tokenizer.apply_chat_template([{"role": "assistant", "content": choice}])
610+
for choice in doc.choices
611+
]
605612

606613
# Constructing the requests
607614
docs[TaskExampleId(cur_task_name, doc_id_seed)] = doc

0 commit comments

Comments
 (0)