Skip to content

Commit 6075c2c

Browse files
committed
update
Signed-off-by: minmingzhu <minming.zhu@intel.com>
1 parent f0d94d1 commit 6075c2c

File tree

2 files changed

+43
-36
lines changed

2 files changed

+43
-36
lines changed

llm_on_ray/common/dataprocesser/general_processer.py

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,19 @@ def create_data(self, examples):
132132
)
133133
else:
134134
new_messages = [
135+
{
136+
"role": "system",
137+
"content": INTRO_BLURB + "\n",
138+
},
135139
{
136140
"role": "user",
137141
"content": examples["instruction"]
138-
+ "\n\n"
142+
+ "\n"
139143
+ INPUT_KEY
140144
+ examples["context"]
141-
+ "\n\n",
145+
+ "\n",
142146
},
143-
{"role": "assistant", "content": examples["response"] + "\n\n"},
147+
{"role": "assistant", "content": examples["response"] + "\n"},
144148
]
145149

146150
return new_messages
@@ -162,7 +166,6 @@ def tokenize_func(self, tokenizer, message):
162166
message,
163167
tokenize=False,
164168
)
165-
print(new_tokenizer)
166169
return tokenizer(
167170
new_tokenizer, add_special_tokens=False, max_length=self.config.get("max_length")
168171
)
@@ -251,21 +254,9 @@ def prepare_dataloader(self, tokenizer, dataset):
251254

252255

253256
class SlimOrcaDataPreprocess(ChatDataPreprocess):
254-
chat_template = (
255-
"{% for message in messages %}"
256-
"{% if message['role'] == 'system' %}"
257-
"{{ '### System: ' + message['content'] }}"
258-
"{% elif message['role'] == 'user' %}"
259-
"{{ '### User: ' + message['content'] }}"
260-
"{% elif message['role'] == 'assistant' %}"
261-
"{{ '### Assistant: ' + message['content'] }}"
262-
"{% endif %}"
263-
"{% endfor %}"
264-
)
265257

266258
def __init__(self, config):
267259
super().__init__(config)
268-
self.config["chat_template"] = self.chat_template
269260
self.default_system = "You are a helpful, respectful and honest assistant."
270261

271262
def create_data(self, data):
@@ -286,22 +277,26 @@ def create_data(self, data):
286277
examples[conv[j]["from"]] = conv[j]["value"]
287278
examples[conv[j + 1]["from"]] = conv[j + 1]["value"]
288279

289-
new_messages = [
290-
{"role": "system", "content": examples["system"] + "\n"},
291-
{
292-
"role": "user",
293-
"content": examples["human"] + "\n",
294-
},
295-
{"role": "assistant", "content": examples["gpt"] + "\n"},
296-
]
297280
if self.config.get("gpt_base_model"):
298281
if examples["human"]:
299-
return SLIMORCA_PROMPT_DICT["prompt_with_input"].format(
300-
system=examples["system"], user=examples["human"], gpt=examples["gpt"]
282+
return PROMPT_WITH_INPUT_FORMAT.format(
283+
instruction=examples["system"], response=examples["gpt"], input=examples["human"]
301284
)
302285
else:
303-
return SLIMORCA_PROMPT_DICT["prompt_with_input"].format(
304-
system=examples["human"], gpt=examples["gpt"]
286+
return PROMPT_NO_INPUT_FORMAT.format(
287+
instruction=examples["system"], response=examples["gpt"]
305288
)
306289
else:
290+
new_messages = [
291+
{"role": "system", "content": INTRO_BLURB + "\n"},
292+
{
293+
"role": "user",
294+
"content": examples["system"]
295+
+ "\n"
296+
+ INPUT_KEY
297+
+ examples["human"]
298+
+ "\n",
299+
},
300+
{"role": "assistant", "content": examples["gpt"] + "\n"},
301+
]
307302
return new_messages

tests/finetune/test_chat_template.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,26 @@ def setUp(self):
2727
"gpt_base_model": True,
2828
"max_length": 512,
2929
"trust_remote_code": False,
30-
"chat_template": "Below is an instruction that describes a task. Write a response that appropriately "
31-
"completes the request\n {% if messages[0]['role'] == 'system' %}{{ raise_exception("
32-
"'System role not supported') }}{% endif %}{% for message in messages %}{% if (message["
33-
"'role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles "
34-
"must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] "
35-
"== 'user' %}{{ '### Instruction: ' + message['content'] }}{% elif message['role'] == "
36-
"'assistant' %}{{ '### Response: ' + message['content'] }}{% endif %}{% endfor %}{{'### "
37-
"End \n'}}",
30+
"chat_template": "{% if messages[0]['role'] == 'system' %}"
31+
"{% set loop_messages = messages[1:] %}"
32+
"{% set system_message = messages[0]['content'] %}"
33+
"{% else %}"
34+
"{% set loop_messages = messages %}"
35+
"{% set system_message = false %}"
36+
"{% endif %}"
37+
"{% for message in loop_messages %}"
38+
"{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
39+
"{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
40+
"{% endif %}"
41+
"{% if loop.index0 == 0 and system_message %}"
42+
"{{ system_message }}"
43+
"{% endif %}"
44+
"{% if message['role'] == 'user' %}"
45+
"{{ '### Instruction: ' + message['content'] + eos_token }}"
46+
"{% elif message['role'] == 'assistant' %}"
47+
"{{ '### Response:' + message['content'] + eos_token }}"
48+
"{% endif %}{% endfor %}"
49+
"{{'### End \n'}}",
3850
}
3951
self.processer = ChatDataPreprocess(self.config)
4052

0 commit comments

Comments
 (0)