Skip to content

Commit 63ac012

Browse files
committed
提交toollearning 评测代码与示例
1 parent 0baa886 commit 63ac012

File tree

4 files changed

+369
-3
lines changed

4 files changed

+369
-3
lines changed

README_zh.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,9 +253,9 @@ answer: D
253253
explanation: 根据分析,题目中的时间序列在12点出的值265要明显大于周围数据,存在着突增现象,因此选择D是正确的。
254254
```
255255
#### 🔧 ToolLearning样本示例
256-
工具学习样本的数据格式与OpenAI的函数调用格式兼容。<br>
257-
详情请参阅[tool_learning_info_zh.md](resources/tool_learning_info_zh.md)。<br>
258-
工具学习评测过程,详情请参阅见 [tool_learning_evalution.md](resources/tool_learning_evalution.md)。<br>
256+
工具学习样本的数据格式与OpenAI的函数调用格式兼容。
257+
详情请参阅[tool_learning_info_zh.md](resources/tool_learning_info_zh.md)。
258+
工具学习评测过程,详情请参阅见 [tool_learning_evalution.md](resources/tool_learning_evalution.md)。
259259
<br>
260260
261261
## 🚀 如何进行测试

resources/tool_learning_evalution.md

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
## tool learning 数据集评测教程
2+
3+
### chatml接入方式
4+
如果需要在自己的 huggingface 格式的模型上进行测试的话,总的步骤分为如下几步:
5+
1. 编写 ~/evals/FuncCallEvalution 的 create_prompts 函数
6+
2. 编写 ~/models/base_model 的 相关函数
7+
3. 注册模型和评估函数
8+
4. 执行测试脚本
9+
如果模型在加载进来后不需要特殊的处理,而且输入也不需要转换为特定的格式(e.g. chatml 格式或者其他的 human-bot 格式),请直接跳转到第四步直接发起测试。
10+
11+
#### 1. 编写 loader 函数
12+
如果模型在加载进来还需要做一些额外的处理(e.g. tokenizer 调整),需要去 `src.context_builder.context_builder_family.py` 中继承 `ModelAndTokenizerLoader` 类来覆写对应的 `load_model``load_tokenizer` 函数,具体可以参照以下示例:
13+
```python
14+
class FuncCallEvalution(ToolEvalution):
15+
16+
def create_prompts(self, func_call_datas):
17+
'''
18+
datas: [
19+
{
20+
"instruction": history[his_idx],
21+
"input": "",
22+
"output": output,
23+
"history": [(human_content, ai_content), (), ()],
24+
"functions": tools
25+
}
26+
]
27+
'''
28+
system_content = '''CodeFuse是一个面向研发领域的智能助手,旨在中立的、无害的帮助用户解决开发相关的问题,所有的回答均使用Markdown格式返回。
29+
你能利用许多工具和功能来完成给定的任务,在每一步中,你需要分析当前状态,并通过执行函数调用来确定下一步的行动方向。你可以进行多次尝试。如果你计划连续尝试不同的条件,请每次尝试一种条件。若给定了Finish函数,则以Finish调用结束,若没提供Finish函数,则以不带function_call的对话结束。'''
30+
function_format = '''You are ToolGPT, you have access to the following APIs:\n{tools}'''
31+
32+
func_call_train_datas = []
33+
history_error_cnt = 0
34+
funccall_error_cnt = 0
35+
36+
for data in func_call_datas:
37+
tools = data["functions"]
38+
chatrounds = data["chatrounds"]
39+
40+
function_content = ""
41+
if len(tools) > 0:
42+
function_content = function_format.format(tools=json.dumps(tools, ensure_ascii=False, sort_keys=True))
43+
44+
history = []
45+
for i in chatrounds:
46+
if i["role"]=="system":
47+
continue
48+
49+
if i["role"]=="user":
50+
history.append(("user", i["content"]))
51+
52+
if i["role"] == "assistant":
53+
if "function_call" in i:
54+
if not isinstance(i["function_call"], dict):
55+
funccall_error_cnt+=1
56+
continue
57+
content = "#function" + json.dumps({**{"content": i["content"]}, **i["function_call"]}, ensure_ascii=False)
58+
else:
59+
content = i["content"]
60+
history.append(("assistant", content))
61+
62+
63+
if i["role"] == "function":
64+
content = json.dumps({**{"content": i["content"]}, **{"name": i["name"]}}, ensure_ascii=False)
65+
history.append(("user", content))
66+
67+
68+
history = [i[1] for i in history]
69+
history[0] = "\n".join([system_content,function_content, history[0]])
70+
71+
for his_idx in range(0, len(history), 2):
72+
output = history[his_idx+1]
73+
74+
if "#function" in output:
75+
output = output.split("#function")[-1]
76+
77+
try:
78+
output = json.loads(output)
79+
except:
80+
output = {"content": output}
81+
82+
83+
func_call_train_datas.append(
84+
{
85+
"instruction": history[his_idx],
86+
"input": "",
87+
"output": output,
88+
"history": [history[:his_idx+2][i:i+2] for i in range(0, len(history[:his_idx]), 2)],
89+
"functions": tools
90+
},
91+
)
92+
return func_call_train_datas
93+
```
94+
95+
#### 2. 编写 Model 的 context_builder 函数
96+
如果输入需要转换为特定的格式(e.g. chatml 格式或者其他的 human-bot 格式),则需要去 `src.context_builder.context_builder_family` 中继承 ContextBuilder 类来覆写 make_context 函数,这个函数是用来将输入转换格式为对应需要的输出的,一个示例如下:
97+
```python
98+
class ToolModel:
99+
def __init__(self, model_path: str, template: str, trust_remote_code=True, tensor_parallel_size=1, gpu_memory_utilization=0.25):
100+
self.model_path = model_path
101+
self.trust_remote_code = trust_remote_code
102+
self.tensor_parallel_size = tensor_parallel_size
103+
self.gpu_memory_utilization = gpu_memory_utilization
104+
self.load_model(self.model_path, self.trust_remote_code, self.tensor_parallel_size, self.gpu_memory_utilization)
105+
106+
def generate(self, prompts: str, template: str = None, generate_configs: GenerateConfigs = None) -> list:
107+
'''产出对应结果'''
108+
pass
109+
110+
def generate_params(
111+
self, generate_configs: GenerateConfigs,
112+
):
113+
'''generate param'''
114+
kargs = generate_configs.dict()
115+
return kargs
116+
117+
def load_model(self, model_path, trust_remote_code=True, tensor_parallel_size=1, gpu_memory_utilization=0.25):
118+
'''加载模型'''
119+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=trust_remote_code)
120+
self.model = AutoModelForCausalLM.from_pretrained(self.model_path, device_map="auto", trust_remote_code=trust_remote_code).eval()
121+
122+
# self.model = LLM(model=model_path, trust_remote_code=trust_remote_code, tensor_parallel_size=tensor_parallel_size, gpu_memory_utilization=gpu_memory_utilization)
123+
```
124+
125+
#### 3. 注册模型和eval函数即可
126+
~/models/__init__.py 中注册即可
127+
```python
128+
from .base_model import ToolModel
129+
130+
__all__ = [
131+
"ToolModel",
132+
]
133+
```
134+
~/evasl/__init__.py 中注册即可
135+
```python
136+
from .base_evalution import ToolEvalution
137+
from .toolfill_evalution import ToolFillEvalution
138+
from .toolparser_evalution import ToolParserEvalution
139+
from .toolsummary_evalution import ToolSummaryEvalution
140+
from .func_call_evalution import FuncCallEvalution
141+
142+
143+
__all__ = [
144+
"ToolEvalution", "ToolFillEvalution", "ToolParserEvalution", "ToolSummaryEvalution", "FuncCallEvalution"
145+
]
146+
```
147+
148+
149+
#### 4. 执行测试脚本
150+
修改 ~/src/qwen_eval_main.py# datainfos和model_infos
151+
```python
152+
model_infos = [
153+
{"model_name": "", "template": "chatml", "model_path": "",
154+
"peft_path": "", "model_class": QwenModel}]
155+
156+
datainfos = [
157+
{"dataset_path": "~/fcdata_luban_zh_test.jsonl", "dataset_name": "fcdata_luban_zh", "tool_task": "func_call"},
158+
{"dataset_path": "~/test_datas/fcdata_zh_test_v1.jsonl", "dataset_name": "fcdata_zh", "tool_task": "func_call"},
159+
]
160+
```
161+
162+
运行下述命令即可
163+
```Bash
164+
python qwen_eval_main.py
165+
```
166+
167+
<br>
168+
169+
### 非chatml接入
170+
如果需要在自己的 huggingface 格式的模型上进行测试的话,总的步骤分为如下几步:
171+
1. 编写 ~/getAssistantAns.py 相关代码
172+
2. 执行测试脚本
173+
174+
175+
#### 1、编写 getAssistantAns 示例
176+
```
177+
class GetAssistantAns():
178+
# 按照自己推理需求自己修改代码
179+
180+
def __init__(self, gpu_num=1):
181+
model = AutoModelForCausalLM.from_pretrained(model_name)
182+
device_list = []
183+
for gpu_idx in range(gpu_num):
184+
device_list.append(torch.device("cuda:0"))
185+
186+
# 将模型移动到指定的GPU设备
187+
model.to(device)
188+
189+
190+
def gen_answer(self, chat_dict, gpu_index):
191+
# 这里实际根据自己推理逻辑 然后转为标准格式返回
192+
# 以下仅仅是样例
193+
import time
194+
print(os.environ["CUDA_VISIBLE_DEVICES"])
195+
time.sleep(1)
196+
rtn_dict1 = {
197+
"role": "assistant",
198+
"content": None,
199+
"function_call":
200+
{
201+
"name": "get_fudan_university_scoreline",
202+
"arguments": "{\n \"year\": \"2020\"\n}"
203+
}
204+
}
205+
206+
rtn_dict2 = {
207+
"role": "assistant",
208+
"content": "2020年复旦大学的分数线如下:\n\n- 文科一批:630分\n- 文科二批:610分\n- 理科一批:650分\n- 理科二批:630分"
209+
}
210+
211+
return random.choice([rtn_dict1, rtn_dict2])
212+
```
213+
#### 2、执行测试脚本
214+
修改 ~/src/opensource_functioncall_evalution.py # test_ans_file_list
215+
```python
216+
test_ans_file_list = [
217+
"fcdata_zh_test.jsonl"
218+
]
219+
```
220+
221+
运行下述命令即可
222+
```Bash
223+
python opensource_functioncall_evalution.py
224+
```

src/getAssistantAns.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import os
2+
import json
3+
import random
4+
import torch
5+
from transformers import AutoModelForCausalLM
6+
7+
class GetAssistantAns():
8+
# 按照自己推理需求自己修改代码
9+
10+
def __init__(self, gpu_num=1):
11+
model = AutoModelForCausalLM.from_pretrained(model_name)
12+
device_list = []
13+
for gpu_idx in range(gpu_num):
14+
device_list.append(torch.device("cuda:0"))
15+
16+
# 将模型移动到指定的GPU设备
17+
model.to(device)
18+
19+
20+
def gen_answer(self, chat_dict, gpu_index):
21+
# 这里实际根据自己推理逻辑 然后转为标准格式返回
22+
# 以下仅仅是样例
23+
import time
24+
print(os.environ["CUDA_VISIBLE_DEVICES"])
25+
time.sleep(1)
26+
rtn_dict1 = {
27+
"role": "assistant",
28+
"content": None,
29+
"function_call":
30+
{
31+
"name": "get_fudan_university_scoreline",
32+
"arguments": "{\n \"year\": \"2020\"\n}"
33+
}
34+
}
35+
36+
rtn_dict2 = {
37+
"role": "assistant",
38+
"content": "2020年复旦大学的分数线如下:\n\n- 文科一批:630分\n- 文科二批:610分\n- 理科一批:650分\n- 理科二批:630分"
39+
}
40+
41+
return random.choice([rtn_dict1, rtn_dict2])
42+
43+
# ======================================================================
44+
# 下面注释的部分是一个huggingface推理的多卡的demo
45+
# 备注 线程数量==2,也就是2卡效率最高 多个卡并不会提升效率,存在资源抢占的情况
46+
# 可以采用多卡部署服务,然后调用服务的方式提升效率
47+
48+
# import os
49+
# import re
50+
# import json
51+
# import random
52+
# import torch
53+
# import copy
54+
# import transformers
55+
# from transformers import AutoModelForCausalLM, CodeLlamaTokenizer, TextStreamer
56+
# end_token_id = 2
57+
# checkpoint = "<your_huggingface_model_dir>"
58+
# tokenizer = CodeLlamaTokenizer.from_pretrained(checkpoint)
59+
# torch.manual_seed(random.randint(0, 100000))
60+
# temperature = 0.2
61+
# top_p = 0.95
62+
# top_k = 40
63+
# repetition_penalty = 1.1
64+
# output_len = 2048
65+
# role_start = "[START]"
66+
# role_end = "[END]"
67+
68+
69+
# def change2traindata(fc_dict):
70+
# chatrounds_list = []
71+
# # insert your code
72+
# # eg
73+
# # chatrounds_list = [
74+
# # {
75+
# # "role": "system",
76+
# # "content":"你好,我是小助手,我能帮你做什么呢?"
77+
# # },
78+
# # {
79+
# # "role": "functionapis"
80+
# # "content": "You are tool gpt, you can using following apis []"
81+
# # },
82+
# # {
83+
# # "role": "user",
84+
# # "content": "我想知道复旦大学的分数线"
85+
# # },
86+
# # {
87+
# # "role": "bot",
88+
# # "content": "#function{*****}"
89+
# # },
90+
# # {
91+
# # "role": "function",
92+
# # "content": ******
93+
# # },
94+
# # {
95+
# # "role": "bot",
96+
# # "content": "复旦大学分数线640"
97+
# # }
98+
# # ]
99+
# return chatrounds_list
100+
101+
102+
# def get_chatrounds_ids(chatrounds_list):
103+
# input_ids = []
104+
# for chatround in chatrounds_list:
105+
# input_ids += tokenizer.encode(role_start + chatround["role"]+ role_end) + tokenizer.encode(chatround["content"], add_special_tokens=False) + [tokenizer.eos_token_id]
106+
# input_ids += tokenizer.encode(role_start + "bot" + role_end)
107+
# return input_ids
108+
109+
110+
# class GetAssistantAns():
111+
# # 按照自己推理需求自己修改代码
112+
113+
# def __init__(self, gpu_num=1):
114+
# print(checkpoint)
115+
# print("Loading model")
116+
# model = AutoModelForCausalLM.from_pretrained(checkpoint).half().eval()
117+
# device_list = [torch.device(f"cuda:%d"%(i)) for i in range(gpu_num)]
118+
# self.model_list = [copy.deepcopy(model.to(device)) for device in device_list]
119+
# print("Loading finish")
120+
121+
# def gen_answer(self, chat_dict, gpu_index=0):
122+
# chatrounds_list = change2traindata(chat_dict)
123+
# input_ids = get_chatrounds_ids(chatrounds_list)
124+
# output_ids = self.model_list[gpu_index].generate(torch.tensor([input_ids]).to(self.model_list[gpu_index].device), max_new_tokens=output_len, num_beams=1, num_return_sequences=1, do_sample=True, temperature=temperature, top_p=top_p, eos_token_id=end_token_id, top_k=top_k, streamer=None, repetition_penalty=repetition_penalty, pad_token_id=10000)[0]
125+
# res = tokenizer.decode(output_ids[len(input_ids):-1])
126+
# save_dict = {"role": "assistant"}
127+
# if res.startswith("#function"):
128+
# try:
129+
# res_dict = json.loads(re.sub("^#function", "", res))
130+
# save_dict["content"] = res_dict["content"]
131+
# save_dict["function_call"] = {}
132+
# save_dict["function_call"]["name"] = res_dict["name"]
133+
# save_dict["function_call"]["arguments"] = res_dict["arguments"]
134+
# except Exception as e:
135+
# print(e)
136+
# save_dict = {"role": "assistant"}
137+
# save_dict["content"] = res
138+
# else:
139+
# save_dict["content"] = res
140+
# # print(save_dict)
141+
# return save_dict

src/qwen_eval_main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from src.models import QwenModel, ToolModel, InternlmModel
66
from src.models.generate_configs import GenerateConfigs
77
from src.prompts.base_prompts_config import TOOL_FILL_BASE_PROMPT, TOOL_PARSER_BASE_PROMPT, TOOL_SUMMARY_BASE_PROMPT
8+
from src.utils.jsonl_utils import save_to_jsonl_file
89

910
import warnings
1011
import re

0 commit comments

Comments
 (0)