-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGPT生成数据集_11_27 copy.py
108 lines (90 loc) · 4.04 KB
/
GPT生成数据集_11_27 copy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import json
# model_path = '/raid/sjl/model/ChatGLM-v2'
# os.environ['CUDA_VISIBLE_DEVICES'] = '1'
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
# model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda()
# model = model.eval()
from tqdm import tqdm
import openai
# from openai import OpenAI
# client = OpenAI()
# response = client.chat.completions.create(
# model="gpt-4-1106-preview",
# messages=[
# {"role": "system", "content": "You are a helpful assistant."},
# {"role": "user", "content": "Who won the world series in 2020?"},
# {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
# {"role": "user", "content": "Where was it played?"}
# ]
# )
def chat_with_model(templ):
# Set up your OpenAI API credentials
# Define the parameters for the conversation
# tokenizer = "gpt-4-1106-preview"
# history = []
max_length = 2048
# Call the chat endpoint of the OpenAI API
response = openai.chat.completions.create(
model="gpt-3.5-turbo-1106",
# stream=True,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": templ}
],
)
# out = ''
# for i in response:
# try:
# out += i['choices'][0]['delta']['content']
# except Exception as e:
# # 最后一个i的delta对应的dict为空,要忽略
# break
# print(out)
# return out
return response.choices[0].message.content
# Example usage
templ = "Hello, how are you?"
response = chat_with_model(templ)
print(response)
def load_json_dataset(jsonl_file_path):
data = []
with open(jsonl_file_path, 'r', encoding='utf-8') as jsonl_file:
for line in jsonl_file:
json_data = json.loads(line)
data.append(json_data)
return data
idx = 0
response_list = []
data ={}
data_path = 'C:\project\socialIQa_v1.4_trn.jsonl'
file_name = 'C:\project\socialIQa_v1.4_trn_CQAF_11_27-3.jsonl' #通过扩展名指定文件存储的数据为json格式
dataset = load_json_dataset(data_path)
for example in tqdm(dataset, desc="Testing Progress"):
idx += 1
if idx <= 6068:
continue
if idx == 10000:
break
templ = f"""
I have a logic reasoning question that requires evaluating three possible answers based on a given scenario.
Please provide a rationality score for each answer, ranging from 0 to 1, based on their relevance to the scenario.
A score of 0 indicates complete irrelevance, while 1 indicates complete relevance.
Importantly, ensure that the marked correct answer (label answer) receives the highest score.
Avoid using extreme values (i.e., 0 or 1) unless an answer is completely irrelevant or completely relevant to the question.
Below are the scenario, question, and label answer: {example}
Please explain the likelihood of what will happen and provide a probability score.
Once you have derived the explanation and the score, you must output in the following format:
'''
["context:":"{example["context"]}","question:":"{example["question"]}","A":"{example["answerA"]}","Explanation_A":"$The explanation you provided.","Scores_A":"$The score you assigned.","B":"{example["answerB"]}","Explanation_B":"$The explanation you provided.","Scores_B":"$The score you assigned.","C":"{example["answerC"]}","Explanation_C":"$The explanation you provided.","Scores_C":"$The score you assigned.","correct": "{example["correct"]}"]
'''
"""
response = chat_with_model(templ)
response = str(response)
data={"id":idx,"response":response}
# response_list.append(data)
with open(file_name,'a',encoding='utf-8') as f:
# for data_dict in response_list:
json.dump(data, f,indent=4, ensure_ascii=False)
f.write('\n')
# json.dump(response_list,file_object, indent=4, ensure_ascii=False)