Skip to content

Commit 8c34b87

Browse files
committed
add example: HF -> OpenAI -> HF -> analyze
1 parent e9282bb commit 8c34b87

File tree

1 file changed

+65
-0
lines changed

1 file changed

+65
-0
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import os
2+
3+
from dotenv import load_dotenv
4+
from openai import OpenAI
5+
6+
from datachain import C, DataChain, DataModel
7+
8+
PROMPT = """
9+
Was this dialog successful? Put result as a single word: Success or Failure.
10+
Explain the reason in a few words.
11+
"""
12+
13+
load_dotenv(".env.test")
14+
15+
16+
class DialogEval(DataModel):
17+
result: str
18+
reason: str
19+
20+
21+
def eval_dialog(user_input: str, bot_response: str) -> DialogEval:
22+
client = OpenAI()
23+
24+
completion = client.beta.chat.completions.parse(
25+
model="gpt-4o-2024-08-06",
26+
messages=[
27+
{
28+
"role": "user",
29+
"content": f"{PROMPT}\n\nUser: {user_input}\nBot: {bot_response}",
30+
},
31+
],
32+
response_format=DialogEval,
33+
)
34+
35+
message = completion.choices[0].message
36+
if message.parsed:
37+
return message.parsed
38+
39+
return DialogEval(result="Error", reason="Failed to parse response.")
40+
41+
42+
# Run OpenAI in parallel for each example
43+
# Get result as Pydantic model that DataChain can understand and serialize
44+
# Save to HF as CSV
45+
(
46+
DataChain.from_csv(
47+
"hf://datasets/infinite-dataset-hub/MobilePlanAssistant/data.csv"
48+
)
49+
.settings(parallel=10)
50+
.map(response=eval_dialog)
51+
.to_csv(
52+
"hf://datasets/dvcorg/test-datachain-llm-eval/data.csv",
53+
fs_kwargs={"token": os.environ["HF_API_TOKEN"]},
54+
)
55+
)
56+
57+
# Read it back to filter and show
58+
(
59+
DataChain.from_csv(
60+
"hf://datasets/dvcorg/test-datachain-llm-eval/data.csv",
61+
column_types={"source.file.location": "str"},
62+
)
63+
.filter(C("response_result") == "Failure")
64+
.show(3)
65+
)

0 commit comments

Comments
 (0)