|
| 1 | +import os |
| 2 | + |
| 3 | +from dotenv import load_dotenv |
| 4 | +from openai import OpenAI |
| 5 | + |
| 6 | +from datachain import C, DataChain, DataModel |
| 7 | + |
| 8 | +PROMPT = """ |
| 9 | +Was this dialog successful? Put result as a single word: Success or Failure. |
| 10 | +Explain the reason in a few words. |
| 11 | +""" |
| 12 | + |
| 13 | +load_dotenv(".env.test") |
| 14 | + |
| 15 | + |
| 16 | +class DialogEval(DataModel): |
| 17 | + result: str |
| 18 | + reason: str |
| 19 | + |
| 20 | + |
| 21 | +def eval_dialog(user_input: str, bot_response: str) -> DialogEval: |
| 22 | + client = OpenAI() |
| 23 | + |
| 24 | + completion = client.beta.chat.completions.parse( |
| 25 | + model="gpt-4o-2024-08-06", |
| 26 | + messages=[ |
| 27 | + { |
| 28 | + "role": "user", |
| 29 | + "content": f"{PROMPT}\n\nUser: {user_input}\nBot: {bot_response}", |
| 30 | + }, |
| 31 | + ], |
| 32 | + response_format=DialogEval, |
| 33 | + ) |
| 34 | + |
| 35 | + message = completion.choices[0].message |
| 36 | + if message.parsed: |
| 37 | + return message.parsed |
| 38 | + |
| 39 | + return DialogEval(result="Error", reason="Failed to parse response.") |
| 40 | + |
| 41 | + |
| 42 | +# Run OpenAI in parallel for each example |
| 43 | +# Get result as Pydantic model that DataChain can understand and serialize |
| 44 | +# Save to HF as CSV |
| 45 | +( |
| 46 | + DataChain.from_csv( |
| 47 | + "hf://datasets/infinite-dataset-hub/MobilePlanAssistant/data.csv" |
| 48 | + ) |
| 49 | + .settings(parallel=10) |
| 50 | + .map(response=eval_dialog) |
| 51 | + .to_csv( |
| 52 | + "hf://datasets/dvcorg/test-datachain-llm-eval/data.csv", |
| 53 | + fs_kwargs={"token": os.environ["HF_API_TOKEN"]}, |
| 54 | + ) |
| 55 | +) |
| 56 | + |
| 57 | +# Read it back to filter and show |
| 58 | +( |
| 59 | + DataChain.from_csv( |
| 60 | + "hf://datasets/dvcorg/test-datachain-llm-eval/data.csv", |
| 61 | + column_types={"source.file.location": "str"}, |
| 62 | + ) |
| 63 | + .filter(C("response_result") == "Failure") |
| 64 | + .show(3) |
| 65 | +) |
0 commit comments