Skip to content

Commit ca6f1b9

Browse files
committed
Evals
1 parent 7bb3b8d commit ca6f1b9

File tree

3 files changed

+58
-0
lines changed

3 files changed

+58
-0
lines changed

evals/highlights.ts

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import { type Example, Run } from "langsmith";
2+
import { graph } from "../src/agent/index";
3+
import { evaluate, EvaluationResult } from "langsmith/evaluation";
4+
import "dotenv/config";
5+
6+
const runGraph = async (input: Record<string, any>): Promise<Record<string, any>> => {
7+
// Interrupt after updating the artifact
8+
graph.interruptAfter = ["updateArtifact"];
9+
return await graph.invoke(input);
10+
}
11+
12+
const evaluateHighlights = (run: Run, example?: Example): EvaluationResult => {
13+
if (!example) {
14+
throw new Error("No example provided");
15+
}
16+
if (!example.outputs) {
17+
throw new Error("No example outputs provided");
18+
}
19+
if (!run.outputs) {
20+
throw new Error("No run outputs provided");
21+
}
22+
23+
const { expectedGeneration } = example.outputs;
24+
const { highlighted, artifacts } = example.inputs;
25+
const expectedGenerationStart = artifacts[0].content.slice(0, highlighted.startCharIndex);
26+
const expectedGenerationEnd = artifacts[0].content.slice(highlighted.endCharIndex);
27+
const fullExpectedArtifact = `${expectedGenerationStart}${expectedGeneration}${expectedGenerationEnd}`;
28+
29+
30+
const generatedArtifact = run.outputs.artifacts[0].content;
31+
if (generatedArtifact !== fullExpectedArtifact) {
32+
return {
33+
key: "correct_generation",
34+
score: false,
35+
}
36+
}
37+
return {
38+
key: "correct_generation",
39+
score: true,
40+
}
41+
};
42+
43+
async function runHighlightEval() {
44+
const datasetName = "open-canvas-deterministic-highlights";
45+
await evaluate(runGraph, {
46+
data: datasetName,
47+
evaluators: [evaluateHighlights],
48+
experimentPrefix: "Highlight generation",
49+
});
50+
}
51+
52+
runHighlightEval();

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
"@vercel/kv": "^2.0.0",
4141
"class-variance-authority": "^0.7.0",
4242
"clsx": "^2.1.1",
43+
"dotenv": "^16.4.5",
4344
"js-cookie": "^3.0.5",
4445
"langsmith": "^0.1.61",
4546
"lucide-react": "^0.441.0",

yarn.lock

+5
Original file line numberDiff line numberDiff line change
@@ -2066,6 +2066,11 @@ doctrine@^3.0.0:
20662066
dependencies:
20672067
esutils "^2.0.2"
20682068

2069+
dotenv@^16.4.5:
2070+
version "16.4.5"
2071+
resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-16.4.5.tgz#cdd3b3b604cb327e286b4762e13502f717cb099f"
2072+
integrity sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==
2073+
20692074
double-ended-queue@^2.1.0-0:
20702075
version "2.1.0-0"
20712076
resolved "https://registry.yarnpkg.com/double-ended-queue/-/double-ended-queue-2.1.0-0.tgz#103d3527fd31528f40188130c841efdd78264e5c"

0 commit comments

Comments
 (0)