|
| 1 | +import { type Example, Run } from "langsmith"; |
| 2 | +import { graph } from "../src/agent/index"; |
| 3 | +import { evaluate, EvaluationResult } from "langsmith/evaluation"; |
| 4 | +import "dotenv/config"; |
| 5 | + |
| 6 | +const runGraph = async (input: Record<string, any>): Promise<Record<string, any>> => { |
| 7 | + // Interrupt after updating the artifact |
| 8 | + graph.interruptAfter = ["updateArtifact"]; |
| 9 | + return await graph.invoke(input); |
| 10 | +} |
| 11 | + |
| 12 | +const evaluateHighlights = (run: Run, example?: Example): EvaluationResult => { |
| 13 | + if (!example) { |
| 14 | + throw new Error("No example provided"); |
| 15 | + } |
| 16 | + if (!example.outputs) { |
| 17 | + throw new Error("No example outputs provided"); |
| 18 | + } |
| 19 | + if (!run.outputs) { |
| 20 | + throw new Error("No run outputs provided"); |
| 21 | + } |
| 22 | + |
| 23 | + const { expectedGeneration } = example.outputs; |
| 24 | + const { highlighted, artifacts } = example.inputs; |
| 25 | + const expectedGenerationStart = artifacts[0].content.slice(0, highlighted.startCharIndex); |
| 26 | + const expectedGenerationEnd = artifacts[0].content.slice(highlighted.endCharIndex); |
| 27 | + const fullExpectedArtifact = `${expectedGenerationStart}${expectedGeneration}${expectedGenerationEnd}`; |
| 28 | + |
| 29 | + |
| 30 | + const generatedArtifact = run.outputs.artifacts[0].content; |
| 31 | + if (generatedArtifact !== fullExpectedArtifact) { |
| 32 | + return { |
| 33 | + key: "correct_generation", |
| 34 | + score: false, |
| 35 | + } |
| 36 | + } |
| 37 | + return { |
| 38 | + key: "correct_generation", |
| 39 | + score: true, |
| 40 | + } |
| 41 | +}; |
| 42 | + |
| 43 | +async function runHighlightEval() { |
| 44 | + const datasetName = "open-canvas-deterministic-highlights"; |
| 45 | + await evaluate(runGraph, { |
| 46 | + data: datasetName, |
| 47 | + evaluators: [evaluateHighlights], |
| 48 | + experimentPrefix: "Highlight generation", |
| 49 | + }); |
| 50 | +} |
| 51 | + |
| 52 | +runHighlightEval(); |
0 commit comments