github · sgoedecke · Jun 5, 2025 · Jun 5, 2025 · Jun 5, 2025
@@ -80,6 +80,8 @@ gh models eval my_prompt.prompt.yml --json
 
 The JSON output includes detailed test results, evaluation scores, and summary statistics that can be processed by other tools or CI/CD pipelines.
 
+Here's a sample GitHub Action that uses the `eval` command to automatically run the evals in any PR that updates a prompt file: [evals_action.yml](/examples/evals_action.yml).
+
 Learn more about `.prompt.yml` files here: [Storing prompts in GitHub repositories](https://docs.github.com/github-models/use-github-models/storing-prompts-in-github-repositories).
 
 ## Notice

@@ -0,0 +1,92 @@
+# This is a sample GitHub Actions workflow file that runs prompt evaluations
+# on pull requests when prompt files are changed. It uses the `gh-models` CLI to evaluate prompts
+# and comments the results back on the pull request.
+# The workflow is triggered by pull requests that modify any `.prompt.yml` files.
+
+
+name: Run evaluations for changed prompts
+
+permissions:
+  models: read
+  contents: read
+  pull-requests: write
+
+on:
+  pull_request:
+    paths:
+      - '**/*.prompt.yml'
+
+jobs:
+  evaluate-model:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup gh-models
+        run: gh extension install github/gh-models
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Find changed prompt files
+        id: find-prompts
+        run: |
+          # Get the list of changed files that match *.prompt.yml pattern
+          changed_prompts=$(git diff --name-only origin/${{ github.base_ref }}..HEAD | grep '\.prompt\.yml$' | head -1)
+
+          if [[ -z "$changed_prompts" ]]; then
+            echo "No prompt files found in the changes"
+            echo "skip_evaluation=true" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          echo "first_prompt=$changed_prompts" >> "$GITHUB_OUTPUT"
+          echo "Found changed prompt file: $changed_prompts"
+
+      - name: Run model evaluation
+        id: eval
+        run: |
+          set -e
+          PROMPT_FILE="${{ steps.find-prompts.outputs.first_prompt }}"
+          echo "## Model Evaluation Results" >> "$GITHUB_STEP_SUMMARY"
+          echo "Evaluating: $PROMPT_FILE" >> "$GITHUB_STEP_SUMMARY"
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+
+          if gh models eval "$PROMPT_FILE" > eval_output.txt 2>&1; then
+            echo "✅ All evaluations passed!"   >> "$GITHUB_STEP_SUMMARY"
+            cat  eval_output.txt               >> "$GITHUB_STEP_SUMMARY"
+            echo "eval_status=success"         >> "$GITHUB_OUTPUT"
+          else
+            echo "❌ Some evaluations failed!" >> "$GITHUB_STEP_SUMMARY"
+            cat  eval_output.txt               >> "$GITHUB_STEP_SUMMARY"
+            echo "eval_status=failure"         >> "$GITHUB_OUTPUT"
+            exit 1
+          fi
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Comment on PR with evaluation results
+        if: github.event_name == 'pull_request'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const output = fs.readFileSync('eval_output.txt', 'utf8');
+            const evalStatus = '${{ steps.eval.outputs.eval_status }}';
+            const statusMessage = evalStatus === 'success' 
+              ? '✅ Evaluation passed' 
+              : '❌ Evaluation failed';
+
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner:        context.repo.owner,
+              repo:         context.repo.repo,
+              body: `## ${statusMessage}
+
+              \`\`\`
+              ${output}
+              \`\`\`
+
+              Review the evaluation results above for more details.`
+            });