Skip to content

Update LLM benchmarks #14

Update LLM benchmarks

Update LLM benchmarks #14

name: Update LLM benchmarks
on:
workflow_dispatch:
inputs:
pr_number:
description: 'Pull Request Number'
required: true
issue_comment:
types: [created] # only run when the comment is first created
permissions:
contents: read
pull-requests: read
issues: write
concurrency:
group: llm-benchmark-${{ github.event.issue.number }}
cancel-in-progress: false
jobs:
run-llm-benchmark:
if: |
(github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/run-llm-benchmark')) ||
(github.event_name == 'workflow_dispatch')
runs-on: ubuntu-latest
steps:
- name: Install spacetime CLI
run: curl -sSf https://install.spacetimedb.com | sh -s -- -y
- name: Load PR info
id: pr
uses: actions/github-script@v7
with:
script: |
let prNumber;
if (context.eventName === 'issue_comment') {
prNumber = context.payload.issue.number;
} else if (context.eventName === 'workflow_dispatch') {
const raw = context.payload.inputs?.pr_number;
if (!raw || !/^\d+$/.test(raw)) {
core.setFailed(`Invalid pr_number input: '${raw}'.`);
return;
}
prNumber = Number(raw);
} else {
core.setFailed(`Unsupported event: ${context.eventName}`);
return;
}
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
});
core.setOutput('number', String(prNumber));
core.setOutput('head_ref', pr.head.ref);
core.setOutput('head_sha', pr.head.sha);
core.setOutput('head_repo_full_name', pr.head.repo.full_name);
core.setOutput('head_owner_type', pr.head.repo.owner.type); // "User"|"Organization"
core.setOutput('maintainer_can_modify', String(pr.maintainer_can_modify));
- name: Check commenter permission
if: github.event_name == 'issue_comment'
uses: actions/github-script@v7
with:
script: |
const user = context.payload.comment.user.login;
const { data } = await github.rest.repos.getCollaboratorPermissionLevel({
owner: context.repo.owner,
repo: context.repo.repo,
username: user,
});
const allowed = new Set(['admin', 'maintain', 'write', 'triage']);
if (!allowed.has(data.permission)) {
core.setFailed(`User ${user} has permission '${data.permission}', not allowed to run benchmarks.`);
}
- name: Check fork pushability (and comment if not)
if: steps.pr.outputs.head_repo_full_name != github.repository
uses: actions/github-script@v7
env:
PR_NUMBER: ${{ steps.pr.outputs.number }}
HEAD_OWNER_TYPE: ${{ steps.pr.outputs.head_owner_type }}
MAINTAINER_CAN_MODIFY: ${{ steps.pr.outputs.maintainer_can_modify }}
with:
script: |
const issue_number = Number(process.env.PR_NUMBER);
const headOwnerType = process.env.HEAD_OWNER_TYPE;
const canModify = process.env.MAINTAINER_CAN_MODIFY === 'true';
if (headOwnerType === 'Organization') {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number,
body: [
"I can’t push benchmark updates to this PR because it comes from an **organization-owned fork**.",
"GitHub doesn’t allow granting upstream maintainers push permissions to org-owned forks.",
"",
"Options:",
"- Reopen the PR from a **personal fork** with **Allow edits from maintainers** enabled, or",
"- A maintainer can apply the benchmark update on an internal branch."
].join("\n"),
});
core.setFailed("Org-owned fork PR is not pushable by maintainers.");
return;
}
if (!canModify) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number,
body: [
"I can’t push benchmark updates to this PR branch until you enable **Allow edits from maintainers**.",
"Please check the box on the PR page, then re-comment `/run-llm-benchmark`.",
"See https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork"
].join("\n"),
});
core.setFailed("maintainer_can_modify is false; author must enable 'Allow edits from maintainers'.");
}
- name: Checkout master (build/install tool from trusted code)
uses: actions/checkout@v4
with:
ref: bradley/llm-benchmark # master <- Set to master before merging
fetch-depth: 0
persist-credentials: false
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- name: Install llm-benchmark tool from master
run: |
cargo install --path tools/xtask-llm-benchmark --locked
command -v llm_benchmark
- name: Checkout PR head (branch)
uses: actions/checkout@v4
with:
repository: ${{ steps.pr.outputs.head_repo_full_name }}
ref: ${{ steps.pr.outputs.head_sha }}
fetch-depth: 0
persist-credentials: false
- name: Run benchmark (with provider keys)
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
llm_benchmark ci-quickfix
llm_benchmark ci-check
- name: Commit changes
run: |
git config user.name "spacetimedb-bot"
git config user.email "spacetimedb-bot@users.noreply.github.com"
# Prefer staging only the benchmark output area (adjust as needed)
git add docs/llms
git diff --cached --quiet && exit 0
git commit -m "Update LLM benchmark results"
- name: Push back to PR branch (same repo or fork)
env:
GH_TOKEN: ${{ secrets.CLOCKWORK_LABS_BOT_PAT }}
run: |
git remote set-url origin "https://x-access-token:${GH_TOKEN}@github.com/${{ steps.pr.outputs.head_repo_full_name }}.git"
git push origin "HEAD:${{ steps.pr.outputs.head_ref }}"