Update LLM benchmarks #14

Workflow file for this run

.github/workflows/llm-benchmark-update.yml at 6cb3c62

	name: Update LLM benchmarks

	on:
	workflow_dispatch:
	inputs:
	pr_number:
	description: 'Pull Request Number'
	required: true
	issue_comment:
	types: [created] # only run when the comment is first created

	permissions:
	contents: read
	pull-requests: read
	issues: write

	concurrency:
	group: llm-benchmark-${{ github.event.issue.number }}
	cancel-in-progress: false

	jobs:
	run-llm-benchmark:
	if: \|
	(github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/run-llm-benchmark')) \|\|
	(github.event_name == 'workflow_dispatch')
	runs-on: ubuntu-latest
	steps:
	- name: Install spacetime CLI
	run: curl -sSf https://install.spacetimedb.com \| sh -s -- -y

	- name: Load PR info
	id: pr
	uses: actions/github-script@v7
	with:
	script: \|
	let prNumber;
	if (context.eventName === 'issue_comment') {
	prNumber = context.payload.issue.number;
	} else if (context.eventName === 'workflow_dispatch') {
	const raw = context.payload.inputs?.pr_number;
	if (!raw \|\| !/^\d+$/.test(raw)) {
	core.setFailed(`Invalid pr_number input: '${raw}'.`);
	return;
	}
	prNumber = Number(raw);
	} else {
	core.setFailed(`Unsupported event: ${context.eventName}`);
	return;
	}

	const { data: pr } = await github.rest.pulls.get({
	owner: context.repo.owner,
	repo: context.repo.repo,
	pull_number: prNumber,
	});

	core.setOutput('number', String(prNumber));
	core.setOutput('head_ref', pr.head.ref);
	core.setOutput('head_sha', pr.head.sha);
	core.setOutput('head_repo_full_name', pr.head.repo.full_name);
	core.setOutput('head_owner_type', pr.head.repo.owner.type); // "User"\|"Organization"
	core.setOutput('maintainer_can_modify', String(pr.maintainer_can_modify));

	- name: Check commenter permission
	if: github.event_name == 'issue_comment'
	uses: actions/github-script@v7
	with:
	script: \|
	const user = context.payload.comment.user.login;
	const { data } = await github.rest.repos.getCollaboratorPermissionLevel({
	owner: context.repo.owner,
	repo: context.repo.repo,
	username: user,
	});

	const allowed = new Set(['admin', 'maintain', 'write', 'triage']);
	if (!allowed.has(data.permission)) {
	core.setFailed(`User ${user} has permission '${data.permission}', not allowed to run benchmarks.`);
	}

	- name: Check fork pushability (and comment if not)
	if: steps.pr.outputs.head_repo_full_name != github.repository
	uses: actions/github-script@v7
	env:
	PR_NUMBER: ${{ steps.pr.outputs.number }}
	HEAD_OWNER_TYPE: ${{ steps.pr.outputs.head_owner_type }}
	MAINTAINER_CAN_MODIFY: ${{ steps.pr.outputs.maintainer_can_modify }}
	with:
	script: \|
	const issue_number = Number(process.env.PR_NUMBER);
	const headOwnerType = process.env.HEAD_OWNER_TYPE;
	const canModify = process.env.MAINTAINER_CAN_MODIFY === 'true';

	if (headOwnerType === 'Organization') {
	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number,
	body: [
	"I can’t push benchmark updates to this PR because it comes from an organization-owned fork.",
	"GitHub doesn’t allow granting upstream maintainers push permissions to org-owned forks.",
	"",
	"Options:",
	"- Reopen the PR from a personal fork with Allow edits from maintainers enabled, or",
	"- A maintainer can apply the benchmark update on an internal branch."
	].join("\n"),
	});
	core.setFailed("Org-owned fork PR is not pushable by maintainers.");
	return;
	}

	if (!canModify) {
	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number,
	body: [
	"I can’t push benchmark updates to this PR branch until you enable Allow edits from maintainers.",
	"Please check the box on the PR page, then re-comment `/run-llm-benchmark`.",
	"See https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork"
	].join("\n"),
	});
	core.setFailed("maintainer_can_modify is false; author must enable 'Allow edits from maintainers'.");
	}

	- name: Checkout master (build/install tool from trusted code)
	uses: actions/checkout@v4
	with:
	ref: bradley/llm-benchmark # master <- Set to master before merging
	fetch-depth: 0
	persist-credentials: false

	- uses: dtolnay/rust-toolchain@stable
	- uses: Swatinem/rust-cache@v2

	- name: Install llm-benchmark tool from master
	run: \|
	cargo install --path tools/xtask-llm-benchmark --locked
	command -v llm_benchmark

	- name: Checkout PR head (branch)
	uses: actions/checkout@v4
	with:
	repository: ${{ steps.pr.outputs.head_repo_full_name }}
	ref: ${{ steps.pr.outputs.head_sha }}
	fetch-depth: 0
	persist-credentials: false

	- name: Run benchmark (with provider keys)
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	run: \|
	llm_benchmark ci-quickfix
	llm_benchmark ci-check

	- name: Commit changes
	run: \|
	git config user.name "spacetimedb-bot"
	git config user.email "spacetimedb-bot@users.noreply.github.com"

	# Prefer staging only the benchmark output area (adjust as needed)
	git add docs/llms

	git diff --cached --quiet && exit 0
	git commit -m "Update LLM benchmark results"

	- name: Push back to PR branch (same repo or fork)
	env:
	GH_TOKEN: ${{ secrets.CLOCKWORK_LABS_BOT_PAT }}
	run: \|
	git remote set-url origin "https://x-access-token:${GH_TOKEN}@github.com/${{ steps.pr.outputs.head_repo_full_name }}.git"
	git push origin "HEAD:${{ steps.pr.outputs.head_ref }}"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Update LLM benchmarks #14

Workflow file

Update LLM benchmarks #14

Uh oh!

Workflow file for this run