Skip to content

Commit ebd3048

Browse files
committed
Trigger buffbench remotely
1 parent e3ef8e5 commit ebd3048

File tree

4 files changed

+131
-2
lines changed

4 files changed

+131
-2
lines changed

.github/workflows/buffbench.yml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
name: BuffBench
2+
3+
on:
4+
workflow_dispatch: # Manual triggering only
5+
6+
jobs:
7+
run-buffbench:
8+
runs-on: ubuntu-latest
9+
timeout-minutes: 360
10+
steps:
11+
- name: Checkout repository
12+
uses: actions/checkout@v4
13+
14+
- name: Set up Bun
15+
uses: oven-sh/setup-bun@v2
16+
with:
17+
bun-version: '1.3.5'
18+
19+
- name: Cache dependencies
20+
uses: actions/cache@v4
21+
with:
22+
path: |
23+
node_modules
24+
*/node_modules
25+
packages/*/node_modules
26+
key: ${{ runner.os }}-deps-${{ hashFiles('**/bun.lock*') }}
27+
restore-keys: |
28+
${{ runner.os }}-deps-
29+
30+
- name: Install dependencies
31+
run: bun install --frozen-lockfile
32+
33+
- name: Set environment variables
34+
env:
35+
SECRETS_CONTEXT: ${{ toJSON(secrets) }}
36+
run: |
37+
VAR_NAMES=$(bun scripts/generate-ci-env.ts)
38+
echo "$SECRETS_CONTEXT" | jq -r --argjson vars "$VAR_NAMES" '
39+
to_entries | .[] | select(.key as $k | $vars | index($k)) | .key + "=" + .value
40+
' >> $GITHUB_ENV
41+
echo "CODEBUFF_GITHUB_ACTIONS=true" >> $GITHUB_ENV
42+
echo "NEXT_PUBLIC_CB_ENVIRONMENT=test" >> $GITHUB_ENV
43+
echo "NEXT_PUBLIC_INFISICAL_UP=true" >> $GITHUB_ENV
44+
echo "CODEBUFF_GITHUB_TOKEN=${{ secrets.CODEBUFF_GITHUB_TOKEN }}" >> $GITHUB_ENV
45+
46+
- name: Run buffbench
47+
run: cd evals && bun run-buffbench
48+
49+
- name: Workflow completed
50+
run: echo "BuffBench workflow completed successfully"

evals/buffbench/main.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ async function main() {
88
// Use 'external:codex' for OpenAI Codex CLI
99
await runBuffBench({
1010
evalDataPaths: [path.join(__dirname, 'eval-codebuff.json')],
11-
agents: ['base2', 'external:claude', 'external:codex'],
12-
taskConcurrency: 1,
11+
agents: ['base2'],
12+
taskConcurrency: 5,
1313
})
1414

1515
process.exit(0)

evals/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"run-eval-set": "bun run git-evals/run-eval-set.ts",
2424
"run-buffbench": "bun run buffbench/main.ts",
2525
"run-buffbench-nightly": "bun run buffbench/main-nightly.ts",
26+
"trigger-buffbench": "bun run scripts/trigger-buffbench.ts",
2627
"setup-codebuff-repo": "bun run setup-codebuff-repo.ts"
2728
},
2829
"sideEffects": false,

evals/scripts/trigger-buffbench.ts

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/usr/bin/env node
2+
3+
const { execSync } = require('child_process')
4+
5+
function log(message: string) {
6+
console.log(`${message}`)
7+
}
8+
9+
function error(message: string) {
10+
console.error(`❌ ${message}`)
11+
process.exit(1)
12+
}
13+
14+
function checkGitHubToken() {
15+
const token = process.env.CODEBUFF_GITHUB_TOKEN
16+
if (!token) {
17+
error(
18+
'CODEBUFF_GITHUB_TOKEN environment variable is required but not set.\n' +
19+
'Please set it with your GitHub personal access token or use the infisical setup.'
20+
)
21+
}
22+
return token
23+
}
24+
25+
function getCurrentBranch(): string {
26+
try {
27+
return execSync('git rev-parse --abbrev-ref HEAD', { encoding: 'utf8' }).trim()
28+
} catch {
29+
return 'main'
30+
}
31+
}
32+
33+
async function triggerWorkflow(token: string, branch: string) {
34+
try {
35+
const triggerCmd = `curl -s -w "HTTP Status: %{http_code}" -X POST \
36+
-H "Accept: application/vnd.github.v3+json" \
37+
-H "Authorization: token ${token}" \
38+
-H "Content-Type: application/json" \
39+
https://api.github.com/repos/CodebuffAI/codebuff/actions/workflows/buffbench.yml/dispatches \
40+
-d '{"ref":"${branch}"}'`
41+
42+
const response = execSync(triggerCmd, { encoding: 'utf8' })
43+
44+
if (response.includes('workflow_dispatch')) {
45+
log(`⚠️ Workflow dispatch failed: ${response}`)
46+
log(
47+
'Please manually trigger the workflow at: https://github.com/CodebuffAI/codebuff/actions/workflows/buffbench.yml',
48+
)
49+
} else {
50+
log('🎉 BuffBench workflow triggered!')
51+
}
52+
} catch (err: any) {
53+
log(`⚠️ Failed to trigger workflow automatically: ${err.message}`)
54+
log(
55+
'You may need to trigger it manually at: https://github.com/CodebuffAI/codebuff/actions/workflows/buffbench.yml',
56+
)
57+
}
58+
}
59+
60+
async function main() {
61+
const branch = process.argv[2] || getCurrentBranch()
62+
63+
log('🧪 Triggering BuffBench workflow...')
64+
log(`Branch: ${branch}`)
65+
66+
const token = checkGitHubToken()
67+
if (!token) return
68+
log('✅ Using CODEBUFF_GITHUB_TOKEN')
69+
70+
await triggerWorkflow(token, branch)
71+
72+
log('')
73+
log('Monitor progress at: https://github.com/CodebuffAI/codebuff/actions/workflows/buffbench.yml')
74+
}
75+
76+
main().catch((err) => {
77+
error(`Failed to trigger BuffBench: ${err.message}`)
78+
})

0 commit comments

Comments
 (0)