Skip to content

Commit 9d7ef6e

Browse files
LoCoBench Botclaude
andcommitted
fix: harden ensure_fresh_token_all + add --timeout-multiplier flag
- _common.sh: ensure_fresh_token_all now auto-discovers accounts if called before setup_multi_accounts (prevents silent no-op refresh) - k8s_docs_2config.sh, linuxflbench_2config.sh, swebenchpro_2config.sh: add --timeout-multiplier CLI flag and make TIMEOUT_MULTIPLIER overridable via env var Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 859fa63 commit 9d7ef6e

File tree

4 files changed

+37
-5
lines changed

4 files changed

+37
-5
lines changed

configs/_common.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,17 @@ setup_dual_accounts() { setup_multi_accounts; }
392392

393393
# Refresh tokens for all registered accounts.
394394
ensure_fresh_token_all() {
395+
# Some scripts call this before setup_multi_accounts/setup_dual_accounts.
396+
# Ensure account homes are discovered so refresh is never silently skipped.
397+
if [ ${#CLAUDE_HOMES[@]} -eq 0 ]; then
398+
setup_multi_accounts
399+
fi
400+
401+
# Safety fallback in case setup_multi_accounts did not populate for any reason.
402+
if [ ${#CLAUDE_HOMES[@]} -eq 0 ]; then
403+
CLAUDE_HOMES=("${HOME}")
404+
fi
405+
395406
for home_dir in "${CLAUDE_HOMES[@]}"; do
396407
echo "Refreshing token for HOME=$home_dir ..."
397408
HOME="$home_dir" ensure_fresh_token

configs/k8s_docs_2config.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# --model MODEL Override model (default: claude-opus-4-6)
1515
# --category CATEGORY Run category (default: official)
1616
# --parallel N Number of parallel task subshells (default: 1)
17+
# --timeout-multiplier N Override Harbor timeout multiplier (default: 3)
1718
#
1819
# Prerequisites:
1920
# - ~/evals/.env.local with USE_SUBSCRIPTION=true (default: 2-account Max subscription)
@@ -60,7 +61,7 @@ TASKS_DIR="/home/stephanie_jarmak/CodeContextBench/benchmarks/ccb_k8sdocs"
6061
AGENT_PATH="agents.claude_baseline_agent:BaselineClaudeCodeAgent"
6162
MODEL="${MODEL:-anthropic/claude-opus-4-6}"
6263
CONCURRENCY=2
63-
TIMEOUT_MULTIPLIER=3 # 3x default for 900s task timeout
64+
TIMEOUT_MULTIPLIER="${TIMEOUT_MULTIPLIER:-3}" # 3x default for 900s task timeout
6465
RUN_BASELINE=true
6566
RUN_FULL=true
6667
CATEGORY="${CATEGORY:-official}"
@@ -88,6 +89,10 @@ while [[ $# -gt 0 ]]; do
8889
PARALLEL_JOBS="$2"
8990
shift 2
9091
;;
92+
--timeout-multiplier)
93+
TIMEOUT_MULTIPLIER="$2"
94+
shift 2
95+
;;
9196
*)
9297
echo "Unknown option: $1"
9398
exit 1

configs/linuxflbench_2config.sh

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# --full-only Run only MCP-Full (sourcegraph_full)
1818
# --model MODEL Override model (default: claude-opus-4-6)
1919
# --category CATEGORY Run category (default: official)
20+
# --timeout-multiplier N Override Harbor timeout multiplier (default: 10)
2021
#
2122
# Prerequisites:
2223
# - ~/evals/.env.local with USE_SUBSCRIPTION=true (default: 2-account Max subscription)
@@ -67,7 +68,7 @@ TASKS_DIR="/home/stephanie_jarmak/CodeContextBench/benchmarks/${SUITE}"
6768
AGENT_PATH="agents.claude_baseline_agent:BaselineClaudeCodeAgent"
6869
MODEL="${MODEL:-anthropic/claude-opus-4-6}"
6970
CONCURRENCY=1
70-
TIMEOUT_MULTIPLIER=10
71+
TIMEOUT_MULTIPLIER="${TIMEOUT_MULTIPLIER:-10}"
7172
RUN_BASELINE=true
7273
RUN_FULL=true
7374
CATEGORY="${CATEGORY:-official}"
@@ -95,6 +96,10 @@ while [[ $# -gt 0 ]]; do
9596
PARALLEL_JOBS="$2"
9697
shift 2
9798
;;
99+
--timeout-multiplier)
100+
TIMEOUT_MULTIPLIER="$2"
101+
shift 2
102+
;;
98103
*)
99104
echo "Unknown option: $1"
100105
exit 1
@@ -133,6 +138,11 @@ declare -A TASK_SG_REPO_NAMES=(
133138
["lfl-sound-53441"]="github.com/sg-benchmarks/linux--07c4ee00"
134139
)
135140

141+
# Per-task timeout multiplier overrides for known long-running tasks.
142+
declare -A TASK_TIMEOUT_MULTIPLIERS=(
143+
["lfl-nfs-117651"]="15"
144+
)
145+
136146
# Derive short model name for run directory
137147
_model_lower=$(echo "$MODEL" | awk -F/ '{print $NF}' | tr '[:upper:]' '[:lower:]')
138148
case "$_model_lower" in
@@ -197,6 +207,7 @@ _linuxflbench_run_single() {
197207
local jobs_base=${5:-$JOBS_BASE}
198208
local jobs_subdir="${jobs_base}/${config}"
199209
local task_path="${TASKS_DIR}/${task_id}"
210+
local timeout_mult="${TASK_TIMEOUT_MULTIPLIERS[$task_id]:-$TIMEOUT_MULTIPLIER}"
200211

201212
mkdir -p "$jobs_subdir"
202213

@@ -205,7 +216,7 @@ _linuxflbench_run_single() {
205216
return 1
206217
fi
207218

208-
echo "Running task: $task_id ($config) [HOME=$task_home]"
219+
echo "Running task: $task_id ($config) [HOME=$task_home] [timeout x${timeout_mult}]"
209220

210221
local sg_repo="${TASK_SG_REPO_NAMES[$task_id]:-}"
211222
if [ -n "$sg_repo" ]; then
@@ -220,7 +231,7 @@ _linuxflbench_run_single() {
220231
--model "$MODEL" \
221232
--jobs-dir "$jobs_subdir" \
222233
-n $CONCURRENCY \
223-
--timeout-multiplier $TIMEOUT_MULTIPLIER \
234+
--timeout-multiplier "$timeout_mult" \
224235
2>&1 | tee "${jobs_subdir}/${task_id}.log" \
225236
|| {
226237
echo "WARNING: Task $task_id ($config) failed (exit code: $?)"

configs/swebenchpro_2config.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# --concurrency N Number of concurrent tasks (default: 2)
1616
# --category CATEGORY Run category (default: official)
1717
# --parallel N Number of parallel task subshells (default: 1)
18+
# --timeout-multiplier N Override Harbor timeout multiplier (default: 10)
1819
#
1920
# Prerequisites:
2021
# - ~/evals/.env.local with USE_SUBSCRIPTION=true (default: 2-account Max subscription)
@@ -60,7 +61,7 @@ ensure_fresh_token
6061
AGENT_PATH="agents.claude_baseline_agent:BaselineClaudeCodeAgent"
6162
MODEL="${MODEL:-anthropic/claude-opus-4-6}"
6263
CONCURRENCY=2
63-
TIMEOUT_MULTIPLIER=10
64+
TIMEOUT_MULTIPLIER="${TIMEOUT_MULTIPLIER:-10}"
6465
RUN_BASELINE=true
6566
RUN_FULL=true
6667
CATEGORY="${CATEGORY:-official}"
@@ -92,6 +93,10 @@ while [[ $# -gt 0 ]]; do
9293
PARALLEL_JOBS="$2"
9394
shift 2
9495
;;
96+
--timeout-multiplier)
97+
TIMEOUT_MULTIPLIER="$2"
98+
shift 2
99+
;;
95100
*)
96101
echo "Unknown option: $1"
97102
exit 1

0 commit comments

Comments
 (0)