|
25 | 25 | # --category CATEGORY Run category (default: staging) |
26 | 26 | # --skip-completed Skip tasks that already have result.json + task_metrics.json |
27 | 27 | # --dry-run Print tasks without running |
28 | | -# --yes Skip confirmation prompt (non-interactive mode) |
| 28 | +# --skip-prebuild Skip Docker image pre-build (use when images already cached) |
29 | 29 | # |
30 | 30 | # Prerequisites: |
31 | 31 | # - configs/selected_benchmark_tasks.json in repo (or --selection-file path) |
@@ -61,7 +61,7 @@ CATEGORY="${CATEGORY:-staging}" |
61 | 61 | FULL_CONFIG="${FULL_CONFIG:-mcp-remote-direct}" |
62 | 62 | DRY_RUN=false |
63 | 63 | SKIP_COMPLETED=false |
64 | | -YES=false |
| 64 | +SKIP_PREBUILD=false |
65 | 65 | AGENT_PATH="agents.claude_baseline_agent:BaselineClaudeCodeAgent" |
66 | 66 |
|
67 | 67 | while [[ $# -gt 0 ]]; do |
@@ -114,8 +114,8 @@ while [[ $# -gt 0 ]]; do |
114 | 114 | DRY_RUN=true |
115 | 115 | shift |
116 | 116 | ;; |
117 | | - --yes) |
118 | | - YES=true |
| 117 | + --skip-prebuild) |
| 118 | + SKIP_PREBUILD=true |
119 | 119 | shift |
120 | 120 | ;; |
121 | 121 | *) |
@@ -151,6 +151,10 @@ BASELINE_CONFIG=$(baseline_config_for "$FULL_CONFIG") |
151 | 151 | BL_MCP_TYPE=$(config_to_mcp_type "$BASELINE_CONFIG") |
152 | 152 | FULL_MCP_TYPE=$(config_to_mcp_type "$FULL_CONFIG") |
153 | 153 |
|
| 154 | +# Strict validation — exit immediately on unknown config names |
| 155 | +validate_config_name "$BASELINE_CONFIG" |
| 156 | +validate_config_name "$FULL_CONFIG" |
| 157 | + |
154 | 158 | # ============================================ |
155 | 159 | # EXTRACT TASKS FROM SELECTION FILE |
156 | 160 | # ============================================ |
@@ -250,17 +254,164 @@ if [ "$DRY_RUN" = true ]; then |
250 | 254 | echo " ... and $(( count - 5 )) more" |
251 | 255 | fi |
252 | 256 | done |
| 257 | + if [ "$SKIP_PREBUILD" = false ]; then |
| 258 | + echo "" |
| 259 | + echo "[DRY RUN] Would pre-build Docker images for: ${!BENCHMARK_COUNTS[*]}" |
| 260 | + fi |
253 | 261 | exit 0 |
254 | 262 | fi |
255 | 263 |
|
256 | 264 | # ============================================ |
257 | | -# CONFIRMATION GATE |
| 265 | +# DOCKERFILE VARIANT CHECK |
| 266 | +# ============================================ |
| 267 | +# Verify all tasks have the required Dockerfile variant for the chosen config |
| 268 | +# BEFORE asking the user to confirm. |
| 269 | +check_dockerfile_variants() { |
| 270 | + DOCKERFILE_MISSING_COUNT=0 |
| 271 | + DOCKERFILE_READY_COUNT=0 |
| 272 | + DOCKERFILE_WARNINGS="" |
| 273 | + |
| 274 | + local _is_artifact=false |
| 275 | + [[ "$FULL_CONFIG" == *artifact* ]] && _is_artifact=true |
| 276 | + |
| 277 | + for bm in $(echo "${!BENCHMARK_TASK_DIRS[@]}" | tr ' ' '\n' | sort); do |
| 278 | + while IFS= read -r task_path; do |
| 279 | + [ -z "$task_path" ] && continue |
| 280 | + local abs_path="$REPO_ROOT/$task_path" |
| 281 | + local task_id |
| 282 | + task_id=$(basename "$task_path") |
| 283 | + |
| 284 | + # Baseline: needs environment/Dockerfile |
| 285 | + if [ "$RUN_BASELINE" = true ] && [ ! -f "${abs_path}/environment/Dockerfile" ]; then |
| 286 | + DOCKERFILE_WARNINGS+=" MISSING: ${task_id} — Dockerfile (baseline)"$'\n' |
| 287 | + DOCKERFILE_MISSING_COUNT=$(( DOCKERFILE_MISSING_COUNT + 1 )) |
| 288 | + fi |
| 289 | + |
| 290 | + # Full/MCP: needs the variant Dockerfile |
| 291 | + if [ "$RUN_FULL" = true ]; then |
| 292 | + if [ "$_is_artifact" = true ]; then |
| 293 | + if [ ! -f "${abs_path}/environment/Dockerfile.artifact_only" ]; then |
| 294 | + DOCKERFILE_WARNINGS+=" MISSING: ${task_id} — Dockerfile.artifact_only"$'\n' |
| 295 | + DOCKERFILE_MISSING_COUNT=$(( DOCKERFILE_MISSING_COUNT + 1 )) |
| 296 | + else |
| 297 | + DOCKERFILE_READY_COUNT=$(( DOCKERFILE_READY_COUNT + 1 )) |
| 298 | + fi |
| 299 | + else |
| 300 | + if [ ! -f "${abs_path}/environment/Dockerfile.sg_only" ]; then |
| 301 | + DOCKERFILE_WARNINGS+=" MISSING: ${task_id} — Dockerfile.sg_only"$'\n' |
| 302 | + DOCKERFILE_MISSING_COUNT=$(( DOCKERFILE_MISSING_COUNT + 1 )) |
| 303 | + else |
| 304 | + DOCKERFILE_READY_COUNT=$(( DOCKERFILE_READY_COUNT + 1 )) |
| 305 | + fi |
| 306 | + fi |
| 307 | + fi |
| 308 | + done <<< "$(echo "${BENCHMARK_TASK_DIRS[$bm]}" | grep -v '^$')" |
| 309 | + done |
| 310 | +} |
| 311 | + |
| 312 | +# ============================================ |
| 313 | +# PRE-FLIGHT VERIFICATION |
258 | 314 | # ============================================ |
259 | | -if [ "$YES" != true ]; then |
260 | | - echo "----------------------------------------------" |
261 | | - echo "Ready to launch $TOTAL_AGENT_RUNS agent runs ($PARALLEL_TASKS parallel)." |
| 315 | +echo "----------------------------------------------" |
| 316 | +echo "PRE-FLIGHT VERIFICATION" |
| 317 | +echo "----------------------------------------------" |
| 318 | +echo "" |
| 319 | + |
| 320 | +# 1. Config pair |
| 321 | +echo "Config pair:" |
| 322 | +if [ "$RUN_BASELINE" = true ]; then |
| 323 | + echo " Baseline: $BASELINE_CONFIG (mcp_type=$BL_MCP_TYPE)" |
| 324 | +fi |
| 325 | +if [ "$RUN_FULL" = true ]; then |
| 326 | + echo " Full: $FULL_CONFIG (mcp_type=$FULL_MCP_TYPE)" |
| 327 | +fi |
| 328 | +echo "" |
| 329 | + |
| 330 | +# 2. Dockerfile variant readiness |
| 331 | +check_dockerfile_variants |
| 332 | +if [ "$RUN_FULL" = true ]; then |
| 333 | + _variant_name="Dockerfile.sg_only" |
| 334 | + [[ "$FULL_CONFIG" == *artifact* ]] && _variant_name="Dockerfile.artifact_only" |
| 335 | + echo "Dockerfile variants ($_variant_name):" |
| 336 | + echo " Ready: $DOCKERFILE_READY_COUNT / $TOTAL_TASKS" |
| 337 | + if [ "$DOCKERFILE_MISSING_COUNT" -gt 0 ]; then |
| 338 | + echo " MISSING: $DOCKERFILE_MISSING_COUNT" |
| 339 | + echo -e "$DOCKERFILE_WARNINGS" |
| 340 | + fi |
262 | 341 | echo "" |
263 | | - read -r -p "Press Enter to proceed, Ctrl+C to abort... " _ |
| 342 | +fi |
| 343 | + |
| 344 | +# 3. Docker daemon |
| 345 | +if timeout 10 docker info >/dev/null 2>&1; then |
| 346 | + echo "Docker: OK" |
| 347 | +else |
| 348 | + echo "Docker: FAIL — daemon not responding" |
| 349 | + exit 1 |
| 350 | +fi |
| 351 | + |
| 352 | +# 4. Account token freshness |
| 353 | +if [ "${#CLAUDE_HOMES[@]}" -gt 0 ]; then |
| 354 | + echo "Accounts: ${#CLAUDE_HOMES[@]} active" |
| 355 | + for _home_dir in "${CLAUDE_HOMES[@]}"; do |
| 356 | + _creds="${_home_dir}/.claude/.credentials.json" |
| 357 | + if [ -f "$_creds" ]; then |
| 358 | + _remaining=$(python3 -c " |
| 359 | +import json, time, sys |
| 360 | +try: |
| 361 | + d = json.load(open(sys.argv[1])) |
| 362 | + exp = d.get('claudeAiOauth',{}).get('expiresAt',0) |
| 363 | + rem = int((exp - time.time()*1000) / 60000) |
| 364 | + print(f'{rem} min remaining') |
| 365 | +except: print('unknown') |
| 366 | +" "$_creds" 2>/dev/null) |
| 367 | + echo " $(basename "$_home_dir"): $_remaining" |
| 368 | + fi |
| 369 | + done |
| 370 | +else |
| 371 | + echo "Accounts: default (single account)" |
| 372 | +fi |
| 373 | + |
| 374 | +# 5. Disk space |
| 375 | +_disk_free=$(df -BG --output=avail "$REPO_ROOT" 2>/dev/null | tail -1 | tr -d ' G') |
| 376 | +if [ -n "$_disk_free" ] && [ "$_disk_free" -lt 5 ]; then |
| 377 | + echo "Disk space: FAIL — only ${_disk_free}GB free" |
| 378 | + exit 1 |
| 379 | +elif [ -n "$_disk_free" ] && [ "$_disk_free" -lt 20 ]; then |
| 380 | + echo "Disk space: WARN — ${_disk_free}GB free (may run low)" |
| 381 | +else |
| 382 | + echo "Disk space: OK (${_disk_free:-?}GB free)" |
| 383 | +fi |
| 384 | + |
| 385 | +# 6. Prebuild status |
| 386 | +if [ "$SKIP_PREBUILD" = false ]; then |
| 387 | + echo "Prebuild: enabled (${!BENCHMARK_COUNTS[*]})" |
| 388 | +else |
| 389 | + echo "Prebuild: SKIPPED (--skip-prebuild)" |
| 390 | +fi |
| 391 | +echo "" |
| 392 | + |
| 393 | +# 7. Critical blockers — exit before confirmation |
| 394 | +if [ "$DOCKERFILE_MISSING_COUNT" -gt 0 ]; then |
| 395 | + echo "BLOCKED: $DOCKERFILE_MISSING_COUNT task(s) missing required Dockerfile variant." |
| 396 | + echo "Fix: Run python3 scripts/generate_sgonly_dockerfiles.py for affected tasks." |
| 397 | + exit 1 |
| 398 | +fi |
| 399 | + |
| 400 | +echo "----------------------------------------------" |
| 401 | +echo "Ready to launch $TOTAL_AGENT_RUNS agent runs ($PARALLEL_TASKS parallel)." |
| 402 | +echo "" |
| 403 | +read -r -p "Press Enter to proceed, Ctrl+C to abort... " _ |
| 404 | +echo "" |
| 405 | + |
| 406 | +# ============================================ |
| 407 | +# DOCKER IMAGE PRE-BUILD |
| 408 | +# ============================================ |
| 409 | +if [ "$SKIP_PREBUILD" = false ]; then |
| 410 | + echo "=== Pre-building Docker images ===" |
| 411 | + ensure_base_images |
| 412 | + for bm in $(echo "${!BENCHMARK_COUNTS[@]}" | tr ' ' '\n' | sort); do |
| 413 | + prebuild_images "$bm" |
| 414 | + done |
264 | 415 | echo "" |
265 | 416 | fi |
266 | 417 |
|
|
0 commit comments