Skip to content

Commit 72a0502

Browse files
sjarmakclaude
andcommitted
fix: run_selected_tasks.sh uses Dockerfile.artifact_only for artifact configs
Previously _launch_task_pair always swapped in Dockerfile.sg_only for MCP and used the original Dockerfile for baseline. For artifact evaluation, both configs need Dockerfile.artifact_only to set /tmp/.artifact_only_mode sentinel, which triggers answer_json_verifier_lib.sh in the verifier. Without this, artifact mode runs would silently fall back to direct-mode verification. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9ad2d84 commit 72a0502

File tree

1 file changed

+57
-20
lines changed

1 file changed

+57
-20
lines changed

configs/run_selected_tasks.sh

Lines changed: 57 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -357,11 +357,10 @@ _drain_pool() {
357357
# Follows _common.sh's run_paired_configs pattern: both configs run in parallel
358358
# per task so timing is comparable and resource utilization is maximized.
359359
#
360-
# Key design: baseline uses the ORIGINAL Dockerfile (full local repos).
361-
# MCP uses a TEMP COPY of the task dir with Dockerfile.sg_only swapped in
362-
# so the agent has NO local source code and must use Sourcegraph MCP tools.
363-
# The verifier's tests/ dir (uploaded by Harbor at runtime) still works
364-
# because oracle_checks.py scores answer.json content, not compiled code.
360+
# Dockerfile swap logic (determined by VERIFIER_MODE):
361+
# direct mode: baseline=original Dockerfile, MCP=Dockerfile.sg_only
362+
# artifact mode: BOTH configs use Dockerfile.artifact_only (sets /tmp/.artifact_only_mode
363+
# sentinel so verifier parses answer.json and applies diffs)
365364
#
366365
# Args: bm task_id task_path bl_jobs_dir full_jobs_dir
367366
_launch_task_pair() {
@@ -375,16 +374,38 @@ _launch_task_pair() {
375374

376375
local pair_pids=()
377376
local _mcp_temp_dir=""
377+
local _bl_temp_dir=""
378378

379-
# Launch baseline config — uses original Dockerfile (full local repos)
379+
# Determine which Dockerfile variant to use for each config
380+
local _is_artifact=false
381+
[[ "$VERIFIER_MODE" == "artifact" ]] && _is_artifact=true
382+
383+
# Launch baseline config
380384
if [ "$RUN_BASELINE" = true ]; then
385+
local _bl_task_path="$abs_path"
386+
387+
# Artifact mode: baseline also needs Dockerfile.artifact_only
388+
# (sets /tmp/.artifact_only_mode so verifier parses answer.json)
389+
if [ "$_is_artifact" = true ]; then
390+
local _df_artifact="${abs_path}/environment/Dockerfile.artifact_only"
391+
if [ -f "$_df_artifact" ]; then
392+
_bl_temp_dir=$(mktemp -d "/tmp/bl_${task_id}_XXXXXX")
393+
cp -a "${abs_path}/." "${_bl_temp_dir}/"
394+
cp "${_bl_temp_dir}/environment/Dockerfile.artifact_only" "${_bl_temp_dir}/environment/Dockerfile"
395+
_bl_task_path="$_bl_temp_dir"
396+
echo " [artifact] Using artifact Dockerfile for baseline: $task_id"
397+
else
398+
echo " WARNING: No Dockerfile.artifact_only for $task_id — baseline verifier won't parse answer.json"
399+
fi
400+
fi
401+
381402
_wait_for_slot
382403
_pick_next_account
383404
local _bl_home="$_PICKED_HOME"
384405
(
385406
export HOME="$_bl_home"
386407
BASELINE_MCP_TYPE=$BL_MCP_TYPE harbor run \
387-
--path "$abs_path" \
408+
--path "$_bl_task_path" \
388409
--agent-import-path "$AGENT_PATH" \
389410
--model "$MODEL" \
390411
--jobs-dir "$bl_jobs_dir" \
@@ -400,21 +421,34 @@ _launch_task_pair() {
400421
sleep 2
401422
fi
402423

403-
# Launch full/MCP config — uses Dockerfile.sg_only (no local source code)
424+
# Launch full/MCP config
404425
if [ "$RUN_FULL" = true ]; then
405426
local _mcp_task_path="$abs_path"
406-
local _df_sgonly="${abs_path}/environment/Dockerfile.sg_only"
407-
408-
# Create temp copy with Dockerfile.sg_only swapped in.
409-
# This ensures baseline sees the original Dockerfile while MCP sees sg_only.
410-
if [ -f "$_df_sgonly" ]; then
411-
_mcp_temp_dir=$(mktemp -d "/tmp/mcp_${task_id}_XXXXXX")
412-
cp -a "${abs_path}/." "${_mcp_temp_dir}/"
413-
cp "${_mcp_temp_dir}/environment/Dockerfile.sg_only" "${_mcp_temp_dir}/environment/Dockerfile"
414-
_mcp_task_path="$_mcp_temp_dir"
415-
echo " [sg_only] Using empty-workspace Dockerfile for MCP config: $task_id"
427+
428+
if [ "$_is_artifact" = true ]; then
429+
# Artifact mode: use Dockerfile.artifact_only (full repo + artifact sentinel)
430+
local _df_artifact="${abs_path}/environment/Dockerfile.artifact_only"
431+
if [ -f "$_df_artifact" ]; then
432+
_mcp_temp_dir=$(mktemp -d "/tmp/mcp_${task_id}_XXXXXX")
433+
cp -a "${abs_path}/." "${_mcp_temp_dir}/"
434+
cp "${_mcp_temp_dir}/environment/Dockerfile.artifact_only" "${_mcp_temp_dir}/environment/Dockerfile"
435+
_mcp_task_path="$_mcp_temp_dir"
436+
echo " [artifact] Using artifact Dockerfile for MCP config: $task_id"
437+
else
438+
echo " WARNING: No Dockerfile.artifact_only for $task_id — MCP verifier won't parse answer.json"
439+
fi
416440
else
417-
echo " WARNING: No Dockerfile.sg_only for $task_id — MCP will have local source access"
441+
# Direct mode: use Dockerfile.sg_only (empty workspace, agent uses MCP)
442+
local _df_sgonly="${abs_path}/environment/Dockerfile.sg_only"
443+
if [ -f "$_df_sgonly" ]; then
444+
_mcp_temp_dir=$(mktemp -d "/tmp/mcp_${task_id}_XXXXXX")
445+
cp -a "${abs_path}/." "${_mcp_temp_dir}/"
446+
cp "${_mcp_temp_dir}/environment/Dockerfile.sg_only" "${_mcp_temp_dir}/environment/Dockerfile"
447+
_mcp_task_path="$_mcp_temp_dir"
448+
echo " [sg_only] Using empty-workspace Dockerfile for MCP config: $task_id"
449+
else
450+
echo " WARNING: No Dockerfile.sg_only for $task_id — MCP will have local source access"
451+
fi
418452
fi
419453

420454
_wait_for_slot
@@ -438,11 +472,14 @@ _launch_task_pair() {
438472
sleep 2
439473
fi
440474

441-
# Track temp dir for cleanup in _drain_pool (after all harbor processes finish).
475+
# Track temp dirs for cleanup in _drain_pool (after all harbor processes finish).
442476
# Cannot use background watcher because wait() only works for child processes.
443477
if [ -n "$_mcp_temp_dir" ]; then
444478
_MCP_TEMP_DIRS+=("$_mcp_temp_dir")
445479
fi
480+
if [ -n "$_bl_temp_dir" ]; then
481+
_MCP_TEMP_DIRS+=("$_bl_temp_dir")
482+
fi
446483
}
447484

448485
# ============================================

0 commit comments

Comments
 (0)