Skip to content

Commit 29bf231

Browse files
sjarmakclaude
andcommitted
fix: TAC sg_only Dockerfiles use base image, wire --tasks prebuild filter, fix perms
- 4 TAC tasks (bustub-hyperloglog, llamacpp-context-window, llamacpp-file-modify, openhands-search-file) had Dockerfile.sg_only using ubuntu:22.04 which lacks /utils/eval.py and python_default needed by the verifier. Now use the TAC base image with workspace truncation so verifiers work in MCP config. - Wire --tasks filter in run_selected_tasks.sh and sdlc_suite_2config.sh so prebuild only builds images for selected tasks, not entire suites. - Fix missing execute permission on ccx-migration-025 test.sh and eval.sh. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d9dd9ac commit 29bf231

File tree

8 files changed

+82
-50
lines changed

8 files changed

+82
-50
lines changed
Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,36 @@
11
# bustub-hyperloglog-impl-001 — sg_only_env variant
2-
# No local repo clone — agent uses Sourcegraph MCP exclusively for code access.
2+
# Uses TAC base image (provides /utils/eval.py, python_default) with workspace truncated.
3+
# Agent uses Sourcegraph MCP exclusively for code access.
34

4-
FROM ubuntu:22.04
5+
FROM ghcr.io/theagentcompany/sde-implement-hyperloglog-image:1.0.0
56

67
ENV SOURCEGRAPH_REPO_NAME=sg-benchmarks/bustub--d5f79431
78

8-
ENV DEBIAN_FRONTEND=noninteractive
9+
# TAC environment variables (needed by verifier)
10+
ENV TAC_SERVER_HOSTNAME=localhost
11+
ENV DECRYPTION_KEY="theagentcompany is all you need"
912

10-
RUN apt-get update && apt-get install -y --no-install-recommends \
11-
git \
12-
ca-certificates \
13-
python3 \
14-
curl \
15-
&& rm -rf /var/lib/apt/lists/*
13+
# Create logs directory for Harbor compatibility
14+
RUN mkdir -p /logs/agent /logs/verifier /workspace
15+
16+
# Truncate workspace — agent must use MCP to discover code
17+
RUN find /workspace -type f -name '*.py' -o -name '*.cpp' -o -name '*.h' -o -name '*.c' \
18+
-o -name '*.java' -o -name '*.js' -o -name '*.ts' -o -name '*.go' -o -name '*.rs' \
19+
-o -name '*.rb' -o -name '*.sh' -o -name '*.md' -o -name '*.txt' -o -name '*.json' \
20+
-o -name '*.yaml' -o -name '*.yml' -o -name '*.toml' -o -name '*.cfg' -o -name '*.ini' \
21+
| while read f; do : > "$f"; done 2>/dev/null || true
1622

1723
WORKDIR /workspace
1824

1925
# Empty git repo so agent can commit work
20-
RUN git init && \
26+
RUN git init 2>/dev/null || (git config --global init.defaultBranch main && git init) && \
2127
git config user.email "agent@example.com" && \
2228
git config user.name "Agent"
2329

24-
RUN mkdir -p /logs/agent /logs/verifier
25-
2630
# Mark sg_only mode so verifiers can skip local-path checks
2731
RUN touch /tmp/.sg_only_mode
2832

33+
# Clone manifest for sgonly_verifier_wrapper.sh to restore repo at verify time
34+
RUN echo '{"repos":[{"mirror":"sg-benchmarks/bustub--d5f79431","dest":"/workspace"}]}' > /tmp/.sg_only_clone_manifest.json
35+
2936
ENTRYPOINT []

benchmarks/ccb_mcp_migration/ccx-migration-025/tests/eval.sh

100644100755
File mode changed.

benchmarks/ccb_mcp_migration/ccx-migration-025/tests/test.sh

100644100755
File mode changed.
Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,36 @@
11
# llamacpp-context-window-search-001 — sg_only_env variant
2-
# No local repo clone — agent uses Sourcegraph MCP exclusively for code access.
2+
# Uses TAC base image (provides /utils/eval.py, python_default) with workspace truncated.
3+
# Agent uses Sourcegraph MCP exclusively for code access.
34

4-
FROM ubuntu:22.04
5+
FROM ghcr.io/theagentcompany/sde-find-answer-in-codebase-1-image:1.0.0
56

67
ENV SOURCEGRAPH_REPO_NAME=sg-benchmarks/llama.cpp--56399714
78

8-
ENV DEBIAN_FRONTEND=noninteractive
9+
# TAC environment variables (needed by verifier)
10+
ENV TAC_SERVER_HOSTNAME=localhost
11+
ENV DECRYPTION_KEY="theagentcompany is all you need"
912

10-
RUN apt-get update && apt-get install -y --no-install-recommends \
11-
git \
12-
ca-certificates \
13-
python3 \
14-
curl \
15-
&& rm -rf /var/lib/apt/lists/*
13+
# Create logs directory for Harbor compatibility
14+
RUN mkdir -p /logs/agent /logs/verifier /workspace
15+
16+
# Truncate workspace — agent must use MCP to discover code
17+
RUN find /workspace -type f -name '*.py' -o -name '*.cpp' -o -name '*.h' -o -name '*.c' \
18+
-o -name '*.java' -o -name '*.js' -o -name '*.ts' -o -name '*.go' -o -name '*.rs' \
19+
-o -name '*.rb' -o -name '*.sh' -o -name '*.md' -o -name '*.txt' -o -name '*.json' \
20+
-o -name '*.yaml' -o -name '*.yml' -o -name '*.toml' -o -name '*.cfg' -o -name '*.ini' \
21+
| while read f; do : > "$f"; done 2>/dev/null || true
1622

1723
WORKDIR /workspace
1824

1925
# Empty git repo so agent can commit work
20-
RUN git init && \
26+
RUN git init 2>/dev/null || (git config --global init.defaultBranch main && git init) && \
2127
git config user.email "agent@example.com" && \
2228
git config user.name "Agent"
2329

24-
RUN mkdir -p /logs/agent /logs/verifier
25-
2630
# Mark sg_only mode so verifiers can skip local-path checks
2731
RUN touch /tmp/.sg_only_mode
2832

33+
# Clone manifest for sgonly_verifier_wrapper.sh to restore repo at verify time
34+
RUN echo '{"repos":[{"mirror":"sg-benchmarks/llama.cpp--56399714","dest":"/workspace"}]}' > /tmp/.sg_only_clone_manifest.json
35+
2936
ENTRYPOINT []
Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,36 @@
11
# llamacpp-file-modify-search-001 — sg_only_env variant
2-
# No local repo clone — agent uses Sourcegraph MCP exclusively for code access.
2+
# Uses TAC base image (provides /utils/eval.py, python_default) with workspace truncated.
3+
# Agent uses Sourcegraph MCP exclusively for code access.
34

4-
FROM ubuntu:22.04
5+
FROM ghcr.io/theagentcompany/sde-find-answer-in-codebase-2-image:1.0.0
56

67
ENV SOURCEGRAPH_REPO_NAME=sg-benchmarks/llama.cpp--56399714
78

8-
ENV DEBIAN_FRONTEND=noninteractive
9+
# TAC environment variables (needed by verifier)
10+
ENV TAC_SERVER_HOSTNAME=localhost
11+
ENV DECRYPTION_KEY="theagentcompany is all you need"
912

10-
RUN apt-get update && apt-get install -y --no-install-recommends \
11-
git \
12-
ca-certificates \
13-
python3 \
14-
curl \
15-
&& rm -rf /var/lib/apt/lists/*
13+
# Create logs directory for Harbor compatibility
14+
RUN mkdir -p /logs/agent /logs/verifier /workspace
15+
16+
# Truncate workspace — agent must use MCP to discover code
17+
RUN find /workspace -type f -name '*.py' -o -name '*.cpp' -o -name '*.h' -o -name '*.c' \
18+
-o -name '*.java' -o -name '*.js' -o -name '*.ts' -o -name '*.go' -o -name '*.rs' \
19+
-o -name '*.rb' -o -name '*.sh' -o -name '*.md' -o -name '*.txt' -o -name '*.json' \
20+
-o -name '*.yaml' -o -name '*.yml' -o -name '*.toml' -o -name '*.cfg' -o -name '*.ini' \
21+
| while read f; do : > "$f"; done 2>/dev/null || true
1622

1723
WORKDIR /workspace
1824

1925
# Empty git repo so agent can commit work
20-
RUN git init && \
26+
RUN git init 2>/dev/null || (git config --global init.defaultBranch main && git init) && \
2127
git config user.email "agent@example.com" && \
2228
git config user.name "Agent"
2329

24-
RUN mkdir -p /logs/agent /logs/verifier
25-
2630
# Mark sg_only mode so verifiers can skip local-path checks
2731
RUN touch /tmp/.sg_only_mode
2832

33+
# Clone manifest for sgonly_verifier_wrapper.sh to restore repo at verify time
34+
RUN echo '{"repos":[{"mirror":"sg-benchmarks/llama.cpp--56399714","dest":"/workspace"}]}' > /tmp/.sg_only_clone_manifest.json
35+
2936
ENTRYPOINT []
Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,36 @@
11
# openhands-search-file-test-001 — sg_only_env variant
2-
# No local repo clone — agent uses Sourcegraph MCP exclusively for code access.
2+
# Uses TAC base image (provides /utils/eval.py, python_default) with workspace truncated.
3+
# Agent uses Sourcegraph MCP exclusively for code access.
34

4-
FROM ubuntu:22.04
5+
FROM ghcr.io/theagentcompany/sde-write-a-unit-test-for-search_file-function-image:1.0.0
56

67
ENV SOURCEGRAPH_REPO_NAME=sg-benchmarks/OpenHands--latest
78

8-
ENV DEBIAN_FRONTEND=noninteractive
9+
# TAC environment variables (needed by verifier)
10+
ENV TAC_SERVER_HOSTNAME=localhost
11+
ENV DECRYPTION_KEY="theagentcompany is all you need"
912

10-
RUN apt-get update && apt-get install -y --no-install-recommends \
11-
git \
12-
ca-certificates \
13-
python3 \
14-
curl \
15-
&& rm -rf /var/lib/apt/lists/*
13+
# Create logs directory for Harbor compatibility
14+
RUN mkdir -p /logs/agent /logs/verifier /workspace
15+
16+
# Truncate workspace — agent must use MCP to discover code
17+
RUN find /workspace -type f -name '*.py' -o -name '*.cpp' -o -name '*.h' -o -name '*.c' \
18+
-o -name '*.java' -o -name '*.js' -o -name '*.ts' -o -name '*.go' -o -name '*.rs' \
19+
-o -name '*.rb' -o -name '*.sh' -o -name '*.md' -o -name '*.txt' -o -name '*.json' \
20+
-o -name '*.yaml' -o -name '*.yml' -o -name '*.toml' -o -name '*.cfg' -o -name '*.ini' \
21+
| while read f; do : > "$f"; done 2>/dev/null || true
1622

1723
WORKDIR /workspace
1824

1925
# Empty git repo so agent can commit work
20-
RUN git init && \
26+
RUN git init 2>/dev/null || (git config --global init.defaultBranch main && git init) && \
2127
git config user.email "agent@example.com" && \
2228
git config user.name "Agent"
2329

24-
RUN mkdir -p /logs/agent /logs/verifier
25-
2630
# Mark sg_only mode so verifiers can skip local-path checks
2731
RUN touch /tmp/.sg_only_mode
2832

33+
# Clone manifest for sgonly_verifier_wrapper.sh to restore repo at verify time
34+
RUN echo '{"repos":[{"mirror":"sg-benchmarks/OpenHands--latest","dest":"/workspace"}]}' > /tmp/.sg_only_clone_manifest.json
35+
2936
ENTRYPOINT []

configs/run_selected_tasks.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,9 @@ if [ "$SKIP_PREBUILD" = false ]; then
410410
echo "=== Pre-building Docker images ==="
411411
ensure_base_images
412412
for bm in $(echo "${!BENCHMARK_COUNTS[@]}" | tr ' ' '\n' | sort); do
413-
prebuild_images "$bm"
413+
# Pass selected task IDs so prebuild only builds images for tasks we'll run
414+
_task_list=$(echo "${BENCHMARK_TASK_IDS[$bm]}" | grep -v '^$' | paste -sd, -)
415+
prebuild_images "$bm" --tasks "$_task_list"
414416
done
415417
echo ""
416418
fi

configs/sdlc_suite_2config.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,9 @@ run_task_batch() {
315315
# This moves Docker build time out of the critical path (API session slots).
316316
if [ "$SKIP_PREBUILD" = false ]; then
317317
log_section "Pre-building Docker images for ${SUITE}"
318-
prebuild_images "$SUITE"
318+
# Pass selected task IDs so prebuild only builds images for tasks we'll run
319+
_task_list=$(IFS=,; echo "${TASK_IDS[*]}")
320+
prebuild_images "$SUITE" --tasks "$_task_list"
319321
fi
320322

321323
BL_CONFIG=$(baseline_config_for "$FULL_CONFIG")

0 commit comments

Comments
 (0)