Skip to content

Commit 4ff6678

Browse files
sjarmakclaude
andcommitted
fix: baseline-local-artifact now gets local code + artifact sentinel
The baseline-local-artifact config was incorrectly using the same empty-workspace Dockerfile.artifact_only as the MCP config, meaning baseline had no code AND no MCP — just the instruction text. Fixes: - Populate local_checkout_repos in 7 fixture files (firefox, gcc, llvm, jdk, chromium, android, libreoffice, arangodb) so baseline Dockerfiles include git clone commands - Add generate_artifact_baseline() to generator: original Dockerfile + artifact sentinel (/tmp/.artifact_only_mode) - Update run_selected_tasks.sh to use Dockerfile.artifact_baseline for baseline (falls back to artifact_only) - Regenerate 57 base Dockerfiles with correct clones + language packages - Generate 81 new Dockerfile.artifact_baseline files Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent a596e26 commit 4ff6678

File tree

206 files changed

+3069
-188
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

206 files changed

+3069
-188
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# ccx-compliance-051 — artifact_baseline variant
2+
# Baseline with local code + artifact mode (verifier parses answer.json).
3+
4+
FROM ubuntu:22.04
5+
6+
ENV DEBIAN_FRONTEND=noninteractive
7+
8+
# Base tools
9+
RUN apt-get update && apt-get install -y --no-install-recommends \
10+
git \
11+
ca-certificates \
12+
curl \
13+
python3 \
14+
golang-go \
15+
&& rm -rf /var/lib/apt/lists/*
16+
17+
WORKDIR /workspace
18+
19+
# Clone fixture repo (baseline has full local access)
20+
RUN git clone --depth 1 https://github.com/sg-evals/prometheus--v3.2.1.git /workspace/prometheus
21+
22+
# Initialize git identity for agent commits
23+
RUN git config --global user.email "agent@example.com" && \
24+
git config --global user.name "Agent" && \
25+
git config --global safe.directory '*'
26+
27+
# Create log directories
28+
RUN mkdir -p /logs/agent /logs/verifier
29+
30+
# Mark artifact-only mode — verifier parses answer.json
31+
RUN touch /tmp/.artifact_only_mode
32+
33+
ENTRYPOINT []

benchmarks/ccb_mcp_compliance/ccx-compliance-051/environment/Dockerfile.artifact_only

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
FROM ubuntu:22.04
66

77
ENV DEBIAN_FRONTEND=noninteractive
8-
ENV SOURCEGRAPH_REPOS="sg-evals/prometheus,sourcegraph-testing/prometheus-common"
8+
ENV SOURCEGRAPH_REPOS="sg-evals/prometheus--v3.2.1"
99

1010
RUN apt-get update && apt-get install -y --no-install-recommends \
1111
git \
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# ccx-compliance-052 — artifact_baseline variant
2+
# Baseline with local code + artifact mode (verifier parses answer.json).
3+
4+
FROM ubuntu:22.04
5+
6+
ENV DEBIAN_FRONTEND=noninteractive
7+
8+
# Base tools
9+
RUN apt-get update && apt-get install -y --no-install-recommends \
10+
git \
11+
ca-certificates \
12+
curl \
13+
python3 \
14+
g++ make \
15+
&& rm -rf /var/lib/apt/lists/*
16+
17+
WORKDIR /workspace
18+
19+
# Clone local checkout repos (baseline config: agent has local access to these)
20+
RUN git clone --depth 1 --branch v1.31.2 https://github.com/sg-evals/envoy--v1.31.2 /workspace/envoy--v1.31.2
21+
RUN git clone --depth 1 --branch 84e84367 https://github.com/sg-evals/data-plane-api--84e84367 /workspace/data-plane-api--84e84367
22+
RUN git clone --depth 1 --branch 71637ad6 https://github.com/sg-evals/go-control-plane--71637ad6 /workspace/go-control-plane--71637ad6
23+
RUN git clone --depth 1 --branch 957dba5e https://github.com/sg-evals/grpc--957dba5e /workspace/grpc--957dba5e
24+
25+
# Initialize git identity for agent commits
26+
RUN git config --global user.email "agent@example.com" && \
27+
git config --global user.name "Agent" && \
28+
git config --global safe.directory '*'
29+
30+
# Create log directories
31+
RUN mkdir -p /logs/agent /logs/verifier
32+
33+
# Mark artifact-only mode — verifier parses answer.json
34+
RUN touch /tmp/.artifact_only_mode
35+
36+
ENTRYPOINT []

benchmarks/ccb_mcp_compliance/ccx-compliance-052/environment/Dockerfile.artifact_only

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
FROM ubuntu:22.04
66

77
ENV DEBIAN_FRONTEND=noninteractive
8-
ENV SOURCEGRAPH_REPOS="sg-evals/envoy--v1.31.2,sg-evals/data-plane-api--84e84367,sg-evals/go-control-plane--71637ad6,sg-evals/grpc--957dba5e"
8+
ENV SOURCEGRAPH_REPOS="sg-evals/data-plane-api--84e84367,sg-evals/envoy--v1.31.2,sg-evals/go-control-plane--71637ad6,sg-evals/grpc--957dba5e"
99

1010
RUN apt-get update && apt-get install -y --no-install-recommends \
1111
git \

benchmarks/ccb_mcp_compliance/ccx-compliance-053/environment/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
88
ca-certificates \
99
curl \
1010
python3 \
11-
default-jdk \
11+
default-jdk maven \
1212
&& rm -rf /var/lib/apt/lists/*
1313

1414
WORKDIR /workspace
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# ccx-compliance-053 — artifact_baseline variant
2+
# Baseline with local code + artifact mode (verifier parses answer.json).
3+
4+
FROM ubuntu:22.04
5+
6+
ENV DEBIAN_FRONTEND=noninteractive
7+
8+
# Base tools
9+
RUN apt-get update && apt-get install -y --no-install-recommends \
10+
git \
11+
ca-certificates \
12+
curl \
13+
python3 \
14+
default-jdk maven \
15+
&& rm -rf /var/lib/apt/lists/*
16+
17+
WORKDIR /workspace
18+
19+
# Clone local checkout repos (baseline config: agent has local access to these)
20+
RUN git clone --depth 1 --branch 0753c489 https://github.com/sg-evals/kafka--0753c489 /workspace/kafka--0753c489
21+
RUN git clone --depth 1 --branch 0cc95fcc https://github.com/sg-evals/flink--0cc95fcc /workspace/flink--0cc95fcc
22+
RUN git clone --depth 1 --branch 1006f047 https://github.com/sg-evals/camel--1006f047 /workspace/camel--1006f047
23+
24+
# Initialize git identity for agent commits
25+
RUN git config --global user.email "agent@example.com" && \
26+
git config --global user.name "Agent" && \
27+
git config --global safe.directory '*'
28+
29+
# Create log directories
30+
RUN mkdir -p /logs/agent /logs/verifier
31+
32+
# Mark artifact-only mode — verifier parses answer.json
33+
RUN touch /tmp/.artifact_only_mode
34+
35+
ENTRYPOINT []

benchmarks/ccb_mcp_compliance/ccx-compliance-053/environment/Dockerfile.artifact_only

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
FROM ubuntu:22.04
66

77
ENV DEBIAN_FRONTEND=noninteractive
8-
ENV SOURCEGRAPH_REPOS="sg-evals/kafka--0753c489,sg-evals/flink--0cc95fcc,sg-evals/camel--1006f047"
8+
ENV SOURCEGRAPH_REPOS="sg-evals/camel--1006f047,sg-evals/flink--0cc95fcc,sg-evals/kafka--0753c489"
99

1010
RUN apt-get update && apt-get install -y --no-install-recommends \
1111
git \
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# ccx-compliance-057-ds — artifact_baseline variant
2+
# Baseline with local code + artifact mode (verifier parses answer.json).
3+
4+
FROM ubuntu:22.04
5+
6+
ENV DEBIAN_FRONTEND=noninteractive
7+
8+
# Base tools
9+
RUN apt-get update && apt-get install -y --no-install-recommends \
10+
git \
11+
ca-certificates \
12+
curl \
13+
python3 \
14+
&& rm -rf /var/lib/apt/lists/*
15+
16+
WORKDIR /workspace
17+
18+
# Clone all fixture repos (baseline has full local access to every repo)
19+
RUN git clone --depth 1 https://github.com/sg-evals/grafana--v11.4.0.git /workspace/grafana
20+
RUN git clone --depth 1 https://github.com/sg-evals/loki--v3.3.4.git /workspace/loki
21+
22+
# Initialize git identity for agent commits
23+
RUN git config --global user.email "agent@example.com" && \
24+
git config --global user.name "Agent" && \
25+
git config --global safe.directory '*'
26+
27+
# Create log directories
28+
RUN mkdir -p /logs/agent /logs/verifier
29+
30+
# Mark artifact-only mode — verifier parses answer.json
31+
RUN touch /tmp/.artifact_only_mode
32+
33+
ENTRYPOINT []

benchmarks/ccb_mcp_compliance/ccx-compliance-057-ds/environment/Dockerfile.artifact_only

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
FROM ubuntu:22.04
66

77
ENV DEBIAN_FRONTEND=noninteractive
8-
ENV SOURCEGRAPH_REPOS="sg-evals/grafana,sg-evals/grafana-loki"
8+
ENV SOURCEGRAPH_REPOS="sg-evals/grafana--v11.4.0,sg-evals/loki--v3.3.4"
99

1010
RUN apt-get update && apt-get install -y --no-install-recommends \
1111
git \

benchmarks/ccb_mcp_compliance/ccx-compliance-115/environment/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
88
ca-certificates \
99
curl \
1010
python3 \
11-
python3 python3-pip \
11+
\
1212
&& rm -rf /var/lib/apt/lists/*
1313

1414
WORKDIR /workspace

0 commit comments

Comments
 (0)