Skip to content

Commit dc03e23

Browse files
sjarmakclaude
andcommitted
chore: untrack .pyc files and add mirror creation script
Remove 4 oracle_checks.cpython-310.pyc files from git tracking (already in .gitignore). Add create_missing_mcp_mirrors.sh for provisioning sg-benchmarks mirrors for repos missing from Sourcegraph. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 665051a commit dc03e23

File tree

5 files changed

+127
-0
lines changed

5 files changed

+127
-0
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#!/usr/bin/env bash
2+
# Create and populate sg-benchmarks mirrors for MCP-unique tasks that are missing.
3+
#
4+
# These repos are cloned at specific tags in baseline Dockerfiles but have no
5+
# corresponding sg-benchmarks mirror, so the MCP agent cannot search them at
6+
# the correct version.
7+
#
8+
# Repos handled:
9+
# - kubernetes/kubernetes @ v1.32.0 → sg-benchmarks/kubernetes-kubernetes (NEW)
10+
# - nodejs/node @ v22.13.0 → sg-benchmarks/nodejs-node (NEW)
11+
# - pandas-dev/pandas @ v2.2.3 → sg-benchmarks/pandas (NEW)
12+
# - scikit-learn/scikit-learn @ 1.6.1 → sg-benchmarks/scikit-learn (NEW)
13+
#
14+
# Usage: bash scripts/create_missing_mcp_mirrors.sh [--dry-run]
15+
set -euo pipefail
16+
17+
DRY_RUN=false
18+
if [[ "${1:-}" == "--dry-run" ]]; then
19+
DRY_RUN=true
20+
echo "=== DRY RUN MODE — no repos will be created or pushed ==="
21+
echo ""
22+
fi
23+
24+
SG_ORG="sg-benchmarks"
25+
WORK_DIR=$(mktemp -d)
26+
trap 'rm -rf "$WORK_DIR"' EXIT
27+
28+
SUCCESS=0
29+
FAILED=0
30+
SKIPPED=0
31+
32+
# Format: "upstream_repo tag sg_name description"
33+
REPOS=(
34+
"kubernetes/kubernetes v1.32.0 kubernetes-kubernetes Mirror of kubernetes/kubernetes at v1.32.0"
35+
"nodejs/node v22.13.0 nodejs-node Mirror of nodejs/node at v22.13.0"
36+
"pandas-dev/pandas v2.2.3 pandas Mirror of pandas-dev/pandas at v2.2.3"
37+
"scikit-learn/scikit-learn 1.6.1 scikit-learn Mirror of scikit-learn/scikit-learn at 1.6.1"
38+
)
39+
40+
for entry in "${REPOS[@]}"; do
41+
# Parse: first 3 space-delimited fields, rest is description
42+
github_repo=$(echo "$entry" | awk '{print $1}')
43+
tag=$(echo "$entry" | awk '{print $2}')
44+
sg_name=$(echo "$entry" | awk '{print $3}')
45+
description=$(echo "$entry" | awk '{$1=$2=$3=""; print}' | sed 's/^ *//')
46+
47+
echo ""
48+
echo "=== ${github_repo} @ ${tag}${SG_ORG}/${sg_name} ==="
49+
50+
# Phase 1: Create repo if it doesn't exist
51+
if gh api "repos/${SG_ORG}/${sg_name}" --jq '.full_name' &>/dev/null; then
52+
echo " Repo ${SG_ORG}/${sg_name} already exists — will force-push"
53+
else
54+
echo " Creating repo ${SG_ORG}/${sg_name}..."
55+
if $DRY_RUN; then
56+
echo " [DRY RUN] Would create: gh repo create ${SG_ORG}/${sg_name} --public --description '${description}'"
57+
else
58+
if ! gh repo create "${SG_ORG}/${sg_name}" --public --description "${description}" 2>&1; then
59+
echo " ERROR: Failed to create repo ${SG_ORG}/${sg_name}"
60+
FAILED=$((FAILED + 1))
61+
continue
62+
fi
63+
echo " Created ${SG_ORG}/${sg_name}"
64+
fi
65+
fi
66+
67+
if $DRY_RUN; then
68+
echo " [DRY RUN] Would clone ${github_repo} at ${tag}, orphan commit, force-push"
69+
SKIPPED=$((SKIPPED + 1))
70+
continue
71+
fi
72+
73+
clone_dir="${WORK_DIR}/clone_${sg_name}"
74+
fresh_dir="${WORK_DIR}/fresh_${sg_name}"
75+
76+
# Phase 2a: Shallow clone at pinned tag
77+
echo " Cloning ${github_repo} at tag ${tag} (shallow)..."
78+
if ! git clone --depth 1 --branch "$tag" "https://github.com/${github_repo}.git" "$clone_dir" 2>&1; then
79+
echo " ERROR: Failed to clone ${github_repo} at tag ${tag}"
80+
FAILED=$((FAILED + 1))
81+
continue
82+
fi
83+
84+
actual_commit=$(git -C "$clone_dir" rev-parse HEAD)
85+
echo " Cloned at commit ${actual_commit:0:12}"
86+
87+
# Phase 2b: Create fresh repo with orphan commit (avoids shallow-pack push errors)
88+
echo " Creating orphan commit..."
89+
mkdir -p "$fresh_dir"
90+
git -C "$fresh_dir" init -b main --quiet 2>&1
91+
rsync -a --exclude='.git' "$clone_dir/" "$fresh_dir/" 2>&1
92+
git -C "$fresh_dir" add -A 2>&1
93+
git -C "$fresh_dir" -c user.email="benchmark@sg-benchmarks.dev" -c user.name="sg-benchmarks" \
94+
commit -m "Mirror ${github_repo} @ ${tag} (${actual_commit:0:8}) — pinned for CCB MCP-unique tasks" --quiet 2>&1
95+
96+
rm -rf "$clone_dir"
97+
98+
# Phase 2c: Force-push
99+
echo " Force-pushing to ${SG_ORG}/${sg_name}..."
100+
git -C "$fresh_dir" remote add sg-target "https://github.com/${SG_ORG}/${sg_name}.git" 2>&1
101+
if ! git -C "$fresh_dir" push sg-target main --force 2>&1; then
102+
echo " ERROR: Failed to push to ${SG_ORG}/${sg_name}"
103+
rm -rf "$fresh_dir"
104+
FAILED=$((FAILED + 1))
105+
continue
106+
fi
107+
108+
echo " SUCCESS: ${SG_ORG}/${sg_name} pinned to ${tag} (${actual_commit:0:8})"
109+
SUCCESS=$((SUCCESS + 1))
110+
rm -rf "$fresh_dir"
111+
done
112+
113+
echo ""
114+
echo "=============================================="
115+
echo "Mirror creation complete!"
116+
echo "=============================================="
117+
echo "Succeeded: $SUCCESS"
118+
echo "Failed: $FAILED"
119+
echo "Skipped: $SKIPPED"
120+
echo ""
121+
if [ "$SUCCESS" -gt 0 ]; then
122+
echo "Wait ~10-30 minutes for Sourcegraph indexing, then verify with:"
123+
echo " keyword_search: repo:^github.com/sg-benchmarks/kubernetes-kubernetes$ apiVersion"
124+
echo " keyword_search: repo:^github.com/sg-benchmarks/nodejs-node$ http.createServer"
125+
echo " keyword_search: repo:^github.com/sg-benchmarks/pandas$ DataFrame"
126+
echo " keyword_search: repo:^github.com/sg-benchmarks/scikit-learn$ sklearn"
127+
fi

0 commit comments

Comments
 (0)