Skip to content

Commit ba67770

Browse files
sjarmakclaude
andcommitted
fix: oracle sg-benchmarks alias resolution + account round-robin bug
Add _SG_MIRROR_ALIASES to all 12 MCP-unique oracle_checks.py so that agent answers using sg-benchmarks mirror names (e.g. sg-benchmarks/etcd-io-etcd) are correctly resolved to upstream canonical names (etcd-io/etcd) during scoring. Fix _next_account in run_selected_tasks.sh: the $() subshell prevented _ACCOUNT_IDX from advancing in the parent shell, so all tasks used account 0. Renamed to _pick_next_account which sets _PICKED_HOME directly. Also adds etcd mirror entry to instance_to_mirror.json and adds etcd + k8s-api to remirror script. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 94b1319 commit ba67770

File tree

15 files changed

+260
-58
lines changed

15 files changed

+260
-58
lines changed

benchmarks/ccb_mcp_crossorg/ccx-crossorg-061/tests/oracle_checks.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,32 @@
3131
# Agents sometimes forget to strip these; normalize before comparison.
3232
_HOSTING_PREFIX_RE = re.compile(r"^(?:github\.com|gitlab\.com|bitbucket\.org)/")
3333

34+
# sg-benchmarks mirrors → upstream canonical names.
35+
# Allows oracle answers and agent answers to use either name interchangeably.
36+
_SG_MIRROR_ALIASES = {
37+
"sg-benchmarks/kubernetes-client-go": "kubernetes/client-go",
38+
"sg-benchmarks/kubernetes-api": "kubernetes/api",
39+
"sg-benchmarks/etcd-io-etcd": "etcd-io/etcd",
40+
"sg-benchmarks/expressjs-express": "expressjs/express",
41+
"sg-benchmarks/grafana-loki": "grafana/loki",
42+
"sg-benchmarks/grafana-mimir": "grafana/mimir",
43+
"sg-benchmarks/prisma-prisma": "prisma/prisma",
44+
"sg-benchmarks/lodash": "lodash/lodash",
45+
}
46+
3447

3548
def _normalize_repo(repo: str) -> str:
36-
"""Strip hosting-provider prefix from a repo name for fuzzy matching.
49+
"""Strip hosting-provider prefix and resolve sg-benchmarks aliases.
3750
3851
>>> _normalize_repo("github.com/sg-benchmarks/kubernetes-client-go")
39-
'sg-benchmarks/kubernetes-client-go'
52+
'kubernetes/client-go'
4053
>>> _normalize_repo("sg-benchmarks/kubernetes-client-go")
41-
'sg-benchmarks/kubernetes-client-go'
54+
'kubernetes/client-go'
55+
>>> _normalize_repo("etcd-io/etcd")
56+
'etcd-io/etcd'
4257
"""
43-
return _HOSTING_PREFIX_RE.sub("", repo)
58+
repo = _HOSTING_PREFIX_RE.sub("", repo)
59+
return _SG_MIRROR_ALIASES.get(repo, repo)
4460

4561

4662
def check_file_set_match(

benchmarks/ccb_mcp_crossorg/ccx-crossorg-066/tests/oracle_checks.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,32 @@
3131
# Agents sometimes forget to strip these; normalize before comparison.
3232
_HOSTING_PREFIX_RE = re.compile(r"^(?:github\.com|gitlab\.com|bitbucket\.org)/")
3333

34+
# sg-benchmarks mirrors → upstream canonical names.
35+
# Allows oracle answers and agent answers to use either name interchangeably.
36+
_SG_MIRROR_ALIASES = {
37+
"sg-benchmarks/kubernetes-client-go": "kubernetes/client-go",
38+
"sg-benchmarks/kubernetes-api": "kubernetes/api",
39+
"sg-benchmarks/etcd-io-etcd": "etcd-io/etcd",
40+
"sg-benchmarks/expressjs-express": "expressjs/express",
41+
"sg-benchmarks/grafana-loki": "grafana/loki",
42+
"sg-benchmarks/grafana-mimir": "grafana/mimir",
43+
"sg-benchmarks/prisma-prisma": "prisma/prisma",
44+
"sg-benchmarks/lodash": "lodash/lodash",
45+
}
46+
3447

3548
def _normalize_repo(repo: str) -> str:
36-
"""Strip hosting-provider prefix from a repo name for fuzzy matching.
49+
"""Strip hosting-provider prefix and resolve sg-benchmarks aliases.
3750
3851
>>> _normalize_repo("github.com/sg-benchmarks/kubernetes-client-go")
39-
'sg-benchmarks/kubernetes-client-go'
52+
'kubernetes/client-go'
4053
>>> _normalize_repo("sg-benchmarks/kubernetes-client-go")
41-
'sg-benchmarks/kubernetes-client-go'
54+
'kubernetes/client-go'
55+
>>> _normalize_repo("etcd-io/etcd")
56+
'etcd-io/etcd'
4257
"""
43-
return _HOSTING_PREFIX_RE.sub("", repo)
58+
repo = _HOSTING_PREFIX_RE.sub("", repo)
59+
return _SG_MIRROR_ALIASES.get(repo, repo)
4460

4561

4662
def check_file_set_match(

benchmarks/ccb_mcp_crossrepo_tracing/ccx-config-trace-010/tests/oracle_checks.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,32 @@
3131
# Agents sometimes forget to strip these; normalize before comparison.
3232
_HOSTING_PREFIX_RE = re.compile(r"^(?:github\.com|gitlab\.com|bitbucket\.org)/")
3333

34+
# sg-benchmarks mirrors → upstream canonical names.
35+
# Allows oracle answers and agent answers to use either name interchangeably.
36+
_SG_MIRROR_ALIASES = {
37+
"sg-benchmarks/kubernetes-client-go": "kubernetes/client-go",
38+
"sg-benchmarks/kubernetes-api": "kubernetes/api",
39+
"sg-benchmarks/etcd-io-etcd": "etcd-io/etcd",
40+
"sg-benchmarks/expressjs-express": "expressjs/express",
41+
"sg-benchmarks/grafana-loki": "grafana/loki",
42+
"sg-benchmarks/grafana-mimir": "grafana/mimir",
43+
"sg-benchmarks/prisma-prisma": "prisma/prisma",
44+
"sg-benchmarks/lodash": "lodash/lodash",
45+
}
46+
3447

3548
def _normalize_repo(repo: str) -> str:
36-
"""Strip hosting-provider prefix from a repo name for fuzzy matching.
49+
"""Strip hosting-provider prefix and resolve sg-benchmarks aliases.
3750
3851
>>> _normalize_repo("github.com/sg-benchmarks/kubernetes-client-go")
39-
'sg-benchmarks/kubernetes-client-go'
52+
'kubernetes/client-go'
4053
>>> _normalize_repo("sg-benchmarks/kubernetes-client-go")
41-
'sg-benchmarks/kubernetes-client-go'
54+
'kubernetes/client-go'
55+
>>> _normalize_repo("etcd-io/etcd")
56+
'etcd-io/etcd'
4257
"""
43-
return _HOSTING_PREFIX_RE.sub("", repo)
58+
repo = _HOSTING_PREFIX_RE.sub("", repo)
59+
return _SG_MIRROR_ALIASES.get(repo, repo)
4460

4561

4662
def check_file_set_match(

benchmarks/ccb_mcp_crossrepo_tracing/ccx-dep-trace-001/tests/oracle_checks.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,32 @@
3131
# Agents sometimes forget to strip these; normalize before comparison.
3232
_HOSTING_PREFIX_RE = re.compile(r"^(?:github\.com|gitlab\.com|bitbucket\.org)/")
3333

34+
# sg-benchmarks mirrors → upstream canonical names.
35+
# Allows oracle answers and agent answers to use either name interchangeably.
36+
_SG_MIRROR_ALIASES = {
37+
"sg-benchmarks/kubernetes-client-go": "kubernetes/client-go",
38+
"sg-benchmarks/kubernetes-api": "kubernetes/api",
39+
"sg-benchmarks/etcd-io-etcd": "etcd-io/etcd",
40+
"sg-benchmarks/expressjs-express": "expressjs/express",
41+
"sg-benchmarks/grafana-loki": "grafana/loki",
42+
"sg-benchmarks/grafana-mimir": "grafana/mimir",
43+
"sg-benchmarks/prisma-prisma": "prisma/prisma",
44+
"sg-benchmarks/lodash": "lodash/lodash",
45+
}
46+
3447

3548
def _normalize_repo(repo: str) -> str:
36-
"""Strip hosting-provider prefix from a repo name for fuzzy matching.
49+
"""Strip hosting-provider prefix and resolve sg-benchmarks aliases.
3750
3851
>>> _normalize_repo("github.com/sg-benchmarks/kubernetes-client-go")
39-
'sg-benchmarks/kubernetes-client-go'
52+
'kubernetes/client-go'
4053
>>> _normalize_repo("sg-benchmarks/kubernetes-client-go")
41-
'sg-benchmarks/kubernetes-client-go'
54+
'kubernetes/client-go'
55+
>>> _normalize_repo("etcd-io/etcd")
56+
'etcd-io/etcd'
4257
"""
43-
return _HOSTING_PREFIX_RE.sub("", repo)
58+
repo = _HOSTING_PREFIX_RE.sub("", repo)
59+
return _SG_MIRROR_ALIASES.get(repo, repo)
4460

4561

4662
def check_file_set_match(

benchmarks/ccb_mcp_crossrepo_tracing/ccx-dep-trace-004/tests/oracle_checks.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,32 @@
3131
# Agents sometimes forget to strip these; normalize before comparison.
3232
_HOSTING_PREFIX_RE = re.compile(r"^(?:github\.com|gitlab\.com|bitbucket\.org)/")
3333

34+
# sg-benchmarks mirrors → upstream canonical names.
35+
# Allows oracle answers and agent answers to use either name interchangeably.
36+
_SG_MIRROR_ALIASES = {
37+
"sg-benchmarks/kubernetes-client-go": "kubernetes/client-go",
38+
"sg-benchmarks/kubernetes-api": "kubernetes/api",
39+
"sg-benchmarks/etcd-io-etcd": "etcd-io/etcd",
40+
"sg-benchmarks/expressjs-express": "expressjs/express",
41+
"sg-benchmarks/grafana-loki": "grafana/loki",
42+
"sg-benchmarks/grafana-mimir": "grafana/mimir",
43+
"sg-benchmarks/prisma-prisma": "prisma/prisma",
44+
"sg-benchmarks/lodash": "lodash/lodash",
45+
}
46+
3447

3548
def _normalize_repo(repo: str) -> str:
36-
"""Strip hosting-provider prefix from a repo name for fuzzy matching.
49+
"""Strip hosting-provider prefix and resolve sg-benchmarks aliases.
3750
3851
>>> _normalize_repo("github.com/sg-benchmarks/kubernetes-client-go")
39-
'sg-benchmarks/kubernetes-client-go'
52+
'kubernetes/client-go'
4053
>>> _normalize_repo("sg-benchmarks/kubernetes-client-go")
41-
'sg-benchmarks/kubernetes-client-go'
54+
'kubernetes/client-go'
55+
>>> _normalize_repo("etcd-io/etcd")
56+
'etcd-io/etcd'
4257
"""
43-
return _HOSTING_PREFIX_RE.sub("", repo)
58+
repo = _HOSTING_PREFIX_RE.sub("", repo)
59+
return _SG_MIRROR_ALIASES.get(repo, repo)
4460

4561

4662
def check_file_set_match(

benchmarks/ccb_mcp_incident/ccx-incident-031/tests/oracle_checks.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,32 @@
3131
# Agents sometimes forget to strip these; normalize before comparison.
3232
_HOSTING_PREFIX_RE = re.compile(r"^(?:github\.com|gitlab\.com|bitbucket\.org)/")
3333

34+
# sg-benchmarks mirrors → upstream canonical names.
35+
# Allows oracle answers and agent answers to use either name interchangeably.
36+
_SG_MIRROR_ALIASES = {
37+
"sg-benchmarks/kubernetes-client-go": "kubernetes/client-go",
38+
"sg-benchmarks/kubernetes-api": "kubernetes/api",
39+
"sg-benchmarks/etcd-io-etcd": "etcd-io/etcd",
40+
"sg-benchmarks/expressjs-express": "expressjs/express",
41+
"sg-benchmarks/grafana-loki": "grafana/loki",
42+
"sg-benchmarks/grafana-mimir": "grafana/mimir",
43+
"sg-benchmarks/prisma-prisma": "prisma/prisma",
44+
"sg-benchmarks/lodash": "lodash/lodash",
45+
}
46+
3447

3548
def _normalize_repo(repo: str) -> str:
36-
"""Strip hosting-provider prefix from a repo name for fuzzy matching.
49+
"""Strip hosting-provider prefix and resolve sg-benchmarks aliases.
3750
3851
>>> _normalize_repo("github.com/sg-benchmarks/kubernetes-client-go")
39-
'sg-benchmarks/kubernetes-client-go'
52+
'kubernetes/client-go'
4053
>>> _normalize_repo("sg-benchmarks/kubernetes-client-go")
41-
'sg-benchmarks/kubernetes-client-go'
54+
'kubernetes/client-go'
55+
>>> _normalize_repo("etcd-io/etcd")
56+
'etcd-io/etcd'
4257
"""
43-
return _HOSTING_PREFIX_RE.sub("", repo)
58+
repo = _HOSTING_PREFIX_RE.sub("", repo)
59+
return _SG_MIRROR_ALIASES.get(repo, repo)
4460

4561

4662
def check_file_set_match(

benchmarks/ccb_mcp_onboarding/ccx-explore-042-ds/tests/oracle_checks.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,32 @@
3131
# Agents sometimes forget to strip these; normalize before comparison.
3232
_HOSTING_PREFIX_RE = re.compile(r"^(?:github\.com|gitlab\.com|bitbucket\.org)/")
3333

34+
# sg-benchmarks mirrors → upstream canonical names.
35+
# Allows oracle answers and agent answers to use either name interchangeably.
36+
_SG_MIRROR_ALIASES = {
37+
"sg-benchmarks/kubernetes-client-go": "kubernetes/client-go",
38+
"sg-benchmarks/kubernetes-api": "kubernetes/api",
39+
"sg-benchmarks/etcd-io-etcd": "etcd-io/etcd",
40+
"sg-benchmarks/expressjs-express": "expressjs/express",
41+
"sg-benchmarks/grafana-loki": "grafana/loki",
42+
"sg-benchmarks/grafana-mimir": "grafana/mimir",
43+
"sg-benchmarks/prisma-prisma": "prisma/prisma",
44+
"sg-benchmarks/lodash": "lodash/lodash",
45+
}
46+
3447

3548
def _normalize_repo(repo: str) -> str:
36-
"""Strip hosting-provider prefix from a repo name for fuzzy matching.
49+
"""Strip hosting-provider prefix and resolve sg-benchmarks aliases.
3750
3851
>>> _normalize_repo("github.com/sg-benchmarks/kubernetes-client-go")
39-
'sg-benchmarks/kubernetes-client-go'
52+
'kubernetes/client-go'
4053
>>> _normalize_repo("sg-benchmarks/kubernetes-client-go")
41-
'sg-benchmarks/kubernetes-client-go'
54+
'kubernetes/client-go'
55+
>>> _normalize_repo("etcd-io/etcd")
56+
'etcd-io/etcd'
4257
"""
43-
return _HOSTING_PREFIX_RE.sub("", repo)
58+
repo = _HOSTING_PREFIX_RE.sub("", repo)
59+
return _SG_MIRROR_ALIASES.get(repo, repo)
4460

4561

4662
def check_file_set_match(

benchmarks/ccb_mcp_onboarding/ccx-onboard-041/tests/oracle_checks.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,32 @@
3131
# Agents sometimes forget to strip these; normalize before comparison.
3232
_HOSTING_PREFIX_RE = re.compile(r"^(?:github\.com|gitlab\.com|bitbucket\.org)/")
3333

34+
# sg-benchmarks mirrors → upstream canonical names.
35+
# Allows oracle answers and agent answers to use either name interchangeably.
36+
_SG_MIRROR_ALIASES = {
37+
"sg-benchmarks/kubernetes-client-go": "kubernetes/client-go",
38+
"sg-benchmarks/kubernetes-api": "kubernetes/api",
39+
"sg-benchmarks/etcd-io-etcd": "etcd-io/etcd",
40+
"sg-benchmarks/expressjs-express": "expressjs/express",
41+
"sg-benchmarks/grafana-loki": "grafana/loki",
42+
"sg-benchmarks/grafana-mimir": "grafana/mimir",
43+
"sg-benchmarks/prisma-prisma": "prisma/prisma",
44+
"sg-benchmarks/lodash": "lodash/lodash",
45+
}
46+
3447

3548
def _normalize_repo(repo: str) -> str:
36-
"""Strip hosting-provider prefix from a repo name for fuzzy matching.
49+
"""Strip hosting-provider prefix and resolve sg-benchmarks aliases.
3750
3851
>>> _normalize_repo("github.com/sg-benchmarks/kubernetes-client-go")
39-
'sg-benchmarks/kubernetes-client-go'
52+
'kubernetes/client-go'
4053
>>> _normalize_repo("sg-benchmarks/kubernetes-client-go")
41-
'sg-benchmarks/kubernetes-client-go'
54+
'kubernetes/client-go'
55+
>>> _normalize_repo("etcd-io/etcd")
56+
'etcd-io/etcd'
4257
"""
43-
return _HOSTING_PREFIX_RE.sub("", repo)
58+
repo = _HOSTING_PREFIX_RE.sub("", repo)
59+
return _SG_MIRROR_ALIASES.get(repo, repo)
4460

4561

4662
def check_file_set_match(

benchmarks/ccb_mcp_onboarding/ccx-onboard-050-ds/tests/oracle_checks.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,32 @@
3131
# Agents sometimes forget to strip these; normalize before comparison.
3232
_HOSTING_PREFIX_RE = re.compile(r"^(?:github\.com|gitlab\.com|bitbucket\.org)/")
3333

34+
# sg-benchmarks mirrors → upstream canonical names.
35+
# Allows oracle answers and agent answers to use either name interchangeably.
36+
_SG_MIRROR_ALIASES = {
37+
"sg-benchmarks/kubernetes-client-go": "kubernetes/client-go",
38+
"sg-benchmarks/kubernetes-api": "kubernetes/api",
39+
"sg-benchmarks/etcd-io-etcd": "etcd-io/etcd",
40+
"sg-benchmarks/expressjs-express": "expressjs/express",
41+
"sg-benchmarks/grafana-loki": "grafana/loki",
42+
"sg-benchmarks/grafana-mimir": "grafana/mimir",
43+
"sg-benchmarks/prisma-prisma": "prisma/prisma",
44+
"sg-benchmarks/lodash": "lodash/lodash",
45+
}
46+
3447

3548
def _normalize_repo(repo: str) -> str:
36-
"""Strip hosting-provider prefix from a repo name for fuzzy matching.
49+
"""Strip hosting-provider prefix and resolve sg-benchmarks aliases.
3750
3851
>>> _normalize_repo("github.com/sg-benchmarks/kubernetes-client-go")
39-
'sg-benchmarks/kubernetes-client-go'
52+
'kubernetes/client-go'
4053
>>> _normalize_repo("sg-benchmarks/kubernetes-client-go")
41-
'sg-benchmarks/kubernetes-client-go'
54+
'kubernetes/client-go'
55+
>>> _normalize_repo("etcd-io/etcd")
56+
'etcd-io/etcd'
4257
"""
43-
return _HOSTING_PREFIX_RE.sub("", repo)
58+
repo = _HOSTING_PREFIX_RE.sub("", repo)
59+
return _SG_MIRROR_ALIASES.get(repo, repo)
4460

4561

4662
def check_file_set_match(

benchmarks/ccb_mcp_platform/ccx-explore-091-ds/tests/oracle_checks.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,32 @@
3131
# Agents sometimes forget to strip these; normalize before comparison.
3232
_HOSTING_PREFIX_RE = re.compile(r"^(?:github\.com|gitlab\.com|bitbucket\.org)/")
3333

34+
# sg-benchmarks mirrors → upstream canonical names.
35+
# Allows oracle answers and agent answers to use either name interchangeably.
36+
_SG_MIRROR_ALIASES = {
37+
"sg-benchmarks/kubernetes-client-go": "kubernetes/client-go",
38+
"sg-benchmarks/kubernetes-api": "kubernetes/api",
39+
"sg-benchmarks/etcd-io-etcd": "etcd-io/etcd",
40+
"sg-benchmarks/expressjs-express": "expressjs/express",
41+
"sg-benchmarks/grafana-loki": "grafana/loki",
42+
"sg-benchmarks/grafana-mimir": "grafana/mimir",
43+
"sg-benchmarks/prisma-prisma": "prisma/prisma",
44+
"sg-benchmarks/lodash": "lodash/lodash",
45+
}
46+
3447

3548
def _normalize_repo(repo: str) -> str:
36-
"""Strip hosting-provider prefix from a repo name for fuzzy matching.
49+
"""Strip hosting-provider prefix and resolve sg-benchmarks aliases.
3750
3851
>>> _normalize_repo("github.com/sg-benchmarks/kubernetes-client-go")
39-
'sg-benchmarks/kubernetes-client-go'
52+
'kubernetes/client-go'
4053
>>> _normalize_repo("sg-benchmarks/kubernetes-client-go")
41-
'sg-benchmarks/kubernetes-client-go'
54+
'kubernetes/client-go'
55+
>>> _normalize_repo("etcd-io/etcd")
56+
'etcd-io/etcd'
4257
"""
43-
return _HOSTING_PREFIX_RE.sub("", repo)
58+
repo = _HOSTING_PREFIX_RE.sub("", repo)
59+
return _SG_MIRROR_ALIASES.get(repo, repo)
4460

4561

4662
def check_file_set_match(

0 commit comments

Comments
 (0)