Skip to content

Commit f99c131

Browse files
committed
for release 2
1 parent 327f451 commit f99c131

File tree

6 files changed

+69
-22
lines changed

6 files changed

+69
-22
lines changed

docs/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
python update_submissions_dataset.py
33
cd ../
44
python docs/render_submissions.py --analyze_submissions --split SPLIT
5-
python docs/render_submissions.py --render_webpages --overwrite_previous_eval --split SPLIT
5+
python docs/render_submissions.py --render_webpages --overwrite_previous_eval
66
```
77

88
```

docs/javascripts/tablesort.js

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
document$.subscribe(function() {
2-
var tables = document.querySelectorAll("article table:not([class])")
3-
tables.forEach(function(table) {
4-
new Tablesort(table)
5-
})
6-
})
2+
var tables = document.querySelectorAll("article table:not([class])")
3+
tables.forEach(function(table) {
4+
new Tablesort(table);
5+
// Automatically sort the table by the specified column
6+
var defaultSortColumn = 2; // Index of the column to sort (0-based)
7+
var isAscending = False; // Set to false for descending order
8+
9+
// Delay to ensure Tablesort is fully initialized
10+
setTimeout(function () {
11+
var header = table.querySelectorAll("thead th")[defaultSortColumn];
12+
if (header) {
13+
header.click(); // Simulate a click on the header
14+
if (!isAscending) {
15+
header.click(); // Click again for descending order
16+
}
17+
}
18+
}, 100);
19+
});
20+
});

docs/javascripts/tablesort.number.js

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
(function(){
2+
var cleanNumber = function(i) {
3+
return i.replace(/[^\-?0-9.]/g, '');
4+
},
5+
6+
compareNumber = function(a, b) {
7+
a = parseFloat(a);
8+
b = parseFloat(b);
9+
10+
a = isNaN(a) ? 0 : a;
11+
b = isNaN(b) ? 0 : b;
12+
13+
return a - b;
14+
};
15+
16+
Tablesort.extend('number', function(item) {
17+
return item.match(/^[-+]?[£\x24Û¢´]?\d+\s*([,\.]\d{0,2})/) || // Prefixed currency
18+
item.match(/^[-+]?\d+\s*([,\.]\d{0,2})?[£\x24Û¢´]/) || // Suffixed currency
19+
item.match(/^[-+]?(\d)*-?([,\.]){0,1}-?(\d)+([E,e][\-+][\d]+)?%?$/); // Number
20+
}, function(a, b) {
21+
a = cleanNumber(a);
22+
b = cleanNumber(b);
23+
24+
return compareNumber(b, a);
25+
});
26+
}());

docs/render_submissions.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -185,28 +185,29 @@ def render_mds(overwrite_previous, subfolder="docs"):
185185
"lite": 3628,
186186
"all": 140926,
187187
} # hard-coded to skip running it later
188-
for split in tqdm.tqdm(["lite", "all"]):
188+
for split in ["lite", "all"]:
189189
num_repos = len(SPLIT[split])
190190
# total_num_tests = 0
191191
# for repo_name in SPLIT[split]:
192192
# repo_tests = subprocess.run(['commit0', 'get-tests', repo_name], capture_output=True, text=True).stdout.strip()
193193
# total_num_tests += len(repo_tests.splitlines())
194-
leaderboard[split] = leaderboard_header.format(
194+
leaderboard[split] = []
195+
leaderboard[split].append((split_to_total_tests[split]+1, leaderboard_header.format(
195196
split=split,
196197
num_repos=num_repos,
197198
total_num_tests=split_to_total_tests[split],
198-
)
199+
)))
199200

200201
for org_path in tqdm.tqdm(glob.glob(os.path.join(analysis_files_path, "*"))):
201202
org_name = os.path.basename(org_path)
202203
if org_name in {"blank", "repos", "submission_repos"}:
203204
continue
204205
for branch_path in glob.glob(os.path.join(org_path, "*.json")):
205206
cum_tests_passed = 0
206-
lite_cum_tests_passed = 0
207207
repos_resolved = 0
208-
lite_repos_resolved = 0
209208
total_duration = 0.0
209+
lite_cum_tests_passed = 0
210+
lite_repos_resolved = 0
210211
lite_total_duration = 0.0
211212
branch_metrics = json.load(open(branch_path))
212213
submission_info = branch_metrics["submission_info"]
@@ -301,10 +302,11 @@ def render_mds(overwrite_previous, subfolder="docs"):
301302
cum_tests_passed += pytest_info["summary"]["passed"]
302303
total_duration += pytest_info["duration"]
303304
repos_resolved += int(resolved)
304-
if repo_name in SPLIT["lite"]:
305+
if split == "all":
305306
lite_cum_tests_passed += pytest_info["summary"]["passed"]
306307
lite_total_duration += pytest_info["duration"]
307308
lite_repos_resolved += int(resolved)
309+
308310
if write_submission:
309311
pytest_details = f"{pytest_info['summary']['passed']} / {pytest_info['summary']['total']}"
310312
duration = f"{pytest_info['duration']:.2f}"
@@ -329,29 +331,33 @@ def render_mds(overwrite_previous, subfolder="docs"):
329331
wf.write(back_button + "\n" + submission_page)
330332
analysis_link = f"[Analysis](/{f'analysis_{org_name}_{branch_name}'})"
331333
github_link = f"[Github]({project_page_link})"
332-
leaderboard[split] += (
334+
leaderboard[split].append((cum_tests_passed,
333335
f"\n|{display_name}|"
334336
f"{repos_resolved}|"
335337
f"{cum_tests_passed}|"
336338
f"{total_duration:.2f}|"
337339
f"{submission_date}|"
338340
f"{analysis_link}|"
339341
f"{github_link}|"
340-
)
341-
if split == "all":
342-
leaderboard["lite"] += (
343-
f"\n|{display_name}|"
342+
))
343+
if ((split == "all") and ("Reference (Gold)" not in display_name)):
344+
leaderboard["lite"].append((lite_cum_tests_passed,
345+
f"\n|{display_name} (subset of `all`)|"
344346
f"{lite_repos_resolved}|"
345347
f"{lite_cum_tests_passed}|"
346348
f"{lite_total_duration:.2f}|"
347349
f"{submission_date}|"
348350
f"{analysis_link}|"
349351
f"{github_link}|"
350-
)
352+
))
351353

352354
leaderboard_filepath = os.path.join(subfolder, "analysis.md")
355+
for split in ["lite", "all"]:
356+
leaderboard[split] = sorted(leaderboard[split], key=lambda elt: -elt[0])
353357
with open(leaderboard_filepath, "w") as wf:
354-
wf.write(leaderboard["lite"] + "\n\n" + leaderboard["all"])
358+
lite_leaderboard_string = "".join(string for (_, string) in leaderboard["lite"])
359+
all_leaderboard_string = "".join(string for (_, string) in leaderboard["all"])
360+
wf.write(lite_leaderboard_string + "\n\n" + all_leaderboard_string)
355361

356362

357363
def get_args():

docs/update_submissions_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
submissions = {
44
"org_name": ["test-save-commit0", "commit0-lite-with-test", "commit0-lite-plain", "commit0-all-plain", "openhands-commit0", "sweagent-commit0"],
5-
"branch": ["baseline", "fillin", "fillin", "fillin", "openhands", "sweagent"],
5+
"branch": ["baseline", "fillin", "fillin", "fillin", "openhands", "sweagent", "openhands", "sweagent"],
66
"display_name": ["Claude Sonnet 3.5 - Base", "Claude Sonnet 3.5 - Fill-in + Unit Test Feedback", "Claude Sonnet 3.5 - Fill-in", "Claude Sonnet 3.5 - Fill-in", "OpenHands", "SWE-Agent"],
77
"submission_date": ["09/25/2024", "09/25/2024", "09/25/2024", "09/25/2024", "11/25/2024", "11/26/2024"],
8-
"split": ["lite", "lite", "lite", "all", "all", "all"],
8+
"split": ["lite", "lite", "lite", "all", "all", "all", "lite", "lite"],
99
"project_page": ["https://github.com/test-save-commit0", "https://github.com/commit0-lite-with-test", "https://github.com/commit0-lite-plain", "https://github.com/commit0-all-plain", "https://github.com/openhands-commit0", "https://github.com/sweagent-commit0"]
1010
}
1111

mkdocs.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,5 @@ markdown_extensions:
2222

2323
extra_javascript:
2424
- https://unpkg.com/tablesort@5.3.0/dist/tablesort.min.js
25-
- javascripts/tablesort.js
25+
- javascripts/tablesort.js
26+
- javascripts/tablesort.number.js

0 commit comments

Comments
 (0)