Skip to content

Commit 3dadfa5

Browse files
committed
annotate app
1 parent 645b945 commit 3dadfa5

File tree

3 files changed

+1009
-320
lines changed

3 files changed

+1009
-320
lines changed

annotate/server.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,13 @@ def upload():
5555
def list_pages():
5656
result = []
5757
for p in PAGES:
58-
boxes = storage.load(p["id"], OUTPUT_DIR)
58+
pairs = storage.load(p["id"], OUTPUT_DIR)
5959
result.append({
6060
"id": p["id"],
6161
"w": p["w"],
6262
"h": p["h"],
63-
"annotated": boxes is not None, # GT JSON exists (even if [])
64-
"n_boxes": len(boxes) if boxes is not None else 0,
63+
"annotated": pairs is not None, # GT JSON exists (even if [])
64+
"n_pairs": len(pairs) if pairs is not None else 0,
6565
})
6666
return jsonify(result)
6767

@@ -78,11 +78,11 @@ def serve_image(page_id):
7878
def get_annotations(page_id):
7979
page = _find(page_id)
8080
if not page:
81-
return jsonify({"boxes": [], "annotated": False, "w": 0, "h": 0})
82-
boxes = storage.load(page_id, OUTPUT_DIR)
81+
return jsonify({"pairs": [], "annotated": False, "w": 0, "h": 0})
82+
pairs = storage.load(page_id, OUTPUT_DIR)
8383
return jsonify({
84-
"boxes": boxes or [],
85-
"annotated": boxes is not None,
84+
"pairs": pairs or [],
85+
"annotated": pairs is not None,
8686
"w": page["w"],
8787
"h": page["h"],
8888
})
@@ -94,8 +94,8 @@ def post_annotations(page_id):
9494
if not page:
9595
return jsonify({"error": "unknown page"}), 404
9696
data = request.get_json()
97-
storage.save(page_id, data["boxes"], page["w"], page["h"], OUTPUT_DIR)
98-
return jsonify({"saved": len(data["boxes"]), "annotated": True})
97+
storage.save(page_id, data["pairs"], page["w"], page["h"], OUTPUT_DIR)
98+
return jsonify({"saved": len(data["pairs"]), "annotated": True})
9999

100100

101101
@app.route("/export", methods=["POST"])
@@ -110,8 +110,8 @@ def export():
110110

111111
exported, skipped = 0, 0
112112
for page in PAGES:
113-
boxes = storage.load(page["id"], OUTPUT_DIR)
114-
if boxes is None: # never annotated — discard
113+
pairs = storage.load(page["id"], OUTPUT_DIR)
114+
if pairs is None: # never annotated — discard
115115
skipped += 1
116116
continue
117117
src = Path(page["path"]) # currently in tmp/
@@ -141,6 +141,6 @@ def stats():
141141
"pages": len(PAGES),
142142
"annotated": annotated,
143143
"pending": len(PAGES) - annotated,
144-
"total_boxes": sum(p["n_boxes"] for p in pages_data),
144+
"total_pairs": sum(p["n_pairs"] for p in pages_data),
145145
"output_dir": str(OUTPUT_DIR),
146146
})

annotate/storage.py

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
"""Annotation persistence — load/save in both GT JSON and YOLO formats.
22
3-
Ground-truth JSON schema:
3+
Ground-truth JSON schema (pair format):
44
[
55
{
6-
"class_id": 0, # 0 = chemical_structure, 1 = compound_label
7-
"bbox": [x1, y1, x2, y2], # pixel coords of what the user drew
8-
"label_text": "",
9-
"smiles": ""
6+
"struct_bbox": [x1, y1, x2, y2], # pixel coords of chemical structure
7+
"label_bbox": [x1, y1, x2, y2], # pixel coords of label ID (null = skipped)
8+
"label_text": "", # filled in by post-processing, not annotator
9+
"smiles": "" # filled in by post-processing, not annotator
1010
},
1111
...
1212
]
1313
14-
YOLO .txt (written only when boxes are non-empty):
15-
<class_id> cx cy w h (all values normalised 0-1)
14+
YOLO .txt (written only when pairs are non-empty):
15+
0 cx cy w h (normalised 0-1; class 0 = compound_panel = union bbox)
1616
1717
Annotation states:
1818
- GT JSON absent → page not yet visited
19-
- GT JSON = [] → page explicitly marked as "no panels" (empty page)
20-
- GT JSON = [...] → page annotated with N boxes
19+
- GT JSON = [] → page explicitly marked as "no panels"
20+
- GT JSON = [...] → page annotated with N pairs
2121
"""
2222

2323
import json
@@ -33,54 +33,56 @@ def lbl_path(page_id: str, output_dir: Path) -> Path:
3333

3434

3535
def load(page_id: str, output_dir: Path) -> list[dict] | None:
36-
"""Return boxes as [{x1,y1,x2,y2,class_id}] for the canvas, or None if not annotated."""
36+
"""Return pairs as [{struct_bbox, label_bbox, ...}], or None if not yet annotated."""
3737
p = gt_path(page_id, output_dir)
3838
if not p.exists():
3939
return None # not yet visited
40-
records = json.loads(p.read_text())
41-
boxes = []
42-
for r in records:
43-
# Support both new schema (bbox + class_id) and legacy (union_bbox)
44-
if "bbox" in r:
45-
x1, y1, x2, y2 = r["bbox"]
46-
else:
47-
x1, y1, x2, y2 = r["union_bbox"]
48-
boxes.append({"x1": x1, "y1": y1, "x2": x2, "y2": y2,
49-
"class_id": r.get("class_id", 0)})
50-
return boxes
51-
52-
53-
def save(page_id: str, boxes: list[dict], img_w: int, img_h: int,
40+
return json.loads(p.read_text())
41+
42+
43+
def save(page_id: str, pairs: list[dict], img_w: int, img_h: int,
5444
output_dir: Path) -> None:
5545
"""Persist annotations.
5646
5747
GT JSON is *always* written (even for empty pages) so the page is
58-
tracked as 'done'. YOLO .txt is only written when boxes are present.
48+
tracked as 'done'. YOLO .txt is only written when pairs are present.
49+
50+
YOLO bounding box = union of struct_bbox and label_bbox (class 0).
5951
"""
6052
gt_dir = output_dir / "ground_truth"
6153
gt_dir.mkdir(parents=True, exist_ok=True)
6254

55+
# Ensure each record has the full schema
6356
records = [
64-
{"class_id": b.get("class_id", 0),
65-
"bbox": [b["x1"], b["y1"], b["x2"], b["y2"]],
66-
"label_text": "",
67-
"smiles": ""}
68-
for b in boxes
57+
{
58+
"struct_bbox": pair["struct_bbox"],
59+
"label_bbox": pair.get("label_bbox"), # None = skipped
60+
"label_text": pair.get("label_text", ""),
61+
"smiles": pair.get("smiles", ""),
62+
}
63+
for pair in pairs
6964
]
7065
gt_path(page_id, output_dir).write_text(json.dumps(records, indent=2))
7166

7267
lbl = lbl_path(page_id, output_dir)
73-
if not boxes:
68+
if not pairs:
7469
lbl.unlink(missing_ok=True) # no YOLO file for empty pages
7570
return
7671

7772
lbl.parent.mkdir(parents=True, exist_ok=True)
7873
with open(lbl, "w") as f:
79-
for b in boxes:
80-
x1, y1, x2, y2 = b["x1"], b["y1"], b["x2"], b["y2"]
74+
for pair in pairs:
75+
s = pair["struct_bbox"] # [x1, y1, x2, y2]
76+
l = pair.get("label_bbox") # [x1, y1, x2, y2] or None
77+
78+
if l:
79+
x1 = min(s[0], l[0]); y1 = min(s[1], l[1])
80+
x2 = max(s[2], l[2]); y2 = max(s[3], l[3])
81+
else:
82+
x1, y1, x2, y2 = s
83+
8184
cx = (x1 + x2) / 2 / img_w
8285
cy = (y1 + y2) / 2 / img_h
8386
w = (x2 - x1) / img_w
8487
h = (y2 - y1) / img_h
85-
cls = b.get("class_id", 0)
86-
f.write(f"{cls} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n")
88+
f.write(f"0 {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n")

0 commit comments

Comments
 (0)