11"""Annotation persistence — load/save in both GT JSON and YOLO formats.
22
3- Ground-truth JSON schema:
3+ Ground-truth JSON schema (pair format) :
44 [
55 {
6- "class_id ": 0, # 0 = chemical_structure, 1 = compound_label
7- "bbox ": [x1, y1, x2, y2], # pixel coords of what the user drew
8- "label_text": "",
9- "smiles": ""
6+ "struct_bbox ": [x1, y1, x2, y2], # pixel coords of chemical structure
7+ "label_bbox ": [x1, y1, x2, y2], # pixel coords of label ID (null = skipped)
8+ "label_text": "", # filled in by post-processing, not annotator
9+ "smiles": "" # filled in by post-processing, not annotator
1010 },
1111 ...
1212 ]
1313
14- YOLO .txt (written only when boxes are non-empty):
15- <class_id> cx cy w h (all values normalised 0-1)
14+ YOLO .txt (written only when pairs are non-empty):
15+ 0 cx cy w h (normalised 0-1; class 0 = compound_panel = union bbox )
1616
1717Annotation states:
1818 - GT JSON absent → page not yet visited
19- - GT JSON = [] → page explicitly marked as "no panels" (empty page)
20- - GT JSON = [...] → page annotated with N boxes
19+ - GT JSON = [] → page explicitly marked as "no panels"
20+ - GT JSON = [...] → page annotated with N pairs
2121"""
2222
2323import json
@@ -33,54 +33,56 @@ def lbl_path(page_id: str, output_dir: Path) -> Path:
3333
3434
3535def load (page_id : str , output_dir : Path ) -> list [dict ] | None :
36- """Return boxes as [{x1,y1,x2,y2,class_id}] for the canvas , or None if not annotated."""
36+ """Return pairs as [{struct_bbox, label_bbox, ...}] , or None if not yet annotated."""
3737 p = gt_path (page_id , output_dir )
3838 if not p .exists ():
3939 return None # not yet visited
40- records = json .loads (p .read_text ())
41- boxes = []
42- for r in records :
43- # Support both new schema (bbox + class_id) and legacy (union_bbox)
44- if "bbox" in r :
45- x1 , y1 , x2 , y2 = r ["bbox" ]
46- else :
47- x1 , y1 , x2 , y2 = r ["union_bbox" ]
48- boxes .append ({"x1" : x1 , "y1" : y1 , "x2" : x2 , "y2" : y2 ,
49- "class_id" : r .get ("class_id" , 0 )})
50- return boxes
51-
52-
53- def save (page_id : str , boxes : list [dict ], img_w : int , img_h : int ,
40+ return json .loads (p .read_text ())
41+
42+
43+ def save (page_id : str , pairs : list [dict ], img_w : int , img_h : int ,
5444 output_dir : Path ) -> None :
5545 """Persist annotations.
5646
5747 GT JSON is *always* written (even for empty pages) so the page is
58- tracked as 'done'. YOLO .txt is only written when boxes are present.
48+ tracked as 'done'. YOLO .txt is only written when pairs are present.
49+
50+ YOLO bounding box = union of struct_bbox and label_bbox (class 0).
5951 """
6052 gt_dir = output_dir / "ground_truth"
6153 gt_dir .mkdir (parents = True , exist_ok = True )
6254
55+ # Ensure each record has the full schema
6356 records = [
64- {"class_id" : b .get ("class_id" , 0 ),
65- "bbox" : [b ["x1" ], b ["y1" ], b ["x2" ], b ["y2" ]],
66- "label_text" : "" ,
67- "smiles" : "" }
68- for b in boxes
57+ {
58+ "struct_bbox" : pair ["struct_bbox" ],
59+ "label_bbox" : pair .get ("label_bbox" ), # None = skipped
60+ "label_text" : pair .get ("label_text" , "" ),
61+ "smiles" : pair .get ("smiles" , "" ),
62+ }
63+ for pair in pairs
6964 ]
7065 gt_path (page_id , output_dir ).write_text (json .dumps (records , indent = 2 ))
7166
7267 lbl = lbl_path (page_id , output_dir )
73- if not boxes :
68+ if not pairs :
7469 lbl .unlink (missing_ok = True ) # no YOLO file for empty pages
7570 return
7671
7772 lbl .parent .mkdir (parents = True , exist_ok = True )
7873 with open (lbl , "w" ) as f :
79- for b in boxes :
80- x1 , y1 , x2 , y2 = b ["x1" ], b ["y1" ], b ["x2" ], b ["y2" ]
74+ for pair in pairs :
75+ s = pair ["struct_bbox" ] # [x1, y1, x2, y2]
76+ l = pair .get ("label_bbox" ) # [x1, y1, x2, y2] or None
77+
78+ if l :
79+ x1 = min (s [0 ], l [0 ]); y1 = min (s [1 ], l [1 ])
80+ x2 = max (s [2 ], l [2 ]); y2 = max (s [3 ], l [3 ])
81+ else :
82+ x1 , y1 , x2 , y2 = s
83+
8184 cx = (x1 + x2 ) / 2 / img_w
8285 cy = (y1 + y2 ) / 2 / img_h
8386 w = (x2 - x1 ) / img_w
8487 h = (y2 - y1 ) / img_h
85- cls = b .get ("class_id" , 0 )
86- f .write (f"{ cls } { cx :.6f} { cy :.6f} { w :.6f} { h :.6f} \n " )
88+ f .write (f"0 { cx :.6f} { cy :.6f} { w :.6f} { h :.6f} \n " )
0 commit comments