Skip to content

Commit 94c78a3

Browse files
committed
Added birthplace extraction for pre annotated data and made extraction more robust
1 parent ef0d697 commit 94c78a3

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

api/table_annotator/pre_annotated.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
KEY_FIRST_NAME_ANNOTATOR = "VORNAME"
1717
KEY_PRISONER_NUMBER_ANNOTATOR = "HAEFTLINGSNUMMER"
1818
KEY_DATE_OF_BIRTH_ANNOTATOR = "GEBURTSDATUM"
19+
KEY_BIRTH_PLACE_ANNOTATOR = "GEBURTSORT"
1920

2021

2122
def image_has_pre_annotated_data(image_path: str) -> bool:
@@ -56,13 +57,15 @@ def apply_pre_annotated_csv(image_path: str, table: Table, offset: int = 0) -> C
5657
first_name_column = find_column(table, KEY_FIRST_NAME_ANNOTATOR)
5758
prisoner_number_column = find_column(table, KEY_PRISONER_NUMBER_ANNOTATOR)
5859
birthdate_column = find_column(table, KEY_DATE_OF_BIRTH_ANNOTATOR)
60+
birthplace_colum = find_column(table, KEY_BIRTH_PLACE_ANNOTATOR)
5961

6062
for line in lines:
6163
key = int(line[KEY_ORDER])
6264
if key not in required_lines:
6365
continue
6466
row = cells[key-offset]
65-
if last_name_column is not None and last_name_column == first_name_column:
67+
if last_name_column is not None and last_name_column == first_name_column \
68+
and KEY_LAST_NAME in line and KEY_FIRST_NAME in line:
6669
last_name_index = table.columnTypes[last_name_column].index(
6770
KEY_LAST_NAME_ANNOTATOR)
6871
first_name_index = table.columnTypes[last_name_column].index(
@@ -73,15 +76,18 @@ def apply_pre_annotated_csv(image_path: str, table: Table, offset: int = 0) -> C
7376
text = f"{line[KEY_FIRST_NAME].title()} {line[KEY_LAST_NAME].title()}"
7477
row[last_name_column].ocr_text = text
7578
else:
76-
if last_name_column is not None:
79+
if last_name_column is not None and KEY_LAST_NAME in line:
7780
row[last_name_column].ocr_text = line[KEY_LAST_NAME].title()
78-
if first_name_column is not None:
81+
if first_name_column is not None and KEY_FIRST_NAME in line:
7982
row[first_name_column].ocr_text = line[KEY_FIRST_NAME].title()
8083

81-
if prisoner_number_column is not None:
84+
if prisoner_number_column is not None and KEY_PRISONER_NUMBER in line:
8285
row[prisoner_number_column].ocr_text = line[KEY_PRISONER_NUMBER]
83-
if birthdate_column is not None:
86+
if birthdate_column is not None and KEY_DATE_OF_BIRTH in line:
8487
row[birthdate_column].ocr_text = \
8588
transform_birthdate(line[KEY_DATE_OF_BIRTH])
8689

90+
if birthplace_colum is not None and KEY_BIRTH_PLACE in line:
91+
row[birthplace_colum].ocr_text = line[KEY_BIRTH_PLACE].title()
92+
8793
return cells

0 commit comments

Comments
 (0)