-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f1fb84b
commit b207739
Showing
9 changed files
with
748 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
Reconcile data from a "Label Babel" expedition. | ||
We need training data for the label finder model. We use volunteers to build the | ||
initial batch of training data. That is, we use a Zooniverse "Notes from Nature" | ||
expedition to have volunteers (often 3 or more) draw the label bounding boxes. Every | ||
bounding will be slightly different, so we use this script to reconcile the differences | ||
into a single best label. There are many wrinkles to this process, some of which are: | ||
- Sometimes a person will draw a box around many labels. | ||
- Sometimes a box gets drawn around nothing. | ||
- Sometimes the drawn boxes are really large or small (outliers). | ||
- Etc. | ||
So we cannot just take a simple average of the box coordinates. | ||
""" | ||
import argparse | ||
import textwrap | ||
from pathlib import Path | ||
|
||
from util.pylib import log | ||
|
||
from finder.pylib.rise_of_machines import reconcile_expedition | ||
|
||
|
||
def main(): | ||
log.started() | ||
args = parse_args() | ||
reconcile_expedition.reconcile(args) | ||
log.finished() | ||
|
||
|
||
def parse_args() -> argparse.Namespace: | ||
description = """Reconcile data from a "Label Babel" expedition. | ||
We need training data for the label finder model and we use use volunteers to build | ||
the initial batch of training data. That is, we use a "Notes from Nature" Zooniverse | ||
expedition to have volunteers (often 3 or more) draw all label bounding boxes around | ||
every label. Every volunteer draws a slightly different bounding box, so we use this | ||
script to reconcile the differences into a single "best" label.""" | ||
|
||
arg_parser = argparse.ArgumentParser( | ||
description=textwrap.dedent(description), | ||
fromfile_prefix_chars="@", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--database", | ||
required=True, | ||
type=Path, | ||
metavar="PATH", | ||
help="""Path to a digi-leap database.""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--unreconciled-csv", | ||
required=True, | ||
metavar="PATH", | ||
help="""Get volunteer drawn labels from this CSV file.""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--reconciled-set", | ||
required=True, | ||
metavar="NAME", | ||
help="""Write reconciled labels to this set.""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--notes", | ||
default="", | ||
metavar="TEXT", | ||
help="""Notes about this run. Enclose them in quotes.""", | ||
) | ||
|
||
args = arg_parser.parse_args() | ||
return args | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
#!/usr/bin/env python3 | ||
"""Build an expedition to determine the quality of label finder output.""" | ||
import argparse | ||
import textwrap | ||
from pathlib import Path | ||
|
||
from traiter.pylib import log | ||
|
||
from finder.pylib.finder.rise_of_machines import build_expedition | ||
|
||
|
||
def main(): | ||
log.started() | ||
args = parse_args() | ||
build_expedition.build(args) | ||
log.finished() | ||
|
||
|
||
def parse_args() -> argparse.Namespace: | ||
description = """Build an expedition to determine the quality of the | ||
label builder. | ||
Ths "Rise of Machines" expedition. | ||
""" | ||
|
||
arg_parser = argparse.ArgumentParser( | ||
description=textwrap.dedent(description), fromfile_prefix_chars="@" | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--database", | ||
required=True, | ||
type=Path, | ||
metavar="PATH", | ||
help="""Path to a digi-leap database.""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--expedition-dir", | ||
required=True, | ||
type=Path, | ||
metavar="PATH", | ||
help="""Place expedition files in this directory.""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--label-set", | ||
required=True, | ||
metavar="NAME", | ||
help="""Get labels from this label set.""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--label-conf", | ||
type=float, | ||
default=0.25, | ||
help="""Use labels that have a confidence >= to this. (default: %(default)s)""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--limit", | ||
type=float, | ||
default=3000, | ||
help="""Sample this many sheets. (default: %(default)s)""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--reduce-by", | ||
type=int, | ||
default=1, | ||
metavar="N", | ||
help="""Shrink images by this factor. (default: %(default)s)""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--notes", | ||
default="", | ||
metavar="TEXT", | ||
help="""Notes about this run. Enclose them in quotes.""", | ||
) | ||
|
||
args = arg_parser.parse_args() | ||
return args | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/usr/bin/env python3 | ||
import argparse | ||
import textwrap | ||
from pathlib import Path | ||
|
||
from traiter.pylib import log | ||
|
||
from finder.pylib.finder.rise_of_machines import reconcile_expedition | ||
|
||
|
||
def main(): | ||
args = parse_args() | ||
log.started() | ||
reconcile_expedition.reconcile(args) | ||
log.finished() | ||
|
||
|
||
def parse_args() -> argparse.Namespace: | ||
description = """Reconcile data from a "Rise of the Machines" expedition. | ||
This expedition is a quality control check on the label finder's output. It | ||
presents volunteers with herbarium sheets with outlines of the labels. The | ||
type of labels is indicated by the color of the outline of the label. The | ||
volunteers judge the correctness of found labels by clicking inside of the | ||
label (a point) with a correct/incorrect indicator. If the label finder | ||
completely missed a label a volunteer draws a bounding box around the missing | ||
label.""" | ||
|
||
arg_parser = argparse.ArgumentParser( | ||
description=textwrap.dedent(description), fromfile_prefix_chars="@" | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--database", | ||
required=True, | ||
type=Path, | ||
metavar="PATH", | ||
help="""Path to a digi-leap database.""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--unreconciled-csv", | ||
required=True, | ||
metavar="PATH", | ||
help="""Get volunteer input from this CSV file.""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--label-set", | ||
required=True, | ||
metavar="NAME", | ||
help="""Get labels from this set.""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--sheet-set", | ||
required=True, | ||
metavar="NAME", | ||
help="""Write reconciled sheets to this set.""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--train-set", | ||
required=True, | ||
metavar="NAME", | ||
help="""Write new reconciled labels to this set.""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--label-conf", | ||
type=float, | ||
default=0.25, | ||
help="""Only include labels that have a confidence >= to this. Set it to 0.0 to | ||
get all of the labels. (default: %(default)s)""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--increase-by", | ||
type=int, | ||
default=1, | ||
metavar="N", | ||
help="""Increase image size by this factor. This must match the --reduce-by N | ||
argument when you built the expedition with rise_of_machines_build.py. | ||
(default: %(default)s)""", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--notes", | ||
default="", | ||
metavar="TEXT", | ||
help="""Notes about this run. Enclose them in quotes.""", | ||
) | ||
|
||
args = arg_parser.parse_args() | ||
return args | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#!/usr/bin/env python3 | ||
import argparse | ||
import textwrap | ||
from pathlib import Path | ||
|
||
from util.pylib import log | ||
|
||
from old import inference_ingest_yolo | ||
|
||
|
||
def main(): | ||
log.started() | ||
args = parse_args() | ||
inference_ingest_yolo.ingest(args) | ||
log.finished() | ||
|
||
|
||
def parse_args(): | ||
description = """Read in YOLO inference results.""" | ||
|
||
arg_parser = argparse.ArgumentParser( | ||
description=textwrap.dedent(description), | ||
fromfile_prefix_chars="@", | ||
) | ||
|
||
arg_parser.add_argument( | ||
"--yolo-dir", | ||
type=Path, | ||
metavar="PATH", | ||
required=True, | ||
help="""Read YOLO results from this directory.""", | ||
) | ||
|
||
args = arg_parser.parse_args() | ||
return args | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
from argparse import Namespace | ||
from collections import defaultdict | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from tqdm import tqdm | ||
|
||
from finder.pylib.old_subject import Subject | ||
|
||
|
||
def reconcile(args: Namespace) -> None: | ||
... | ||
# with db.connect(args.database) as cxn: | ||
# run_id = db.insert_run(cxn, args) | ||
# | ||
# with args.unreconciled_csv.open() as csv_file: | ||
# reader = csv.DictReader(csv_file) | ||
# classifications = list(reader) | ||
# | ||
# subjects = get_subjects(classifications) | ||
# | ||
# for subject in tqdm(subjects): | ||
# subject.merge_box_groups() | ||
# | ||
# df = get_reconciled_boxes(subjects, args.reconciled_set) | ||
# df.to_sql("label_train", cxn, if_exists="append", index=False) | ||
# | ||
# db.update_run_finished(cxn, run_id) | ||
|
||
|
||
def get_reconciled_boxes(subjects, reconciled_set): | ||
rec_boxes = [] | ||
|
||
for subject in subjects: | ||
boxes = subject.merged_boxes | ||
|
||
if not boxes: | ||
continue | ||
|
||
classes = subject.merged_types | ||
|
||
if len(boxes) != len(classes): | ||
msg = f"Malformed subject {subject.subject_id}" | ||
raise ValueError(msg) | ||
|
||
for box, cls in zip(boxes, classes, strict=False): | ||
rec_boxes.append( | ||
{ | ||
"sheet_id": subject.sheet_id, | ||
"train_set": reconciled_set, | ||
"train_class": cls, | ||
"train_left": box[0], | ||
"train_top": box[1], | ||
"train_right": box[2], | ||
"train_bottom": box[3], | ||
}, | ||
) | ||
|
||
df = pd.DataFrame(rec_boxes) | ||
return df | ||
|
||
|
||
def get_subjects(classifications): | ||
subs: dict[str, Subject] = defaultdict(Subject) | ||
|
||
for class_if in tqdm(classifications): | ||
sub_id = class_if["subject_id"] | ||
|
||
subs[sub_id].subject_id = sub_id | ||
subs[sub_id].sheet_id = class_if["sheet_id"] | ||
|
||
coords = [v for k, v in class_if.items() if k.startswith("Box(es): box") and v] | ||
boxes = np.array([Subject.bbox_from_json(c) for c in coords if c]) | ||
if len(boxes): | ||
subs[sub_id].boxes = np.vstack((subs[sub_id].boxes, boxes)) | ||
|
||
selects = [ | ||
(v if v else "") | ||
for k, v in class_if.items() | ||
if k.startswith("Box(es): select") | ||
] | ||
types = np.array(selects[: len(boxes)], dtype=str) | ||
if len(types): | ||
subs[sub_id].types = np.hstack((subs[sub_id].types, types)) | ||
|
||
return list(subs.values()) |
Empty file.
Oops, something went wrong.