Skip to content

Commit

Permalink
Add old expedition scripts back
Browse files Browse the repository at this point in the history
  • Loading branch information
rafelafrance committed Feb 14, 2024
1 parent f1fb84b commit b207739
Show file tree
Hide file tree
Showing 9 changed files with 748 additions and 0 deletions.
80 changes: 80 additions & 0 deletions finder/__finder_label_babel_reconcile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python3
"""
Reconcile data from a "Label Babel" expedition.
We need training data for the label finder model. We use volunteers to build the
initial batch of training data. That is, we use a Zooniverse "Notes from Nature"
expedition to have volunteers (often 3 or more) draw the label bounding boxes. Every
bounding will be slightly different, so we use this script to reconcile the differences
into a single best label. There are many wrinkles to this process, some of which are:
- Sometimes a person will draw a box around many labels.
- Sometimes a box gets drawn around nothing.
- Sometimes the drawn boxes are really large or small (outliers).
- Etc.
So we cannot just take a simple average of the box coordinates.
"""
import argparse
import textwrap
from pathlib import Path

from util.pylib import log

from finder.pylib.rise_of_machines import reconcile_expedition


def main():
log.started()
args = parse_args()
reconcile_expedition.reconcile(args)
log.finished()


def parse_args() -> argparse.Namespace:
description = """Reconcile data from a "Label Babel" expedition.
We need training data for the label finder model and we use use volunteers to build
the initial batch of training data. That is, we use a "Notes from Nature" Zooniverse
expedition to have volunteers (often 3 or more) draw all label bounding boxes around
every label. Every volunteer draws a slightly different bounding box, so we use this
script to reconcile the differences into a single "best" label."""

arg_parser = argparse.ArgumentParser(
description=textwrap.dedent(description),
fromfile_prefix_chars="@",
)

arg_parser.add_argument(
"--database",
required=True,
type=Path,
metavar="PATH",
help="""Path to a digi-leap database.""",
)

arg_parser.add_argument(
"--unreconciled-csv",
required=True,
metavar="PATH",
help="""Get volunteer drawn labels from this CSV file.""",
)

arg_parser.add_argument(
"--reconciled-set",
required=True,
metavar="NAME",
help="""Write reconciled labels to this set.""",
)

arg_parser.add_argument(
"--notes",
default="",
metavar="TEXT",
help="""Notes about this run. Enclose them in quotes.""",
)

args = arg_parser.parse_args()
return args


if __name__ == "__main__":
main()
87 changes: 87 additions & 0 deletions finder/__finder_rise_of_machines_build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/usr/bin/env python3
"""Build an expedition to determine the quality of label finder output."""
import argparse
import textwrap
from pathlib import Path

from traiter.pylib import log

from finder.pylib.finder.rise_of_machines import build_expedition


def main():
log.started()
args = parse_args()
build_expedition.build(args)
log.finished()


def parse_args() -> argparse.Namespace:
description = """Build an expedition to determine the quality of the
label builder.
Ths "Rise of Machines" expedition.
"""

arg_parser = argparse.ArgumentParser(
description=textwrap.dedent(description), fromfile_prefix_chars="@"
)

arg_parser.add_argument(
"--database",
required=True,
type=Path,
metavar="PATH",
help="""Path to a digi-leap database.""",
)

arg_parser.add_argument(
"--expedition-dir",
required=True,
type=Path,
metavar="PATH",
help="""Place expedition files in this directory.""",
)

arg_parser.add_argument(
"--label-set",
required=True,
metavar="NAME",
help="""Get labels from this label set.""",
)

arg_parser.add_argument(
"--label-conf",
type=float,
default=0.25,
help="""Use labels that have a confidence >= to this. (default: %(default)s)""",
)

arg_parser.add_argument(
"--limit",
type=float,
default=3000,
help="""Sample this many sheets. (default: %(default)s)""",
)

arg_parser.add_argument(
"--reduce-by",
type=int,
default=1,
metavar="N",
help="""Shrink images by this factor. (default: %(default)s)""",
)

arg_parser.add_argument(
"--notes",
default="",
metavar="TEXT",
help="""Notes about this run. Enclose them in quotes.""",
)

args = arg_parser.parse_args()
return args


if __name__ == "__main__":
main()
99 changes: 99 additions & 0 deletions finder/__finder_rise_of_machines_reconcile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python3
import argparse
import textwrap
from pathlib import Path

from traiter.pylib import log

from finder.pylib.finder.rise_of_machines import reconcile_expedition


def main():
args = parse_args()
log.started()
reconcile_expedition.reconcile(args)
log.finished()


def parse_args() -> argparse.Namespace:
description = """Reconcile data from a "Rise of the Machines" expedition.
This expedition is a quality control check on the label finder's output. It
presents volunteers with herbarium sheets with outlines of the labels. The
type of labels is indicated by the color of the outline of the label. The
volunteers judge the correctness of found labels by clicking inside of the
label (a point) with a correct/incorrect indicator. If the label finder
completely missed a label a volunteer draws a bounding box around the missing
label."""

arg_parser = argparse.ArgumentParser(
description=textwrap.dedent(description), fromfile_prefix_chars="@"
)

arg_parser.add_argument(
"--database",
required=True,
type=Path,
metavar="PATH",
help="""Path to a digi-leap database.""",
)

arg_parser.add_argument(
"--unreconciled-csv",
required=True,
metavar="PATH",
help="""Get volunteer input from this CSV file.""",
)

arg_parser.add_argument(
"--label-set",
required=True,
metavar="NAME",
help="""Get labels from this set.""",
)

arg_parser.add_argument(
"--sheet-set",
required=True,
metavar="NAME",
help="""Write reconciled sheets to this set.""",
)

arg_parser.add_argument(
"--train-set",
required=True,
metavar="NAME",
help="""Write new reconciled labels to this set.""",
)

arg_parser.add_argument(
"--label-conf",
type=float,
default=0.25,
help="""Only include labels that have a confidence >= to this. Set it to 0.0 to
get all of the labels. (default: %(default)s)""",
)

arg_parser.add_argument(
"--increase-by",
type=int,
default=1,
metavar="N",
help="""Increase image size by this factor. This must match the --reduce-by N
argument when you built the expedition with rise_of_machines_build.py.
(default: %(default)s)""",
)

arg_parser.add_argument(
"--notes",
default="",
metavar="TEXT",
help="""Notes about this run. Enclose them in quotes.""",
)

args = arg_parser.parse_args()
return args


if __name__ == "__main__":
main()
39 changes: 39 additions & 0 deletions finder/__finder_yolo_inference_ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env python3
import argparse
import textwrap
from pathlib import Path

from util.pylib import log

from old import inference_ingest_yolo


def main():
log.started()
args = parse_args()
inference_ingest_yolo.ingest(args)
log.finished()


def parse_args():
description = """Read in YOLO inference results."""

arg_parser = argparse.ArgumentParser(
description=textwrap.dedent(description),
fromfile_prefix_chars="@",
)

arg_parser.add_argument(
"--yolo-dir",
type=Path,
metavar="PATH",
required=True,
help="""Read YOLO results from this directory.""",
)

args = arg_parser.parse_args()
return args


if __name__ == "__main__":
main()
Empty file.
86 changes: 86 additions & 0 deletions finder/pylib/label_babel/reconcile_expedition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from argparse import Namespace
from collections import defaultdict

import numpy as np
import pandas as pd
from tqdm import tqdm

from finder.pylib.old_subject import Subject


def reconcile(args: Namespace) -> None:
...
# with db.connect(args.database) as cxn:
# run_id = db.insert_run(cxn, args)
#
# with args.unreconciled_csv.open() as csv_file:
# reader = csv.DictReader(csv_file)
# classifications = list(reader)
#
# subjects = get_subjects(classifications)
#
# for subject in tqdm(subjects):
# subject.merge_box_groups()
#
# df = get_reconciled_boxes(subjects, args.reconciled_set)
# df.to_sql("label_train", cxn, if_exists="append", index=False)
#
# db.update_run_finished(cxn, run_id)


def get_reconciled_boxes(subjects, reconciled_set):
rec_boxes = []

for subject in subjects:
boxes = subject.merged_boxes

if not boxes:
continue

classes = subject.merged_types

if len(boxes) != len(classes):
msg = f"Malformed subject {subject.subject_id}"
raise ValueError(msg)

for box, cls in zip(boxes, classes, strict=False):
rec_boxes.append(
{
"sheet_id": subject.sheet_id,
"train_set": reconciled_set,
"train_class": cls,
"train_left": box[0],
"train_top": box[1],
"train_right": box[2],
"train_bottom": box[3],
},
)

df = pd.DataFrame(rec_boxes)
return df


def get_subjects(classifications):
subs: dict[str, Subject] = defaultdict(Subject)

for class_if in tqdm(classifications):
sub_id = class_if["subject_id"]

subs[sub_id].subject_id = sub_id
subs[sub_id].sheet_id = class_if["sheet_id"]

coords = [v for k, v in class_if.items() if k.startswith("Box(es): box") and v]
boxes = np.array([Subject.bbox_from_json(c) for c in coords if c])
if len(boxes):
subs[sub_id].boxes = np.vstack((subs[sub_id].boxes, boxes))

selects = [
(v if v else "")
for k, v in class_if.items()
if k.startswith("Box(es): select")
]
types = np.array(selects[: len(boxes)], dtype=str)
if len(types):
subs[sub_id].types = np.hstack((subs[sub_id].types, types))

return list(subs.values())
Empty file.
Loading

0 comments on commit b207739

Please sign in to comment.