star-whale · goldenxinxing · Dec 5, 2023 · Nov 30, 2023
diff --git a/example/object-detection/README.md b/example/object-detection/README.md
@@ -0,0 +1,49 @@
+Object Detection
+======
+
+Object detection is a computer vision technique for locating instances of objects in images or videos. Object detection algorithms typically leverage machine learning or deep learning to produce meaningful results.
+
+In these examples, we will use Starwhale to evaluate a set of object detection models on COCO datasets.
+
+Thanks to [ultralytics](https://github.com/ultralytics/ultralytics), it makes Starwhale Model Evaluation on YOLO easily.
+
+Links
+------
+
+- Github Example Code: <https://github.com/star-whale/starwhale/tree/main/example/object-detection>
+- Starwhale Cloud Demo: <https://cloud.starwhale.cn/projects/397/overview>
+
+What we learn
+------
+
+- build Starwhale Dataset by Starwhale Python SDK and use Starwhale Dataset Web Viewer.
+
+Models
+------
+
+- [YOLO](https://docs.ultralytics.com/): We will compare YOLOv8-{n,s,m,l,x} and YOLOv6-{n,s,m,l,l6} model evaluations.
+
+Datasets
+------
+
+- [COCO128](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco128.yaml)
+
+  - Introduction: Ultralytics COCO8 is a small, but versatile object detection dataset composed of the first 128 images of the COCO train 2017 set. This dataset is ideal for testing and debugging object detection models.
+  - Size: Validation images 128.
+  - Dataset build command:
+
+    ```bash
+    swcli runtime activate object-detection
+    python3 datasets/coco128.py
+    ```
+
+- [COCO_val2017](https://cocodataset.org/#download)
+
+  - Introduction: The COCO (Common Objects in Context) dataset is a large-scale object detection, segmentation, and captioning dataset. It is designed to encourage research on a wide variety of object categories and is commonly used for benchmarking computer vision models. The dataset comprises 80 object categories.
+  - Size: Validation images 5,000.
+  - Dataset build command:
+
+    ```bash
+    swcli runtime activate object-detection
+    python3 datasets/coco_val2017.py
+    ```
diff --git a/example/object-detection/datasets/.gitignore b/example/object-detection/datasets/.gitignore
@@ -0,0 +1 @@
+data/*
diff --git a/example/object-detection/datasets/coco128.py b/example/object-detection/datasets/coco128.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from utils import download, extract_zip, get_name_by_coco_category_id
+
+from starwhale import Image, dataset, BoundingBox, init_logger
+from starwhale.utils import console
+
+init_logger(3)
+
+ROOT = Path(__file__).parent
+DATA_DIR = ROOT / "data" / "coco128"
+
+# Copy from https://www.kaggle.com/datasets/ultralytics/coco128.
+
+
+def build() -> None:
+    _zip_path = DATA_DIR / "coco128.zip"
+    download("https://ultralytics.com/assets/coco128.zip", _zip_path)
+    extract_zip(
+        _zip_path, DATA_DIR, DATA_DIR / "coco129/images/train2017/000000000650.jpg"
+    )
+
+    with dataset("coco128") as ds:
+        for img_path in (DATA_DIR / "coco128/images/train2017").glob("*.jpg"):
+            name = img_path.name.split(".jpg")[0]
+
+            # YOLO Darknet format: https://docs.plainsight.ai/labels/exporting-labels/yolo
+            # Format: <object-class> <x_center> <y_center> <width> <height>
+            # Meaning: object-class> - zero-based index representing the class in obj.names from 0 to (classes-1).
+            #         <x_center> <y_center> <width> <height> - float values relative to width and height of image, it can be equal from (0.0 to 1.0].
+            #         <x_center> = <absolute_x> / <image_width>
+            #         <height> = <absolute_height> / <image_height>
+
+            annotations = []
+            image = Image(img_path)
+            i_width, i_height = image.to_pil().size
+
+            label_path = DATA_DIR / "coco128/labels/train2017" / f"{name}.txt"
+            if not label_path.exists():
+                continue
+
+            for line in label_path.read_text().splitlines():
+                class_id, x, y, w, h = line.split()
+                class_id, x, y, w, h = (
+                    int(class_id),
+                    float(x),
+                    float(y),
+                    float(w),
+                    float(h),
+                )
+                annotations.append(
+                    {
+                        "class_id": class_id,
+                        "class_name": get_name_by_coco_category_id(class_id),
+                        "darknet_bbox": [x, y, w, h],
+                        "bbox": BoundingBox(
+                            x=(x - w / 2) * i_width,
+                            y=(y - h / 2) * i_height,
+                            width=w * i_width,
+                            height=h * i_height,
+                        ),
+                    }
+                )
+
+            ds[name] = {"image": image, "annotations": annotations}
+
+        console.print("commit dataset...")
+        ds.commit()
+
+    console.print(f"{ds} has been built successfully!")
+
+
+if __name__ == "__main__":
+    build()
diff --git a/example/object-detection/datasets/coco_val2017.py b/example/object-detection/datasets/coco_val2017.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from collections import defaultdict
+
+from tqdm import tqdm
+from utils import download, extract_zip, get_name_by_coco_category_id
+from ultralytics.data.converter import coco91_to_coco80_class
+
+from starwhale import Image, dataset, init_logger
+from starwhale.utils import console
+from starwhale.base.data_type import BoundingBox
+
+init_logger(3)
+
+ROOT = Path(__file__).parent
+DATA_DIR = ROOT / "data" / "coco2017"
+
+# The coco2017 val set is from https://cocodataset.org/#download.
+
+
+def build() -> None:
+    _zip_path = DATA_DIR / "val2017.zip"
+    download(
+        "https://starwhale-examples.oss-cn-beijing.aliyuncs.com/dataset/coco2017/val2017.zip",
+        _zip_path,
+    )
+    extract_zip(_zip_path, DATA_DIR, DATA_DIR / "val2017/000000000139.jpg")
+
+    _zip_path = DATA_DIR / "annotations_trainval2017.zip"
+    download(
+        "https://starwhale-examples.oss-cn-beijing.aliyuncs.com/dataset/coco2017/annotations_trainval2017.zip",
+        _zip_path,
+    )
+    json_path = DATA_DIR / "annotations/instances_val2017.json"
+    extract_zip(_zip_path, DATA_DIR, json_path)
+
+    coco_classes = coco91_to_coco80_class()
+
+    with json_path.open() as f:
+        content = json.load(f)
+        annotations = defaultdict(list)
+        for ann in content["annotations"]:
+            class_id = coco_classes[ann["category_id"] - 1]
+            annotations[ann["image_id"]].append(
+                {
+                    "bbox": BoundingBox(*ann["bbox"]),
+                    "class_id": class_id,
+                    "class_name": get_name_by_coco_category_id(class_id),
+                }
+            )
+
+        with dataset("coco_val2017") as ds:
+            for image in tqdm(content["images"]):
+                name = image["file_name"].split(".jpg")[0]
+                for ann in annotations[image["id"]]:
+                    bbox = ann["bbox"]
+                    ann["darknet_bbox"] = [
+                        (bbox.x + bbox.width / 2) / image["width"],
+                        (bbox.y + bbox.height / 2) / image["height"],
+                        bbox.width / image["width"],
+                        bbox.height / image["height"],
+                    ]
+                ds[name] = {
+                    "image": Image(DATA_DIR / "val2017" / image["file_name"]),
+                    "annotations": annotations[image["id"]],
+                }
+            console.print("commit dataset...")
+            ds.commit()
+
+    console.print(f"{ds} has been built successfully!")
+
+
+if __name__ == "__main__":
+    build()
diff --git a/example/object-detection/datasets/utils.py b/example/object-detection/datasets/utils.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+import zipfile
+from pathlib import Path
+
+import requests
+from tqdm import tqdm
+
+from starwhale.utils import console
+
+_COCO_CLASSES_MAP = None
+
+
+def get_name_by_coco_category_id(category_id: int | None) -> str:
+    global _COCO_CLASSES_MAP
+
+    if _COCO_CLASSES_MAP is None:
+        import sys
+
+        sys.path.append(str(Path(__file__).parent.parent / "models/yolo"))
+
+        from consts import COCO_CLASSES_MAP
+
+        _COCO_CLASSES_MAP = COCO_CLASSES_MAP
+
+    return (
+        _COCO_CLASSES_MAP[category_id] if category_id is not None else "uncategorized"
+    )
+
+
+def extract_zip(from_path: Path, to_path: Path, chk_path: Path) -> None:
+    if chk_path.exists():
+        console.log(f"skip extract {from_path}, dir {chk_path} already exists")
+        return
+
+    with zipfile.ZipFile(from_path, "r", zipfile.ZIP_STORED) as z:
+        for file in tqdm(
+            iterable=z.namelist(),
+            total=len(z.namelist()),
+            desc=f"extract {from_path.name}",
+        ):
+            z.extract(member=file, path=to_path)
+
+
+def download(url: str, to_path: Path) -> None:
+    if to_path.exists():
+        console.log(f"skip download {url}, file {to_path} already exists")
+        return
+
+    to_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with requests.get(url, timeout=60, stream=True) as r:
+        r.raise_for_status()
+        size = int(r.headers.get("content-length", 0))
+        with tqdm(
+            iterable=r.iter_content(chunk_size=1024),
+            total=size,
+            unit="B",
+            unit_scale=True,
+            desc=f"download {url}",
+        ) as pbar:
+            with open(to_path, "wb") as f:
+                for chunk in pbar:
+                    f.write(chunk)
+                    pbar.update(len(chunk))
diff --git a/example/object-detection/models/yolo/.gitignore b/example/object-detection/models/yolo/.gitignore
@@ -0,0 +1,3 @@
+checkpoints/*
+runs/*
+flagged/*
diff --git a/example/object-detection/models/yolo/.swignore b/example/object-detection/models/yolo/.swignore
@@ -0,0 +1,2 @@
+runs/*
+checkpoints/cache/*
diff --git a/example/object-detection/models/yolo/README.md b/example/object-detection/models/yolo/README.md
@@ -0,0 +1,38 @@
+YOLO meets Starwhale
+======
+
+YOLO (You Only Look Once) is a target detection algorithm based on deep learning, proposed by Redmon et al. in 2016. Its main features are high speed and accuracy. Through continuous improvement, the YOLO algorithm currently has iterated to the 8th version.
+
+In this example, we use [ultralytics](https://github.com/ultralytics/ultralytics) lib to evaluate YOLOv8 and YOLOv5 models.
+
+Build Starwhale Model
+------
+
+```bash
+swcli runtime activate object-detection
+# build yolov8n model
+python3 build.py yolov8n
+# build all YOLOv8 and YOLOv5 models
+python3 build.py all
+```
+
+Run Offline Evaluation in Standalone instance
+------
+
+```bash
+# use source code
+swcli -vvv model run -w . -m evaluation --handler evaluation:summary_detection --dataset coco128 --dataset-head 4 --runtime object-detection
+
+# use Starwhale Model
+swcli -vvv model run -u yolov8n --handler evaluation:summary_detection --dataset coco128 --runtime object-detection
+```
+
+Run Online Evaluation in Standalone instance
+---
+
+```bash
+swcli runtime activate object-detection
+swcli -vvv model serve -w . -m evaluation
+```
+
+Then visit <http://127.0.0.1:8080>.
diff --git a/example/object-detection/models/yolo/build.py b/example/object-detection/models/yolo/build.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+from starwhale import model as starwhale_model
+
+ROOT = Path(__file__).parent
+CHECKPOINTS_DIR = ROOT / "checkpoints"
+
+SUPPORT_MODELS = (
+    "yolov8n",
+    "yolov8s",
+    "yolov8m",
+    "yolov8l",
+    "yolov8x",
+    "yolov5nu",
+    "yolov5su",
+    "yolov5xu",
+    "yolov5mu",
+    "yolov5lu",
+)
+
+
+def build(model: str) -> None:
+    print(f"start to build {model} yolo model...")
+    fpath = CHECKPOINTS_DIR / "cache" / f"{model}.pt"
+    if not fpath.exists():
+        from torch.hub import download_url_to_file
+
+        fpath.parent.mkdir(parents=True, exist_ok=True)
+        download_url_to_file(
+            url=f"https://github.com/ultralytics/assets/releases/download/v0.0.0/{model}.pt",
+            dst=str(fpath),
+        )
+
+    (CHECKPOINTS_DIR / ".model").write_text(model)
+    for pt in CHECKPOINTS_DIR.glob("*.pt"):
+        pt.unlink()
+    fpath.link_to(CHECKPOINTS_DIR / f"{model}.pt")
+
+    starwhale_model.build(name=model, modules=["evaluation"])
+
+
+if __name__ == "__main__":
+    if len(sys.argv[1:]) == 0:
+        print(f"please specify model name, supported: {SUPPORT_MODELS}")
+        sys.exit(1)
+    elif sys.argv[1] == "all":
+        print("build all supported yolo models")
+        models = SUPPORT_MODELS
+    else:
+        models = [sys.argv[1]]
+
+    for model in models:
+        build(model)