Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

example: add object detection example #3049

Merged
merged 1 commit into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions example/object-detection/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
Object Detection
======

Object detection is a computer vision technique for locating instances of objects in images or videos. Object detection algorithms typically leverage machine learning or deep learning to produce meaningful results.

In these examples, we will use Starwhale to evaluate a set of object detection models on COCO datasets.

Thanks to [ultralytics](https://github.com/ultralytics/ultralytics), it makes Starwhale Model Evaluation on YOLO easily.

Links
------

- Github Example Code: <https://github.com/star-whale/starwhale/tree/main/example/object-detection>
- Starwhale Cloud Demo: <https://cloud.starwhale.cn/projects/397/overview>

What we learn
------

- build Starwhale Dataset by Starwhale Python SDK and use Starwhale Dataset Web Viewer.

Models
------

- [YOLO](https://docs.ultralytics.com/): We will compare YOLOv8-{n,s,m,l,x} and YOLOv6-{n,s,m,l,l6} model evaluations.

Datasets
------

- [COCO128](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco128.yaml)

- Introduction: Ultralytics COCO8 is a small, but versatile object detection dataset composed of the first 128 images of the COCO train 2017 set. This dataset is ideal for testing and debugging object detection models.
- Size: Validation images 128.
- Dataset build command:

```bash
swcli runtime activate object-detection
python3 datasets/coco128.py
```

- [COCO_val2017](https://cocodataset.org/#download)

- Introduction: The COCO (Common Objects in Context) dataset is a large-scale object detection, segmentation, and captioning dataset. It is designed to encourage research on a wide variety of object categories and is commonly used for benchmarking computer vision models. The dataset comprises 80 object categories.
- Size: Validation images 5,000.
- Dataset build command:

```bash
swcli runtime activate object-detection
python3 datasets/coco_val2017.py
```
1 change: 1 addition & 0 deletions example/object-detection/datasets/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/*
76 changes: 76 additions & 0 deletions example/object-detection/datasets/coco128.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from __future__ import annotations

from pathlib import Path

from utils import download, extract_zip, get_name_by_coco_category_id

from starwhale import Image, dataset, BoundingBox, init_logger
from starwhale.utils import console

init_logger(3)

ROOT = Path(__file__).parent
DATA_DIR = ROOT / "data" / "coco128"

# Copy from https://www.kaggle.com/datasets/ultralytics/coco128.


def build() -> None:
_zip_path = DATA_DIR / "coco128.zip"
download("https://ultralytics.com/assets/coco128.zip", _zip_path)
extract_zip(
_zip_path, DATA_DIR, DATA_DIR / "coco129/images/train2017/000000000650.jpg"
)

with dataset("coco128") as ds:
for img_path in (DATA_DIR / "coco128/images/train2017").glob("*.jpg"):
name = img_path.name.split(".jpg")[0]

# YOLO Darknet format: https://docs.plainsight.ai/labels/exporting-labels/yolo
# Format: <object-class> <x_center> <y_center> <width> <height>
# Meaning: object-class> - zero-based index representing the class in obj.names from 0 to (classes-1).
# <x_center> <y_center> <width> <height> - float values relative to width and height of image, it can be equal from (0.0 to 1.0].
# <x_center> = <absolute_x> / <image_width>
# <height> = <absolute_height> / <image_height>

annotations = []
image = Image(img_path)
i_width, i_height = image.to_pil().size

label_path = DATA_DIR / "coco128/labels/train2017" / f"{name}.txt"
if not label_path.exists():
continue

for line in label_path.read_text().splitlines():
class_id, x, y, w, h = line.split()
class_id, x, y, w, h = (
int(class_id),
float(x),
float(y),
float(w),
float(h),
)
annotations.append(
{
"class_id": class_id,
"class_name": get_name_by_coco_category_id(class_id),
"darknet_bbox": [x, y, w, h],
"bbox": BoundingBox(
x=(x - w / 2) * i_width,
y=(y - h / 2) * i_height,
width=w * i_width,
height=h * i_height,
),
}
)

ds[name] = {"image": image, "annotations": annotations}

console.print("commit dataset...")
ds.commit()

console.print(f"{ds} has been built successfully!")


if __name__ == "__main__":
build()
76 changes: 76 additions & 0 deletions example/object-detection/datasets/coco_val2017.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from __future__ import annotations

import json
from pathlib import Path
from collections import defaultdict

from tqdm import tqdm
from utils import download, extract_zip, get_name_by_coco_category_id
from ultralytics.data.converter import coco91_to_coco80_class

from starwhale import Image, dataset, init_logger
from starwhale.utils import console
from starwhale.base.data_type import BoundingBox

init_logger(3)

ROOT = Path(__file__).parent
DATA_DIR = ROOT / "data" / "coco2017"

# The coco2017 val set is from https://cocodataset.org/#download.


def build() -> None:
_zip_path = DATA_DIR / "val2017.zip"
download(
"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/dataset/coco2017/val2017.zip",
_zip_path,
)
extract_zip(_zip_path, DATA_DIR, DATA_DIR / "val2017/000000000139.jpg")

_zip_path = DATA_DIR / "annotations_trainval2017.zip"
download(
"https://starwhale-examples.oss-cn-beijing.aliyuncs.com/dataset/coco2017/annotations_trainval2017.zip",
_zip_path,
)
json_path = DATA_DIR / "annotations/instances_val2017.json"
extract_zip(_zip_path, DATA_DIR, json_path)

coco_classes = coco91_to_coco80_class()

with json_path.open() as f:
content = json.load(f)
annotations = defaultdict(list)
for ann in content["annotations"]:
class_id = coco_classes[ann["category_id"] - 1]
annotations[ann["image_id"]].append(
{
"bbox": BoundingBox(*ann["bbox"]),
"class_id": class_id,
"class_name": get_name_by_coco_category_id(class_id),
}
)

with dataset("coco_val2017") as ds:
for image in tqdm(content["images"]):
name = image["file_name"].split(".jpg")[0]
for ann in annotations[image["id"]]:
bbox = ann["bbox"]
ann["darknet_bbox"] = [
(bbox.x + bbox.width / 2) / image["width"],
(bbox.y + bbox.height / 2) / image["height"],
bbox.width / image["width"],
bbox.height / image["height"],
]
ds[name] = {
"image": Image(DATA_DIR / "val2017" / image["file_name"]),
"annotations": annotations[image["id"]],
}
console.print("commit dataset...")
ds.commit()

console.print(f"{ds} has been built successfully!")


if __name__ == "__main__":
build()
65 changes: 65 additions & 0 deletions example/object-detection/datasets/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from __future__ import annotations

import zipfile
from pathlib import Path

import requests
from tqdm import tqdm

from starwhale.utils import console

_COCO_CLASSES_MAP = None


def get_name_by_coco_category_id(category_id: int | None) -> str:
global _COCO_CLASSES_MAP

if _COCO_CLASSES_MAP is None:
import sys

sys.path.append(str(Path(__file__).parent.parent / "models/yolo"))

from consts import COCO_CLASSES_MAP

_COCO_CLASSES_MAP = COCO_CLASSES_MAP

return (
_COCO_CLASSES_MAP[category_id] if category_id is not None else "uncategorized"
)


def extract_zip(from_path: Path, to_path: Path, chk_path: Path) -> None:
if chk_path.exists():
console.log(f"skip extract {from_path}, dir {chk_path} already exists")
return

with zipfile.ZipFile(from_path, "r", zipfile.ZIP_STORED) as z:
for file in tqdm(
iterable=z.namelist(),
total=len(z.namelist()),
desc=f"extract {from_path.name}",
):
z.extract(member=file, path=to_path)


def download(url: str, to_path: Path) -> None:
if to_path.exists():
console.log(f"skip download {url}, file {to_path} already exists")
return

to_path.parent.mkdir(parents=True, exist_ok=True)

with requests.get(url, timeout=60, stream=True) as r:
r.raise_for_status()
size = int(r.headers.get("content-length", 0))
with tqdm(
iterable=r.iter_content(chunk_size=1024),
total=size,
unit="B",
unit_scale=True,
desc=f"download {url}",
) as pbar:
with open(to_path, "wb") as f:
for chunk in pbar:
f.write(chunk)
pbar.update(len(chunk))
3 changes: 3 additions & 0 deletions example/object-detection/models/yolo/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
checkpoints/*
runs/*
flagged/*
2 changes: 2 additions & 0 deletions example/object-detection/models/yolo/.swignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
runs/*
checkpoints/cache/*
38 changes: 38 additions & 0 deletions example/object-detection/models/yolo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
YOLO meets Starwhale
======

YOLO (You Only Look Once) is a target detection algorithm based on deep learning, proposed by Redmon et al. in 2016. Its main features are high speed and accuracy. Through continuous improvement, the YOLO algorithm currently has iterated to the 8th version.

In this example, we use [ultralytics](https://github.com/ultralytics/ultralytics) lib to evaluate YOLOv8 and YOLOv5 models.

Build Starwhale Model
------

```bash
swcli runtime activate object-detection
# build yolov8n model
python3 build.py yolov8n
# build all YOLOv8 and YOLOv5 models
python3 build.py all
```

Run Offline Evaluation in Standalone instance
------

```bash
# use source code
swcli -vvv model run -w . -m evaluation --handler evaluation:summary_detection --dataset coco128 --dataset-head 4 --runtime object-detection

# use Starwhale Model
swcli -vvv model run -u yolov8n --handler evaluation:summary_detection --dataset coco128 --runtime object-detection
```

Run Online Evaluation in Standalone instance
---

```bash
swcli runtime activate object-detection
swcli -vvv model serve -w . -m evaluation
```

Then visit <http://127.0.0.1:8080>.
56 changes: 56 additions & 0 deletions example/object-detection/models/yolo/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from __future__ import annotations

import sys
from pathlib import Path

from starwhale import model as starwhale_model

ROOT = Path(__file__).parent
CHECKPOINTS_DIR = ROOT / "checkpoints"

SUPPORT_MODELS = (
"yolov8n",
"yolov8s",
"yolov8m",
"yolov8l",
"yolov8x",
"yolov5nu",
"yolov5su",
"yolov5xu",
"yolov5mu",
"yolov5lu",
)


def build(model: str) -> None:
print(f"start to build {model} yolo model...")
fpath = CHECKPOINTS_DIR / "cache" / f"{model}.pt"
if not fpath.exists():
from torch.hub import download_url_to_file

fpath.parent.mkdir(parents=True, exist_ok=True)
download_url_to_file(
url=f"https://github.com/ultralytics/assets/releases/download/v0.0.0/{model}.pt",
dst=str(fpath),
)

(CHECKPOINTS_DIR / ".model").write_text(model)
for pt in CHECKPOINTS_DIR.glob("*.pt"):
pt.unlink()
fpath.link_to(CHECKPOINTS_DIR / f"{model}.pt")

starwhale_model.build(name=model, modules=["evaluation"])


if __name__ == "__main__":
if len(sys.argv[1:]) == 0:
print(f"please specify model name, supported: {SUPPORT_MODELS}")
sys.exit(1)
elif sys.argv[1] == "all":
print("build all supported yolo models")
models = SUPPORT_MODELS
else:
models = [sys.argv[1]]

for model in models:
build(model)
Loading
Loading