-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
252 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
.POHNY: raw | ||
raw: | ||
mkdir -p data | ||
[ -f data/panoptic_annotations_trainval2017.zip ] || wget wget http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip -O data/panoptic_annotations_trainval2017.zip | ||
[ -f data/val2017.zip ] || wget http://images.cocodataset.org/zips/val2017.zip -O data/val2017.zip | ||
[ -d data/val2017 ] || unzip data/val2017.zip -d data | ||
[ -d data/annotations ] || unzip data/panoptic_annotations_trainval2017.zip -d data | ||
[ -d data/annotations/panoptic_val2017 ] || unzip data/annotations/panoptic_val2017.zip -d data/annotations |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
--- | ||
title: The `coco-raw` Dataset | ||
--- | ||
|
||
## The COCO Dataset Description | ||
|
||
- [Homepage](https://cocodataset.org/#home) | ||
|
||
## The `coco-raw` dataset Structure | ||
|
||
### Data Fields | ||
|
||
- `data`: `starwhale.Image` loaded as bytes array | ||
- `annotations` of type dict: | ||
- `mask`: `starwhale.Link` loaded as dict | ||
- `uri`: the path where the `mask` file sits | ||
- `segments_info`: array of `segment_info` | ||
- `bbox_view`: `starwhale.BoundingBox` used by viewer | ||
- other original fields | ||
|
||
|
||
## Build `coco-raw` Dataset locally | ||
|
||
- download raw data | ||
|
||
```shell | ||
make download | ||
``` | ||
|
||
- build `coco-raw` dataset | ||
|
||
```shell | ||
swcli dataset build . --name coco-raw --handler dataset:do_iter_item | ||
``` | ||
|
||
## Example | ||
|
||
Output the first 1 record of the `coco-raw` dataset. | ||
|
||
```shell | ||
python3 example.py | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
import os | ||
import json | ||
from pathlib import Path | ||
|
||
import boto3 | ||
from botocore.client import Config as S3Config | ||
|
||
from starwhale import Link, Image, MIMEType, S3LinkAuth, BoundingBox # noqa: F401 | ||
from starwhale.core.dataset.store import S3Connection, S3StorageBackend # noqa: F401 | ||
|
||
ROOT_DIR = Path(__file__).parent | ||
DATA_DIR = ROOT_DIR / "data" | ||
|
||
|
||
def do_iter_item(): | ||
with (DATA_DIR / "annotations" / "panoptic_val2017.json").open("r") as f: | ||
index = json.load(f) | ||
img_dict = {img["id"]: img for img in index["images"]} | ||
for anno in index["annotations"]: | ||
img_meta = img_dict[anno["image_id"]] | ||
img_name = img_meta["file_name"] | ||
img_pth = DATA_DIR / "val2017" / img_name | ||
img_shape = (img_meta["height"], img_meta["width"]) | ||
msk_f_name = anno["file_name"] | ||
msk_f_pth = DATA_DIR / "annotations" / "panoptic_val2017" / msk_f_name | ||
segs_info = anno["segments_info"] | ||
for sg in segs_info: | ||
x, y, w, h = sg["bbox"] | ||
sg["bbox_view"] = BoundingBox(x=x, y=y, width=w, height=h) | ||
|
||
anno["mask"] = Link( | ||
auth=None, | ||
with_local_fs_data=True, | ||
data_type=Image( | ||
display_name=msk_f_name, shape=img_shape, mime_type=MIMEType.PNG | ||
), | ||
uri=str(msk_f_pth.absolute()), | ||
) | ||
yield Link( | ||
uri=str(img_pth.absolute()), | ||
data_type=Image(display_name=img_name, shape=img_shape), | ||
with_local_fs_data=True, | ||
), anno | ||
|
||
|
||
PATH_ROOT = "dataset/coco/extracted" | ||
_ak = os.environ.get("SW_S3_AK", "starwhale") | ||
_sk = os.environ.get("SW_S3_SK", "starwhale") | ||
_endpoint = os.environ.get("SW_S3_EDP", "http://10.131.0.1:9000") | ||
_region = os.environ.get("SW_S3_REGION", "local") | ||
_auth = S3LinkAuth( | ||
name="SW_S3", access_key=_ak, secret=_sk, endpoint=_endpoint, region=_region | ||
) | ||
_bucket = "users" | ||
RUI_ROOT = f"{_bucket}/{PATH_ROOT}" | ||
|
||
|
||
def do_iter_item_from_remote(): | ||
s3 = boto3.resource( | ||
"s3", | ||
endpoint_url=_endpoint, | ||
aws_access_key_id=_ak, | ||
aws_secret_access_key=_sk, | ||
config=S3Config( | ||
s3={}, | ||
connect_timeout=6000, | ||
read_timeout=6000, | ||
signature_version="s3v4", | ||
retries={ | ||
"total_max_attempts": 1, | ||
"mode": "standard", | ||
}, | ||
), | ||
region_name=_region, | ||
) | ||
|
||
index = json.loads( | ||
s3.Object(_bucket, f"{PATH_ROOT}/annotations/panoptic_val2017.json") | ||
.get()["Body"] | ||
.read() | ||
.decode("utf8") | ||
) | ||
img_dict = images2dict(index["images"]) | ||
for anno in index["annotations"]: | ||
img_meta = img_dict[anno["image_id"]] | ||
img_name = img_meta["file_name"] | ||
img_shape = (img_meta["height"], img_meta["width"]) | ||
msk_f_name = anno["file_name"] | ||
segs_info = anno["segments_info"] | ||
for sg in segs_info: | ||
x, y, w, h = sg["bbox"] | ||
sg["bbox_view"] = BoundingBox(x=x, y=y, width=w, height=h) | ||
|
||
anno["mask"] = Link( | ||
auth=None, | ||
with_local_fs_data=False, | ||
data_type=Image( | ||
display_name=msk_f_name, shape=img_shape, mime_type=MIMEType.PNG | ||
), | ||
uri=f"s3://{RUI_ROOT}/annotations/panoptic_val2017/{msk_f_name}", | ||
) | ||
yield Link( | ||
auth=_auth, | ||
uri=f"s3://{RUI_ROOT}/val2017/{img_name}", | ||
data_type=Image(display_name=img_name, shape=img_shape), | ||
with_local_fs_data=False, | ||
), anno |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import io | ||
import os | ||
from urllib.parse import urlparse | ||
|
||
from PIL import Image as PILImage | ||
from PIL import ImageDraw | ||
|
||
from starwhale import URI, URIType, get_data_loader | ||
from starwhale.core.dataset.store import S3Connection, S3StorageBackend | ||
|
||
|
||
def draw_bbox(img, bbox_view_): | ||
bbox1 = ImageDraw.Draw(img) | ||
bbox1.rectangle( | ||
[ | ||
(bbox_view_["x"], bbox_view_["y"]), | ||
( | ||
bbox_view_["x"] + bbox_view_["width"], | ||
bbox_view_["y"] + bbox_view_["height"], | ||
), | ||
], | ||
fill=None, | ||
outline="red", | ||
) | ||
|
||
|
||
def raw(): | ||
uri = URI("coco-raw/version/latest", expected_type=URIType.DATASET) | ||
for idx, data, annotations in get_data_loader(uri, 0, 1): | ||
with PILImage.open(io.BytesIO(data.fp)) as img, PILImage.open( | ||
annotations["mask"]["uri"] | ||
).convert("RGBA") as msk: | ||
for seg in annotations["segments_info"]: | ||
draw_bbox(img, seg["bbox_view"]) | ||
|
||
msk.putalpha(127) | ||
img.paste(msk, (0, 0), mask=msk) | ||
img.show() | ||
|
||
|
||
_ak = os.environ.get("SW_S3_AK", "starwhale") | ||
_sk = os.environ.get("SW_S3_SK", "starwhale") | ||
_endpoint = os.environ.get("SW_S3_EDP", "http://10.131.0.1:9000") | ||
_region = os.environ.get("SW_S3_REGION", "local") | ||
_bucket = "users" | ||
|
||
|
||
def link(): | ||
s3 = S3StorageBackend(S3Connection(_endpoint, _ak, _sk, _region, _bucket)) | ||
uri = URI("coco-link/version/latest", expected_type=URIType.DATASET) | ||
for idx, data, annotations in get_data_loader(uri, 0, 1): | ||
with PILImage.open(io.BytesIO(data.fp)) as img, PILImage.open( | ||
io.BytesIO( | ||
s3._make_file(_bucket, urlparse(annotations["mask"]["uri"]).path).read( | ||
-1 | ||
) | ||
) | ||
).convert("RGBA") as msk: | ||
for seg in annotations["segments_info"]: | ||
draw_bbox(img, seg["bbox_view"]) | ||
|
||
msk.putalpha(127) | ||
img.paste(msk, (0, 0), mask=msk) | ||
img.show() | ||
|
||
|
||
if __name__ == "__main__": | ||
raw() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
starwhale==0.3.1 | ||
Pillow==9.2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
FROM homepage-ca.intra.starwhale.ai:5000/docker-e2e:0.6 | ||
FROM homepage-ca.intra.starwhale.ai:5000/docker-e2e:0.7 | ||
COPY entrypoint.sh / | ||
ENTRYPOINT ["/entrypoint.sh"] | ||
ENTRYPOINT ["/entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters