Skip to content

Commit

Permalink
feat(example): add video type & example (#1473)
Browse files Browse the repository at this point in the history
add video example
  • Loading branch information
goldenxinxing authored Nov 13, 2022
1 parent 6426568 commit 92c3baf
Show file tree
Hide file tree
Showing 24 changed files with 2,250 additions and 0 deletions.
2 changes: 2 additions & 0 deletions client/starwhale/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Text,
Audio,
Image,
Video,
Binary,
LinkType,
MIMEType,
Expand Down Expand Up @@ -49,6 +50,7 @@
"Binary",
"Text",
"Audio",
"Video",
"Image",
"ClassLabel",
"BoundingBox",
Expand Down
2 changes: 2 additions & 0 deletions client/starwhale/api/_impl/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Text,
Audio,
Image,
Video,
Binary,
LinkType,
MIMEType,
Expand Down Expand Up @@ -34,6 +35,7 @@
"Binary",
"Text",
"Audio",
"Video",
"Image",
"ClassLabel",
"BoundingBox",
Expand Down
2 changes: 2 additions & 0 deletions client/starwhale/api/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Text,
Audio,
Image,
Video,
Binary,
LinkType,
MIMEType,
Expand Down Expand Up @@ -40,6 +41,7 @@
"Binary",
"Text",
"Audio",
"Video",
"Image",
"ClassLabel",
"BoundingBox",
Expand Down
27 changes: 27 additions & 0 deletions client/starwhale/core/dataset/type.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ class MIMEType(Enum):
AVIF = "image/avif"
MP4 = "video/mp4"
AVI = "video/avi"
WEBM = "video/webm"
WAV = "audio/wav"
MP3 = "audio/mp3"
PLAIN = "text/plain"
Expand Down Expand Up @@ -145,6 +146,7 @@ def create_by_file_suffix(cls, name: str) -> MIMEType:
".mp4": cls.MP4,
".avif": cls.AVIF,
".avi": cls.AVI,
".webm": cls.WEBM,
".wav": cls.WAV,
".csv": cls.CSV,
".txt": cls.PLAIN,
Expand Down Expand Up @@ -221,6 +223,10 @@ def reflect(cls, raw_data: bytes, data_type: t.Dict[str, t.Any]) -> BaseArtifact
return Audio(
raw_data, mime_type=mime_type, shape=shape, display_name=display_name
)
elif dtype == ArtifactType.Video.value:
return Video(
raw_data, mime_type=mime_type, shape=shape, display_name=display_name
)
elif not dtype or dtype == ArtifactType.Binary.value:
return Binary(raw_data)
elif dtype == ArtifactType.Link.value:
Expand Down Expand Up @@ -350,6 +356,27 @@ def _do_validate(self) -> None:
raise NoSupportError(f"Audio type: {self.mime_type}")


class Video(BaseArtifact):
def __init__(
self,
fp: _TArtifactFP = "",
display_name: str = "",
shape: t.Optional[_TShape] = None,
mime_type: t.Optional[MIMEType] = None,
) -> None:
shape = shape or (None,)
super().__init__(fp, ArtifactType.Video, display_name, shape, mime_type)

def _do_validate(self) -> None:
if self.mime_type not in (
MIMEType.MP4,
MIMEType.AVI,
MIMEType.WEBM,
MIMEType.UNDEFINED,
):
raise NoSupportError(f"Video type: {self.mime_type}")


class ClassLabel(ASDictMixin):
def __init__(self, names: t.List[t.Union[int, float, str]]) -> None:
self.type = "class_label"
Expand Down
10 changes: 10 additions & 0 deletions client/tests/sdk/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
Text,
Audio,
Image,
Video,
Binary,
ClassLabel,
BoundingBox,
Expand Down Expand Up @@ -482,6 +483,15 @@ def test_audio(self) -> None:
assert _asdict["type"] == "audio"
assert audio.to_bytes() == b"test"

def test_video(self) -> None:
fp = "/test/1.avi"
self.fs.create_file(fp, contents="test")
video = Video(fp)
_asdict = json.loads(json.dumps(video.asdict()))
assert _asdict["mime_type"] == MIMEType.AVI.value
assert _asdict["type"] == "video"
assert video.to_bytes() == b"test"

def test_bbox(self) -> None:
bbox = BoundingBox(1, 2, 3, 4)
assert bbox.to_list() == [1, 2, 3, 4]
Expand Down
2 changes: 2 additions & 0 deletions example/ucf101/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
data/
models/
5 changes: 5 additions & 0 deletions example/ucf101/.swignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
venv
.git
.history
.vscode
.venv
17 changes: 17 additions & 0 deletions example/ucf101/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
.POHNY: train
train:
mkdir -p models
python3 ucf101/train.py

.POHNY: download-data
download-data:
rm -rf data
mkdir -p data
wget http://www.crcv.ucf.edu/data/UCF101/UCF101.rar --no-check-certificate -P data
unrar x data/UCF101.rar data
rm -rf data/UCF101.rar
rm -f data/all_list.txt
bash generate_data.sh
shuf data/all_list.txt -n 9000 -o data/train_list.txt
shuf data/all_list.txt -n 1000 -o data/validation_list.txt
shuf data/all_list.txt -n 200 -o data/test_list.txt
9 changes: 9 additions & 0 deletions example/ucf101/dataset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: ucf101

handler: ucf101.dataset:UCFDatasetBuildExecutor

desc: ucf101 data and label test dataset

attr:
alignment_size: 128
volume_size: 10M
20 changes: 20 additions & 0 deletions example/ucf101/generate_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#! /bin/bash

global_index=0
label_index=0

read_dir(){
for file in `ls $1`
do
if [ -d $1"/"$file ]
then
read_dir $1"/"$file $label_index
let label_index++
else
echo $global_index $2 ${1:13}"/"$file >> "data/"all_list.txt
let global_index++
fi
done
}

read_dir data/UCF-101
10 changes: 10 additions & 0 deletions example/ucf101/model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: 1.0
name: ucf101

model:
- models/PyTorch-MFNet_ep-0000.pth

run:
handler: ucf101.evaluator:UCF101PipelineHandler

desc: ucf101 by pytorch
54 changes: 54 additions & 0 deletions example/ucf101/requirements-sw-lock.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Generated by Starwhale(0.3.1) Runtime Lock
--index-url 'https://pypi.doubanio.com/simple/'
--extra-index-url 'https://mirrors.bfsu.edu.cn/pypi/web/simple/'
--trusted-host 'mirrors.bfsu.edu.cn\npypi.doubanio.com'
appdirs==1.4.4
attrs==21.4.0
boto3==1.21.0
botocore==1.24.46
cattrs==1.7.1
certifi==2022.9.24
charset-normalizer==2.1.1
click==8.1.3
click-option-group==0.5.5
commonmark==0.9.1
conda-pack==0.6.0
dill==0.3.5.1
distlib==0.3.6
filelock==3.8.0
fs==2.4.16
idna==3.4
Jinja2==3.1.2
jmespath==0.10.0
joblib==1.2.0
jsonlines==3.0.0
loguru==0.6.0
MarkupSafe==2.1.1
numpy==1.23.4
nvidia-cublas-cu11==11.10.3.66
nvidia-cuda-nvrtc-cu11==11.7.99
nvidia-cuda-runtime-cu11==11.7.99
nvidia-cudnn-cu11==8.5.0.96
opencv-python==4.6.0.66
packaging==21.3
platformdirs==2.5.3
pyarrow==10.0.0
Pygments==2.13.0
pyparsing==3.0.9
python-dateutil==2.8.2
PyYAML==6.0
requests==2.28.1
requests-toolbelt==0.10.1
rich==12.6.0
s3transfer==0.5.2
scikit-learn==1.1.3
scipy==1.9.3
shellingham==1.5.0
six==1.16.0
tenacity==8.1.0
textual==0.1.18
threadpoolctl==3.1.0
torch==1.13.0
typing_extensions==4.4.0
urllib3==1.26.12
virtualenv==20.16.6
13 changes: 13 additions & 0 deletions example/ucf101/runtime.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
api_version: '1.1'
dependencies:
- requirements-sw-lock.txt
- pip:
- starwhale==0.3.1
- wheels:
- starwhale-0.0.0.dev0-py3-none-any.whl
environment:
arch: noarch
os: ubuntu:20.04
python: '3.9'
mode: venv
name: ucf101
16 changes: 16 additions & 0 deletions example/ucf101/transform_video.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#! /bin/bash

read_dir(){
for file in `ls $1`
do
if [ -d $1"/"$file ]
then
mkdir -p data/UCF-101-WEBM/${1:13}"/"$file
read_dir $1"/"$file
else
ffmpeg -i $1"/"$file -y data/UCF-101-WEBM/${1:13}"/"${file%.*}".webm"
fi
done
}

read_dir data/UCF-101
Empty file.
29 changes: 29 additions & 0 deletions example/ucf101/ucf101/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import typing as t
from pathlib import Path

from starwhale import Video, MIMEType, BuildExecutor

root_dir = Path(__file__).parent.parent
dataset_dir = root_dir / "data" / "UCF-101"
test_ds_path = [root_dir / "data" / "test_list.txt"]


class UCFDatasetBuildExecutor(BuildExecutor):
def iter_item(self) -> t.Generator[t.Tuple, None, None]:
for path in test_ds_path:
with path.open() as f:
for line in f.readlines():
v_id, label, video_sub_path = line.split()

data_path = dataset_dir / video_sub_path
data = Video(
data_path,
display_name=video_sub_path,
shape=(1,),
mime_type=MIMEType.AVI,
)

annotations = {
"label": label,
}
yield f"{label}_{video_sub_path}", data, annotations
Loading

0 comments on commit 92c3baf

Please sign in to comment.