pytorch · pmeier · Jan 17, 2022 · Mar 18, 2022 · Mar 21, 2022 · Mar 21, 2022
diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py
@@ -16,7 +16,7 @@
 import PIL.Image
 import pytest
 import torch
-from datasets_utils import make_zip, make_tar, create_image_folder, create_image_file
+from datasets_utils import make_zip, make_tar, create_image_folder, create_image_file, random_group
 from torch.nn.functional import one_hot
 from torch.testing import make_tensor as _make_tensor
 from torchvision._utils import sequence_to_str
@@ -1431,3 +1431,69 @@ def stanford_cars(info, root, config):
         make_tar(root, "car_devkit.tgz", devkit, compression="gz")
 
     return num_samples
+
+
+@register_mock
+def widerface(info, root, config):
+    labels_and_categories = [
+        (0, "Parade"),
+        (59, "people"),
+        (60, "Street_Battle"),
+    ]
+
+    image_folder = root / f"WIDER_{config.split}"
+    image_folder.mkdir()
+    image_files = []
+    for label, category in labels_and_categories:
+        image_files.extend(
+            create_image_folder(
+                image_folder / "images",
+                f"{label}--{category}",
+                lambda idx: f"{label}_{category}_{idx}.jpg",
+                num_examples=3,
+            )
+        )
+    make_zip(root, f"{image_folder.name}.zip")
+
+    random_group(image_files, ["train", "val", "test"])
+
+    anns_folder = root / "wider_face_split"
+    anns_folder.mkdir()
+    for split, image_files_in_split in random_group(image_files, ["train", "val", "test"]).items():
+        if split == "test":
+            with open(anns_folder / "wider_face_test_filelist.txt", "w") as file:
+                file.write(
+                    "\n".join(path.relative_to(path.parents[1]).as_posix() for path in image_files_in_split) + "\n"
+                )
+        else:
+            with open(anns_folder / f"wider_face_{split}_bbx_gt.txt", "w") as file:
+                for path in image_files_in_split:
+                    file.write(f"{path.relative_to(path.parents[1]).as_posix()}\n")
+
+                    num_objects = int(torch.randint(1, 5, ()))
+                    file.write(f"{num_objects}\n")
+                    for _ in range(num_objects):
+                        file.write(
+                            " ".join(
+                                [
+                                    str(attr)
+                                    for attr in [
+                                        *torch.randint(10, (4,)).tolist(),  # bounding box
+                                        int(torch.randint(3, ())),  # blur
+                                        int(torch.randint(2, ())),  # expression
+                                        int(torch.randint(2, ())),  # illumnation
+                                        int(torch.randint(2, ())),  # invalid
+                                        int(torch.randint(3, ())),  # occlusion
+                                        int(torch.randint(2, ())),  # pose
+                                    ]
+                                ]
+                            )
+                            + "\n"
+                        )
+
+        if split == config.split:
+            num_samples = len(image_files_in_split)
+
+    make_zip(root, f"{anns_folder.name}.zip")
+
+    return num_samples
diff --git a/test/datasets_utils.py b/test/datasets_utils.py
@@ -952,3 +952,26 @@ def make_fake_flo_file(h, w, file_name):
     )
     with open(file_name, "wb") as f:
         f.write(content)
+
+
+def random_group(collection, groups):
+    """Randomly put items into groups without overlap.
+
+    Args:
+        collection: Collection of items to be grouped.
+        groups: Collection of group keys.
+
+    Returns:
+        Dictionary with ``groups`` as keys. Each value is a list of random items from ``collection`` without overlap
+            to the other values. Each list has at least length ``1``.
+    """
+    while True:
+        idcs = torch.randint(len(groups), (len(collection),)).tolist()
+        if len(set(idcs)) == len(groups):
+            break
+
+    idx_to_group = dict(zip(range(len(groups)), groups))
+    grouping = defaultdict(list)
+    for idx, item in zip(idcs, collection):
+        grouping[idx_to_group[idx]].append(item)
+    return grouping
diff --git a/torchvision/prototype/datasets/_builtin/__init__.py b/torchvision/prototype/datasets/_builtin/__init__.py
@@ -18,3 +18,4 @@
 from .stanford_cars import StanfordCars
 from .svhn import SVHN
 from .voc import VOC
+from .widerface import WIDERFace
diff --git a/torchvision/prototype/datasets/_builtin/widerface.categories b/torchvision/prototype/datasets/_builtin/widerface.categories
@@ -0,0 +1,61 @@
+Parade
+Handshaking
+Demonstration
+Riot
+Dancing
+Car_Accident
+Funeral
+Cheering
+Election_Campain
+Press_Conference
+People_Marching
+Meeting
+Group
+Interview
+Traffic
+Stock_Market
+Award_Ceremony
+Ceremony
+Concerts
+Couple
+Family_Group
+Festival
+Picnic
+Shoppers
+Soldier_Firing
+Soldier_Patrol
+Soldier_Drilling
+Spa
+Sports_Fan
+Students_Schoolkids
+Surgeons
+Waiter_Waitress
+Worker_Laborer
+Running
+Baseball
+Basketball
+Football
+Soccer
+Tennis
+Ice_Skating
+Gymnastics
+Swimming
+Car_Racing
+Row_Boat
+Aerobics
+Balloonist
+Jockey
+Matador_Bullfighter
+Parachutist_Paratrooper
+Greeting
+Celebration_Or_Party
+Dresses
+Photographers
+Raid
+Rescue
+Sports_Coach_Trainer
+Voter
+Angler
+Hockey
+people
+Street_Battle
diff --git a/torchvision/prototype/datasets/_builtin/widerface.py b/torchvision/prototype/datasets/_builtin/widerface.py
@@ -0,0 +1,175 @@
+import itertools
+import pathlib
+from typing import Any, Dict, List, Optional, Tuple, BinaryIO, Iterator
+
+from torchdata.datapipes.iter import IterDataPipe, Mapper, Filter, IterKeyZipper, LineReader
+from torchvision.prototype.datasets.utils import (
+    Dataset,
+    DatasetConfig,
+    DatasetInfo,
+    HttpResource,
+    OnlineResource,
+    GDriveResource,
+)
+from torchvision.prototype.datasets.utils._internal import (
+    INFINITE_BUFFER_SIZE,
+    hint_sharding,
+    hint_shuffling,
+    path_accessor,
+    path_comparator,
+    getitem,
+)
+from torchvision.prototype.features import BoundingBox, EncodedImage, Label
+
+
+class WIDERFaceAnnotationParser(IterDataPipe[Tuple[str, List[Dict[str, str]]]]):
+    def __init__(self, datapipe: IterDataPipe[str]) -> None:
+        self.datapipe = datapipe
+
+    def __iter__(self) -> Iterator[Tuple[str, List[Dict[str, str]]]]:
+        lines = iter(self.datapipe)
+        for line in lines:
+            name = line.rsplit("/")[1]
+            anns = [
+                dict(
+                    zip(
+                        ("x", "y", "w", "h", "blur", "expression", "illumination", "invalid", "occlusion", "pose"),
+                        next(lines).split(" "),
+                    )
+                )
+                for _ in range(int(next(lines)))
+            ]
+            yield name, anns
+
+
+class WIDERFace(Dataset):
+    def _make_info(self) -> DatasetInfo:
+        return DatasetInfo(
+            "widerface",
+            homepage="http://shuoyang1213.me/WIDERFACE/",
+            valid_options=dict(split=("train", "val", "test")),
+        )
+
+    def resources(self, config: DatasetConfig) -> List[OnlineResource]:
+        id, sha256 = {
+            "train": (
+                "15hGDLhsx8bLgLcIRD5DhYt5iBxnjNF1M",
+                "e23b76129c825cafae8be944f65310b2e1ba1c76885afe732f179c41e5ed6d59",
+            ),
+            "val": (
+                "1GUCogbp16PMGa39thoMMeWxp7Rp5oM8Q",
+                "f9efbd09f28c5d2d884be8c0eaef3967158c866a593fc36ab0413e4b2a58a17a",
+            ),
+            "test": (
+                "1HIfDbVEWKmsYKJZm4lchTBDLW5N7dY5T",
+                "3b0313e11ea292ec58894b47ac4c0503b230e12540330845d70a7798241f88d3",
+            ),
+        }[config.split]
+        images = GDriveResource(id, file_name=f"WIDER_{config.split}.zip", sha256=sha256)
+
+        anns = HttpResource(
+            "http://shuoyang1213.me/WIDERFACE/support/bbx_annotation/wider_face_split.zip",
+            sha256="c7561e4f5e7a118c249e0a5c5c902b0de90bbf120d7da9fa28d99041f68a8a5c",
+        )
+        return [images, anns]
+
+    def _parse_test_annotation(self, data: str) -> Tuple[str, None]:
+        return data.rsplit("/", 1)[1], None
+
+    _BLUR_MAP = {
+        "0": "clear",
+        "1": "normal",
+        "2": "heavy",
+    }
+
+    _EXPRESSION_MAP = {
+        "0": "typical",
+        "1": "exaggregate",
+    }
+
+    _ILLUMINATION_MAP = {
+        "0": "normal",
+        "1": "extreme",
+    }
+
+    _OCCLUSION_MAP = {
+        "0": "no",
+        "1": "partial",
+        "2": "heavy",
+    }
+
+    _POSE_MAP = {
+        "0": "typical",
+        "1": "atypical",
+    }
+
+    def _prepare_anns(self, anns: Optional[List[Dict[str, Any]]], image_size: Tuple[int, int]) -> Dict[str, Any]:
+        if not anns:
+            return dict(
+                zip(
+                    ("bounding_boxes", "blur", "expression", "illumination", "occlusion", "pose", "invalid"),
+                    itertools.repeat(None),
+                )
+            )
+
+        return dict(
+            bounding_boxes=BoundingBox(
+                [[int(part) for part in (ann["x"], ann["y"], ann["w"], ann["h"])] for ann in anns],
+                format="xywh",
+                image_size=image_size,
+            ),
+            blur=[self._BLUR_MAP[ann["blur"]] for ann in anns],
+            expression=[self._EXPRESSION_MAP[ann["expression"]] for ann in anns],
+            illumination=[self._ILLUMINATION_MAP[ann["illumination"]] for ann in anns],
+            occlusion=[self._OCCLUSION_MAP[ann["occlusion"]] for ann in anns],
+            pose=[self._POSE_MAP[ann["pose"]] for ann in anns],
+            invalid=[ann["invalid"] == "1" for ann in anns],
+        )
+
+    def _prepare_sample(
+        self,
+        data: Tuple[Tuple[str, Optional[List[Dict[str, Any]]]], Tuple[str, BinaryIO]],
+    ) -> Dict[str, Any]:
+        ann_data, image_data = data
+        _, anns = ann_data
+        path, buffer = image_data
+        image = EncodedImage.from_file(buffer)
+
+        return dict(
+            self._prepare_anns(anns, image.image_size),
+            path=path,
+            label=Label.from_category(pathlib.Path(path).parent.name.rsplit("--")[1], categories=self.categories),
+            image=image,
+        )
+
+    def _make_datapipe(
+        self, resource_dps: List[IterDataPipe], *, config: DatasetConfig
+    ) -> IterDataPipe[Dict[str, Any]]:
+        images_dp, anns_dp = resource_dps
+
+        if config.split == "test":
+            anns_dp = Filter(anns_dp, path_comparator("name", "wider_face_test_filelist.txt"))
+            anns_dp = LineReader(anns_dp, decode=True, return_path=False)
+            anns_dp = Mapper(anns_dp, self._parse_test_annotation)
+        else:
+            anns_dp = Filter(anns_dp, path_comparator("name", f"wider_face_{config.split}_bbx_gt.txt"))
+            anns_dp = LineReader(anns_dp, decode=True, return_path=False)
+            anns_dp = WIDERFaceAnnotationParser(anns_dp)
+        anns_dp = hint_sharding(anns_dp)
+        anns_dp = hint_shuffling(anns_dp)
+
+        dp = IterKeyZipper(
+            anns_dp,
+            images_dp,
+            key_fn=getitem(0),
+            ref_key_fn=path_accessor("name"),
+            buffer_size=INFINITE_BUFFER_SIZE,
+        )
+        return Mapper(dp, self._prepare_sample)
+
+    def _generate_categories(self, root: pathlib.Path) -> Tuple[str, ...]:
+        resource = self.resources(self.default_config)[0]
+
+        ids_and_categories = set(tuple(pathlib.Path(path).parent.name.split("--")) for path, _ in resource.load(root))
+        _, categories = zip(*sorted(ids_and_categories, key=lambda id_and_category: int(id_and_category[0])))
+        return categories