pytorch · bjuncek · Oct 21, 2021 · Oct 21, 2021 · Oct 31, 2021 · Nov 2, 2021
diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
@@ -286,9 +286,20 @@ def _extract_zip(from_path: str, to_path: str, compression: Optional[str]) -> No
         zip.extractall(to_path)
 
 
+def _extract_rar(from_path: str, to_path: str, compression: Optional[str]) -> None:
+    if compression is not None:
+        raise RuntimeError("Compressed rar archives are currently not supported")
+
+    import rarfile
+
+    with rarfile.RarFile(from_path) as rar:
+        rar.extractall(to_path)
+
+
 _ARCHIVE_EXTRACTORS: Dict[str, Callable[[str, str, Optional[str]], None]] = {
     ".tar": _extract_tar,
     ".zip": _extract_zip,
+    ".rar": _extract_rar,
 }
 _COMPRESSED_FILE_OPENERS: Dict[str, Callable[..., IO]] = {
     ".bz2": bz2.open,

diff --git a/torchvision/prototype/datasets/_api.py b/torchvision/prototype/datasets/_api.py
@@ -51,9 +51,10 @@ def info(name: str) -> DatasetInfo:
 
 DEFAULT_DECODER = object()
 
-DEFAULT_DECODER_MAP: Dict[DatasetType, Callable[[io.IOBase], torch.Tensor]] = {
+DEFAULT_DECODER_MAP: Dict[DatasetType, Optional[Callable[[io.IOBase], torch.Tensor]]] = {
     DatasetType.RAW: raw,
     DatasetType.IMAGE: pil,
+    DatasetType.VIDEO: None,
 }
 
 

diff --git a/torchvision/prototype/datasets/_builtin/__init__.py b/torchvision/prototype/datasets/_builtin/__init__.py
@@ -6,4 +6,5 @@
 from .mnist import MNIST, FashionMNIST, KMNIST, EMNIST, QMNIST
 from .sbd import SBD
 from .semeion import SEMEION
+from .ucf101 import UCF101
 from .voc import VOC
diff --git a/torchvision/prototype/datasets/_builtin/ucf101.categories b/torchvision/prototype/datasets/_builtin/ucf101.categories
@@ -0,0 +1,101 @@
+ApplyEyeMakeup
+ApplyLipstick
+Archery
+BabyCrawling
+BalanceBeam
+BandMarching
+BaseballPitch
+Basketball
+BasketballDunk
+BenchPress
+Biking
+Billiards
+BlowDryHair
+BlowingCandles
+BodyWeightSquats
+Bowling
+BoxingPunchingBag
+BoxingSpeedBag
+BreastStroke
+BrushingTeeth
+CleanAndJerk
+CliffDiving
+CricketBowling
+CricketShot
+CuttingInKitchen
+Diving
+Drumming
+Fencing
+FieldHockeyPenalty
+FloorGymnastics
+FrisbeeCatch
+FrontCrawl
+GolfSwing
+Haircut
+Hammering
+HammerThrow
+HandstandPushups
+HandstandWalking
+HeadMassage
+HighJump
+HorseRace
+HorseRiding
+HulaHoop
+IceDancing
+JavelinThrow
+JugglingBalls
+JumpingJack
+JumpRope
+Kayaking
+Knitting
+LongJump
+Lunges
+MilitaryParade
+Mixing
+MoppingFloor
+Nunchucks
+ParallelBars
+PizzaTossing
+PlayingCello
+PlayingDaf
+PlayingDhol
+PlayingFlute
+PlayingGuitar
+PlayingPiano
+PlayingSitar
+PlayingTabla
+PlayingViolin
+PoleVault
+PommelHorse
+PullUps
+Punch
+PushUps
+Rafting
+RockClimbingIndoor
+RopeClimbing
+Rowing
+SalsaSpin
+ShavingBeard
+Shotput
+SkateBoarding
+Skiing
+Skijet
+SkyDiving
+SoccerJuggling
+SoccerPenalty
+StillRings
+SumoWrestling
+Surfing
+Swing
+TableTennisShot
+TaiChi
+TennisSwing
+ThrowDiscus
+TrampolineJumping
+Typing
+UnevenBars
+VolleyballSpiking
+WalkingWithDog
+WallPushups
+WritingOnBoard
+YoYo
diff --git a/torchvision/prototype/datasets/_builtin/ucf101.py b/torchvision/prototype/datasets/_builtin/ucf101.py
@@ -0,0 +1,102 @@
+import csv
+import io
+import pathlib
+from typing import Any, Callable, Dict, List, Optional, Tuple, cast
+
+import torch
+from torch.utils.data import IterDataPipe
+from torch.utils.data.datapipes.iter import Filter, Mapper
+from torchdata.datapipes.iter import CSVParser, IterKeyZipper
+from torchvision.prototype.datasets.utils import (
+    Dataset,
+    DatasetConfig,
+    DatasetInfo,
+    HttpResource,
+    OnlineResource,
+    DatasetType,
+)
+from torchvision.prototype.datasets.utils._internal import (
+    path_accessor,
+    path_comparator,
+    hint_sharding,
+    hint_shuffling,
+)
+from torchvision.prototype.features import Label
+
+csv.register_dialect("ucf101", delimiter=" ")
+
+
+class UCF101(Dataset):
+    """
+    `UCF101 <https://www.crcv.ucf.edu/data/UCF101.php>`_ dataset.
+
+    UCF101 is an action recognition video dataset, containing 101 classes
+    of various human actions.
+    """
+
+    def _make_info(self) -> DatasetInfo:
+        return DatasetInfo(
+            "ucf101",
+            type=DatasetType.VIDEO,
+            dependencies=("rarfile",),
+            valid_options=dict(
+                split=("train", "test"),
+                fold=("1", "2", "3"),
+            ),
+            homepage="https://www.crcv.ucf.edu/data/UCF101.php",
+        )
+
+    def resources(self, config: DatasetConfig) -> List[OnlineResource]:
+        return [
+            HttpResource(
+                "https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip",
+                sha256="5c0d1a53b8ed364a2ac830a73f405e51bece7d98ce1254fd19ed4a36b224bd27",
+            ),
+            HttpResource(
+                "https://www.crcv.ucf.edu/data/UCF101/UCF101.rar",
+                sha256="ca8dfadb4c891cb11316f94d52b6b0ac2a11994e67a0cae227180cd160bd8e55",
+                extract=True,
+            ),
+        ]
+
+    def _collate_and_decode(
+        self,
+        data: Tuple[Tuple[str, str], Tuple[str, io.IOBase]],
+        *,
+        decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]] = None,
+    ) -> Dict[str, Any]:
+        split_data, image_data = data
+        _, label_idx = split_data
+        path, buffer = image_data
+        label_idx = int(label_idx)
+        return dict(
+            label=Label(label_idx, category=self.categories[label_idx]),
+            path=path,
+            video=decoder(buffer) if decoder else buffer,
+        )
+
+    def _make_datapipe(
+        self,
+        resource_dps: List[IterDataPipe],
+        *,
+        config: DatasetConfig,
+        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
+    ) -> IterDataPipe[Dict[str, Any]]:
+        splits_dp, images_dp = resource_dps
+
+        splits_dp: IterDataPipe[Tuple[str, io.IOBase]] = Filter(
+            splits_dp, path_comparator("name", f"{config.split}list0{config.fold}.txt")
+        )
+        splits_dp = CSVParser(splits_dp, dialect="ucf101")
+        splits_dp = hint_sharding(splits_dp)
+        splits_dp = hint_shuffling(splits_dp)
+
+        dp = IterKeyZipper(splits_dp, images_dp, path_accessor("name"))
+        return Mapper(dp, self._collate_and_decode, fn_kwargs=dict(decoder=decoder))
+
+    def _generate_categories(self, root: pathlib.Path) -> Tuple[str, ...]:
+        dp = self.resources(self.default_config)[0].load(pathlib.Path(root) / self.name)
+        dp: IterDataPipe[Tuple[str, io.IOBase]] = Filter(dp, path_comparator("name", "classInd.txt"))
+        dp = CSVParser(dp, dialect="ucf101")
+        _, categories = zip(*dp)
+        return cast(Tuple[str, ...], categories)
diff --git a/torchvision/prototype/datasets/utils/_dataset.py b/torchvision/prototype/datasets/utils/_dataset.py
@@ -21,6 +21,7 @@
 class DatasetType(enum.Enum):
     RAW = enum.auto()
     IMAGE = enum.auto()
+    VIDEO = enum.auto()
 
 
 class DatasetConfig(FrozenBunch):

diff --git a/torchvision/prototype/datasets/utils/_resource.py b/torchvision/prototype/datasets/utils/_resource.py
@@ -95,14 +95,21 @@ def load(
         # Instead of the raw file, there might also be files with fewer suffixes after decompression or directories
         # with no suffixes at all. Thus, we look for all paths that share the same name without suffixes as the raw
         # file.
-        path_candidates = {file for file in path.parent.glob(path.name.replace("".join(path.suffixes), "") + "*")}
+        stem = path.name.replace("".join(path.suffixes), "")
+        path_candidates = {file for file in path.parent.glob(stem + ".*")}
+        folder_candidate = path.parent / stem
+        if folder_candidate.exists() and folder_candidate.is_dir():
+            path_candidates.add(folder_candidate)
         # If we don't find anything, we try to download the raw file.
         if not path_candidates:
             path_candidates = {self.download(root, skip_integrity_check=skip_integrity_check)}
         # If the only thing we find is the raw file, we use it and optionally perform some preprocessing steps.
         if path_candidates == {path}:
             if self._preprocess:
                 path = self._preprocess(path)
+        # If we only have one candidate, we use it.
+        elif len(path_candidates) == 1:
+            path = path_candidates.pop()
         # Otherwise we use the path with the fewest suffixes. This gives us the extracted > decompressed > raw priority
         # that we want.
         else: