Skip to content

Commit 7f7eab8

Browse files
committed
v0.5.1: adds several datasets from the WILDS benchmark
1 parent e78e9b0 commit 7f7eab8

File tree

13 files changed

+246
-2
lines changed

13 files changed

+246
-2
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ A drawback of Dassl is that it doesn't (yet? hmm) support distributed multi-GPU
2020
We don't provide detailed documentations for Dassl, unlike another [project](https://kaiyangzhou.github.io/deep-person-reid/) of ours. This is because Dassl is developed for research purpose and as a researcher, we think it's important to be able to read source code and we highly encourage you to do so---definitely not because we are lazy. :-)
2121

2222
## What's new
23+
- Jun 2022: Adds three datasets from the [WILDS](https://wilds.stanford.edu/) benchmark: IWildCam, FMoW and Camelyon17.
2324
- May 2022: A new domain generalization method [DDG](https://arxiv.org/abs/2205.13913) developed by [Zhishu Sun](https://github.com/siaimes) and to appear at IJCAI'22 is added to this repo. See [here](https://github.com/MetaVisionLab/DDG) for more details.
2425
- Mar 2022: A new domain generalization method [EFDM](https://arxiv.org/abs/2203.07740) developed by [Yabin Zhang (PolyU)](https://ybzh.github.io/) and to appear at CVPR'22 is added to this repo. See [here](https://github.com/KaiyangZhou/Dassl.pytorch/pull/36) for more details.
2526
- Feb 2022: In case you don't know, a class in the painting domain of DomainNet (the official splits) only has test images (no training images), which could affect performance. See section 4.a in our [paper](https://arxiv.org/abs/2003.07325) for more details.

configs/datasets/dg/camelyon17.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
INPUT:
2+
SIZE: (224, 224)
3+
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
4+
5+
DATASET:
6+
NAME: "Camelyon17"

configs/datasets/dg/fmow.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
INPUT:
2+
SIZE: (224, 224)
3+
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
4+
5+
DATASET:
6+
NAME: "FMoW"

configs/datasets/dg/iwildcam.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
INPUT:
2+
SIZE: (224, 224)
3+
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
4+
5+
DATASET:
6+
NAME: "IWildCam"

dassl/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@
1313
}
1414
"""
1515

16-
__version__ = "0.5.0"
16+
__version__ = "0.5.1"
1717
__author__ = "Kaiyang Zhou"
1818
__homepage__ = "https://kaiyangzhou.github.io/"

dassl/data/datasets/base_dataset.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ def __init__(self, train_x=None, train_u=None, val=None, test=None):
6060
self._train_u = train_u # unlabeled training data (optional)
6161
self._val = val # validation data (optional)
6262
self._test = test # test data
63-
6463
self._num_classes = self.get_num_classes(train_x)
6564
self._lab2cname, self._classnames = self.get_lab2cname(train_x)
6665

dassl/data/datasets/dg/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from .pacs import PACS
22
from .vlcs import VLCS
3+
from .wilds import *
34
from .cifar_c import CIFAR10C, CIFAR100C
45
from .digits_dg import DigitsDG
56
from .digit_single import DigitSingle
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .fmow import FMoW
2+
from .iwildcam import IWildCam
3+
from .camelyon17 import Camelyon17
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from dassl.data.datasets import DATASET_REGISTRY
2+
3+
from .wilds_base import WILDSBase
4+
5+
6+
@DATASET_REGISTRY.register()
7+
class Camelyon17(WILDSBase):
8+
"""Tumor tissue recognition.
9+
10+
2 classes (whether a given region of tissue contains tumor tissue).
11+
12+
Reference:
13+
- Bandi et al. "From detection of individual metastases to classification of lymph
14+
node status at the patient level: the CAMELYON17 challenge." TMI 2021.
15+
- Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021.
16+
"""
17+
18+
dataset_dir = "camelyon17_v1.0"
19+
20+
def __init__(self, cfg):
21+
super().__init__(cfg)
22+
23+
def load_classnames(self):
24+
return {0: "healthy tissue", 1: "tumor tissue"}

dassl/data/datasets/dg/wilds/fmow.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import os.path as osp
2+
3+
from dassl.data.datasets import DATASET_REGISTRY
4+
5+
from .wilds_base import WILDSBase
6+
7+
CATEGORIES = [
8+
"airport", "airport_hangar", "airport_terminal", "amusement_park",
9+
"aquaculture", "archaeological_site", "barn", "border_checkpoint",
10+
"burial_site", "car_dealership", "construction_site", "crop_field", "dam",
11+
"debris_or_rubble", "educational_institution", "electric_substation",
12+
"factory_or_powerplant", "fire_station", "flooded_road", "fountain",
13+
"gas_station", "golf_course", "ground_transportation_station", "helipad",
14+
"hospital", "impoverished_settlement", "interchange", "lake_or_pond",
15+
"lighthouse", "military_facility", "multi-unit_residential",
16+
"nuclear_powerplant", "office_building", "oil_or_gas_facility", "park",
17+
"parking_lot_or_garage", "place_of_worship", "police_station", "port",
18+
"prison", "race_track", "railway_bridge", "recreational_facility",
19+
"road_bridge", "runway", "shipyard", "shopping_mall",
20+
"single-unit_residential", "smokestack", "solar_farm", "space_facility",
21+
"stadium", "storage_tank", "surface_mine", "swimming_pool", "toll_booth",
22+
"tower", "tunnel_opening", "waste_disposal", "water_treatment_facility",
23+
"wind_farm", "zoo"
24+
]
25+
26+
27+
@DATASET_REGISTRY.register()
28+
class FMoW(WILDSBase):
29+
"""Satellite imagery classification.
30+
31+
62 classes (building or land use categories).
32+
33+
Reference:
34+
- Christie et al. "Functional Map of the World." CVPR 2018.
35+
- Koh et al. "Wilds: A benchmark of in-the-wild distribution shifts." ICML 2021.
36+
"""
37+
38+
dataset_dir = "fmow_v1.1"
39+
40+
def __init__(self, cfg):
41+
super().__init__(cfg)
42+
43+
def get_image_path(self, dataset, idx):
44+
idx = dataset.full_idxs[idx]
45+
image_name = f"rgb_img_{idx}.png"
46+
image_path = osp.join(self.dataset_dir, "images", image_name)
47+
return image_path
48+
49+
def get_domain(self, dataset, idx):
50+
# number of regions: 5 or 6
51+
# number of years: 16
52+
region_id = int(dataset.metadata_array[idx][0])
53+
year_id = int(dataset.metadata_array[idx][1])
54+
return region_id*16 + year_id
55+
56+
def load_classnames(self):
57+
return {i: cat for i, cat in enumerate(CATEGORIES)}

0 commit comments

Comments
 (0)