Skip to content

Commit

Permalink
Merge pull request isaaccorley#33 from isaaccorley/datasets-ucm
Browse files Browse the repository at this point in the history
UC Merced Dataset
  • Loading branch information
isaaccorley authored Sep 2, 2021
2 parents 49bc6cf + 14bee43 commit 89d3416
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 3 deletions.
35 changes: 34 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ pip install 'git+https://github.com/isaaccorley/torchrs.git#egg=torch-rs[train]'
* [Dubai - Semantic Segmentation](https://github.com/isaaccorley/torchrs#dubai-segmentation)
* [GID-15 - Semantic Segmentation](https://github.com/isaaccorley/torchrs#gid-15)
* [TiSeLaC - Time-Series Land Cover Classification](https://github.com/isaaccorley/torchrs#tiselac)
* [UC Merced - Land Use Classification](https://github.com/isaaccorley/torchrs#uc-merced-ucm)

### PROBA-V Super Resolution

Expand Down Expand Up @@ -825,7 +826,7 @@ dataset.classes

The TiSeLaC dataset from the [Time Series Land Cover Classification Challenge](https://sites.google.com/site/dinoienco/tiselac-time-series-land-cover-classification-challenge) is a time series land cover classification dataset consisting of 23 2866x2633 medium resolution (30m) multispectral 10 band (7 reflectance + NDVI/NDWI/Brightness Index) images taken by the [USGS Landsat 8 satellite](https://www.usgs.gov/core-science-systems/nli/landsat/landsat-8). The imagery was captured over Reunion Island in 2014 and contains 9 land cover classes derived from the [Corine Land Cover (CLC) map](https://land.copernicus.eu/pan-european/corine-land-cover). Note that the dataset is formatted for pixelwise time-series classification where each time series is of the form `(t, b)` where `t=23 samples` and `b=10 bands`. This dataset is very easy with the top score currently standing at `0.9929` F1 Score.

The dataset can be downloaded (.08GB) using `scripts/download_tiselac.sh` and instantiated below:
The dataset can be downloaded (0.08GB) using `scripts/download_tiselac.sh` and instantiated below:

```python
from torchrs.transforms import Compose, ToTensor
Expand All @@ -850,7 +851,39 @@ dataset.classes
['Urban Areas', 'Other built-up surfaces', 'Forests', 'Sparse Vegetation', 'Rocks and bare soil',
'Grassland', 'Sugarcane crops', 'Other crops', 'Water']
"""
```

### UC Merced (UCM)

<img src="./assets/ucm_captions.png" width="500px"></img>

The [UC Merced (UCM)](http://weegee.vision.ucmerced.edu/datasets/landuse.html) dataset, proposed in ["Bag-Of-Visual-Words and Spatial Extensions for Land-Use Classification", Yang et al.](https://faculty.ucmerced.edu/snewsam/papers/Yang_ACMGIS10_BagOfVisualWords.pdf) is a land use classification dataset of 21k 256x256 1ft resolution RGB images of urban locations around the U.S. extracted from the [USGS National Map Urban Area Imagery collection](https://www.usgs.gov/core-science-systems/national-geospatial-program/national-map) with 21 land use classes (100 images per class).

The dataset can be downloaded (0.42GB) using `scripts/download_ucm.sh` and instantiated below:

```python
import torchvision.transforms as T
from torchrs.datasets import UCM

transform = T.Compose([T.ToTensor()])

dataset = UCM(
root="path/to/dataset/",
transform=transform
)

x, y = dataset[0]
"""
x: (3, 256, 256)
y: int
"""

dataset.classes
"""
['agricultural', 'airplane', 'baseballdiamond', 'beach', 'buildings', 'chaparral', 'denseresidential',
'forest', 'freeway', 'golfcourse', 'harbor', 'intersection', 'mediumresidential', 'mobilehomepark',
'overpass', 'parkinglot', 'river', 'runway', 'sparseresidential', 'storagetanks', 'tenniscourt']
"""
```

## Models
Expand Down
4 changes: 4 additions & 0 deletions scripts/download_ucm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mkdir -p .data/
wget http://weegee.vision.ucmerced.edu/datasets/UCMerced_LandUse.zip -O UCMerced_LandUse.zip
unzip UCMerced_LandUse.zip -d .data/
rm UCMerced_LandUse.zip
3 changes: 2 additions & 1 deletion torchrs/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@
from .aid import AID
from .dubai_segmentation import DubaiSegmentation
from .hkh_glacier import HKHGlacierMapping
from .ucm import UCM


__all__ = [
"PROBAV", "ETCI2021", "RSVQALR", "RSVQAxBEN", "EuroSATRGB", "EuroSATMS",
"RESISC45", "RSICD", "OSCD", "S2Looking", "LEVIRCDPlus", "FAIR1M",
"SydneyCaptions", "UCMCaptions", "S2MTCP", "ADVANCE", "SAT4", "SAT6",
"HRSCD", "InriaAIL", "Tiselac", "GID15", "ZueriCrop", "AID", "DubaiSegmentation",
"HKHGlacierMapping"
"HKHGlacierMapping", "UCM"
]
21 changes: 21 additions & 0 deletions torchrs/datasets/ucm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os

import torchvision.transforms as T
from torchvision.datasets import ImageFolder


class UCM(ImageFolder):
""" UC Merced Land Use dataset from 'Bag-Of-Visual-Words and
Spatial Extensions for Land-Use Classification', Yang at al. (2010)
https://faculty.ucmerced.edu/snewsam/papers/Yang_ACMGIS10_BagOfVisualWords.pdf
"""
def __init__(
self,
root: str = ".data/UCMerced_LandUse",
transform: T.Compose = T.Compose([T.ToTensor()])
):
super().__init__(
root=os.path.join(root, "Images"),
transform=transform
)
3 changes: 2 additions & 1 deletion torchrs/train/datamodules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .aid import AIDDataModule
from .dubai_segmentation import DubaiSegmentationDataModule
from .hkh_glacier import HKHGlacierMappingDataModule
from .ucm import UCMDataModule


__all__ = [
Expand All @@ -31,5 +32,5 @@
"FAIR1MDataModule", "SydneyCaptionsDataModule", "UCMCaptionsDataModule", "S2MTCPDataModule",
"ADVANCEDataModule", "SAT4DataModule", "SAT6DataModule", "HRSCDDataModule", "InriaAILDataModule",
"TiselacDataModule", "GID15DataModule", "ZueriCropDataModule", "AIDDataModule",
"DubaiSegmentationDataModule", "HKHGlacierMappingDataModule"
"DubaiSegmentationDataModule", "HKHGlacierMappingDataModule", "UCMDataModule"
]
26 changes: 26 additions & 0 deletions torchrs/train/datamodules/ucm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from typing import Optional

import torchvision.transforms as T

from torchrs.datasets.utils import dataset_split
from torchrs.train.datamodules import BaseDataModule
from torchrs.datasets import UCM


class UCMDataModule(BaseDataModule):

def __init__(
self,
root: str = ".data/UCMerced_LandUse",
transform: T.Compose = T.Compose([T.ToTensor()]),
*args, **kwargs
):
super().__init__(*args, **kwargs)
self.root = root
self.transform = transform

def setup(self, stage: Optional[str] = None):
dataset = UCM(root=self.root, transform=self.transform)
self.train_dataset, self.val_dataset, self.test_dataset = dataset_split(
dataset, val_pct=self.val_split, test_pct=self.test_split
)

0 comments on commit 89d3416

Please sign in to comment.