Skip to content

Dwhc datasets 91 121 #107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions docs/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,37 @@ The Torchhd library provides many popular built-in datasets to work with.
PostOperative
PrimaryTumor
Ringnorm
Seeds
Semeion
Soybean
Spambase
Spect
Spectf
StatlogAustralianCredit
StatlogGermanCredit
StatlogHeart
StatlogImage
StatlogLandsat
StatlogShuttle
StatlogVehicle
SteelPlates
SyntheticControl
Teaching
Thyroid
TicTacToe
Titanic
Trains
Twonorm
VertebralColumn2Clases
VertebralColumn3Clases
WallFollowing
Waveform
WaveformNoise
Wine
WineQualityRed
WineQualityWhite
Yeast
Zoo

Base classes
------------------------
Expand Down
62 changes: 62 additions & 0 deletions torchhd/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,37 @@
from torchhd.datasets.post_operative import PostOperative
from torchhd.datasets.primary_tumor import PrimaryTumor
from torchhd.datasets.ringnorm import Ringnorm
from torchhd.datasets.seeds import Seeds
from torchhd.datasets.semeion import Semeion
from torchhd.datasets.soybean import Soybean
from torchhd.datasets.spambase import Spambase
from torchhd.datasets.spect import Spect
from torchhd.datasets.spectf import Spectf
from torchhd.datasets.statlog_australian_credit import StatlogAustralianCredit
from torchhd.datasets.statlog_german_credit import StatlogGermanCredit
from torchhd.datasets.statlog_heart import StatlogHeart
from torchhd.datasets.statlog_image import StatlogImage
from torchhd.datasets.statlog_landsat import StatlogLandsat
from torchhd.datasets.statlog_shuttle import StatlogShuttle
from torchhd.datasets.statlog_vehicle import StatlogVehicle
from torchhd.datasets.steel_plates import SteelPlates
from torchhd.datasets.synthetic_control import SyntheticControl
from torchhd.datasets.teaching import Teaching
from torchhd.datasets.thyroid import Thyroid
from torchhd.datasets.tic_tac_toe import TicTacToe
from torchhd.datasets.titanic import Titanic
from torchhd.datasets.trains import Trains
from torchhd.datasets.twonorm import Twonorm
from torchhd.datasets.vertebral_column_2clases import VertebralColumn2Clases
from torchhd.datasets.vertebral_column_3clases import VertebralColumn3Clases
from torchhd.datasets.wall_following import WallFollowing
from torchhd.datasets.waveform import Waveform
from torchhd.datasets.waveform_noise import WaveformNoise
from torchhd.datasets.wine import Wine
from torchhd.datasets.wine_quality_red import WineQualityRed
from torchhd.datasets.wine_quality_white import WineQualityWhite
from torchhd.datasets.yeast import Yeast
from torchhd.datasets.zoo import Zoo

__all__ = [
"BeijingAirQuality",
Expand Down Expand Up @@ -202,4 +233,35 @@
"PostOperative",
"PrimaryTumor",
"Ringnorm",
"Seeds",
"Semeion",
"Soybean",
"Spambase",
"Spect",
"Spectf",
"StatlogAustralianCredit",
"StatlogGermanCredit",
"StatlogHeart",
"StatlogImage",
"StatlogLandsat",
"StatlogShuttle",
"StatlogVehicle",
"SteelPlates",
"SyntheticControl",
"Teaching",
"Thyroid",
"TicTacToe",
"Titanic",
"Trains",
"Twonorm",
"VertebralColumn2Clases",
"VertebralColumn3Clases",
"WallFollowing",
"Waveform",
"WaveformNoise",
"Wine",
"WineQualityRed",
"WineQualityWhite",
"Yeast",
"Zoo",
]
32 changes: 32 additions & 0 deletions torchhd/datasets/seeds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Seeds(DatasetFourFold):
"""`Seeds <https://archive.ics.uci.edu/ml/datasets/seeds>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "seeds"
classes: List[str] = [
"Kama",
"Rosa",
"Canadian",
]
39 changes: 39 additions & 0 deletions torchhd/datasets/semeion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Semeion(DatasetFourFold):
"""`Semeion Handwritten Digit <https://archive.ics.uci.edu/ml/datasets/semeion+handwritten+digit>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "semeion"
classes: List[str] = [
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
]
43 changes: 43 additions & 0 deletions torchhd/datasets/soybean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class Soybean(DatasetTrainTest):
"""`Soybean (Large) <https://archive.ics.uci.edu/ml/datasets/Soybean+(Large)>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "soybean"
classes: List[str] = [
"diaporthe-stem-canker",
"charcoal-rot",
"rhizoctonia-root-rot",
"phytophthora-rot",
"brown-stem-rot",
"powdery-mildew",
"downy-mildew",
"brown-spot",
"bacterial-blight",
"bacterial-pustule",
"purple-seed-stain",
"anthracnose",
"phyllosticta-leaf-spot",
"alternarialeaf-spot",
"frog-eye-leaf-spot",
"diaporthe-pod-&-stem-blight",
"cyst-nematode",
"herbicide-injury",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/spambase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Spambase(DatasetFourFold):
"""`Spambase <https://archive.ics.uci.edu/ml/datasets/spambase>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "spambase"
classes: List[str] = [
"non-spam",
"spam",
]
27 changes: 27 additions & 0 deletions torchhd/datasets/spect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class Spect(DatasetTrainTest):
"""`SPECT Heart Data <https://archive.ics.uci.edu/ml/datasets/spect+heart>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "spect"
classes: List[str] = [
"normal",
"abnormal",
]
27 changes: 27 additions & 0 deletions torchhd/datasets/spectf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class Spectf(DatasetTrainTest):
"""`SPECTF Heart Data <https://archive.ics.uci.edu/ml/datasets/SPECTF+Heart>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "spectf"
classes: List[str] = [
"normal",
"abnormal",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/statlog_australian_credit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class StatlogAustralianCredit(DatasetFourFold):
"""`Statlog (Australian Credit Approval) <https://archive.ics.uci.edu/ml/datasets/statlog+(australian+credit+approval)>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "statlog-australian-credit"
classes: List[str] = [
"+",
"-",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/statlog_german_credit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class StatlogGermanCredit(DatasetFourFold):
"""`Statlog (German Credit Data) <https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "statlog-german-credit"
classes: List[str] = [
"Good",
"Bad",
]
Loading