Skip to content

Datasets 1 20 #103

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion docs/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,24 @@ The Torchhd library provides many popular built-in datasets to work with.
CyclePowerPlant
Abalone
Adult

AcuteInflammation
AcuteNephritis
Annealing
Arrhythmia
AudiologyStd
BalanceScale
Balloons
Bank
Blood
BreastCancer
BreastCancerWisc
BreastCancerWiscDiag
BreastCancerWiscProg
BreastTissue
Car
Cardiotocography3Clases
Cardiotocography10Clases
ChessKrvk

Base classes
------------------------
Expand Down
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ torch
torchvision
pandas
requests
tqdm
numpy
sphinx
sphinx-rtd-theme
36 changes: 36 additions & 0 deletions torchhd/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,24 @@
from torchhd.datasets.dataset import DatasetTrainTest
from torchhd.datasets.abalone import Abalone
from torchhd.datasets.adult import Adult
from torchhd.datasets.acute_inflammation import AcuteInflammation
from torchhd.datasets.acute_nephritis import AcuteNephritis
from torchhd.datasets.annealing import Annealing
from torchhd.datasets.arrhythmia import Arrhythmia
from torchhd.datasets.audiology_std import AudiologyStd
from torchhd.datasets.balance_scale import BalanceScale
from torchhd.datasets.balloons import Balloons
from torchhd.datasets.bank import Bank
from torchhd.datasets.blood import Blood
from torchhd.datasets.breast_cancer import BreastCancer
from torchhd.datasets.breast_cancer_wisc import BreastCancerWisc
from torchhd.datasets.breast_cancer_wisc_diag import BreastCancerWiscDiag
from torchhd.datasets.breast_cancer_wisc_prog import BreastCancerWiscProg
from torchhd.datasets.breast_tissue import BreastTissue
from torchhd.datasets.car import Car
from torchhd.datasets.cardiotocography_3clases import Cardiotocography3Clases
from torchhd.datasets.cardiotocography_10clases import Cardiotocography10Clases
from torchhd.datasets.chess_krvk import ChessKrvk


__all__ = [
Expand All @@ -27,4 +45,22 @@
"DatasetTrainTest",
"Abalone",
"Adult",
"AcuteInflammation",
"AcuteNephritis",
"Annealing",
"Arrhythmia",
"AudiologyStd",
"BalanceScale",
"Balloons",
"Bank",
"Blood",
"BreastCancer",
"BreastCancerWisc",
"BreastCancerWiscDiag",
"BreastCancerWiscProg",
"BreastTissue",
"Car",
"Cardiotocography3Clases",
"Cardiotocography10Clases",
"ChessKrvk",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/acute_inflammation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class AcuteInflammation(DatasetFourFold):
"""`Acute Inflammation of urinary bladder <https://archive.ics.uci.edu/ml/datasets/Acute+Inflammations>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "acute-inflammation"
classes: List[str] = [
"yes",
"no",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/acute_nephritis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class AcuteNephritis(DatasetFourFold):
"""`Acute Nephritis of renal pelvis origin <https://archive.ics.uci.edu/ml/datasets/Acute+Inflammations>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "acute-nephritis"
classes: List[str] = [
"yes",
"no",
]
30 changes: 30 additions & 0 deletions torchhd/datasets/annealing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class Annealing(DatasetTrainTest):
"""`Annealing <https://archive.ics.uci.edu/ml/datasets/Annealing>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "annealing"
classes: List[str] = [
"1",
"2",
"3",
"4",
"5",
]
42 changes: 42 additions & 0 deletions torchhd/datasets/arrhythmia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Arrhythmia(DatasetFourFold):
"""`Arrhythmia <https://archive.ics.uci.edu/ml/datasets/arrhythmia>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "arrhythmia"
classes: List[str] = [
"1 - normal",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10",
"14",
"15",
"16 - unclassified",
]
43 changes: 43 additions & 0 deletions torchhd/datasets/audiology_std.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class AudiologyStd(DatasetTrainTest):
"""`Audiology (Standardized) <https://archive.ics.uci.edu/ml/datasets/Audiology+%28Standardized%29>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "audiology-std"
classes: List[str] = [
"cochlear_age",
"cochlear_age_and_noise",
"cochlear_noise_and_heredity",
"cochlear_poss_noise",
"cochlear_unknown",
"conductive_discontinuity",
"conductive_fixation",
"mixed_cochlear_age_otitis_media",
"mixed_cochlear_age_s_om",
"mixed_cochlear_unk_discontinuity",
"mixed_cochlear_unk_fixation",
"mixed_cochlear_unk_ser_om",
"mixed_poss_noise_om",
"normal_ear",
"otitis_media",
"possible_brainstem_disorder",
"possible_menieres",
"retrocochlear_unknown",
]
32 changes: 32 additions & 0 deletions torchhd/datasets/balance_scale.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class BalanceScale(DatasetFourFold):
"""`Balance Scale <https://archive.ics.uci.edu/ml/datasets/balance+scale>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "balance-scale"
classes: List[str] = [
"B",
"L",
"R",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/balloons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Balloons(DatasetFourFold):
"""`Balloons <https://archive.ics.uci.edu/ml/datasets/balloons>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "balloons"
classes: List[str] = [
"inflated - F",
"inflated - T",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/bank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Bank(DatasetFourFold):
"""`Bank Marketing <https://archive.ics.uci.edu/ml/datasets/Bank+Marketing>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "bank"
classes: List[str] = [
"no",
"yes",
]
Loading