Skip to content

Add DWHC datasets 41-60 #105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions docs/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,27 @@ The Torchhd library provides many popular built-in datasets to work with.
HayesRoth
HeartCleveland
HeartHungarian
HeartSwitzerland
HeartVa
Hepatitis
HillValley
HorseColic
IlpdIndianLiver
ImageSegmentation
Ionosphere
Iris
LedDisplay
Lenses
Letter
Libras
LowResSpect
LungCancer
Lymphography
Magic
Mammographic
Miniboone
MolecBiolPromoter


Base classes
------------------------
Expand Down
40 changes: 40 additions & 0 deletions torchhd/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,26 @@
from torchhd.datasets.hayes_roth import HayesRoth
from torchhd.datasets.heart_cleveland import HeartCleveland
from torchhd.datasets.heart_hungarian import HeartHungarian
from torchhd.datasets.heart_switzerland import HeartSwitzerland
from torchhd.datasets.heart_va import HeartVa
from torchhd.datasets.hepatitis import Hepatitis
from torchhd.datasets.hill_valley import HillValley
from torchhd.datasets.horse_colic import HorseColic
from torchhd.datasets.ilpd_indian_liver import IlpdIndianLiver
from torchhd.datasets.image_segmentation import ImageSegmentation
from torchhd.datasets.ionosphere import Ionosphere
from torchhd.datasets.iris import Iris
from torchhd.datasets.led_display import LedDisplay
from torchhd.datasets.lenses import Lenses
from torchhd.datasets.letter import Letter
from torchhd.datasets.libras import Libras
from torchhd.datasets.low_res_spect import LowResSpect
from torchhd.datasets.lung_cancer import LungCancer
from torchhd.datasets.lymphography import Lymphography
from torchhd.datasets.magic import Magic
from torchhd.datasets.mammographic import Mammographic
from torchhd.datasets.miniboone import Miniboone
from torchhd.datasets.molec_biol_promoter import MolecBiolPromoter


__all__ = [
Expand Down Expand Up @@ -103,4 +123,24 @@
"HayesRoth",
"HeartCleveland",
"HeartHungarian",
"HeartSwitzerland",
"HeartVa",
"Hepatitis",
"HillValley",
"HorseColic",
"IlpdIndianLiver",
"ImageSegmentation",
"Ionosphere",
"Iris",
"LedDisplay",
"Lenses",
"Letter",
"Libras",
"LowResSpect",
"LungCancer",
"Lymphography",
"Magic",
"Mammographic",
"Miniboone",
"MolecBiolPromoter",
]
34 changes: 34 additions & 0 deletions torchhd/datasets/heart_switzerland.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class HeartSwitzerland(DatasetFourFold):
"""`Heart Disease <https://archive.ics.uci.edu/ml/datasets/heart+disease>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "heart-switzerland"
classes: List[str] = [
"0",
"1",
"2",
"3",
"4",
]
34 changes: 34 additions & 0 deletions torchhd/datasets/heart_va.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class HeartVa(DatasetFourFold):
"""`Heart Disease <https://archive.ics.uci.edu/ml/datasets/heart+disease>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "heart-va"
classes: List[str] = [
"0",
"1",
"2",
"3",
"4",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/hepatitis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Hepatitis(DatasetFourFold):
"""`Hepatitis <https://archive.ics.uci.edu/ml/datasets/hepatitis>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "hepatitis"
classes: List[str] = [
"die",
"live",
]
27 changes: 27 additions & 0 deletions torchhd/datasets/hill_valley.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class HillValley(DatasetTrainTest):
"""`Hill-Valley <https://archive.ics.uci.edu/ml/datasets/hill-valley>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "hill-valley"
classes: List[str] = [
"valley",
"hill",
]
27 changes: 27 additions & 0 deletions torchhd/datasets/horse_colic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class HorseColic(DatasetTrainTest):
"""`Horse Colic <https://archive.ics.uci.edu/ml/datasets/Horse+Colic>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "horse-colic"
classes: List[str] = [
"Yes, it had surgery",
"It was treated without surgery",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/ilpd_indian_liver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class IlpdIndianLiver(DatasetFourFold):
"""`ILPD (Indian Liver Patient Dataset) <https://archive.ics.uci.edu/ml/datasets/ILPD+(Indian+Liver+Patient+Dataset)>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "ilpd-indian-liver"
classes: List[str] = [
"liver patient",
"not liver patient",
]
32 changes: 32 additions & 0 deletions torchhd/datasets/image_segmentation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class ImageSegmentation(DatasetTrainTest):
"""`Image Segmentation <https://archive.ics.uci.edu/ml/datasets/image+segmentation>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "image-segmentation"
classes: List[str] = [
"brickface",
"sky",
"foliage",
"cement",
"window",
"path",
"grass",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/ionosphere.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Ionosphere(DatasetFourFold):
"""`Ionosphere <https://archive.ics.uci.edu/ml/datasets/ionosphere>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "ionosphere"
classes: List[str] = [
"good",
"bad",
]
32 changes: 32 additions & 0 deletions torchhd/datasets/iris.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Iris(DatasetFourFold):
"""`Iris <https://archive.ics.uci.edu/ml/datasets/iris>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "iris"
classes: List[str] = [
"Iris Setosa",
"Iris Versicolour",
"Iris Virginica",
]
Loading