hyperdimensional-computing · mikeheddes · Jan 4, 2023 · Jan 4, 2023 · Jan 4, 2023
diff --git a/docs/datasets.rst b/docs/datasets.rst
@@ -59,6 +59,27 @@ The Torchhd library provides many popular built-in datasets to work with.
     HayesRoth
     HeartCleveland
     HeartHungarian
+    HeartSwitzerland
+    HeartVa
+    Hepatitis
+    HillValley
+    HorseColic
+    IlpdIndianLiver
+    ImageSegmentation
+    Ionosphere
+    Iris
+    LedDisplay
+    Lenses
+    Letter
+    Libras
+    LowResSpect
+    LungCancer
+    Lymphography
+    Magic
+    Mammographic
+    Miniboone
+    MolecBiolPromoter
+
 
 Base classes
 ------------------------

diff --git a/torchhd/datasets/__init__.py b/torchhd/datasets/__init__.py
@@ -49,6 +49,26 @@
 from torchhd.datasets.hayes_roth import HayesRoth
 from torchhd.datasets.heart_cleveland import HeartCleveland
 from torchhd.datasets.heart_hungarian import HeartHungarian
+from torchhd.datasets.heart_switzerland import HeartSwitzerland
+from torchhd.datasets.heart_va import HeartVa
+from torchhd.datasets.hepatitis import Hepatitis
+from torchhd.datasets.hill_valley import HillValley
+from torchhd.datasets.horse_colic import HorseColic
+from torchhd.datasets.ilpd_indian_liver import IlpdIndianLiver
+from torchhd.datasets.image_segmentation import ImageSegmentation
+from torchhd.datasets.ionosphere import Ionosphere
+from torchhd.datasets.iris import Iris
+from torchhd.datasets.led_display import LedDisplay
+from torchhd.datasets.lenses import Lenses
+from torchhd.datasets.letter import Letter
+from torchhd.datasets.libras import Libras
+from torchhd.datasets.low_res_spect import LowResSpect
+from torchhd.datasets.lung_cancer import LungCancer
+from torchhd.datasets.lymphography import Lymphography
+from torchhd.datasets.magic import Magic
+from torchhd.datasets.mammographic import Mammographic
+from torchhd.datasets.miniboone import Miniboone
+from torchhd.datasets.molec_biol_promoter import MolecBiolPromoter
 
 
 __all__ = [
@@ -103,4 +123,24 @@
     "HayesRoth",
     "HeartCleveland",
     "HeartHungarian",
+    "HeartSwitzerland",
+    "HeartVa",
+    "Hepatitis",
+    "HillValley",
+    "HorseColic",
+    "IlpdIndianLiver",
+    "ImageSegmentation",
+    "Ionosphere",
+    "Iris",
+    "LedDisplay",
+    "Lenses",
+    "Letter",
+    "Libras",
+    "LowResSpect",
+    "LungCancer",
+    "Lymphography",
+    "Magic",
+    "Mammographic",
+    "Miniboone",
+    "MolecBiolPromoter",
 ]
diff --git a/torchhd/datasets/heart_switzerland.py b/torchhd/datasets/heart_switzerland.py
@@ -0,0 +1,34 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class HeartSwitzerland(DatasetFourFold):
+    """`Heart Disease <https://archive.ics.uci.edu/ml/datasets/heart+disease>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "heart-switzerland"
+    classes: List[str] = [
+        "0",
+        "1",
+        "2",
+        "3",
+        "4",
+    ]
diff --git a/torchhd/datasets/heart_va.py b/torchhd/datasets/heart_va.py
@@ -0,0 +1,34 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class HeartVa(DatasetFourFold):
+    """`Heart Disease <https://archive.ics.uci.edu/ml/datasets/heart+disease>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "heart-va"
+    classes: List[str] = [
+        "0",
+        "1",
+        "2",
+        "3",
+        "4",
+    ]
diff --git a/torchhd/datasets/hepatitis.py b/torchhd/datasets/hepatitis.py
@@ -0,0 +1,31 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class Hepatitis(DatasetFourFold):
+    """`Hepatitis <https://archive.ics.uci.edu/ml/datasets/hepatitis>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "hepatitis"
+    classes: List[str] = [
+        "die",
+        "live",
+    ]
diff --git a/torchhd/datasets/hill_valley.py b/torchhd/datasets/hill_valley.py
@@ -0,0 +1,27 @@
+from typing import List
+from torchhd.datasets import DatasetTrainTest
+
+
+class HillValley(DatasetTrainTest):
+    """`Hill-Valley <https://archive.ics.uci.edu/ml/datasets/hill-valley>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
+            Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
+        hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "hill-valley"
+    classes: List[str] = [
+        "valley",
+        "hill",
+    ]
diff --git a/torchhd/datasets/horse_colic.py b/torchhd/datasets/horse_colic.py
@@ -0,0 +1,27 @@
+from typing import List
+from torchhd.datasets import DatasetTrainTest
+
+
+class HorseColic(DatasetTrainTest):
+    """`Horse Colic <https://archive.ics.uci.edu/ml/datasets/Horse+Colic>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
+            Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
+        hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "horse-colic"
+    classes: List[str] = [
+        "Yes, it had surgery",
+        "It was treated without surgery",
+    ]
diff --git a/torchhd/datasets/ilpd_indian_liver.py b/torchhd/datasets/ilpd_indian_liver.py
@@ -0,0 +1,31 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class IlpdIndianLiver(DatasetFourFold):
+    """`ILPD (Indian Liver Patient Dataset) <https://archive.ics.uci.edu/ml/datasets/ILPD+(Indian+Liver+Patient+Dataset)>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "ilpd-indian-liver"
+    classes: List[str] = [
+        "liver patient",
+        "not liver patient",
+    ]
diff --git a/torchhd/datasets/image_segmentation.py b/torchhd/datasets/image_segmentation.py
@@ -0,0 +1,32 @@
+from typing import List
+from torchhd.datasets import DatasetTrainTest
+
+
+class ImageSegmentation(DatasetTrainTest):
+    """`Image Segmentation <https://archive.ics.uci.edu/ml/datasets/image+segmentation>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
+            Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
+        hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "image-segmentation"
+    classes: List[str] = [
+        "brickface",
+        "sky",
+        "foliage",
+        "cement",
+        "window",
+        "path",
+        "grass",
+    ]
diff --git a/torchhd/datasets/ionosphere.py b/torchhd/datasets/ionosphere.py
@@ -0,0 +1,31 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class Ionosphere(DatasetFourFold):
+    """`Ionosphere <https://archive.ics.uci.edu/ml/datasets/ionosphere>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "ionosphere"
+    classes: List[str] = [
+        "good",
+        "bad",
+    ]
diff --git a/torchhd/datasets/iris.py b/torchhd/datasets/iris.py
@@ -0,0 +1,32 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class Iris(DatasetFourFold):
+    """`Iris <https://archive.ics.uci.edu/ml/datasets/iris>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "iris"
+    classes: List[str] = [
+        "Iris Setosa",
+        "Iris Versicolour",
+        "Iris Virginica",
+    ]