hyperdimensional-computing · mikeheddes · Jan 7, 2023 · Jan 7, 2023 · Jan 7, 2023
diff --git a/docs/datasets.rst b/docs/datasets.rst
@@ -109,6 +109,37 @@ The Torchhd library provides many popular built-in datasets to work with.
     PostOperative
     PrimaryTumor
     Ringnorm
+    Seeds
+    Semeion
+    Soybean
+    Spambase
+    Spect
+    Spectf
+    StatlogAustralianCredit
+    StatlogGermanCredit
+    StatlogHeart
+    StatlogImage
+    StatlogLandsat
+    StatlogShuttle
+    StatlogVehicle
+    SteelPlates
+    SyntheticControl
+    Teaching
+    Thyroid
+    TicTacToe
+    Titanic
+    Trains
+    Twonorm
+    VertebralColumn2Clases
+    VertebralColumn3Clases
+    WallFollowing
+    Waveform
+    WaveformNoise
+    Wine
+    WineQualityRed
+    WineQualityWhite
+    Yeast
+    Zoo
 
 Base classes
 ------------------------

diff --git a/torchhd/datasets/__init__.py b/torchhd/datasets/__init__.py
@@ -99,6 +99,37 @@
 from torchhd.datasets.post_operative import PostOperative
 from torchhd.datasets.primary_tumor import PrimaryTumor
 from torchhd.datasets.ringnorm import Ringnorm
+from torchhd.datasets.seeds import Seeds
+from torchhd.datasets.semeion import Semeion
+from torchhd.datasets.soybean import Soybean
+from torchhd.datasets.spambase import Spambase
+from torchhd.datasets.spect import Spect
+from torchhd.datasets.spectf import Spectf
+from torchhd.datasets.statlog_australian_credit import StatlogAustralianCredit
+from torchhd.datasets.statlog_german_credit import StatlogGermanCredit
+from torchhd.datasets.statlog_heart import StatlogHeart
+from torchhd.datasets.statlog_image import StatlogImage
+from torchhd.datasets.statlog_landsat import StatlogLandsat
+from torchhd.datasets.statlog_shuttle import StatlogShuttle
+from torchhd.datasets.statlog_vehicle import StatlogVehicle
+from torchhd.datasets.steel_plates import SteelPlates
+from torchhd.datasets.synthetic_control import SyntheticControl
+from torchhd.datasets.teaching import Teaching
+from torchhd.datasets.thyroid import Thyroid
+from torchhd.datasets.tic_tac_toe import TicTacToe
+from torchhd.datasets.titanic import Titanic
+from torchhd.datasets.trains import Trains
+from torchhd.datasets.twonorm import Twonorm
+from torchhd.datasets.vertebral_column_2clases import VertebralColumn2Clases
+from torchhd.datasets.vertebral_column_3clases import VertebralColumn3Clases
+from torchhd.datasets.wall_following import WallFollowing
+from torchhd.datasets.waveform import Waveform
+from torchhd.datasets.waveform_noise import WaveformNoise
+from torchhd.datasets.wine import Wine
+from torchhd.datasets.wine_quality_red import WineQualityRed
+from torchhd.datasets.wine_quality_white import WineQualityWhite
+from torchhd.datasets.yeast import Yeast
+from torchhd.datasets.zoo import Zoo
 
 __all__ = [
     "BeijingAirQuality",
@@ -202,4 +233,35 @@
     "PostOperative",
     "PrimaryTumor",
     "Ringnorm",
+    "Seeds",
+    "Semeion",
+    "Soybean",
+    "Spambase",
+    "Spect",
+    "Spectf",
+    "StatlogAustralianCredit",
+    "StatlogGermanCredit",
+    "StatlogHeart",
+    "StatlogImage",
+    "StatlogLandsat",
+    "StatlogShuttle",
+    "StatlogVehicle",
+    "SteelPlates",
+    "SyntheticControl",
+    "Teaching",
+    "Thyroid",
+    "TicTacToe",
+    "Titanic",
+    "Trains",
+    "Twonorm",
+    "VertebralColumn2Clases",
+    "VertebralColumn3Clases",
+    "WallFollowing",
+    "Waveform",
+    "WaveformNoise",
+    "Wine",
+    "WineQualityRed",
+    "WineQualityWhite",
+    "Yeast",
+    "Zoo",
 ]
diff --git a/torchhd/datasets/seeds.py b/torchhd/datasets/seeds.py
@@ -0,0 +1,32 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class Seeds(DatasetFourFold):
+    """`Seeds <https://archive.ics.uci.edu/ml/datasets/seeds>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "seeds"
+    classes: List[str] = [
+        "Kama",
+        "Rosa",
+        "Canadian",
+    ]
diff --git a/torchhd/datasets/semeion.py b/torchhd/datasets/semeion.py
@@ -0,0 +1,39 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class Semeion(DatasetFourFold):
+    """`Semeion Handwritten Digit <https://archive.ics.uci.edu/ml/datasets/semeion+handwritten+digit>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "semeion"
+    classes: List[str] = [
+        "0",
+        "1",
+        "2",
+        "3",
+        "4",
+        "5",
+        "6",
+        "7",
+        "8",
+        "9",
+    ]
diff --git a/torchhd/datasets/soybean.py b/torchhd/datasets/soybean.py
@@ -0,0 +1,43 @@
+from typing import List
+from torchhd.datasets import DatasetTrainTest
+
+
+class Soybean(DatasetTrainTest):
+    """`Soybean (Large) <https://archive.ics.uci.edu/ml/datasets/Soybean+(Large)>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
+            Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
+        hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "soybean"
+    classes: List[str] = [
+        "diaporthe-stem-canker",
+        "charcoal-rot",
+        "rhizoctonia-root-rot",
+        "phytophthora-rot",
+        "brown-stem-rot",
+        "powdery-mildew",
+        "downy-mildew",
+        "brown-spot",
+        "bacterial-blight",
+        "bacterial-pustule",
+        "purple-seed-stain",
+        "anthracnose",
+        "phyllosticta-leaf-spot",
+        "alternarialeaf-spot",
+        "frog-eye-leaf-spot",
+        "diaporthe-pod-&-stem-blight",
+        "cyst-nematode",
+        "herbicide-injury",
+    ]
diff --git a/torchhd/datasets/spambase.py b/torchhd/datasets/spambase.py
@@ -0,0 +1,31 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class Spambase(DatasetFourFold):
+    """`Spambase <https://archive.ics.uci.edu/ml/datasets/spambase>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "spambase"
+    classes: List[str] = [
+        "non-spam",
+        "spam",
+    ]
diff --git a/torchhd/datasets/spect.py b/torchhd/datasets/spect.py
@@ -0,0 +1,27 @@
+from typing import List
+from torchhd.datasets import DatasetTrainTest
+
+
+class Spect(DatasetTrainTest):
+    """`SPECT Heart Data <https://archive.ics.uci.edu/ml/datasets/spect+heart>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
+            Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
+        hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "spect"
+    classes: List[str] = [
+        "normal",
+        "abnormal",
+    ]
diff --git a/torchhd/datasets/spectf.py b/torchhd/datasets/spectf.py
@@ -0,0 +1,27 @@
+from typing import List
+from torchhd.datasets import DatasetTrainTest
+
+
+class Spectf(DatasetTrainTest):
+    """`SPECTF Heart Data <https://archive.ics.uci.edu/ml/datasets/SPECTF+Heart>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
+            Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
+        hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "spectf"
+    classes: List[str] = [
+        "normal",
+        "abnormal",
+    ]
diff --git a/torchhd/datasets/statlog_australian_credit.py b/torchhd/datasets/statlog_australian_credit.py
@@ -0,0 +1,31 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class StatlogAustralianCredit(DatasetFourFold):
+    """`Statlog (Australian Credit Approval) <https://archive.ics.uci.edu/ml/datasets/statlog+(australian+credit+approval)>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "statlog-australian-credit"
+    classes: List[str] = [
+        "+",
+        "-",
+    ]
diff --git a/torchhd/datasets/statlog_german_credit.py b/torchhd/datasets/statlog_german_credit.py
@@ -0,0 +1,31 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class StatlogGermanCredit(DatasetFourFold):
+    """`Statlog (German Credit Data) <https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "statlog-german-credit"
+    classes: List[str] = [
+        "Good",
+        "Bad",
+    ]