hyperdimensional-computing · mikeheddes · Jan 3, 2023 · Jan 2, 2023 · Jan 2, 2023
diff --git a/docs/datasets.rst b/docs/datasets.rst
@@ -39,6 +39,26 @@ The Torchhd library provides many popular built-in datasets to work with.
     Cardiotocography3Clases
     Cardiotocography10Clases
     ChessKrvk
+    ChessKrvkp
+    CongressionalVoting
+    ConnBenchSonarMinesRocks
+    ConnBenchVowelDeterding
+    Connect4
+    Contrac
+    CreditApproval
+    CylinderBands
+    Dermatology
+    Echocardiogram
+    Ecoli
+    EnergyY1
+    EnergyY2
+    Fertility
+    Flags
+    Glass
+    HabermanSurvival
+    HayesRoth
+    HeartCleveland
+    HeartHungarian
 
 Base classes
 ------------------------

diff --git a/torchhd/datasets/__init__.py b/torchhd/datasets/__init__.py
@@ -29,6 +29,26 @@
 from torchhd.datasets.cardiotocography_3clases import Cardiotocography3Clases
 from torchhd.datasets.cardiotocography_10clases import Cardiotocography10Clases
 from torchhd.datasets.chess_krvk import ChessKrvk
+from torchhd.datasets.chess_krvkp import ChessKrvkp
+from torchhd.datasets.congressional_voting import CongressionalVoting
+from torchhd.datasets.conn_bench_sonar_mines_rocks import ConnBenchSonarMinesRocks
+from torchhd.datasets.conn_bench_vowel_deterding import ConnBenchVowelDeterding
+from torchhd.datasets.connect_4 import Connect4
+from torchhd.datasets.contrac import Contrac
+from torchhd.datasets.credit_approval import CreditApproval
+from torchhd.datasets.cylinder_bands import CylinderBands
+from torchhd.datasets.dermatology import Dermatology
+from torchhd.datasets.echocardiogram import Echocardiogram
+from torchhd.datasets.ecoli import Ecoli
+from torchhd.datasets.energy_y1 import EnergyY1
+from torchhd.datasets.energy_y2 import EnergyY2
+from torchhd.datasets.fertility import Fertility
+from torchhd.datasets.flags import Flags
+from torchhd.datasets.glass import Glass
+from torchhd.datasets.haberman_survival import HabermanSurvival
+from torchhd.datasets.hayes_roth import HayesRoth
+from torchhd.datasets.heart_cleveland import HeartCleveland
+from torchhd.datasets.heart_hungarian import HeartHungarian
 
 
 __all__ = [
@@ -63,4 +83,24 @@
     "Cardiotocography3Clases",
     "Cardiotocography10Clases",
     "ChessKrvk",
+    "ChessKrvkp",
+    "CongressionalVoting",
+    "ConnBenchSonarMinesRocks",
+    "ConnBenchVowelDeterding",
+    "Connect4",
+    "Contrac",
+    "CreditApproval",
+    "CylinderBands",
+    "Dermatology",
+    "Echocardiogram",
+    "Ecoli",
+    "EnergyY1",
+    "EnergyY2",
+    "Fertility",
+    "Flags",
+    "Glass",
+    "HabermanSurvival",
+    "HayesRoth",
+    "HeartCleveland",
+    "HeartHungarian",
 ]
diff --git a/torchhd/datasets/chess_krvkp.py b/torchhd/datasets/chess_krvkp.py
@@ -0,0 +1,31 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class ChessKrvkp(DatasetFourFold):
+    """`Chess (King-Rook vs. King-Pawn) <https://archive.ics.uci.edu/ml/datasets/Chess+(King-Rook+vs.+King-Pawn)>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "chess-krvkp"
+    classes: List[str] = [
+        "White can win",
+        "White cannot win",
+    ]
diff --git a/torchhd/datasets/congressional_voting.py b/torchhd/datasets/congressional_voting.py
@@ -0,0 +1,31 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class CongressionalVoting(DatasetFourFold):
+    """`Congressional Voting Records <https://archive.ics.uci.edu/ml/datasets/congressional+voting+records>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "congressional-voting"
+    classes: List[str] = [
+        "Democrat",
+        "Republican",
+    ]
diff --git a/torchhd/datasets/conn_bench_sonar_mines_rocks.py b/torchhd/datasets/conn_bench_sonar_mines_rocks.py
@@ -0,0 +1,31 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class ConnBenchSonarMinesRocks(DatasetFourFold):
+    """`Connectionist Bench (Sonar, Mines vs. Rocks) <https://archive.ics.uci.edu/ml/datasets/connectionist+bench+(sonar,+mines+vs.+rocks)>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "conn-bench-sonar-mines-rocks"
+    classes: List[str] = [
+        "Mine",
+        "Rock",
+    ]
diff --git a/torchhd/datasets/conn_bench_vowel_deterding.py b/torchhd/datasets/conn_bench_vowel_deterding.py
@@ -0,0 +1,36 @@
+from typing import List
+from torchhd.datasets import DatasetTrainTest
+
+
+class ConnBenchVowelDeterding(DatasetTrainTest):
+    """`Connectionist Bench (Vowel Recognition - Deterding Data) <https://archive.ics.uci.edu/ml/datasets/Connectionist+Bench+(Vowel+Recognition+-+Deterding+Data)>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
+            Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
+        hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "conn-bench-vowel-deterding"
+    classes: List[str] = [
+        "0",
+        "1",
+        "2",
+        "3",
+        "4",
+        "5",
+        "6",
+        "7",
+        "8",
+        "9",
+        "10",
+    ]
diff --git a/torchhd/datasets/connect_4.py b/torchhd/datasets/connect_4.py
@@ -0,0 +1,32 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class Connect4(DatasetFourFold):
+    """`Connect-4 <https://archive.ics.uci.edu/ml/datasets/connect-4>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "connect-4"
+    classes: List[str] = [
+        "draw",
+        "loss",
+        "win",
+    ]
diff --git a/torchhd/datasets/contrac.py b/torchhd/datasets/contrac.py
@@ -0,0 +1,32 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class Contrac(DatasetFourFold):
+    """`Contraceptive Method Choice <https://archive.ics.uci.edu/ml/datasets/Contraceptive+Method+Choice>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "contrac"
+    classes: List[str] = [
+        "No-use",
+        "Long-term",
+        "Short-term",
+    ]
diff --git a/torchhd/datasets/credit_approval.py b/torchhd/datasets/credit_approval.py
@@ -0,0 +1,31 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class CreditApproval(DatasetFourFold):
+    """`Credit Approval <https://archive.ics.uci.edu/ml/datasets/credit+approval>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "credit-approval"
+    classes: List[str] = [
+        "+",
+        "-",
+    ]
diff --git a/torchhd/datasets/cylinder_bands.py b/torchhd/datasets/cylinder_bands.py
@@ -0,0 +1,31 @@
+from typing import List
+from torchhd.datasets import DatasetFourFold
+
+
+class CylinderBands(DatasetFourFold):
+    """`Cylinder Bands <https://archive.ics.uci.edu/ml/datasets/Cylinder+Bands>`_ dataset.
+
+    Args:
+        root (string): Root directory containing the files of the dataset.
+        train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
+            Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
+            as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
+        fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
+            Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
+            Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
+        hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
+            while the second row corresponds to test indices (used if ``train = False``).
+        transform (callable, optional): A function/transform that takes in an torch.FloatTensor
+            and returns a transformed version.
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+
+    name = "cylinder-bands"
+    classes: List[str] = [
+        "band",
+        "noband",
+    ]