Skip to content

Added datasets 21 to 40 #104

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions docs/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ The Torchhd library provides many popular built-in datasets to work with.
Cardiotocography3Clases
Cardiotocography10Clases
ChessKrvk
ChessKrvkp
CongressionalVoting
ConnBenchSonarMinesRocks
ConnBenchVowelDeterding
Connect4
Contrac
CreditApproval
CylinderBands
Dermatology
Echocardiogram
Ecoli
EnergyY1
EnergyY2
Fertility
Flags
Glass
HabermanSurvival
HayesRoth
HeartCleveland
HeartHungarian

Base classes
------------------------
Expand Down
40 changes: 40 additions & 0 deletions torchhd/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,26 @@
from torchhd.datasets.cardiotocography_3clases import Cardiotocography3Clases
from torchhd.datasets.cardiotocography_10clases import Cardiotocography10Clases
from torchhd.datasets.chess_krvk import ChessKrvk
from torchhd.datasets.chess_krvkp import ChessKrvkp
from torchhd.datasets.congressional_voting import CongressionalVoting
from torchhd.datasets.conn_bench_sonar_mines_rocks import ConnBenchSonarMinesRocks
from torchhd.datasets.conn_bench_vowel_deterding import ConnBenchVowelDeterding
from torchhd.datasets.connect_4 import Connect4
from torchhd.datasets.contrac import Contrac
from torchhd.datasets.credit_approval import CreditApproval
from torchhd.datasets.cylinder_bands import CylinderBands
from torchhd.datasets.dermatology import Dermatology
from torchhd.datasets.echocardiogram import Echocardiogram
from torchhd.datasets.ecoli import Ecoli
from torchhd.datasets.energy_y1 import EnergyY1
from torchhd.datasets.energy_y2 import EnergyY2
from torchhd.datasets.fertility import Fertility
from torchhd.datasets.flags import Flags
from torchhd.datasets.glass import Glass
from torchhd.datasets.haberman_survival import HabermanSurvival
from torchhd.datasets.hayes_roth import HayesRoth
from torchhd.datasets.heart_cleveland import HeartCleveland
from torchhd.datasets.heart_hungarian import HeartHungarian


__all__ = [
Expand Down Expand Up @@ -63,4 +83,24 @@
"Cardiotocography3Clases",
"Cardiotocography10Clases",
"ChessKrvk",
"ChessKrvkp",
"CongressionalVoting",
"ConnBenchSonarMinesRocks",
"ConnBenchVowelDeterding",
"Connect4",
"Contrac",
"CreditApproval",
"CylinderBands",
"Dermatology",
"Echocardiogram",
"Ecoli",
"EnergyY1",
"EnergyY2",
"Fertility",
"Flags",
"Glass",
"HabermanSurvival",
"HayesRoth",
"HeartCleveland",
"HeartHungarian",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/chess_krvkp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class ChessKrvkp(DatasetFourFold):
"""`Chess (King-Rook vs. King-Pawn) <https://archive.ics.uci.edu/ml/datasets/Chess+(King-Rook+vs.+King-Pawn)>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "chess-krvkp"
classes: List[str] = [
"White can win",
"White cannot win",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/congressional_voting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class CongressionalVoting(DatasetFourFold):
"""`Congressional Voting Records <https://archive.ics.uci.edu/ml/datasets/congressional+voting+records>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "congressional-voting"
classes: List[str] = [
"Democrat",
"Republican",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/conn_bench_sonar_mines_rocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class ConnBenchSonarMinesRocks(DatasetFourFold):
"""`Connectionist Bench (Sonar, Mines vs. Rocks) <https://archive.ics.uci.edu/ml/datasets/connectionist+bench+(sonar,+mines+vs.+rocks)>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "conn-bench-sonar-mines-rocks"
classes: List[str] = [
"Mine",
"Rock",
]
36 changes: 36 additions & 0 deletions torchhd/datasets/conn_bench_vowel_deterding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class ConnBenchVowelDeterding(DatasetTrainTest):
"""`Connectionist Bench (Vowel Recognition - Deterding Data) <https://archive.ics.uci.edu/ml/datasets/Connectionist+Bench+(Vowel+Recognition+-+Deterding+Data)>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "conn-bench-vowel-deterding"
classes: List[str] = [
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10",
]
32 changes: 32 additions & 0 deletions torchhd/datasets/connect_4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Connect4(DatasetFourFold):
"""`Connect-4 <https://archive.ics.uci.edu/ml/datasets/connect-4>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "connect-4"
classes: List[str] = [
"draw",
"loss",
"win",
]
32 changes: 32 additions & 0 deletions torchhd/datasets/contrac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Contrac(DatasetFourFold):
"""`Contraceptive Method Choice <https://archive.ics.uci.edu/ml/datasets/Contraceptive+Method+Choice>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "contrac"
classes: List[str] = [
"No-use",
"Long-term",
"Short-term",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/credit_approval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class CreditApproval(DatasetFourFold):
"""`Credit Approval <https://archive.ics.uci.edu/ml/datasets/credit+approval>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "credit-approval"
classes: List[str] = [
"+",
"-",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/cylinder_bands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class CylinderBands(DatasetFourFold):
"""`Cylinder Bands <https://archive.ics.uci.edu/ml/datasets/Cylinder+Bands>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "cylinder-bands"
classes: List[str] = [
"band",
"noband",
]
Loading