Skip to content

Added datasets 61-90 from DWHC #106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion docs/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,36 @@ The Torchhd library provides many popular built-in datasets to work with.
Mammographic
Miniboone
MolecBiolPromoter

MolecBiolSplice
Monks1
Monks2
Monks3
Mushroom
Musk1
Musk2
Nursery
OocytesMerlucciusNucleus4d
OocytesMerlucciusStates2f
OocytesTrisopterusNucleus2f
OocytesTrisopterusStates5b
Optical
Ozone
PageBlocks
Parkinsons
Pendigits
Pima
PittsburgBridgesMaterial
PittsburgBridgesRelL
PittsburgBridgesSpan
PittsburgBridgesTOrD
PittsburgBridgesType
Planning
PlantMargin
PlantShape
PlantTexture
PostOperative
PrimaryTumor
Ringnorm

Base classes
------------------------
Expand Down
61 changes: 60 additions & 1 deletion torchhd/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,36 @@
from torchhd.datasets.mammographic import Mammographic
from torchhd.datasets.miniboone import Miniboone
from torchhd.datasets.molec_biol_promoter import MolecBiolPromoter

from torchhd.datasets.molec_biol_splice import MolecBiolSplice
from torchhd.datasets.monks_1 import Monks1
from torchhd.datasets.monks_2 import Monks2
from torchhd.datasets.monks_3 import Monks3
from torchhd.datasets.mushroom import Mushroom
from torchhd.datasets.musk_1 import Musk1
from torchhd.datasets.musk_2 import Musk2
from torchhd.datasets.nursery import Nursery
from torchhd.datasets.oocytes_merluccius_nucleus_4d import OocytesMerlucciusNucleus4d
from torchhd.datasets.oocytes_merluccius_states_2f import OocytesMerlucciusStates2f
from torchhd.datasets.oocytes_trisopterus_nucleus_2f import OocytesTrisopterusNucleus2f
from torchhd.datasets.oocytes_trisopterus_states_5b import OocytesTrisopterusStates5b
from torchhd.datasets.optical import Optical
from torchhd.datasets.ozone import Ozone
from torchhd.datasets.page_blocks import PageBlocks
from torchhd.datasets.parkinsons import Parkinsons
from torchhd.datasets.pendigits import Pendigits
from torchhd.datasets.pima import Pima
from torchhd.datasets.pittsburg_bridges_material import PittsburgBridgesMaterial
from torchhd.datasets.pittsburg_bridges_rel_l import PittsburgBridgesRelL
from torchhd.datasets.pittsburg_bridges_span import PittsburgBridgesSpan
from torchhd.datasets.pittsburg_bridges_t_or_d import PittsburgBridgesTOrD
from torchhd.datasets.pittsburg_bridges_type import PittsburgBridgesType
from torchhd.datasets.planning import Planning
from torchhd.datasets.plant_margin import PlantMargin
from torchhd.datasets.plant_shape import PlantShape
from torchhd.datasets.plant_texture import PlantTexture
from torchhd.datasets.post_operative import PostOperative
from torchhd.datasets.primary_tumor import PrimaryTumor
from torchhd.datasets.ringnorm import Ringnorm

__all__ = [
"BeijingAirQuality",
Expand Down Expand Up @@ -143,4 +172,34 @@
"Mammographic",
"Miniboone",
"MolecBiolPromoter",
"MolecBiolSplice",
"Monks1",
"Monks2",
"Monks3",
"Mushroom",
"Musk1",
"Musk2",
"Nursery",
"OocytesMerlucciusNucleus4d",
"OocytesMerlucciusStates2f",
"OocytesTrisopterusNucleus2f",
"OocytesTrisopterusStates5b",
"Optical",
"Ozone",
"PageBlocks",
"Parkinsons",
"Pendigits",
"Pima",
"PittsburgBridgesMaterial",
"PittsburgBridgesRelL",
"PittsburgBridgesSpan",
"PittsburgBridgesTOrD",
"PittsburgBridgesType",
"Planning",
"PlantMargin",
"PlantShape",
"PlantTexture",
"PostOperative",
"PrimaryTumor",
"Ringnorm",
]
32 changes: 32 additions & 0 deletions torchhd/datasets/molec_biol_splice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class MolecBiolSplice(DatasetFourFold):
"""`Molecular Biology (Splice-junction Gene Sequences) <https://archive.ics.uci.edu/ml/datasets/Molecular+Biology+(Splice-junction+Gene+Sequences)>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "molec-biol-splice"
classes: List[str] = [
"EI",
"IE",
"N",
]
27 changes: 27 additions & 0 deletions torchhd/datasets/monks_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class Monks1(DatasetTrainTest):
"""`MONK's Problems <https://archive.ics.uci.edu/ml/datasets/MONK%27s+Problems>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "monks-1"
classes: List[str] = [
"0",
"1",
]
27 changes: 27 additions & 0 deletions torchhd/datasets/monks_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class Monks2(DatasetTrainTest):
"""`MONK's Problems <https://archive.ics.uci.edu/ml/datasets/MONK%27s+Problems>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "monks-2"
classes: List[str] = [
"0",
"1",
]
27 changes: 27 additions & 0 deletions torchhd/datasets/monks_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import List
from torchhd.datasets import DatasetTrainTest


class Monks3(DatasetTrainTest):
"""`MONK's Problems <https://archive.ics.uci.edu/ml/datasets/MONK%27s+Problems>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "monks-3"
classes: List[str] = [
"0",
"1",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/mushroom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Mushroom(DatasetFourFold):
"""`Mushroom <https://archive.ics.uci.edu/ml/datasets/mushroom>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "mushroom"
classes: List[str] = [
"edible",
"poisonous",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/musk_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Musk1(DatasetFourFold):
"""`Musk (Version 1) <https://archive.ics.uci.edu/ml/datasets/Musk+(Version+1)>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "musk-1"
classes: List[str] = [
"non-musk",
"musk",
]
31 changes: 31 additions & 0 deletions torchhd/datasets/musk_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Musk2(DatasetFourFold):
"""`Musk (Version 2) <https://archive.ics.uci.edu/ml/datasets/Musk+(Version+2)>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "musk-2"
classes: List[str] = [
"non-musk",
"musk",
]
34 changes: 34 additions & 0 deletions torchhd/datasets/nursery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import List
from torchhd.datasets import DatasetFourFold


class Nursery(DatasetFourFold):
"""`Nursery <https://archive.ics.uci.edu/ml/datasets/nursery>`_ dataset.

Args:
root (string): Root directory containing the files of the dataset.
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
while the second row corresponds to test indices (used if ``train = False``).
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
and returns a transformed version.
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""

name = "nursery"
classes: List[str] = [
"not_recom",
"recommend",
"very_recom",
"priority",
"spec_prior",
]
Loading