Skip to content

Commit 981263b

Browse files
Added datasets 61-90 from DWHC (#106)
* Added datasets 61-90 from DWHC * [github-action] formatting fixes Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent b4dec2c commit 981263b

32 files changed

+1340
-2
lines changed

docs/datasets.rst

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,36 @@ The Torchhd library provides many popular built-in datasets to work with.
7979
Mammographic
8080
Miniboone
8181
MolecBiolPromoter
82-
82+
MolecBiolSplice
83+
Monks1
84+
Monks2
85+
Monks3
86+
Mushroom
87+
Musk1
88+
Musk2
89+
Nursery
90+
OocytesMerlucciusNucleus4d
91+
OocytesMerlucciusStates2f
92+
OocytesTrisopterusNucleus2f
93+
OocytesTrisopterusStates5b
94+
Optical
95+
Ozone
96+
PageBlocks
97+
Parkinsons
98+
Pendigits
99+
Pima
100+
PittsburgBridgesMaterial
101+
PittsburgBridgesRelL
102+
PittsburgBridgesSpan
103+
PittsburgBridgesTOrD
104+
PittsburgBridgesType
105+
Planning
106+
PlantMargin
107+
PlantShape
108+
PlantTexture
109+
PostOperative
110+
PrimaryTumor
111+
Ringnorm
83112

84113
Base classes
85114
------------------------

torchhd/datasets/__init__.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,36 @@
6969
from torchhd.datasets.mammographic import Mammographic
7070
from torchhd.datasets.miniboone import Miniboone
7171
from torchhd.datasets.molec_biol_promoter import MolecBiolPromoter
72-
72+
from torchhd.datasets.molec_biol_splice import MolecBiolSplice
73+
from torchhd.datasets.monks_1 import Monks1
74+
from torchhd.datasets.monks_2 import Monks2
75+
from torchhd.datasets.monks_3 import Monks3
76+
from torchhd.datasets.mushroom import Mushroom
77+
from torchhd.datasets.musk_1 import Musk1
78+
from torchhd.datasets.musk_2 import Musk2
79+
from torchhd.datasets.nursery import Nursery
80+
from torchhd.datasets.oocytes_merluccius_nucleus_4d import OocytesMerlucciusNucleus4d
81+
from torchhd.datasets.oocytes_merluccius_states_2f import OocytesMerlucciusStates2f
82+
from torchhd.datasets.oocytes_trisopterus_nucleus_2f import OocytesTrisopterusNucleus2f
83+
from torchhd.datasets.oocytes_trisopterus_states_5b import OocytesTrisopterusStates5b
84+
from torchhd.datasets.optical import Optical
85+
from torchhd.datasets.ozone import Ozone
86+
from torchhd.datasets.page_blocks import PageBlocks
87+
from torchhd.datasets.parkinsons import Parkinsons
88+
from torchhd.datasets.pendigits import Pendigits
89+
from torchhd.datasets.pima import Pima
90+
from torchhd.datasets.pittsburg_bridges_material import PittsburgBridgesMaterial
91+
from torchhd.datasets.pittsburg_bridges_rel_l import PittsburgBridgesRelL
92+
from torchhd.datasets.pittsburg_bridges_span import PittsburgBridgesSpan
93+
from torchhd.datasets.pittsburg_bridges_t_or_d import PittsburgBridgesTOrD
94+
from torchhd.datasets.pittsburg_bridges_type import PittsburgBridgesType
95+
from torchhd.datasets.planning import Planning
96+
from torchhd.datasets.plant_margin import PlantMargin
97+
from torchhd.datasets.plant_shape import PlantShape
98+
from torchhd.datasets.plant_texture import PlantTexture
99+
from torchhd.datasets.post_operative import PostOperative
100+
from torchhd.datasets.primary_tumor import PrimaryTumor
101+
from torchhd.datasets.ringnorm import Ringnorm
73102

74103
__all__ = [
75104
"BeijingAirQuality",
@@ -143,4 +172,34 @@
143172
"Mammographic",
144173
"Miniboone",
145174
"MolecBiolPromoter",
175+
"MolecBiolSplice",
176+
"Monks1",
177+
"Monks2",
178+
"Monks3",
179+
"Mushroom",
180+
"Musk1",
181+
"Musk2",
182+
"Nursery",
183+
"OocytesMerlucciusNucleus4d",
184+
"OocytesMerlucciusStates2f",
185+
"OocytesTrisopterusNucleus2f",
186+
"OocytesTrisopterusStates5b",
187+
"Optical",
188+
"Ozone",
189+
"PageBlocks",
190+
"Parkinsons",
191+
"Pendigits",
192+
"Pima",
193+
"PittsburgBridgesMaterial",
194+
"PittsburgBridgesRelL",
195+
"PittsburgBridgesSpan",
196+
"PittsburgBridgesTOrD",
197+
"PittsburgBridgesType",
198+
"Planning",
199+
"PlantMargin",
200+
"PlantShape",
201+
"PlantTexture",
202+
"PostOperative",
203+
"PrimaryTumor",
204+
"Ringnorm",
146205
]

torchhd/datasets/molec_biol_splice.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class MolecBiolSplice(DatasetFourFold):
6+
"""`Molecular Biology (Splice-junction Gene Sequences) <https://archive.ics.uci.edu/ml/datasets/Molecular+Biology+(Splice-junction+Gene+Sequences)>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "molec-biol-splice"
28+
classes: List[str] = [
29+
"EI",
30+
"IE",
31+
"N",
32+
]

torchhd/datasets/monks_1.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetTrainTest
3+
4+
5+
class Monks1(DatasetTrainTest):
6+
"""`MONK's Problems <https://archive.ics.uci.edu/ml/datasets/MONK%27s+Problems>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
11+
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
12+
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
13+
while the second row corresponds to test indices (used if ``train = False``).
14+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
15+
and returns a transformed version.
16+
target_transform (callable, optional): A function/transform that takes in the
17+
target and transforms it.
18+
download (bool, optional): If True, downloads the dataset from the internet and
19+
puts it in root directory. If dataset is already downloaded, it is not
20+
downloaded again.
21+
"""
22+
23+
name = "monks-1"
24+
classes: List[str] = [
25+
"0",
26+
"1",
27+
]

torchhd/datasets/monks_2.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetTrainTest
3+
4+
5+
class Monks2(DatasetTrainTest):
6+
"""`MONK's Problems <https://archive.ics.uci.edu/ml/datasets/MONK%27s+Problems>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
11+
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
12+
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
13+
while the second row corresponds to test indices (used if ``train = False``).
14+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
15+
and returns a transformed version.
16+
target_transform (callable, optional): A function/transform that takes in the
17+
target and transforms it.
18+
download (bool, optional): If True, downloads the dataset from the internet and
19+
puts it in root directory. If dataset is already downloaded, it is not
20+
downloaded again.
21+
"""
22+
23+
name = "monks-2"
24+
classes: List[str] = [
25+
"0",
26+
"1",
27+
]

torchhd/datasets/monks_3.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetTrainTest
3+
4+
5+
class Monks3(DatasetTrainTest):
6+
"""`MONK's Problems <https://archive.ics.uci.edu/ml/datasets/MONK%27s+Problems>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
11+
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
12+
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
13+
while the second row corresponds to test indices (used if ``train = False``).
14+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
15+
and returns a transformed version.
16+
target_transform (callable, optional): A function/transform that takes in the
17+
target and transforms it.
18+
download (bool, optional): If True, downloads the dataset from the internet and
19+
puts it in root directory. If dataset is already downloaded, it is not
20+
downloaded again.
21+
"""
22+
23+
name = "monks-3"
24+
classes: List[str] = [
25+
"0",
26+
"1",
27+
]

torchhd/datasets/mushroom.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class Mushroom(DatasetFourFold):
6+
"""`Mushroom <https://archive.ics.uci.edu/ml/datasets/mushroom>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "mushroom"
28+
classes: List[str] = [
29+
"edible",
30+
"poisonous",
31+
]

torchhd/datasets/musk_1.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class Musk1(DatasetFourFold):
6+
"""`Musk (Version 1) <https://archive.ics.uci.edu/ml/datasets/Musk+(Version+1)>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "musk-1"
28+
classes: List[str] = [
29+
"non-musk",
30+
"musk",
31+
]

torchhd/datasets/musk_2.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class Musk2(DatasetFourFold):
6+
"""`Musk (Version 2) <https://archive.ics.uci.edu/ml/datasets/Musk+(Version+2)>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "musk-2"
28+
classes: List[str] = [
29+
"non-musk",
30+
"musk",
31+
]

torchhd/datasets/nursery.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class Nursery(DatasetFourFold):
6+
"""`Nursery <https://archive.ics.uci.edu/ml/datasets/nursery>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "nursery"
28+
classes: List[str] = [
29+
"not_recom",
30+
"recommend",
31+
"very_recom",
32+
"priority",
33+
"spec_prior",
34+
]

0 commit comments

Comments
 (0)