Skip to content

Commit ce53153

Browse files
sibre28megalinter-botlars-reimann
authored
feat: add fnn functionality (#529)
Closes #522 ### Summary of Changes Added Model and layer classes to support Feed Forward Neural Network Functionality. Added into_dataloader() Function in TaggedTable Class, that creates a pytorch dataloader for any given tagged table, which can then be used to train a FNN. Tests for those modules are probably incomplete, as i wasnt really sure what to test for, could use some feedback there. --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Lars Reimann <mail@larsreimann.com>
1 parent ca23f0f commit ce53153

File tree

9 files changed

+792
-1
lines changed

9 files changed

+792
-1
lines changed

src/safeds/data/tabular/containers/_table.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@
1313
import openpyxl
1414
import pandas as pd
1515
import seaborn as sns
16+
import torch
1617
import xxhash
1718
from pandas import DataFrame
1819
from scipy import stats
20+
from torch.utils.data import DataLoader, Dataset
1921

2022
from safeds.data.image.containers import Image
2123
from safeds.data.tabular.typing import ColumnType, Schema
@@ -2392,3 +2394,41 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): #
23922394
data_copy = self._data.reset_index(drop=True)
23932395
data_copy.columns = self.column_names
23942396
return data_copy.__dataframe__(nan_as_null, allow_copy)
2397+
2398+
def _into_dataloader(self, batch_size: int) -> DataLoader:
2399+
"""
2400+
Return a Dataloader for the data stored in this table, used for training neural networks.
2401+
2402+
The original table is not modified.
2403+
2404+
Parameters
2405+
----------
2406+
batch_size
2407+
The size of data batches that should be loaded at one time.
2408+
2409+
Returns
2410+
-------
2411+
result :
2412+
The DataLoader.
2413+
2414+
"""
2415+
features = self.to_rows()
2416+
all_rows = []
2417+
for row in features:
2418+
new_item = []
2419+
for column_name in row:
2420+
new_item.append(row.get_value(column_name))
2421+
all_rows.append(new_item.copy())
2422+
return DataLoader(dataset=_CustomDataset(np.array(all_rows)), batch_size=batch_size)
2423+
2424+
2425+
class _CustomDataset(Dataset):
2426+
def __init__(self, features: np.array):
2427+
self.X = torch.from_numpy(features.astype(np.float32))
2428+
self.len = self.X.shape[0]
2429+
2430+
def __getitem__(self, item: int) -> torch.Tensor:
2431+
return self.X[item]
2432+
2433+
def __len__(self) -> int:
2434+
return self.len

src/safeds/data/tabular/containers/_tagged_table.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
import sys
44
from typing import TYPE_CHECKING
55

6+
import numpy as np
7+
import torch
68
import xxhash
9+
from torch.utils.data import DataLoader, Dataset
710

811
from safeds.data.tabular.containers import Column, Row, Table
912
from safeds.exceptions import (
@@ -190,7 +193,9 @@ def __hash__(self) -> int:
190193
hash : int
191194
The hash value.
192195
"""
193-
return xxhash.xxh3_64(hash(self.target).to_bytes(8) + hash(self.features).to_bytes(8) + Table.__hash__(self).to_bytes(8)).intdigest()
196+
return xxhash.xxh3_64(
197+
hash(self.target).to_bytes(8) + hash(self.features).to_bytes(8) + Table.__hash__(self).to_bytes(8),
198+
).intdigest()
194199

195200
def __sizeof__(self) -> int:
196201
"""
@@ -871,3 +876,42 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg
871876
target_name=self.target.name,
872877
feature_names=self.features.column_names,
873878
)
879+
880+
def _into_dataloader(self, batch_size: int) -> DataLoader:
881+
"""
882+
Return a Dataloader for the data stored in this table, used for training neural networks.
883+
884+
The original table is not modified.
885+
886+
Parameters
887+
----------
888+
batch_size
889+
The size of data batches that should be loaded at one time.
890+
891+
Returns
892+
-------
893+
result :
894+
The DataLoader.
895+
896+
"""
897+
feature_rows = self.features.to_rows()
898+
all_rows = []
899+
for row in feature_rows:
900+
new_item = []
901+
for column_name in row:
902+
new_item.append(row.get_value(column_name))
903+
all_rows.append(new_item.copy())
904+
return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size)
905+
906+
907+
class _CustomDataset(Dataset):
908+
def __init__(self, features: np.array, target: np.array):
909+
self.X = torch.from_numpy(features.astype(np.float32))
910+
self.Y = torch.from_numpy(target.astype(np.float32))
911+
self.len = self.X.shape[0]
912+
913+
def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]:
914+
return self.X[item], self.Y[item].unsqueeze(-1)
915+
916+
def __len__(self) -> int:
917+
return self.len

src/safeds/ml/nn/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
"""Classes for classification tasks."""
2+
3+
from ._fnn_layer import FNNLayer
4+
from ._model import ClassificationNeuralNetwork, RegressionNeuralNetwork
5+
6+
__all__ = [
7+
"FNNLayer",
8+
"ClassificationNeuralNetwork",
9+
"RegressionNeuralNetwork",
10+
]

src/safeds/ml/nn/_fnn_layer.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from torch import nn
2+
3+
from safeds.exceptions import ClosedBound, OutOfBoundsError
4+
5+
6+
class _InternalLayer(nn.Module):
7+
def __init__(self, input_size: int, output_size: int, activation_function: str):
8+
super().__init__()
9+
self._layer = nn.Linear(input_size, output_size)
10+
match activation_function:
11+
case "sigmoid":
12+
self._fn = nn.Sigmoid()
13+
case "relu":
14+
self._fn = nn.ReLU()
15+
case "softmax":
16+
self._fn = nn.Softmax()
17+
case _:
18+
raise ValueError("Unknown Activation Function: " + activation_function)
19+
20+
def forward(self, x: float) -> float:
21+
return self._fn(self._layer(x))
22+
23+
24+
class FNNLayer:
25+
def __init__(self, output_size: int, input_size: int | None = None):
26+
"""
27+
Create a FNN Layer.
28+
29+
Parameters
30+
----------
31+
input_size
32+
The number of neurons in the previous layer
33+
output_size
34+
The number of neurons in this layer
35+
36+
Raises
37+
------
38+
ValueError
39+
If input_size < 1
40+
If output_size < 1
41+
42+
"""
43+
if input_size is not None:
44+
self._set_input_size(input_size=input_size)
45+
if output_size < 1:
46+
raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1))
47+
self._output_size = output_size
48+
49+
def _get_internal_layer(self, activation_function: str) -> _InternalLayer:
50+
return _InternalLayer(self._input_size, self._output_size, activation_function)
51+
52+
@property
53+
def output_size(self) -> int:
54+
"""
55+
Get the output_size of this layer.
56+
57+
Returns
58+
-------
59+
result :
60+
The Number of Neurons in this layer.
61+
"""
62+
return self._output_size
63+
64+
def _set_input_size(self, input_size: int) -> None:
65+
if input_size < 1:
66+
raise OutOfBoundsError(actual=input_size, name="input_size", lower_bound=ClosedBound(1))
67+
self._input_size = input_size

0 commit comments

Comments
 (0)