forked from cgpotts/cs224u
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtorch_shallow_neural_classifier.py
185 lines (153 loc) · 5.57 KB
/
torch_shallow_neural_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data
from torch_model_base import TorchModelBase
from utils import progress_bar
__author__ = "Christopher Potts"
__version__ = "CS224u, Stanford, Spring 2020"
class TorchShallowNeuralClassifier(TorchModelBase):
"""Fit a model
h = f(xW1 + b1)
y = softmax(hW2 + b2)
with a cross entropy loss.
Parameters
----------
hidden_dim : int
Dimensionality of the hidden layer.
hidden_activation : vectorized activation function
The non-linear activation function used by the network for the
hidden layer. Default `nn.Tanh()`.
max_iter : int
Maximum number of training epochs.
eta : float
Learning rate.
optimizer : PyTorch optimizer
Default is `torch.optim.Adam`.
l2_strength : float
L2 regularization strength. Default 0 is no regularization.
device : 'cpu' or 'cuda'
The default is to use 'cuda' iff available
warm_start : bool
If True, calling `fit` will resume training with previously
defined trainable parameters. If False, calling `fit` will
reinitialize all trainable parameters. Default: False.
"""
def __init__(self, **kwargs):
super(TorchShallowNeuralClassifier, self).__init__(**kwargs)
def define_graph(self):
return nn.Sequential(
nn.Linear(self.input_dim, self.hidden_dim),
self.hidden_activation,
nn.Linear(self.hidden_dim, self.n_classes_))
def fit(self, X, y, **kwargs):
"""Standard `fit` method.
Parameters
----------
X : np.array
y : array-like
kwargs : dict
For passing other parameters. If 'X_dev' is included,
then performance is monitored every 10 epochs; use
`dev_iter` to control this number.
Returns
-------
self
"""
# Incremental performance:
X_dev = kwargs.get('X_dev')
if X_dev is not None:
dev_iter = kwargs.get('dev_iter', 10)
# Data prep:
X = np.array(X)
self.input_dim = X.shape[1]
self.classes_ = sorted(set(y))
self.n_classes_ = len(self.classes_)
class2index = dict(zip(self.classes_, range(self.n_classes_)))
y = [class2index[label] for label in y]
# Dataset:
X = torch.FloatTensor(X)
y = torch.tensor(y)
dataset = torch.utils.data.TensorDataset(X, y)
dataloader = torch.utils.data.DataLoader(
dataset, batch_size=self.batch_size, shuffle=True,
pin_memory=True)
# Graph:
if not self.warm_start or not hasattr(self, "model"):
self.model = self.define_graph()
self.opt = self.optimizer(
self.model.parameters(),
lr=self.eta,
weight_decay=self.l2_strength)
self.model.to(self.device)
self.model.train()
# Optimization:
loss = nn.CrossEntropyLoss()
# Train:
for iteration in range(1, self.max_iter+1):
epoch_error = 0.0
for X_batch, y_batch in dataloader:
X_batch = X_batch.to(self.device, non_blocking=True)
y_batch = y_batch.to(self.device, non_blocking=True)
batch_preds = self.model(X_batch)
err = loss(batch_preds, y_batch)
epoch_error += err.item()
self.opt.zero_grad()
err.backward()
self.opt.step()
# Incremental predictions where possible:
if X_dev is not None and iteration > 0 and iteration % dev_iter == 0:
self.dev_predictions[iteration] = self.predict(X_dev)
self.model.train()
self.errors.append(epoch_error)
progress_bar(
"Finished epoch {} of {}; error is {}".format(
iteration, self.max_iter, epoch_error))
return self
def predict_proba(self, X):
"""Predicted probabilities for the examples in `X`.
Parameters
----------
X : np.array
Returns
-------
np.array with shape (len(X), self.n_classes_)
"""
self.model.eval()
with torch.no_grad():
self.model.to(self.device)
X = torch.FloatTensor(X).to(self.device)
preds = self.model(X)
return torch.softmax(preds, dim=1).cpu().numpy()
def predict(self, X):
"""Predicted labels for the examples in `X`. These are converted
from the integers that PyTorch needs back to their original
values in `self.classes_`.
Parameters
----------
X : np.array
Returns
-------
list of length len(X)
"""
probs = self.predict_proba(X)
return [self.classes_[i] for i in probs.argmax(axis=1)]
def simple_example():
"""Assess on the digits dataset."""
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.33, random_state=42)
mod = TorchShallowNeuralClassifier()
print(mod)
mod.fit(X_train, y_train)
predictions = mod.predict(X_test)
print("\nClassification report:")
print(classification_report(y_test, predictions))
return accuracy_score(y_test, predictions)
if __name__ == '__main__':
simple_example()