Don't it feel amazing if you are a deep learning library creator like Tensorflow, Pytorch, etc?
This is a learning purpose toy Deep Learning Framework from scratch to understand how automatic differentiation work and how a deep learning framework like Pytorch, Tensorflow, etc, work. If you want to learn how modern deep learning frameworks work, hope this repository will help you a lot.
For starting see example folder and tests folder to understand functionality.
Only Numpy
Tensor is the main auto differentiable multidimensional variable. It supports almost all frequently used function such that
- add
- sub
- mul
- div
- matmul
- pow
- sum
- slice
- transpose
- exp
- log
- max
- he_initialization
- SGD
- softmax
- tanh
- CrossEntropyLoss
- Liner
- Flatten
- Conv2d
import oxynet as onet
import numpy as np
from oxynet.modules import Module, Conv2d, Linear, Flatten, CrossEntropyLoss
from oxynet.optims import SGD
from oxynet.modules import tanh
import gzip
from oxynet import Tensor
root_dir = ".datasets/MNIST/"
train_data='train-images-idx3-ubyte.gz'
train_label='train-labels-idx1-ubyte.gz'
test_data='t10k-images-idx3-ubyte.gz'
test_label='t10k-labels-idx1-ubyte.gz'
def _load_mnist( path, header_size):
path = root_dir + path
with gzip.open(path, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=header_size)
return np.asarray(data, dtype=np.uint8)
data_size = 1000*28*28
x_train = _load_mnist(train_data, header_size=16)[:data_size,].reshape((-1,1, 28, 28)).astype(float)/255
x_test = _load_mnist(test_data, header_size=16).reshape((-1, 1,28, 28)).astype(float)/255
y_train = _load_mnist(train_label, header_size=8)[:1000]
y_test = _load_mnist(test_label, header_size=8).reshape((-1,1))
print(y_train.shape)
def one_hot(Y, num_classes):
batch_size = len(Y)
Y_tilde = np.zeros((batch_size, num_classes))
Y_tilde[np.arange(batch_size), Y] = 1
return Y_tilde
def accuracy(pred, actual):
pred_ = np.argmax(pred, axis=-1)
actual_ = np.argmax(actual, axis=-1)
match = (pred_ == actual_).astype(int).sum()
acc = match/len(pred)
return acc
#Model Definetion
class Model(Module):
def __init__(self,in_channel, out_channel):
self.conv1 = Conv2d(in_channels=in_channel, out_channels= 4, kernel_size=(5,5), stride=2)
self.fc1 = Linear(12*12*4, 64)
self.fc2 = Linear(64,32)
self.fc3 = Linear(32,out_channel)
self.flat = Flatten()
def forward(self, input):
x1 = tanh(self.conv1(input))
# x1 = input
x2 = self.flat(x1)
x3 = tanh(self.fc1(x2))
x4 = tanh(self.fc2(x3))
x5 = self.fc3(x4)
return x5
# Create Model
model = Model(1, 10)
optimizer = SGD(lr=0.0001)
criterion = CrossEntropyLoss()
batch_size =64
out_class = 10
# Training Model
starts = np.arange(0, x_train.shape[0], batch_size)
for epoch in range(500):
epoch_loss = 0.0
epoch_accuracy = 0.0
np.random.shuffle(starts)
for start in starts:
end = start + batch_size
model.zero_grad()
inputs = Tensor(x_train[start:end], requires_grad = True)
actual = Tensor(one_hot(y_train[start:end],out_class), requires_grad = True)
predicted = model(inputs)
loss = criterion(predicted, actual)
loss.backward()
optimizer.step(model)
epoch_loss += loss.data
epoch_accuracy += accuracy(predicted.data, actual.data)
epoch_loss = epoch_loss/(len(starts))
epoch_accuracy /= (len(starts))
if(epoch % 10 == 0):
print("Epoch : ",epoch, " Loss: ",epoch_loss, " Acc: ", epoch_accuracy)
t1 = Tensor(10, requires_grad=True)
t2 = Tensor([1, 2, 3], requires_grad=True)
t3 = Tensor([[1, 2, 3],[4, 5, 6]], requires_grad=True)
t1 = Tensor([[1, 2, 3], [4, 5, 6]], requires_grad=True)
t2 = Tensor([[7, 8, 9]], requires_grad=True)
t3 = t1 + t2
assert t3.data.tolist() == [[8, 10, 12], [11, 13, 15]]
t3.backward(Tensor([[1, 1, 1], [1, 1, 1]]))
assert t1.grad.data.tolist() == [[1, 1, 1], [1, 1, 1]]
assert t2.grad.data.tolist() == [[2, 2, 2]]
t1 = Tensor([[1, 2, 3], [4, 5, 6]], requires_grad=True)
t2 = Tensor([[7, 8, 9]], requires_grad=True)
t3 = t1 * t2
assert t3.data.tolist() == [[7,16, 27], [28, 40, 54]]
t3.backward(Tensor([[1, 1, 1], [1, 1, 1]]))
assert t1.grad.data.tolist() == [[7, 8, 9], [7, 8, 9]]
assert t2.grad.data.tolist() == [[5, 7, 9]]
# t1 is (3, 2)
t1 = Tensor([[1, 2], [3, 4], [5, 6]], requires_grad=True)
# t2 is a (2, 1)
t2 = Tensor([[10], [20]], requires_grad=True)
t3 = t1 @ t2
assert t3.data.tolist() == [[50], [110], [170]]
grad = Tensor([[-1], [-2], [-3]])
t3.backward(grad)
np.testing.assert_array_equal(t1.grad.data,
grad.data @ t2.data.T)
np.testing.assert_array_equal(t2.grad.data,
t1.data.T @ grad.data)
t1 = Tensor(10, requires_grad=True)
t2 = Tensor(20, requires_grad=True)
t3 = t2/t1
assert t3.data == 2.
t3.backward()
assert t1.grad.data == 20* (-1./10**2)
assert t2.grad.data == 1./10
t1 = Tensor([1,2,3], requires_grad=True)
t2 = t1.sum()
t2.backward(Tensor(3))
assert t1.grad.data.tolist() == [3,3,3]
data = np.random.randn(10,10)
t1 = Tensor(data, requires_grad=True)
t2 = t1[2:5, 5:]
assert t2.shape == (3,5)
t2.backward(Tensor(1))
assert t1.grad.shape == (10,10)
t1 = onet.Tensor([1,2,3], requires_grad=True)
t2 = onet.exp(t1)
assert t2.data.tolist() == np.exp([1,2,3]).tolist()
t2.backward(onet.Tensor(1))
assert t1.grad.data.tolist() == np.exp([1,2,3]).tolist()
t1 = onet.Tensor([1,2,3], requires_grad=True)
t2 = onet.log(t1)
assert t2.data.tolist() == np.log([1,2,3]).tolist()
t2.backward(onet.Tensor([10,10,12]))
assert t1.grad.data.tolist() == [10,5,4]
t1 = onet.Tensor([[2,4,8,10],[3,15,4,5]], requires_grad=True)
t2 = onet.max(t1, keepdims=True)
assert t2.data == [[15]]
t2.backward(onet.Tensor([[20]]))
outdata = np.zeros((2,4))
outdata[1][1]=20
np.testing.assert_array_almost_equal(t1.grad.data, outdata)