Skip to content

Commit c6957b7

Browse files
committed
SINGA-386 Implement RNN operation for autograd
- fix bugs in cpp parts, the codes can be made without error.
1 parent 33ddc2d commit c6957b7

File tree

3 files changed

+266
-126
lines changed

3 files changed

+266
-126
lines changed

python/singa/autograd.py

Lines changed: 67 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -347,19 +347,6 @@ def add_bias(x, b, axis=0):
347347
return AddBias(axis)(x, b)[0]
348348

349349

350-
class Add(Operation):
351-
352-
def forward(self, a, b):
353-
return singa.__add__(a, b)
354-
355-
def backward(self, dy):
356-
return dy, dy
357-
358-
359-
def add(a, b):
360-
return Add()(a, b)[0]
361-
362-
363350
class SoftMax(Operation):
364351
'''
365352
Apply SoftMax for each row of the Tensor or each column of the Tensor
@@ -469,24 +456,22 @@ def cross_entropy(y, t):
469456

470457
class SoftMaxCrossEntropy(Operation):
471458

472-
def __init__(self, t):
473-
self.t = t.data
474-
475-
def forward(self, x):
459+
def forward(self, x, t):
476460
self.p = singa.SoftMax(x)
461+
self.t = t
477462
loss = CTensor((1,), self.p.device())
478-
ret = singa.CrossEntropyFwd(self.p, self.t)
463+
ret = singa.CrossEntropyFwd(self.p, t)
479464
loss.SetFloatValue(singa.SumAsFloat(ret) / x.shape()[0])
480465
return loss
481466

482467
def backward(self, dy=1.0):
483468
dx = singa.SoftmaxCrossEntropyBwd(self.p, self.t)
484-
return singa.DivFloat(dx, float(self.p.shape()[0]))
469+
return singa.DivFloat(dx, float(self.p.shape()[0])), None
485470

486471

487472
def softmax_cross_entropy(x, t):
488473
# x is the logits and t is the ground truth; both are 2D.
489-
return SoftMaxCrossEntropy(t)(x)[0]
474+
return SoftMaxCrossEntropy()(x, t)[0]
490475

491476

492477
def ctensor2numpy(x):
@@ -587,12 +572,12 @@ def backward(self, dy):
587572
return tuple(dxs)
588573

589574

590-
def cat(xs, axis=0):
575+
def concat(xs, axis=0):
591576
# xs is a tuple of multiple Tensors
592577
return Concat(axis)(*xs)[0]
593578

594579

595-
class _Conv2d(Operation):
580+
class _Conv2D(Operation):
596581

597582
def __init__(self, handle):
598583
self.handle = handle
@@ -642,10 +627,10 @@ def backward(self, dy):
642627

643628

644629
def conv2d(handle, x, W, b):
645-
return _Conv2d(handle)(x, W, b)[0]
630+
return _Conv2D(handle)(x, W, b)[0]
646631

647632

648-
class Conv2d(Layer):
633+
class Conv2D(Layer):
649634

650635
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
651636
padding=0, dilation=1, groups=1, bias=True, **kwargs):
@@ -708,6 +693,10 @@ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
708693

709694
def __call__(self, x):
710695
assert x.shape[1] == self.in_channels, 'in_channels dismatched'
696+
assert (x.shape[2] + 2 * self.padding[0] - self.kernel_size[0]
697+
) % self.stride[0] == 0, 'invalid padding or strides.'
698+
assert (x.shape[3] + 2 * self.padding[1] - self.kernel_size[1]
699+
) % self.stride[1] == 0, 'invalid padding or stride.'
711700

712701
self.device_check(x, self.W, self.b)
713702

@@ -731,7 +720,7 @@ def __call__(self, x):
731720
return y
732721

733722

734-
class BatchNorm2d(Layer):
723+
class BatchNorm(Layer):
735724

736725
def __init__(self, num_features, momentum=0.9):
737726
self.channels = num_features
@@ -771,12 +760,12 @@ def __call__(self, x):
771760
self.momentum, x.data)
772761
self.handle.device_id = x.device.id()
773762

774-
y = batchnorm_2d(self.handle, x, self.scale, self.bias,
763+
y = batchnorm(self.handle, x, self.scale, self.bias,
775764
self.running_mean, self.running_var)
776765
return y
777766

778767

779-
class _BatchNorm2d(Operation):
768+
class _BatchNorm(Operation):
780769

781770
def __init__(self, handle, running_mean, running_var):
782771
self.running_mean = running_mean.data
@@ -796,7 +785,7 @@ def forward(self, x, scale, bias):
796785
if self.handle.device_id == -1:
797786
raise NotImplementedError
798787
else:
799-
y = singa.GpuBatchNormForwardInference(
788+
y, _, _ = singa.GpuBatchNormForwardInference(
800789
self.handle, x, scale, bias, self.running_mean, self.running_var)
801790
return y
802791

@@ -816,11 +805,11 @@ def backward(self, dy):
816805
return dx, ds, db
817806

818807

819-
def batchnorm_2d(handle, x, scale, bias, running_mean, running_var):
820-
return _BatchNorm2d(handle, running_mean, running_var)(x, scale, bias)[0]
808+
def batchnorm(handle, x, scale, bias, running_mean, running_var):
809+
return _BatchNorm(handle, running_mean, running_var)(x, scale, bias)[0]
821810

822811

823-
class _Pooling2d(Operation):
812+
class _Pooling2D(Operation):
824813

825814
def __init__(self, handle):
826815
self.handle = handle
@@ -846,10 +835,10 @@ def backward(self, dy):
846835

847836

848837
def pooling_2d(handle, x):
849-
return _Pooling2d(handle)(x)[0]
838+
return _Pooling2D(handle)(x)[0]
850839

851840

852-
class Pooling2d(Layer):
841+
class Pooling2D(Layer):
853842

854843
def __init__(self, kernel_size, stride=None, padding=0, is_max=True):
855844
if isinstance(kernel_size, int):
@@ -896,43 +885,78 @@ def __call__(self, x):
896885
else:
897886
if not hasattr(self, 'handle'):
898887
self.handle = singa.CudnnPoolingHandle(x.data, self.kernel_size, self.stride,
899-
self.padding, self.is_max)
888+
self.padding, self.is_max) # False for nan_prop
900889
elif x.shape[0] != self.handle.batchsize or out_shape_h != self.handle.pooled_height or \
901890
out_shape_w != self.handle.pooled_width:
902891
self.handle = singa.CudnnPoolingHandle(x.data, self.kernel_size, self.stride,
903-
self.padding, self.is_max)
892+
self.padding, self.is_max) # False for nan_prop
904893

905894
self.handle.device_id = x.device.id()
906895

907896
y = pooling_2d(self.handle, x)
908897
return y
909898

910899

911-
class MaxPool2d(Pooling2d):
900+
class MaxPooling2D(Pooling2D):
912901

913902
def __init__(self, kernel_size, stride=None, padding=0):
914-
super(MaxPool2d, self).__init__(kernel_size, stride, padding, True)
903+
super(MaxPooling2D, self).__init__(kernel_size, stride, padding, True)
915904

916905

917-
class AvgPool2d(Pooling2d):
906+
class AvgPooling2D(Pooling2D):
918907

919908
def __init__(self, kernel_size, stride=None, padding=0):
920-
super(AvgPool2d, self).__init__(kernel_size, stride, padding, False)
909+
super(AvgPooling2D, self).__init__(kernel_size, stride, padding, False)
921910

922911

923-
class MaxPool1d(Pooling2d):
912+
class MaxPooling1D(Pooling2D):
924913

925914
def __init__(self, kernel_size, stride=None, padding=0):
926915
if stride is None:
927916
stride = kernel_size
928-
super(MaxPool2d, self).__init__(
917+
super(MaxPooling2D, self).__init__(
929918
(1, kernel_size), (0, stride), (0, padding), True)
930919

931920

932-
class AvgPool1d(Pooling2d):
921+
class AvgPooling1D(Pooling2D):
933922

934923
def __init__(self, kernel_size, stride=None, padding=0):
935924
if stride is None:
936925
stride = kernel_size
937-
super(MaxPool2d, self).__init__(
926+
super(MaxPooling2D, self).__init__(
938927
(1, kernel_size), (0, stride), (0, padding), False)
928+
929+
930+
class _RNN(Operation):
931+
def __init__(self, handle):
932+
self.handle= handle
933+
934+
def forward(self, X, W):
935+
936+
if self.handle.device_id ==-1:
937+
raise NotImplementedError
938+
else:
939+
if training:
940+
out, self.cache=singa.GpuRNNForwardTraining(self.handle, X, W)
941+
else:
942+
out=singa.GpuRNNForwardInference(self.handle, X, W)
943+
return out
944+
945+
def backward(self, dY):
946+
assert training is True and hasattr(
947+
self, 'cache'), 'Please set training as True before do BP. '
948+
949+
if dY.device().id() != self.handle.device_id:
950+
dY.ToDevice(self.inputs[0].device())
951+
952+
if self.handle.device_id == -1:
953+
raise NotImplementedError
954+
else:
955+
dX, dW=singa.GpuRNNBackward(self.handle, dY, self.cache)
956+
return dX, dW
957+
958+
def rnn():
959+
pass
960+
961+
class RNN(Layer):
962+

0 commit comments

Comments
 (0)