@@ -347,19 +347,6 @@ def add_bias(x, b, axis=0):
347347 return AddBias (axis )(x , b )[0 ]
348348
349349
350- class Add (Operation ):
351-
352- def forward (self , a , b ):
353- return singa .__add__ (a , b )
354-
355- def backward (self , dy ):
356- return dy , dy
357-
358-
359- def add (a , b ):
360- return Add ()(a , b )[0 ]
361-
362-
363350class SoftMax (Operation ):
364351 '''
365352 Apply SoftMax for each row of the Tensor or each column of the Tensor
@@ -469,24 +456,22 @@ def cross_entropy(y, t):
469456
470457class SoftMaxCrossEntropy (Operation ):
471458
472- def __init__ (self , t ):
473- self .t = t .data
474-
475- def forward (self , x ):
459+ def forward (self , x , t ):
476460 self .p = singa .SoftMax (x )
461+ self .t = t
477462 loss = CTensor ((1 ,), self .p .device ())
478- ret = singa .CrossEntropyFwd (self .p , self . t )
463+ ret = singa .CrossEntropyFwd (self .p , t )
479464 loss .SetFloatValue (singa .SumAsFloat (ret ) / x .shape ()[0 ])
480465 return loss
481466
482467 def backward (self , dy = 1.0 ):
483468 dx = singa .SoftmaxCrossEntropyBwd (self .p , self .t )
484- return singa .DivFloat (dx , float (self .p .shape ()[0 ]))
469+ return singa .DivFloat (dx , float (self .p .shape ()[0 ])), None
485470
486471
487472def softmax_cross_entropy (x , t ):
488473 # x is the logits and t is the ground truth; both are 2D.
489- return SoftMaxCrossEntropy (t )(x )[0 ]
474+ return SoftMaxCrossEntropy ()(x , t )[0 ]
490475
491476
492477def ctensor2numpy (x ):
@@ -587,12 +572,12 @@ def backward(self, dy):
587572 return tuple (dxs )
588573
589574
590- def cat (xs , axis = 0 ):
575+ def concat (xs , axis = 0 ):
591576 # xs is a tuple of multiple Tensors
592577 return Concat (axis )(* xs )[0 ]
593578
594579
595- class _Conv2d (Operation ):
580+ class _Conv2D (Operation ):
596581
597582 def __init__ (self , handle ):
598583 self .handle = handle
@@ -642,10 +627,10 @@ def backward(self, dy):
642627
643628
644629def conv2d (handle , x , W , b ):
645- return _Conv2d (handle )(x , W , b )[0 ]
630+ return _Conv2D (handle )(x , W , b )[0 ]
646631
647632
648- class Conv2d (Layer ):
633+ class Conv2D (Layer ):
649634
650635 def __init__ (self , in_channels , out_channels , kernel_size , stride = 1 ,
651636 padding = 0 , dilation = 1 , groups = 1 , bias = True , ** kwargs ):
@@ -708,6 +693,10 @@ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
708693
709694 def __call__ (self , x ):
710695 assert x .shape [1 ] == self .in_channels , 'in_channels dismatched'
696+ assert (x .shape [2 ] + 2 * self .padding [0 ] - self .kernel_size [0 ]
697+ ) % self .stride [0 ] == 0 , 'invalid padding or strides.'
698+ assert (x .shape [3 ] + 2 * self .padding [1 ] - self .kernel_size [1 ]
699+ ) % self .stride [1 ] == 0 , 'invalid padding or stride.'
711700
712701 self .device_check (x , self .W , self .b )
713702
@@ -731,7 +720,7 @@ def __call__(self, x):
731720 return y
732721
733722
734- class BatchNorm2d (Layer ):
723+ class BatchNorm (Layer ):
735724
736725 def __init__ (self , num_features , momentum = 0.9 ):
737726 self .channels = num_features
@@ -771,12 +760,12 @@ def __call__(self, x):
771760 self .momentum , x .data )
772761 self .handle .device_id = x .device .id ()
773762
774- y = batchnorm_2d (self .handle , x , self .scale , self .bias ,
763+ y = batchnorm (self .handle , x , self .scale , self .bias ,
775764 self .running_mean , self .running_var )
776765 return y
777766
778767
779- class _BatchNorm2d (Operation ):
768+ class _BatchNorm (Operation ):
780769
781770 def __init__ (self , handle , running_mean , running_var ):
782771 self .running_mean = running_mean .data
@@ -796,7 +785,7 @@ def forward(self, x, scale, bias):
796785 if self .handle .device_id == - 1 :
797786 raise NotImplementedError
798787 else :
799- y = singa .GpuBatchNormForwardInference (
788+ y , _ , _ = singa .GpuBatchNormForwardInference (
800789 self .handle , x , scale , bias , self .running_mean , self .running_var )
801790 return y
802791
@@ -816,11 +805,11 @@ def backward(self, dy):
816805 return dx , ds , db
817806
818807
819- def batchnorm_2d (handle , x , scale , bias , running_mean , running_var ):
820- return _BatchNorm2d (handle , running_mean , running_var )(x , scale , bias )[0 ]
808+ def batchnorm (handle , x , scale , bias , running_mean , running_var ):
809+ return _BatchNorm (handle , running_mean , running_var )(x , scale , bias )[0 ]
821810
822811
823- class _Pooling2d (Operation ):
812+ class _Pooling2D (Operation ):
824813
825814 def __init__ (self , handle ):
826815 self .handle = handle
@@ -846,10 +835,10 @@ def backward(self, dy):
846835
847836
848837def pooling_2d (handle , x ):
849- return _Pooling2d (handle )(x )[0 ]
838+ return _Pooling2D (handle )(x )[0 ]
850839
851840
852- class Pooling2d (Layer ):
841+ class Pooling2D (Layer ):
853842
854843 def __init__ (self , kernel_size , stride = None , padding = 0 , is_max = True ):
855844 if isinstance (kernel_size , int ):
@@ -896,43 +885,78 @@ def __call__(self, x):
896885 else :
897886 if not hasattr (self , 'handle' ):
898887 self .handle = singa .CudnnPoolingHandle (x .data , self .kernel_size , self .stride ,
899- self .padding , self .is_max )
888+ self .padding , self .is_max ) # False for nan_prop
900889 elif x .shape [0 ] != self .handle .batchsize or out_shape_h != self .handle .pooled_height or \
901890 out_shape_w != self .handle .pooled_width :
902891 self .handle = singa .CudnnPoolingHandle (x .data , self .kernel_size , self .stride ,
903- self .padding , self .is_max )
892+ self .padding , self .is_max ) # False for nan_prop
904893
905894 self .handle .device_id = x .device .id ()
906895
907896 y = pooling_2d (self .handle , x )
908897 return y
909898
910899
911- class MaxPool2d ( Pooling2d ):
900+ class MaxPooling2D ( Pooling2D ):
912901
913902 def __init__ (self , kernel_size , stride = None , padding = 0 ):
914- super (MaxPool2d , self ).__init__ (kernel_size , stride , padding , True )
903+ super (MaxPooling2D , self ).__init__ (kernel_size , stride , padding , True )
915904
916905
917- class AvgPool2d ( Pooling2d ):
906+ class AvgPooling2D ( Pooling2D ):
918907
919908 def __init__ (self , kernel_size , stride = None , padding = 0 ):
920- super (AvgPool2d , self ).__init__ (kernel_size , stride , padding , False )
909+ super (AvgPooling2D , self ).__init__ (kernel_size , stride , padding , False )
921910
922911
923- class MaxPool1d ( Pooling2d ):
912+ class MaxPooling1D ( Pooling2D ):
924913
925914 def __init__ (self , kernel_size , stride = None , padding = 0 ):
926915 if stride is None :
927916 stride = kernel_size
928- super (MaxPool2d , self ).__init__ (
917+ super (MaxPooling2D , self ).__init__ (
929918 (1 , kernel_size ), (0 , stride ), (0 , padding ), True )
930919
931920
932- class AvgPool1d ( Pooling2d ):
921+ class AvgPooling1D ( Pooling2D ):
933922
934923 def __init__ (self , kernel_size , stride = None , padding = 0 ):
935924 if stride is None :
936925 stride = kernel_size
937- super (MaxPool2d , self ).__init__ (
926+ super (MaxPooling2D , self ).__init__ (
938927 (1 , kernel_size ), (0 , stride ), (0 , padding ), False )
928+
929+
930+ class _RNN (Operation ):
931+ def __init__ (self , handle ):
932+ self .handle = handle
933+
934+ def forward (self , X , W ):
935+
936+ if self .handle .device_id == - 1 :
937+ raise NotImplementedError
938+ else :
939+ if training :
940+ out , self .cache = singa .GpuRNNForwardTraining (self .handle , X , W )
941+ else :
942+ out = singa .GpuRNNForwardInference (self .handle , X , W )
943+ return out
944+
945+ def backward (self , dY ):
946+ assert training is True and hasattr (
947+ self , 'cache' ), 'Please set training as True before do BP. '
948+
949+ if dY .device ().id () != self .handle .device_id :
950+ dY .ToDevice (self .inputs [0 ].device ())
951+
952+ if self .handle .device_id == - 1 :
953+ raise NotImplementedError
954+ else :
955+ dX , dW = singa .GpuRNNBackward (self .handle , dY , self .cache )
956+ return dX , dW
957+
958+ def rnn ():
959+ pass
960+
961+ class RNN (Layer ):
962+
0 commit comments