2323import numpy as np
2424import math
2525
26+ from singa import tensor
2627from .tensor import Tensor
2728from . import layer
2829from singa .proto import model_pb2
@@ -969,12 +970,13 @@ class _RNN(Operation):
969970 def __init__ (self , handle ):
970971 self .handle = handle
971972
972- def forward (self , X , h0 , c0 , W ):
973+ #def forward(self, X, h0, c0, W):
974+ def forward (self , X , h0 , W , c0 = None ):
973975 # X of shape (seq_len, batch, input_size)
974976 # h0_c0: (h0, c0) if lstm, else (h0,)
975977 # h0, c0 of shape (num_layers * num_directions, batch, hidden_size)
976978 if c0 is None :
977- assert self .rnn_mode != 'lstm'
979+ assert self .handle . rnn_mode_ != 'lstm'
978980 c0 = CTensor ([]) # CTensor([]) and Tensor cx are the same?
979981
980982 if self .handle .device_id == - 1 :
@@ -992,38 +994,49 @@ def forward(self, X, h0, c0, W):
992994 # hout_cout: (hout, cout) if lstm, else (hout,)
993995 # hout, cout of shape (num_layers * num_directions, batch,
994996 # hidden_size)
995- oututs = _1dTo3d (Y )
997+
998+ #oututs= _1dTo3d(Y)
999+ shape = (self .handle .seq_length_ , self .handle .batch_size_ , self .handle .hidden_size_ )
1000+ outputs = singa .Reshape (Y , shape )
9961001
997- if self .rnn_mode != 'lstm' :
1002+ if self .handle . rnn_mode_ != 'lstm' :
9981003 return outputs , hout
9991004 else :
10001005 return outputs , hout , cout
10011006
1002- def backward (self , dY , dh , dc = CTensor ([])):
1007+ def backward (self , dY , dh = CTensor ([]) , dc = CTensor ([])):
10031008 assert training is True and hasattr (
10041009 self , 'cache' ), 'Please set training as True before do BP. '
10051010
1006- dY_1d = _3dTo1d (dY )
1011+ # dY_1d= _3dTo1d(dY)
10071012
1008- if dY_1d .device ().id () != self .handle .device_id :
1009- dY_1d .ToDevice (self .cache [0 ].device ())
1013+ if dY .device ().id () != self .handle .device_id :
1014+ dY .ToDevice (self .cache [0 ].device ())
10101015
10111016 if self .handle .device_id == - 1 :
10121017 raise NotImplementedError
10131018 else :
10141019 dX_1d , dhout , dcout , dW = singa .GpuRNNBackward (
1015- self .handle , dY_1d , dh , dc , self .cache )
1020+ self .handle , dY , dh , dc , self .cache )
10161021
1017- dX = _1dTo3d (dX_1d )
1022+ #dX = _1dTo3d(dX_1d)
1023+ shape = (self .handle .seq_length_ , self .handle .batch_size_ , self .handle .input_size_ )
1024+ dX = singa .Reshape (dX_1d , shape )
10181025
1019- if self .rnn_mode != 'lstm' :
1026+ if self .handle . rnn_mode_ != 'lstm' :
10201027 return dX , dhout , dW
10211028 else :
1022- return dX , dhout , dcout , dW
1029+ return dX , dhout , dW , dcout
10231030
10241031
1025- def rnn (handle , x , h0 , c0 , W ):
1026- return _RNN (handle )(x , h0 , c0 , W )
1032+ #def rnn(handle, x, h0, c0, W):
1033+ # return _RNN(handle)(x, h0, c0, W)
1034+
1035+ def rnn (handle , x , h0 , W , c0 ):
1036+ if c0 is None :
1037+ return _RNN (handle )(x , h0 , W )
1038+ else :
1039+ return _RNN (handle )(x , h0 , W , c0 )
10271040
10281041
10291042class RNN (Layer ):
@@ -1054,14 +1067,15 @@ def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first
10541067 if self .bidirectional :
10551068 mult *= 2
10561069
1070+ W_Size = 0
10571071 for k in range (num_layers ):
1058- if k == 1 :
1072+ if k == 0 :
10591073 w_size = self .hidden_size * \
10601074 (self .input_size + self .hidden_size + 2 )
10611075 else :
10621076 w_size = self .hidden_size * \
10631077 (self .hidden_size + self .hidden_size + 2 )
1064- W_Size * = mult * w_size
1078+ W_Size + = mult * w_size
10651079
10661080 self .W_Size = W_Size
10671081 self .W = Tensor (shape = (W_Size ,), requires_grad = True , stores_grad = True ) # TODO: assign value of Wi separately
@@ -1077,33 +1091,35 @@ def __call__(self, inputs, h0, c0=None):
10771091 if self .rnn_mode == 'lstm' :
10781092 assert c0 is not None , 'Please input c0.'
10791093 self .device_check (h0 , c0 )
1094+ else :
1095+ assert c0 is None , 'only lstm needs input c0'
10801096
10811097 if not hasattr (self , 'handle' ):
1082- self .handle = signa .CudnnRNNHandle (inputs .data , self .input_size , self .hidden_size , self .num_layers ,
1098+ self .handle = singa .CudnnRNNHandle (inputs .data , self .input_size , self .hidden_size , self .num_layers ,
10831099 self .rnn_mode , self .dropout , self .bidirectional , self .W_Size )
10841100 elif inputs .shape [0 ] != self .handle .seq_length_ or inputs .shape [1 ] != self .handle .batch_size_ :
1085- self .handle = signa .CudnnRNNHandle (inputs .data , self .input_size , self .hidden_size , self .num_layers ,
1101+ self .handle = singa .CudnnRNNHandle (inputs .data , self .input_size , self .hidden_size , self .num_layers ,
10861102 self .rnn_mode , self .dropout , self .bidirectional , self .W_Size )
10871103
10881104 self .handle .device_id = inputs .device .id ()
10891105
1090- X = _3dTo1d (inputs )
1091- outputs = rnn (self .handle , X , h0 , c0 , self .W )
1106+ #X= _3dTo1d(inputs)
1107+ X = inputs
1108+ outputs = rnn (self .handle , X , h0 , self .W , c0 )
1109+ #outputs = rnn(self.handle, X, h0, self.W)
1110+ #outputs=tensor.to_numpy(outputs[0])
1111+ #print(outputs.shape)
1112+ #print(outputs)
10921113 return outputs
10931114
1094- def _3dTo1d (self , inputs ):
1095- pass
1096-
1097- def _1dTo3d (self , * args ):
1098- pass
10991115
11001116class LSTM (RNN ):
11011117
11021118 def __init__ (self , input_size , hidden_size , num_layers = 1 , bias = True , batch_first = False , dropout = 0 , bidirectional = False ):
1103- super (LSTM , self ).__init__ (input_size , hidden_size , num_layers , bias , batch_first , dropout , bidirectional , rnn_mode = 'lstm' )
1119+ super (LSTM , self ).__init__ (input_size , hidden_size , num_layers , bias , batch_first , dropout , bidirectional , rnn_mode = 'lstm' )
11041120
11051121
11061122class GRU (RNN ):
11071123
11081124 def __init__ (self , input_size , hidden_size , num_layers = 1 , bias = True , batch_first = False , dropout = 0 , bidirectional = False ):
1109- super (GRU , self ).__init__ (input_size , hidden_size , num_layers , bias , batch_first , dropout , bidirectional , rnn_mode = 'gru' )
1125+ super (GRU , self ).__init__ (input_size , hidden_size , num_layers , bias , batch_first , dropout , bidirectional , rnn_mode = 'gru' )
0 commit comments