Skip to content
This repository was archived by the owner on Feb 12, 2022. It is now read-only.

simple bidirectional support #27

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 58 additions & 9 deletions torchqrnn/qrnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,43 @@
from .forget_mult import ForgetMult


class BiDirQRNNLayer(nn.Module):
def __init__(self, input_size, hidden_size=None, save_prev_x=False, zoneout=0, window=1, output_gate=True,
use_cuda=True):
super(BiDirQRNNLayer, self).__init__()

assert window in [1,
2], "This QRNN implementation currently only handles convolutional window of size 1 or size 2"
self.window = window
self.input_size = input_size
self.hidden_size = hidden_size if hidden_size else input_size
self.zoneout = zoneout
self.save_prev_x = save_prev_x
self.prevX = None
self.output_gate = output_gate
self.use_cuda = use_cuda

self.forward_qrnn = QRNNLayer(input_size, hidden_size=hidden_size, save_prev_x=save_prev_x, zoneout=zoneout,
window=window,
output_gate=output_gate, use_cuda=use_cuda)
self.backward_qrnn = QRNNLayer(input_size, hidden_size=hidden_size, save_prev_x=save_prev_x, zoneout=zoneout,
window=window,
output_gate=output_gate, use_cuda=use_cuda)

def forward(self, X, hidden=None):
fwd, h_fwd = self.forward_qrnn(X, hidden=hidden)
bwd, h_bwd = self.backward_qrnn(torch.flip(X, [0]), hidden=hidden)
return torch.flip(torch.cat([fwd, bwd], dim=-1), [0]), torch.cat([h_fwd, h_bwd], dim=-1)


def fast_tanh(x):
return x / (1 + x.abs())


def fast_sigmoid(x):
return (x / 2) / (1 + x.abs()) + 0.5


class QRNNLayer(nn.Module):
r"""Applies a single layer Quasi-Recurrent Neural Network (QRNN) to an input sequence.

Expand All @@ -29,10 +66,12 @@ class QRNNLayer(nn.Module):
- h_n (batch, hidden_size): tensor containing the hidden state for t=seq_len
"""

def __init__(self, input_size, hidden_size=None, save_prev_x=False, zoneout=0, window=1, output_gate=True, use_cuda=True):
def __init__(self, input_size, hidden_size=None, save_prev_x=False, zoneout=0, window=1, output_gate=True,
use_cuda=True):
super(QRNNLayer, self).__init__()

assert window in [1, 2], "This QRNN implementation currently only handles convolutional window of size 1 or size 2"
assert window in [1,
2], "This QRNN implementation currently only handles convolutional window of size 1 or size 2"
self.window = window
self.input_size = input_size
self.hidden_size = hidden_size if hidden_size else input_size
Expand All @@ -43,7 +82,8 @@ def __init__(self, input_size, hidden_size=None, save_prev_x=False, zoneout=0, w
self.use_cuda = use_cuda

# One large matmul with concat is faster than N small matmuls and no concat
self.linear = nn.Linear(self.window * self.input_size, 3 * self.hidden_size if self.output_gate else 2 * self.hidden_size)
self.linear = nn.Linear(self.window * self.input_size,
3 * self.hidden_size if self.output_gate else 2 * self.hidden_size)

def reset(self):
# If you are saving the previous value of x, you should call this when starting with a new state
Expand Down Expand Up @@ -76,8 +116,8 @@ def forward(self, X, hidden=None):
Y = Y.view(seq_len, batch_size, 2 * self.hidden_size)
Z, F = Y.chunk(2, dim=2)
###
Z = torch.nn.functional.tanh(Z)
F = torch.nn.functional.sigmoid(F)
Z = fast_tanh(Z) # torch.nn.functional.tanh(Z)
F = fast_sigmoid(F) # torch.nn.functional.sigmoid(F)

# If zoneout is specified, we perform dropout on the forget gates in F
# If an element of F is zero, that means the corresponding neuron keeps the old value
Expand All @@ -100,7 +140,7 @@ def forward(self, X, hidden=None):

# Apply (potentially optional) output gate
if self.output_gate:
H = torch.nn.functional.sigmoid(O) * C
H = fast_sigmoid(O) * C # torch.nn.functional.sigmoid(O) * C
else:
H = C

Expand Down Expand Up @@ -137,13 +177,21 @@ class QRNN(torch.nn.Module):
def __init__(self, input_size, hidden_size,
num_layers=1, bias=True, batch_first=False,
dropout=0, bidirectional=False, layers=None, **kwargs):
assert bidirectional == False, 'Bidirectional QRNN is not yet supported'
# assert bidirectional == False, 'Bidirectional QRNN is not yet supported'
assert batch_first == False, 'Batch first mode is not yet supported'
assert bias == True, 'Removing underlying bias is not yet supported'

super(QRNN, self).__init__()

self.layers = torch.nn.ModuleList(layers if layers else [QRNNLayer(input_size if l == 0 else hidden_size, hidden_size, **kwargs) for l in range(num_layers)])
if bidirectional:
self.layers = torch.nn.ModuleList(
layers if layers else [BiDirQRNNLayer(input_size if l == 0 else hidden_size * 2, hidden_size, **kwargs)
for l in
range(num_layers)])
else:
self.layers = torch.nn.ModuleList(
layers if layers else [QRNNLayer(input_size if l == 0 else hidden_size, hidden_size, **kwargs) for l in
range(num_layers)])

self.input_size = input_size
self.hidden_size = hidden_size
Expand Down Expand Up @@ -202,6 +250,7 @@ def forward(self, input, hidden=None):
assert diff < 1e-5, 'CUDA and non-CUDA QRNN layers return different results'

from torch.autograd import gradcheck
inputs = [X,]

inputs = [X, ]
test = gradcheck(QRNNLayer(hidden_size, hidden_size).cuda(), inputs)
print(test)