Add RCNN

ShawnyXiao · Jan 9, 2019 · 906c842 · 906c842
1 parent f0c3077
commit 906c842
Show file tree

Hide file tree

Showing 4 changed files with 114 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -13,6 +13,7 @@ This code repository implements a variety of **deep learning models** for **text
     4. [TextBiRNN](#4-textbirnn)
     5. [TextAttBiRNN](#5-textattbirnn)
     6. [HAN](#6-han)
+    7. [RCNN](#7-rcnn)
     999. [To Be Continued...](#to-be-continued)
 4. [Reference](#reference)
 
@@ -161,6 +162,27 @@ Network structure of HAN:
 
 The TimeDistributed wrapper is used here, since the parameters of the Embedding, Bidirectional RNN, and Attention layers are expected to be shared on the time step dimension.
 
+### 7 RCNN
+
+RCNN was proposed in the paper [Recurrent Convolutional Neural Networks for Text Classification](https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/view/9745/9552).
+
+#### 7.1 Description in Paper
+
+<p align="center">
+	<img src="image/RCNN.png">
+</p>
+
+1. **Word Representation Learning**. RCNN uses a recurrent structure, which is a **bi-directional recurrent neural network**, to capture the contexts. Then, combine the word and its context to present the word. And apply a **linear transformation** together with the `tanh` activation fucntion to the representation.
+2. **Text Representation Learning**. When all of the representations of words are calculated, it applys a element-wise **max-pooling** layer in order to capture the most important information throughout the entire text. Finally, do the **linear transformation** and apply the **softmax** function.
+
+#### 7.2 Implementation Here
+
+Network structure of RCNN:
+
+<p align="center">
+	<img src="image/RCNN_network_structure.png">
+</p>
+
 ### To Be Continued...
 
 ## Reference

diff --git a/image/RCNN.png b/image/RCNN.png
diff --git a/model/RCNN/main.py b/model/RCNN/main.py
@@ -0,0 +1,54 @@
+# coding=utf-8
+
+import numpy as np
+from keras.callbacks import EarlyStopping
+from keras.datasets import imdb
+from keras.preprocessing import sequence
+
+from rcnn import RCNN
+
+max_features = 5000
+maxlen = 400
+batch_size = 32
+embedding_dims = 50
+epochs = 10
+
+print('Loading data...')
+(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
+print(len(x_train), 'train sequences')
+print(len(x_test), 'test sequences')
+
+print('Pad sequences (samples x time)...')
+x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
+x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
+print('x_train shape:', x_train.shape)
+print('x_test shape:', x_test.shape)
+
+print('Prepare input for model...')
+x_train_current = x_train
+x_train_left = np.hstack([np.expand_dims(x_train[:, 0], axis=1), x_train[:, 0:-1]])
+x_train_right = np.hstack([x_train[:, 1:], np.expand_dims(x_train[:, -1], axis=1)])
+x_test_current = x_test
+x_test_left = np.hstack([np.expand_dims(x_test[:, 0], axis=1), x_test[:, 0:-1]])
+x_test_right = np.hstack([x_test[:, 1:], np.expand_dims(x_test[:, -1], axis=1)])
+print('x_train_current shape:', x_train_current.shape)
+print('x_train_left shape:', x_train_left.shape)
+print('x_train_right shape:', x_train_right.shape)
+print('x_test_current shape:', x_test_current.shape)
+print('x_test_left shape:', x_test_left.shape)
+print('x_test_right shape:', x_test_right.shape)
+
+print('Build model...')
+model = RCNN(maxlen, max_features, embedding_dims).get_model()
+model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
+
+print('Train...')
+early_stopping = EarlyStopping(monitor='val_acc', patience=3, mode='max')
+model.fit([x_train_current, x_train_left, x_train_right], y_train,
+          batch_size=batch_size,
+          epochs=epochs,
+          callbacks=[early_stopping],
+          validation_data=([x_test_current, x_test_left, x_test_right], y_test))
+
+print('Test...')
+result = model.predict([x_test_current, x_test_left, x_test_right])
diff --git a/model/RCNN/rcnn.py b/model/RCNN/rcnn.py
@@ -0,0 +1,38 @@
+# coding=utf-8
+
+from keras import Input, Model
+from keras import backend as K
+from keras.layers import Embedding, Dense, SimpleRNN, Lambda, Concatenate, Conv1D, GlobalMaxPooling1D
+
+
+class RCNN(object):
+    def __init__(self, maxlen, max_features, embedding_dims,
+                 class_num=1,
+                 last_activation='sigmoid'):
+        self.maxlen = maxlen
+        self.max_features = max_features
+        self.embedding_dims = embedding_dims
+        self.class_num = class_num
+        self.last_activation = last_activation
+
+    def get_model(self):
+        input_current = Input((self.maxlen,))
+        input_left = Input((self.maxlen,))
+        input_right = Input((self.maxlen,))
+
+        embedder = Embedding(self.max_features, self.embedding_dims, input_length=self.maxlen)
+        embedding_current = embedder(input_current)
+        embedding_left = embedder(input_left)
+        embedding_right = embedder(input_right)
+
+        x_left = SimpleRNN(128, return_sequences=True)(embedding_left)
+        x_right = SimpleRNN(128, return_sequences=True, go_backwards=True)(embedding_right)
+        x_right = Lambda(lambda x: K.reverse(x, axes=1))(x_right)
+        x = Concatenate(axis=2)([x_left, embedding_current, x_right])
+
+        x = Conv1D(64, kernel_size=1, activation='tanh')(x)
+        x = GlobalMaxPooling1D()(x)
+
+        output = Dense(self.class_num, activation=self.last_activation)(x)
+        model = Model(inputs=[input_current, input_left, input_right], outputs=output)
+        return model