diff --git a/README.md b/README.md
index 47c5f59..47fbbb0 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
-
+
---
@@ -262,6 +262,7 @@ Original implementations are quite complex and not really beginner friendly. So
47. Attention is all you need + Beam Search
48. Fairseq
49. Conv-Encoder + LSTM
+50. Bytenet Greedy
@@ -362,6 +363,9 @@ Original implementations are quite complex and not really beginner friendly. So
7. Word-wise + Seq2Seq + GRU
8. Character-wise RNN + LSTM + Bahdanau Attention
9. Character-wise RNN + LSTM + Luong Attention
+10. Word-wise + Seq2Seq + GRU + Beam
+11. Character-wise + Seq2Seq + GRU + Bahdanau Attention
+12. Word-wise + Seq2Seq + GRU + Bahdanau Attention
### [Language-detection](language-detection)
diff --git a/generator/10.gru-seq2seq-beam-word.ipynb b/generator/10.gru-seq2seq-beam-word.ipynb
new file mode 100644
index 0000000..163d0b1
--- /dev/null
+++ b/generator/10.gru-seq2seq-beam-word.ipynb
@@ -0,0 +1,351 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import tensorflow as tf\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "import random\n",
+ "import time\n",
+ "import collections\n",
+ "from tqdm import tqdm\n",
+ "sns.set()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def build_dataset(words, n_words, atleast=1):\n",
+ " count = [['PAD', 0], ['GO', 1], ['EOS', 2], ['UNK', 3]]\n",
+ " counter = collections.Counter(words).most_common(n_words)\n",
+ " counter = [i for i in counter if i[1] >= atleast]\n",
+ " count.extend(counter)\n",
+ " dictionary = dict()\n",
+ " for word, _ in count:\n",
+ " dictionary[word] = len(dictionary)\n",
+ " data = list()\n",
+ " unk_count = 0\n",
+ " for word in words:\n",
+ " index = dictionary.get(word, 0)\n",
+ " if index == 0:\n",
+ " unk_count += 1\n",
+ " data.append(index)\n",
+ " count[0][1] = unk_count\n",
+ " reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))\n",
+ " return data, count, dictionary, reversed_dictionary"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with open('shakespeare.txt') as fopen:\n",
+ " shakespeare = fopen.read().split()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vocabulary_size = len(list(set(shakespeare)))\n",
+ "data, count, dictionary, rev_dictionary = build_dataset(shakespeare, vocabulary_size)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "GO = dictionary['GO']\n",
+ "PAD = dictionary['PAD']\n",
+ "EOS = dictionary['EOS']\n",
+ "UNK = dictionary['UNK']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class Generator:\n",
+ " def __init__(self, size_layer, num_layers, embedded_size,\n",
+ " from_dict_size, to_dict_size, learning_rate, batch_size):\n",
+ " \n",
+ " def cells(reuse=False):\n",
+ " return tf.nn.rnn_cell.GRUCell(size_layer,reuse=reuse)\n",
+ " \n",
+ " self.X = tf.placeholder(tf.int32, [None, None])\n",
+ " self.Y = tf.placeholder(tf.int32, [None, None])\n",
+ " self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)\n",
+ " self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32)\n",
+ " batch_size = tf.shape(self.X)[0]\n",
+ " \n",
+ " encoder_embedding = tf.Variable(tf.random_uniform([from_dict_size, embedded_size], -1, 1))\n",
+ " decoder_embedding = tf.Variable(tf.random_uniform([to_dict_size, embedded_size], -1, 1))\n",
+ " \n",
+ " self.cells = tf.nn.rnn_cell.MultiRNNCell([cells() for _ in range(num_layers)])\n",
+ " self.encoder_state = self.cells.zero_state(\n",
+ " dtype = tf.float32, batch_size = tf.shape(self.X)[0]\n",
+ " )\n",
+ " \n",
+ " _, encoder_state = tf.nn.dynamic_rnn(\n",
+ " cell = self.cells, \n",
+ " inputs = tf.nn.embedding_lookup(encoder_embedding, self.X),\n",
+ " sequence_length = self.X_seq_len,\n",
+ " initial_state = self.encoder_state,\n",
+ " dtype = tf.float32)\n",
+ " \n",
+ " main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])\n",
+ " decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)\n",
+ " dense = tf.layers.Dense(to_dict_size)\n",
+ " decoder_cells = tf.nn.rnn_cell.MultiRNNCell([cells() for _ in range(num_layers)])\n",
+ " \n",
+ " training_helper = tf.contrib.seq2seq.TrainingHelper(\n",
+ " inputs = tf.nn.embedding_lookup(decoder_embedding, decoder_input),\n",
+ " sequence_length = self.Y_seq_len,\n",
+ " time_major = False)\n",
+ " training_decoder = tf.contrib.seq2seq.BasicDecoder(\n",
+ " cell = decoder_cells,\n",
+ " helper = training_helper,\n",
+ " initial_state = encoder_state,\n",
+ " output_layer = dense)\n",
+ " training_decoder_output, self.training_state, _ = tf.contrib.seq2seq.dynamic_decode(\n",
+ " decoder = training_decoder,\n",
+ " impute_finished = True,\n",
+ " maximum_iterations = tf.reduce_max(self.Y_seq_len))\n",
+ " self.training_logits = training_decoder_output.rnn_output\n",
+ " \n",
+ " predicting_decoder = tf.contrib.seq2seq.BeamSearchDecoder(\n",
+ " cell = decoder_cells,\n",
+ " embedding = decoder_embedding,\n",
+ " start_tokens = tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]),\n",
+ " end_token = EOS,\n",
+ " initial_state = tf.contrib.seq2seq.tile_batch(self.encoder_state, 15),\n",
+ " beam_width = 15,\n",
+ " output_layer = dense,\n",
+ " length_penalty_weight = 0.0)\n",
+ " predicting_decoder_output, self.predict_state, _ = tf.contrib.seq2seq.dynamic_decode(\n",
+ " decoder = predicting_decoder,\n",
+ " impute_finished = False,\n",
+ " maximum_iterations = tf.reduce_max(self.X_seq_len))\n",
+ " \n",
+ " self.predicting_ids = predicting_decoder_output.predicted_ids[:,:,0]\n",
+ " \n",
+ " masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)\n",
+ " self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,\n",
+ " targets = self.Y,\n",
+ " weights = masks)\n",
+ " self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)\n",
+ " \n",
+ " y_t = tf.argmax(self.training_logits,axis=2)\n",
+ " y_t = tf.cast(y_t, tf.int32)\n",
+ " self.prediction = tf.boolean_mask(y_t, masks)\n",
+ " mask_label = tf.boolean_mask(self.Y, masks)\n",
+ " correct_pred = tf.equal(self.prediction, mask_label)\n",
+ " correct_index = tf.cast(correct_pred, tf.float32)\n",
+ " self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "learning_rate = 0.001\n",
+ "batch_size = 32\n",
+ "sequence_length = 64\n",
+ "epoch = 3000\n",
+ "num_layers = 2\n",
+ "size_layer = 256\n",
+ "possible_batch_id = range(len(data) - sequence_length - 1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tf.reset_default_graph()\n",
+ "sess = tf.InteractiveSession()\n",
+ "model = Generator(size_layer, num_layers, size_layer, len(dictionary), \n",
+ " len(dictionary), learning_rate,batch_size)\n",
+ "sess.run(tf.global_variables_initializer())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def train_random_batch():\n",
+ " LOST, ACCURACY = [], []\n",
+ " pbar = tqdm(range(epoch), desc = 'epoch')\n",
+ " batch_x = np.zeros((batch_size, sequence_length))\n",
+ " batch_y = np.zeros((batch_size, sequence_length + 1))\n",
+ " for n in range(batch_size):\n",
+ " index = np.random.randint(0, len(data) - sequence_length - 1)\n",
+ " batch_x[n] = data[index:index + sequence_length]\n",
+ " batch_y[n] = data[index + 1:index + sequence_length + 1] + [EOS]\n",
+ " initial_state, _ = sess.run([model.predict_state, model.optimizer], feed_dict = {model.X: batch_x,\n",
+ " model.Y: batch_y})\n",
+ " initial_state = initial_state.cell_state\n",
+ " initial_state = (initial_state[0][:,-1,:], initial_state[1][:,-1,:])\n",
+ " \n",
+ " for i in pbar:\n",
+ " batch_x = np.zeros((batch_size, sequence_length))\n",
+ " batch_y = np.zeros((batch_size, sequence_length + 1))\n",
+ " for n in range(batch_size):\n",
+ " index = np.random.randint(0, len(data) - sequence_length - 1)\n",
+ " batch_x[n] = data[index:index + sequence_length]\n",
+ " batch_y[n] = data[index + 1:index + sequence_length + 1] + [EOS]\n",
+ " accuracy, _, loss, initial_state = sess.run([model.accuracy, model.optimizer, \n",
+ " model.cost, model.predict_state], \n",
+ " feed_dict = {model.X: batch_x, \n",
+ " model.Y: batch_y,\n",
+ " model.encoder_state: initial_state})\n",
+ " initial_state = initial_state.cell_state\n",
+ " initial_state = (initial_state[0][:,-1,:], initial_state[1][:,-1,:])\n",
+ " ACCURACY.append(accuracy); LOST.append(loss)\n",
+ " pbar.set_postfix(cost = loss, accuracy = accuracy)\n",
+ " return LOST, ACCURACY"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "epoch: 100%|██████████| 3000/3000 [27:14<00:00, 2.08it/s, accuracy=0.348, cost=3.35] \n"
+ ]
+ }
+ ],
+ "source": [
+ "LOST, ACCURACY = train_random_batch()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ "