improve stackpointer

sjyttkl · Sep 29, 2019 · 97124d6 · 97124d6
1 parent d49e296
commit 97124d6
Showing 1 changed file with 87 additions and 39 deletions.
diff --git a/dependency-parser/6.stackpointer.ipynb b/dependency-parser/6.stackpointer.ipynb
@@ -485,20 +485,27 @@
     "\n",
     "class StackPointer:\n",
     "    def __init__(self, word_dim, num_words, char_dim, num_chars, num_filters, kernel_size,\n",
-    "                 input_size_decoder, hidden_size, encoder_layers, decoder_layers,\n",
+    "                 input_size_decoder, hidden_size, layers,\n",
     "                 num_labels, arc_space, type_space):\n",
     "        \n",
     "        def cells(size, reuse=False):\n",
     "            return tf.nn.rnn_cell.LSTMCell(size,\n",
-    "                                           initializer=tf.orthogonal_initializer(),reuse=reuse)\n",
+    "                                           initializer=tf.orthogonal_initializer(),reuse=reuse,\n",
+    "                                           state_is_tuple=False)\n",
     "        \n",
     "        self.word_embedd = tf.Variable(tf.random_uniform([num_words, word_dim], -1, 1))\n",
     "        self.char_embedd = tf.Variable(tf.random_uniform([num_chars, char_dim], -1, 1))\n",
     "        self.conv1d = tf.layers.Conv1D(num_filters, kernel_size, 1, padding='VALID')\n",
     "        self.num_labels = num_labels\n",
     "        self.prior_order = PriorOrder.DEPTH\n",
-    "        self.encoder = tf.nn.rnn_cell.MultiRNNCell([cells(hidden_size) for _ in range(encoder_layers)])\n",
-    "        self.decoder = tf.nn.rnn_cell.MultiRNNCell([cells(hidden_size) for _ in range(decoder_layers)])\n",
+    "        self.layers = layers\n",
+    "        self.encoder = tf.nn.rnn_cell.MultiRNNCell([cells(hidden_size) for _ in range(layers)],\n",
+    "                                                   state_is_tuple=False)\n",
+    "        self.decoder = tf.nn.rnn_cell.MultiRNNCell([cells(hidden_size) for _ in range(layers)],\n",
+    "                                                   state_is_tuple=False)\n",
+    "        self.hidden_size = hidden_size\n",
+    "        self.arc_space = arc_space\n",
+    "        \n",
     "        \n",
     "        self.src_dense = tf.layers.Dense(hidden_size)\n",
     "        self.hx_dense = tf.layers.Dense(hidden_size)\n",
@@ -655,8 +662,7 @@
     "                            kernel_size = 3,\n",
     "                            input_size_decoder = 256, \n",
     "                            hidden_size = 256, \n",
-    "                            encoder_layers = 1, \n",
-    "                            decoder_layers = 1,\n",
+    "                            layers = 1,\n",
     "                            num_labels = len(tag2idx), \n",
     "                            arc_space = 128, \n",
     "                            type_space = 128)\n",
@@ -678,7 +684,32 @@
     "        loss_arc = loss_arc_leaf + loss_arc_non_leaf\n",
     "        loss_type = loss_type_leaf + loss_type_non_leaf\n",
     "        self.cost = loss_arc + loss_type + cov * loss_cov\n",
-    "        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)"
+    "        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)\n",
+    "        \n",
+    "        self.encode_output, self.encode_hidden = self.stackpointer.encode(self.words, self.chars)\n",
+    "        self.encode_arc_c = tf.nn.elu(self.stackpointer.arc_c(self.encode_output))\n",
+    "        self.type_c = tf.nn.elu(self.stackpointer.type_c(self.encode_output))\n",
+    "        \n",
+    "        self.src_encoding = tf.placeholder(tf.float32, (None, self.stackpointer.hidden_size))\n",
+    "        self.arc_c = tf.placeholder(tf.float32, (None, self.stackpointer.arc_space))\n",
+    "        self.hx = tf.placeholder(tf.float32, (None, \n",
+    "                                              self.stackpointer.hidden_size * 2 * self.stackpointer.layers))      \n",
+    "        \n",
+    "        src_encoding = tf.nn.elu(self.stackpointer.src_dense(self.src_encoding))\n",
+    "        output_dec, hx = self.stackpointer.decoder(src_encoding, self.hx)\n",
+    "        arc_h = tf.nn.elu(self.stackpointer.arc_h(tf.expand_dims(output_dec, axis = 1)))\n",
+    "        type_h = tf.nn.elu(self.stackpointer.type_h(output_dec))\n",
+    "        out_arc = self.stackpointer.attention.forward(arc_h, tf.expand_dims(self.arc_c, 0))\n",
+    "        out_arc = tf.squeeze(tf.squeeze(out_arc, axis = 1), axis = 1)\n",
+    "        self.hyp_scores = tf.nn.log_softmax(out_arc, axis = 1)\n",
+    "        self.type_h = type_h\n",
+    "        self.decode_hidden = hx\n",
+    "        \n",
+    "        self.holder_type_h = tf.placeholder(tf.float32, (None, self.stackpointer.arc_space))\n",
+    "        self.holder_type_c = tf.placeholder(tf.float32, (None, self.stackpointer.arc_space))\n",
+    "        \n",
+    "        out_type = self.stackpointer.bilinear.forward(self.holder_type_h, self.holder_type_c)\n",
+    "        self.hyp_type_scores = tf.nn.log_softmax(out_type, axis = 1)"
    ]
   },
   {
@@ -716,7 +747,6 @@
     "                return child_id > child_orders[base_id, head]\n",
     "        \n",
     "    length = output_enc.shape[0] if length is None else length\n",
-    "    hx = tuple([hx])\n",
     "            \n",
     "    stacked_heads = [[0] for _ in range(beam)]\n",
     "    grand_parents = [[0] for _ in range(beam)]\n",
@@ -749,13 +779,11 @@
     "        src_encoding = src_encoding + output_enc_sibling\n",
     "        output_enc_gpar = output_enc[gpars]\n",
     "        src_encoding = src_encoding + output_enc_gpar\n",
-    "        src_encoding = tf.nn.elu(model.stackpointer.src_dense(src_encoding))\n",
-    "        output_dec, hx = model.stackpointer.decoder(src_encoding, hx)\n",
-    "        arc_h = tf.nn.elu(model.stackpointer.arc_h(tf.expand_dims(output_dec, axis = 1)))\n",
-    "        type_h = tf.nn.elu(model.stackpointer.type_h(output_dec))\n",
-    "        out_arc = model.stackpointer.attention.forward(arc_h, tf.expand_dims(arc_c, 0))\n",
-    "        out_arc = tf.squeeze(tf.squeeze(out_arc, axis = 1), axis = 1)\n",
-    "        hyp_scores, type_h = sess.run([tf.nn.log_softmax(out_arc, axis = 1), type_h])\n",
+    "        hyp_scores, type_h, hx = sess.run([model.hyp_scores, model.type_h, model.decode_hidden],\n",
+    "                                     feed_dict = {model.src_encoding: src_encoding,\n",
+    "                                                  model.arc_c: arc_c,\n",
+    "                                                  model.hx: hx})\n",
+    "        \n",
     "        new_hypothesis_scores = np.expand_dims(hypothesis_scores[:num_hyp], axis = 1) + hyp_scores\n",
     "        new_hypothesis_scores = new_hypothesis_scores.reshape((-1))\n",
     "        hyp_index = np.argsort(new_hypothesis_scores)[::-1]\n",
@@ -827,8 +855,11 @@
     "            index = np.array(ids)\n",
     "        base_index = base_index[index]\n",
     "        child_index = child_index[index]\n",
-    "        out_type = model.stackpointer.bilinear.forward(type_h[base_index], type_c[child_index])\n",
-    "        hyp_type_scores = sess.run(tf.nn.log_softmax(out_type, axis = 1))\n",
+    "        hyp_type_scores = sess.run(model.hyp_type_scores,\n",
+    "                                  feed_dict = {\n",
+    "                                      model.holder_type_h: type_h[base_index],\n",
+    "                                      model.holder_type_c: type_c[child_index]\n",
+    "                                  })\n",
     "        hyp_types = np.argmax(hyp_type_scores, axis = 1)\n",
     "        hyp_type_scores = np.max(hyp_type_scores, axis = 1)\n",
     "        hypothesis_scores[:num_hyp] = hypothesis_scores[:num_hyp] + hyp_type_scores\n",
@@ -866,10 +897,10 @@
     "    return heads, types, length, children, stacked_types   \n",
     "        \n",
     "def decode(input_word, input_char, length = None, beam = 1, leading_symbolic=0, ordered=True):\n",
-    "    output, hn = model.stackpointer.encode(input_word, input_char)\n",
-    "    arc_c, type_c, output, hn = sess.run([tf.nn.elu(model.stackpointer.arc_c(output)), \n",
-    "                              tf.nn.elu(model.stackpointer.type_c(output)),\n",
-    "                              output, hn])\n",
+    "    \n",
+    "    arc_c, type_c, output, hn = sess.run([model.encode_arc_c, model.type_c, \n",
+    "                                          model.encode_output, model.encode_hidden],\n",
+    "            feed_dict = {model.words: input_word, model.chars: input_char})\n",
     "    batch, max_len_e, _ = output.shape\n",
     "\n",
     "    heads = np.zeros([batch, max_len_e], dtype=np.int32)\n",
@@ -880,10 +911,10 @@
     "    \n",
     "    for b in range(batch):\n",
     "        sent_len = None if length is None else length[b]\n",
-    "        state = tf.nn.rnn_cell.LSTMStateTuple(hn[0].c[b:b+1], hn[0].h[b:b+1])\n",
-    "        preds = decode_sentence(output[b], arc_c[b], type_c[b], state, beam, sent_len, ordered, leading_symbolic)\n",
+    "        preds = decode_sentence(output[b], arc_c[b], type_c[b], [hn[b]], \n",
+    "                                beam, sent_len, ordered, leading_symbolic)\n",
     "        if preds is None:\n",
-    "            preds = decode_sentence(output[b], arc_c[b], type_c[b], state, beam, \n",
+    "            preds = decode_sentence(output[b], arc_c[b], type_c[b], [hn[b]], beam, \n",
     "                                         sent_len, False, leading_symbolic)\n",
     "        hids, tids, sent_len, chids, stids = preds\n",
     "        heads[b, :sent_len] = hids\n",
@@ -901,21 +932,38 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# batch_w = wid_inputs[:2]\n",
-    "# batch_c = cid_inputs[:2]\n",
-    "# batch_heads = hid_inputs[:2]\n",
-    "# batch_stacked_heads = stack_hid_inputs[:2]\n",
-    "# batch_siblings = ssid_inputs[:2]\n",
-    "# batch_children = chid_inputs[:2]\n",
-    "# batch_stacked_types = stack_tid_inputs[:2]\n",
-    "# batch_e = masks_e[:2]\n",
-    "# batch_d = masks_d[:2]\n",
-    "# batch_types = tid_inputs[:2]\n",
-    "# batch_len = np.count_nonzero(batch_w, axis = 1)\n",
-    "\n",
-    "# NUM_SYMBOLIC_TAGS = 3\n",
-    "# heads_pred, types_pred, _, _ = decode(batch_w, batch_c, leading_symbolic = NUM_SYMBOLIC_TAGS)\n",
-    "# evaluate(batch_w, heads_pred, types_pred, batch_heads, batch_types, batch_len)"
+    "batch_w, batch_c, batch_heads, batch_stacked_heads, batch_siblings, batch_children, \\\n",
+    "batch_stacked_types, batch_e, batch_d, batch_types, batch_len = train[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "decode(batch_w[:2], batch_c[:2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "decode(batch_w[:2], batch_c[:2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "decode(batch_w[:2], batch_c[:2])"
    ]
   },
   {