Skip to content

Commit

Permalink
improve stackpointer
Browse files Browse the repository at this point in the history
  • Loading branch information
huseinzol05 committed Sep 29, 2019
1 parent d49e296 commit 97124d6
Showing 1 changed file with 87 additions and 39 deletions.
126 changes: 87 additions & 39 deletions dependency-parser/6.stackpointer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -485,20 +485,27 @@
"\n",
"class StackPointer:\n",
" def __init__(self, word_dim, num_words, char_dim, num_chars, num_filters, kernel_size,\n",
" input_size_decoder, hidden_size, encoder_layers, decoder_layers,\n",
" input_size_decoder, hidden_size, layers,\n",
" num_labels, arc_space, type_space):\n",
" \n",
" def cells(size, reuse=False):\n",
" return tf.nn.rnn_cell.LSTMCell(size,\n",
" initializer=tf.orthogonal_initializer(),reuse=reuse)\n",
" initializer=tf.orthogonal_initializer(),reuse=reuse,\n",
" state_is_tuple=False)\n",
" \n",
" self.word_embedd = tf.Variable(tf.random_uniform([num_words, word_dim], -1, 1))\n",
" self.char_embedd = tf.Variable(tf.random_uniform([num_chars, char_dim], -1, 1))\n",
" self.conv1d = tf.layers.Conv1D(num_filters, kernel_size, 1, padding='VALID')\n",
" self.num_labels = num_labels\n",
" self.prior_order = PriorOrder.DEPTH\n",
" self.encoder = tf.nn.rnn_cell.MultiRNNCell([cells(hidden_size) for _ in range(encoder_layers)])\n",
" self.decoder = tf.nn.rnn_cell.MultiRNNCell([cells(hidden_size) for _ in range(decoder_layers)])\n",
" self.layers = layers\n",
" self.encoder = tf.nn.rnn_cell.MultiRNNCell([cells(hidden_size) for _ in range(layers)],\n",
" state_is_tuple=False)\n",
" self.decoder = tf.nn.rnn_cell.MultiRNNCell([cells(hidden_size) for _ in range(layers)],\n",
" state_is_tuple=False)\n",
" self.hidden_size = hidden_size\n",
" self.arc_space = arc_space\n",
" \n",
" \n",
" self.src_dense = tf.layers.Dense(hidden_size)\n",
" self.hx_dense = tf.layers.Dense(hidden_size)\n",
Expand Down Expand Up @@ -655,8 +662,7 @@
" kernel_size = 3,\n",
" input_size_decoder = 256, \n",
" hidden_size = 256, \n",
" encoder_layers = 1, \n",
" decoder_layers = 1,\n",
" layers = 1,\n",
" num_labels = len(tag2idx), \n",
" arc_space = 128, \n",
" type_space = 128)\n",
Expand All @@ -678,7 +684,32 @@
" loss_arc = loss_arc_leaf + loss_arc_non_leaf\n",
" loss_type = loss_type_leaf + loss_type_non_leaf\n",
" self.cost = loss_arc + loss_type + cov * loss_cov\n",
" self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)"
" self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)\n",
" \n",
" self.encode_output, self.encode_hidden = self.stackpointer.encode(self.words, self.chars)\n",
" self.encode_arc_c = tf.nn.elu(self.stackpointer.arc_c(self.encode_output))\n",
" self.type_c = tf.nn.elu(self.stackpointer.type_c(self.encode_output))\n",
" \n",
" self.src_encoding = tf.placeholder(tf.float32, (None, self.stackpointer.hidden_size))\n",
" self.arc_c = tf.placeholder(tf.float32, (None, self.stackpointer.arc_space))\n",
" self.hx = tf.placeholder(tf.float32, (None, \n",
" self.stackpointer.hidden_size * 2 * self.stackpointer.layers)) \n",
" \n",
" src_encoding = tf.nn.elu(self.stackpointer.src_dense(self.src_encoding))\n",
" output_dec, hx = self.stackpointer.decoder(src_encoding, self.hx)\n",
" arc_h = tf.nn.elu(self.stackpointer.arc_h(tf.expand_dims(output_dec, axis = 1)))\n",
" type_h = tf.nn.elu(self.stackpointer.type_h(output_dec))\n",
" out_arc = self.stackpointer.attention.forward(arc_h, tf.expand_dims(self.arc_c, 0))\n",
" out_arc = tf.squeeze(tf.squeeze(out_arc, axis = 1), axis = 1)\n",
" self.hyp_scores = tf.nn.log_softmax(out_arc, axis = 1)\n",
" self.type_h = type_h\n",
" self.decode_hidden = hx\n",
" \n",
" self.holder_type_h = tf.placeholder(tf.float32, (None, self.stackpointer.arc_space))\n",
" self.holder_type_c = tf.placeholder(tf.float32, (None, self.stackpointer.arc_space))\n",
" \n",
" out_type = self.stackpointer.bilinear.forward(self.holder_type_h, self.holder_type_c)\n",
" self.hyp_type_scores = tf.nn.log_softmax(out_type, axis = 1)"
]
},
{
Expand Down Expand Up @@ -716,7 +747,6 @@
" return child_id > child_orders[base_id, head]\n",
" \n",
" length = output_enc.shape[0] if length is None else length\n",
" hx = tuple([hx])\n",
" \n",
" stacked_heads = [[0] for _ in range(beam)]\n",
" grand_parents = [[0] for _ in range(beam)]\n",
Expand Down Expand Up @@ -749,13 +779,11 @@
" src_encoding = src_encoding + output_enc_sibling\n",
" output_enc_gpar = output_enc[gpars]\n",
" src_encoding = src_encoding + output_enc_gpar\n",
" src_encoding = tf.nn.elu(model.stackpointer.src_dense(src_encoding))\n",
" output_dec, hx = model.stackpointer.decoder(src_encoding, hx)\n",
" arc_h = tf.nn.elu(model.stackpointer.arc_h(tf.expand_dims(output_dec, axis = 1)))\n",
" type_h = tf.nn.elu(model.stackpointer.type_h(output_dec))\n",
" out_arc = model.stackpointer.attention.forward(arc_h, tf.expand_dims(arc_c, 0))\n",
" out_arc = tf.squeeze(tf.squeeze(out_arc, axis = 1), axis = 1)\n",
" hyp_scores, type_h = sess.run([tf.nn.log_softmax(out_arc, axis = 1), type_h])\n",
" hyp_scores, type_h, hx = sess.run([model.hyp_scores, model.type_h, model.decode_hidden],\n",
" feed_dict = {model.src_encoding: src_encoding,\n",
" model.arc_c: arc_c,\n",
" model.hx: hx})\n",
" \n",
" new_hypothesis_scores = np.expand_dims(hypothesis_scores[:num_hyp], axis = 1) + hyp_scores\n",
" new_hypothesis_scores = new_hypothesis_scores.reshape((-1))\n",
" hyp_index = np.argsort(new_hypothesis_scores)[::-1]\n",
Expand Down Expand Up @@ -827,8 +855,11 @@
" index = np.array(ids)\n",
" base_index = base_index[index]\n",
" child_index = child_index[index]\n",
" out_type = model.stackpointer.bilinear.forward(type_h[base_index], type_c[child_index])\n",
" hyp_type_scores = sess.run(tf.nn.log_softmax(out_type, axis = 1))\n",
" hyp_type_scores = sess.run(model.hyp_type_scores,\n",
" feed_dict = {\n",
" model.holder_type_h: type_h[base_index],\n",
" model.holder_type_c: type_c[child_index]\n",
" })\n",
" hyp_types = np.argmax(hyp_type_scores, axis = 1)\n",
" hyp_type_scores = np.max(hyp_type_scores, axis = 1)\n",
" hypothesis_scores[:num_hyp] = hypothesis_scores[:num_hyp] + hyp_type_scores\n",
Expand Down Expand Up @@ -866,10 +897,10 @@
" return heads, types, length, children, stacked_types \n",
" \n",
"def decode(input_word, input_char, length = None, beam = 1, leading_symbolic=0, ordered=True):\n",
" output, hn = model.stackpointer.encode(input_word, input_char)\n",
" arc_c, type_c, output, hn = sess.run([tf.nn.elu(model.stackpointer.arc_c(output)), \n",
" tf.nn.elu(model.stackpointer.type_c(output)),\n",
" output, hn])\n",
" \n",
" arc_c, type_c, output, hn = sess.run([model.encode_arc_c, model.type_c, \n",
" model.encode_output, model.encode_hidden],\n",
" feed_dict = {model.words: input_word, model.chars: input_char})\n",
" batch, max_len_e, _ = output.shape\n",
"\n",
" heads = np.zeros([batch, max_len_e], dtype=np.int32)\n",
Expand All @@ -880,10 +911,10 @@
" \n",
" for b in range(batch):\n",
" sent_len = None if length is None else length[b]\n",
" state = tf.nn.rnn_cell.LSTMStateTuple(hn[0].c[b:b+1], hn[0].h[b:b+1])\n",
" preds = decode_sentence(output[b], arc_c[b], type_c[b], state, beam, sent_len, ordered, leading_symbolic)\n",
" preds = decode_sentence(output[b], arc_c[b], type_c[b], [hn[b]], \n",
" beam, sent_len, ordered, leading_symbolic)\n",
" if preds is None:\n",
" preds = decode_sentence(output[b], arc_c[b], type_c[b], state, beam, \n",
" preds = decode_sentence(output[b], arc_c[b], type_c[b], [hn[b]], beam, \n",
" sent_len, False, leading_symbolic)\n",
" hids, tids, sent_len, chids, stids = preds\n",
" heads[b, :sent_len] = hids\n",
Expand All @@ -901,21 +932,38 @@
"metadata": {},
"outputs": [],
"source": [
"# batch_w = wid_inputs[:2]\n",
"# batch_c = cid_inputs[:2]\n",
"# batch_heads = hid_inputs[:2]\n",
"# batch_stacked_heads = stack_hid_inputs[:2]\n",
"# batch_siblings = ssid_inputs[:2]\n",
"# batch_children = chid_inputs[:2]\n",
"# batch_stacked_types = stack_tid_inputs[:2]\n",
"# batch_e = masks_e[:2]\n",
"# batch_d = masks_d[:2]\n",
"# batch_types = tid_inputs[:2]\n",
"# batch_len = np.count_nonzero(batch_w, axis = 1)\n",
"\n",
"# NUM_SYMBOLIC_TAGS = 3\n",
"# heads_pred, types_pred, _, _ = decode(batch_w, batch_c, leading_symbolic = NUM_SYMBOLIC_TAGS)\n",
"# evaluate(batch_w, heads_pred, types_pred, batch_heads, batch_types, batch_len)"
"batch_w, batch_c, batch_heads, batch_stacked_heads, batch_siblings, batch_children, \\\n",
"batch_stacked_types, batch_e, batch_d, batch_types, batch_len = train[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"decode(batch_w[:2], batch_c[:2])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"decode(batch_w[:2], batch_c[:2])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"decode(batch_w[:2], batch_c[:2])"
]
},
{
Expand Down

0 comments on commit 97124d6

Please sign in to comment.