Resolve comments

gpengzhi · gpengzhi · commit a2dee7823c07 · 2019-09-20T14:10:47.000-04:00
diff --git a/examples/seq2seq_exposure_bias/interpolation_decoder.py b/examples/seq2seq_exposure_bias/interpolation_decoder.py
@@ -113,12 +113,12 @@ def step(self, time, inputs, state, name=None):
             logits, sample_ids, wrapper_outputs,
             attention_scores, attention_context)
 
-        return (outputs, sample_ids, logits, wrapper_state)
+        return (outputs, wrapper_state)
 
-    def next_inputs(self, sample_ids, time, outputs, state):
+    def next_inputs(self, time, outputs, state):
         (finished, next_inputs, next_state) = self._helper.next_inputs(
             time=time,
-            outputs=outputs,
+            outputs=outputs.logits,
             state=[state[0], state],
-            sample_ids=sample_ids)
+            sample_ids=outputs.sample_id)
         return (finished, next_inputs, next_state)
diff --git a/texar/tf/modules/decoders/__init__.py b/texar/tf/modules/decoders/__init__.py
@@ -21,9 +21,9 @@
 
 # pylint: disable=wildcard-import
 
+from texar.tf.modules.decoders.beam_search_decode import *
 from texar.tf.modules.decoders.rnn_decoder_base import *
 from texar.tf.modules.decoders.rnn_decoders import *
 from texar.tf.modules.decoders.tf_helpers import *
 from texar.tf.modules.decoders.rnn_decoder_helpers import *
 from texar.tf.modules.decoders.transformer_decoders import *
-from texar.tf.modules.decoders.beam_search_decode import *
diff --git a/texar/tf/modules/decoders/rnn_decoders.py b/texar/tf/modules/decoders/rnn_decoders.py
@@ -257,14 +257,14 @@ def step(self, time, inputs, state, name=None):
         sample_ids = self._helper.sample(
             time=time, outputs=logits, state=cell_state)
         outputs = BasicRNNDecoderOutput(logits, sample_ids, cell_outputs)
-        return outputs, sample_ids, logits, cell_state
+        return outputs, cell_state
 
-    def next_inputs(self, sample_ids, time, logits, state):
+    def next_inputs(self, time, outputs, state):
         (finished, next_inputs, next_state) = self._helper.next_inputs(
             time=time,
-            outputs=logits,
+            outputs=outputs.logits,
             state=state,
-            sample_ids=sample_ids,)
+            sample_ids=outputs.sample_id)
         return finished, next_inputs, next_state
 
     def finalize(self, outputs, final_state, sequence_lengths):
@@ -601,14 +601,14 @@ def step(self, time, inputs, state, name=None):
             logits, sample_ids, wrapper_outputs,
             attention_scores, attention_context)
 
-        return (outputs, sample_ids, logits, wrapper_state)
+        return (outputs, wrapper_state)
 
-    def next_inputs(self, sample_ids, time, outputs, state):
+    def next_inputs(self, time, outputs, state):
         (finished, next_inputs, state) = self._helper.next_inputs(
             time=time,
-            outputs=outputs,
+            outputs=outputs.logits,
             state=state,
-            sample_ids=sample_ids)
+            sample_ids=outputs.sample_id)
         return (finished, next_inputs, state)
 
     def finalize(self, outputs, final_state, sequence_lengths):
diff --git a/texar/tf/modules/decoders/rnn_decoders_test.py b/texar/tf/modules/decoders/rnn_decoders_test.py
@@ -385,5 +385,6 @@ def test_beam_search_cell(self):
         for tvar in beam_cell.trainable_variables:
             self.assertTrue(tvar in decoder.trainable_variables)
 
+
 if __name__ == "__main__":
     tf.test.main()
diff --git a/texar/tf/modules/decoders/tf_helpers.py b/texar/tf/modules/decoders/tf_helpers.py
@@ -615,13 +615,10 @@ def sample(self, time, outputs, state, name=None):
         sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
         return sample_ids
 
-    def next_inputs(self, time, outputs, state, sample_ids, name=None,
-                    reach_max_time=None):
+    def next_inputs(self, time, outputs, state, sample_ids, name=None):
         """Gets the inputs for next step."""
         finished = math_ops.equal(sample_ids, self._end_token)
         all_finished = math_ops.reduce_all(finished)
-        if reach_max_time is not None:
-            all_finished = tf.logical_or(all_finished, reach_max_time)
 
         if self._embedding_args_cnt == 1:
             del time, outputs  # unused by next_inputs_fn
diff --git a/texar/tf/modules/decoders/transformer_decoders.py b/texar/tf/modules/decoders/transformer_decoders.py
@@ -826,14 +826,14 @@ def step(self, time, inputs, state, name=None):
         wrapper_outputs = TransformerDecoderOutput(
             logits=outputs,
             sample_id=sample_ids)
-        return (wrapper_outputs, sample_ids, outputs, state)
+        return (wrapper_outputs, state)
 
-    def next_inputs(self, sample_ids, time, outputs, state):
+    def next_inputs(self, time, outputs, state):
         (finished, next_inputs, state) = self._helper.next_inputs(
             time=time,
-            outputs=outputs,
+            outputs=outputs.logits,
             state=state,
-            sample_ids=sample_ids)
+            sample_ids=outputs.sample_id)
         return (finished, next_inputs, state)
 
     def finalize(self, outputs, final_state, sequence_lengths):
diff --git a/texar/tf/utils/beam_search.py b/texar/tf/utils/beam_search.py
@@ -16,8 +16,8 @@
 # Modifications copyright (C) 2019 Texar
 # ==============================================================================
 """
-Implemetation of beam seach with penalties.
-Adapted from tensor2tensor repositor.
+Implementation of beam search with penalties.
+Adapted from tensor2tensor repository.
 """
 
 from __future__ import absolute_import
@@ -32,6 +32,7 @@
 # Default value for INF
 INF = 1. * 1e7
 
+
 def _merge_beam_dim(tensor):
     """Reshapes first two dimensions in to single dimension.
 
@@ -41,6 +42,8 @@ def _merge_beam_dim(tensor):
     Returns:
         Reshaped tensor of shape [A*B, ...]
     """
+    if not isinstance(tensor, tf.Tensor) or not tensor.get_shape().as_list():
+        return tensor
     shape = shape_list(tensor)
     shape[0] *= shape[1]    # batch -> batch * beam_size
     shape.pop(1)    # Remove beam dim
@@ -58,6 +61,8 @@ def _unmerge_beam_dim(tensor, batch_size, beam_size):
     Returns:
         Reshaped tensor of shape [batch_size, beam_size, ...]
     """
+    if not isinstance(tensor, tf.Tensor) or not tensor.get_shape().as_list():
+        return tensor
     shape = shape_list(tensor)
     new_shape = [batch_size] + [beam_size] + shape[1:]
     return tf.reshape(tensor, new_shape)
@@ -73,6 +78,8 @@ def _expand_to_beam_size(tensor, beam_size):
     Returns:
         Tiled tensor [batch_size, beam_size, ...]
     """
+    if not isinstance(tensor, tf.Tensor) or not tensor.get_shape().as_list():
+        return tensor
     tensor = tf.expand_dims(tensor, axis=1)
     tile_dims = [1] * tensor.shape.ndims
     tile_dims[1] = beam_size
@@ -173,6 +180,9 @@ def compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags,
     # operations with tfdbg. Clients can capture these tensors by watching
     # these node names.
     def gather(tensor, name):
+        if not isinstance(tensor,
+                          tf.Tensor) or not tensor.get_shape().as_list():
+            return tensor
         return tf.gather_nd(tensor, top_coordinates, name=(prefix + name))
     topk_seq = gather(sequences, "_topk_seq")
     topk_flags = gather(flags, "_topk_flags")
@@ -196,7 +206,7 @@ def beam_search(symbols_to_logits_fn,
                 stop_early=True):
     """Beam search with length penalties.
 
-    Requires a function that can take the currently decoded sybmols and
+    Requires a function that can take the currently decoded symbols and
     return the logits for the next symbol. The implementation is inspired
     by https://arxiv.org/abs/1609.08144.
 
@@ -255,11 +265,11 @@ def beam_search(symbols_to_logits_fn,
     # Expand each batch and state to beam_size
     alive_seq = _expand_to_beam_size(initial_ids, beam_size)
     alive_seq = tf.expand_dims(alive_seq, axis=2)
-    #(batch_size, beam_size, 1)
+
+    # (batch_size, beam_size, 1)
     if states:
         states = nest.map_structure(
-            lambda state: _expand_to_beam_size(state, beam_size),
-                states)
+            lambda state: _expand_to_beam_size(state, beam_size), states)
     else:
         states = {}
 
@@ -384,7 +394,7 @@ def grow_topk(i, alive_seq, alive_log_probs, states):
         if states:
             flat_states = nest.map_structure(_merge_beam_dim, states)
             flat_logits, flat_states = symbols_to_logits_fn(flat_ids, i,
-                flat_states)
+                                                            flat_states)
             states = nest.map_structure(
                 lambda t: _unmerge_beam_dim(t, batch_size, beam_size),
                 flat_states)
@@ -435,20 +445,19 @@ def grow_topk(i, alive_seq, alive_log_probs, states):
         topk_seq = tf.gather_nd(alive_seq, topk_coordinates)
         if states:
             states = nest.map_structure(
-                lambda state: tf.gather_nd(state, topk_coordinates),
-                    states)
+                lambda state: tf.gather_nd(state, topk_coordinates), states)
 
         # Append the most probable alive
         topk_seq = tf.concat([topk_seq, tf.expand_dims(topk_ids, axis=2)],
-            axis=2)
+                             axis=2)
 
         topk_finished = tf.equal(topk_ids, eos_id)
 
         return topk_seq, topk_log_probs, topk_scores, topk_finished, states
 
     def inner_loop(i, alive_seq, alive_log_probs, finished_seq,
-            finished_scores, finished_flags, states):
-        """Inner beam seach loop.
+                   finished_scores, finished_flags, states):
+        """Inner beam search loop.
 
         There are three groups of tensors, alive, finished, and topk.
         The alive group contains information about the current alive
diff --git a/texar/tf/utils/dynamic_decode.py b/texar/tf/utils/dynamic_decode.py
@@ -182,8 +182,6 @@ def dynamic_decode(decoder,
                         type(decoder))
 
     with tf.variable_scope(scope, "decoder") as varscope:
-        # Determine context types.
-
         if maximum_iterations is not None:
             maximum_iterations = tf.convert_to_tensor(
                 maximum_iterations, dtype=tf.int32, name="maximum_iterations")
@@ -249,14 +247,13 @@ def body(time, outputs_ta, state, inputs, finished, sequence_lengths):
             `(time + 1, outputs_ta, next_state, next_inputs, next_finished,
             next_sequence_lengths)`.
         """
-        (next_outputs, sample_ids, logits, state) = decoder.step(
-            time, inputs, state)
-        reach_max = tf.equal(time+1, decoder.max_decoding_length)
+        (next_outputs, state) = decoder.step(time, inputs, state)
+        reach_max = tf.equal(time+1, maximum_iterations)
         (decoder_finished, next_inputs, decoder_state) = tf.cond(
             reach_max,
-            lambda: (tf.cast(tf.ones(tf.shape(sample_ids)[0]), tf.bool),
-                     decoder._helper._start_inputs, state),
-            lambda: decoder.next_inputs(sample_ids, time, logits, state)
+            lambda: (tf.cast(tf.ones_like(finished), tf.bool),
+                     inputs, state),
+            lambda: decoder.next_inputs(time, next_outputs, state)
         )
         if decoder.tracks_own_finished:
             next_finished = decoder_finished