internal merge of PR #1192

rllin-fathom · Copybara-Service · commit bc1268d8acfb · 2018-11-01T11:08:57.000-07:00
PiperOrigin-RevId: 219664613
diff --git a/tensor2tensor/models/research/universal_transformer_util.py b/tensor2tensor/models/research/universal_transformer_util.py
@@ -125,6 +125,8 @@ def universal_transformer_encoder(encoder_input,
     x, extra_output = universal_transformer_layer(
         x, hparams, ffn_unit, attention_unit, pad_remover=pad_remover)
 
+    if hparams.get("use_memory_as_last_state", False):
+      x = extra_output  # which is memory
     return common_layers.layer_preprocess(x, hparams), extra_output
 
 
@@ -249,9 +251,8 @@ def add_vanilla_transformer_layer(x, num_layers):
       output, _, extra_output = tf.foldl(
           ut_function, tf.range(hparams.num_rec_steps), initializer=initializer)
 
-      # Right now, this is only possible when the transition function is an lstm
-      if (hparams.recurrence_type == "lstm" and
-          hparams.get("use_memory_as_final_state", False)):
+      # This is possible only when we are using lstm as transition function.
+      if hparams.get("use_memory_as_final_state", False):
         output = extra_output
 
     if hparams.mix_with_transformer == "after_ut":