internal merge of PR #1303

aeloyq · Copybara-Service · commit 01d76fad4a53 · 2019-01-04T15:58:08.000-08:00
PiperOrigin-RevId: 227927931
diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py
@@ -1584,7 +1584,8 @@ def dot_product_attention_relative(q,
     raise ValueError("Max relative position (%s) should be > 0 when using "
                      "relative self attention." % (max_relative_position))
   with tf.variable_scope(
-      name, default_name="dot_product_attention_relative", values=[q, k, v]) as scope:
+      name, default_name="dot_product_attention_relative",
+      values=[q, k, v]) as scope:
 
     # This calculation only works for self attention.
     # q, k and v must therefore have the same shape.
diff --git a/tensor2tensor/visualization/visualization.py b/tensor2tensor/visualization/visualization.py
@@ -51,19 +51,19 @@ def __init__(
 
   def encode(self, input_str):
     """Input str to features dict, ready for inference."""
-    inputs = self.encoders['inputs'].encode(input_str) + [EOS_ID]
+    inputs = self.encoders["inputs"].encode(input_str) + [EOS_ID]
     batch_inputs = np.reshape(inputs, [1, -1, 1, 1])  # Make it 3D.
     return batch_inputs
 
   def decode(self, integers):
     """List of ints to str."""
     integers = list(np.squeeze(integers))
-    return self.encoders['inputs'].decode(integers)
+    return self.encoders["inputs"].decode(integers)
 
   def decode_list(self, integers):
     """List of ints to list of str."""
     integers = list(np.squeeze(integers))
-    return self.encoders['inputs'].decode_list(integers)
+    return self.encoders["inputs"].decode_list(integers)
 
   def get_vis_data_from_string(self, sess, input_string):
     """Constructs the data needed for visualizing attentions.
@@ -135,11 +135,11 @@ def build_model(hparams_set, model_name, data_dir, problem_name, beam_size=1):
   translate_model = registry.model(model_name)(
       hparams, tf.estimator.ModeKeys.EVAL)
 
-  inputs = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='inputs')
-  targets = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='targets')
+  inputs = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name="inputs")
+  targets = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name="targets")
   translate_model({
-      'inputs': inputs,
-      'targets': targets,
+      "inputs": inputs,
+      "targets": targets,
   })
 
   # Must be called after building the training graph, so that the dict will
@@ -150,8 +150,8 @@ def build_model(hparams_set, model_name, data_dir, problem_name, beam_size=1):
 
   with tf.variable_scope(tf.get_variable_scope(), reuse=True):
     samples = translate_model.infer({
-        'inputs': inputs,
-    }, beam_size=beam_size)['outputs']
+        "inputs": inputs,
+    }, beam_size=beam_size)["outputs"]
 
   return inputs, targets, samples, att_mats
 
@@ -182,19 +182,22 @@ def get_att_mats(translate_model):
   dec_atts = []
   encdec_atts = []
 
-  prefix = 'transformer/body/'
-  postfix_self_attention = '/multihead_attention/dot_product_attention'
+  prefix = "transformer/body/"
+  postfix_self_attention = "/multihead_attention/dot_product_attention"
   if translate_model.hparams.self_attention_type == "dot_product_relative":
-    postfix_self_attention = '/multihead_attention/dot_product_attention_relative'
-  postfix_encdec = '/multihead_attention/dot_product_attention'
+    postfix_self_attention = ("/multihead_attention/"
+                              "dot_product_attention_relative")
+  postfix_encdec = "/multihead_attention/dot_product_attention"
 
   for i in range(translate_model.hparams.num_hidden_layers):
     enc_att = translate_model.attention_weights[
-        '%sencoder/layer_%i/self_attention%s' % (prefix, i, postfix_self_attention)]
+        "%sencoder/layer_%i/self_attention%s"
+        % (prefix, i, postfix_self_attention)]
     dec_att = translate_model.attention_weights[
-        '%sdecoder/layer_%i/self_attention%s' % (prefix, i, postfix_self_attention)]
+        "%sdecoder/layer_%i/self_attention%s"
+        % (prefix, i, postfix_self_attention)]
     encdec_att = translate_model.attention_weights[
-        '%sdecoder/layer_%i/encdec_attention%s' % (prefix, i, postfix_encdec)]
+        "%sdecoder/layer_%i/encdec_attention%s" % (prefix, i, postfix_encdec)]
     enc_atts.append(enc_att)
     dec_atts.append(dec_att)
     encdec_atts.append(encdec_att)