internal merge of PR tensorflow#1290

artitw · kpe · commit 892f76c9c486 · 2019-03-02T23:17:25.000+01:00
PiperOrigin-RevId: 224943245
diff --git a/docs/walkthrough.md b/docs/walkthrough.md
@@ -47,6 +47,7 @@ pip install tensor2tensor && t2t-trainer \
 ### Contents
 
 * [Suggested Datasets and Models](#suggested-datasets-and-models)
+  * [Mathematical Language Understanding](#mathematical-language-understanding)
   * [Story, Question and Answer](#story-question-and-answer)
   * [Image Classification](#image-classification)
   * [Image Generation](#image-generation)
@@ -79,6 +80,24 @@ hyperparameters that we know works well in our setup. We usually
 run either on Cloud TPUs or on 8-GPU machines; you might need
 to modify the hyperparameters if you run on a different setup.
 
+### Mathematical Language Understanding
+
+For evaluating mathematical expressions at the character level involving addition, subtraction and multiplication of both positive and negative decimal numbers with variable digits assigned to symbolic variables, use
+
+* the [MLU](https://art.wangperawong.com/mathematical_language_understanding_train.tar.gz) data-set:
+ `--problem=mathematical_language_understanding`
+
+You can try solving the problem with different transformer models and hyperparameters as described in the [paper](https://arxiv.org/abs/1812.02825):
+* Standard transformer:
+`--model=transformer`
+`--hparams_set=transformer_tiny`
+* Universal transformer:
+`--model=universal_transformer`
+`--hparams_set=universal_transformer_tiny`
+* Adaptive universal transformer:
+`--model=universal_transformer`
+`--hparams_set=adaptive_universal_transformer_tiny`
+
 ### Story, Question and Answer
 
 For answering questions based on a story, use
@@ -464,5 +483,6 @@ T2T](https://research.googleblog.com/2017/06/accelerating-deep-learning-research
 * [Fast Decoding in Sequence Models using Discrete Latent Variables](https://arxiv.org/abs/1803.03382)
 * [Adafactor: Adaptive Learning Rates with Sublinear Memory Cost](https://arxiv.org/abs/1804.04235)
 * [Universal Transformers](https://arxiv.org/abs/1807.03819)
+* [Attending to Mathematical Language with Transformers](https://arxiv.org/abs/1812.02825)
 
 *Note: This is not an official Google product.*
diff --git a/tensor2tensor/data_generators/babi_qa.py b/tensor2tensor/data_generators/babi_qa.py
@@ -109,9 +109,11 @@ def _prepare_babi_data(tmp_dir, data_dir):
     tf.gfile.MakeDirs(data_dir)
 
   file_path = os.path.join(tmp_dir, _TAR)
-  headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
+  headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) "
+                           "AppleWebKit/537.36 (KHTML, like Gecko) "
+                           "Chrome/63.0.3239.132 Safari/537.36"}
   resp = requests.get(_URL, headers=headers)
-  with open(file_path, 'wb') as f:
+  with open(file_path, "wb") as f:
     f.write(resp.content)
 
   tar = tarfile.open(file_path)
@@ -192,10 +194,12 @@ def _all_task_raw_data_generator(tmp_dir, data_file, dataset_split):
 
     tf.logging.info("Preparing dataset of all task together")
     globe_name = ("*_{}.txt")
+    mode_name = "test"
+    if dataset_split == problem.DatasetSplit.TRAIN:
+      mode_name = "train"
     files_name = os.path.join(
         tmp_dir, _DIR_NAME, subset,
-        globe_name.format("train" if dataset_split == problem.DatasetSplit.TRAIN
-                          else "test"))
+        globe_name.format(mode_name))
     with tf.gfile.GFile(data_file, "wb") as outfile:
       for filename in tf.gfile.Glob(files_name):
         if filename == data_file:
@@ -459,6 +463,7 @@ def hparams(self, defaults, unused_model_hparams):
     if "context" in p.vocab_size:
       del p.vocab_size["context"]
 
+
 def _problems_to_register():
   """Problems for which we want to create datasets.
 
diff --git a/tensor2tensor/data_generators/mathematical_language_understanding.py b/tensor2tensor/data_generators/mathematical_language_understanding.py
@@ -1,5 +1,5 @@
 # coding=utf-8
-# Copyright 2018 Artit Wangperawong artitw@gmail.com
+# Copyright 2018 The Tensor2Tensor Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,31 +15,30 @@
 
 r"""Data generators for the Mathematical Language Understanding dataset.
 
-The training and test data were generated by assigning symbolic variables 
-either positive or negative decimal integers and then describing the algebraic 
-operation to perform. We restrict our variable assignments to the range 
-x,y->[-1000,1000) and the operations to the set {+,-,*}. To ensure that the 
-model embraces symbolic variables, the order in which x and y appears in the 
-expression is randomly chosen. For instance, an input string contrasting from 
-the example shown above might be y=129,x=531,x-y. Each input string is 
-accompanied by its target string, which is the evaluation of the mathematical 
-expression. For this study, all targets considered are decimal integers 
-represented at the character level. About 12 million unique samples were thus 
-generated and randomly split into training and test sets at an approximate 
-ratio of 9:1, respectively. 
+The training and test data were generated by assigning symbolic variables
+either positive or negative decimal integers and then describing the algebraic
+operation to perform. We restrict our variable assignments to the range
+x,y->[-1000,1000) and the operations to the set {+,-,*}. To ensure that the
+model embraces symbolic variables, the order in which x and y appears in the
+expression is randomly chosen. For instance, an input string contrasting from
+the example shown above might be y=129,x=531,x-y. Each input string is
+accompanied by its target string, which is the evaluation of the mathematical
+expression. For this study, all targets considered are decimal integers
+represented at the character level. About 12 million unique samples were thus
+generated and randomly split into training and test sets at an approximate
+ratio of 9:1, respectively.
 
 For more information check the following paper:
-Artit Wangperawong. Attending to Mathematical Language with Transformers, 
-arXiv:1812.02825.
-Available at: https://arxiv.org/abs/1812.02825
-
+Artit Wangperawong. Attending to Mathematical Language with Transformers,
+arXiv:1812.02825 (https://arxiv.org/abs/1812.02825).
 """
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
 import os
+import tarfile
 
 from tensor2tensor.data_generators import generator_utils
 from tensor2tensor.data_generators import problem
@@ -48,9 +47,13 @@
 
 import tensorflow as tf
 
+
 @registry.register_problem
 class MathematicalLanguageUnderstanding(text_problems.Text2TextProblem):
-  URL = "https://art.wangperawong.com/mathematical_language_understanding_train.tar.gz"
+  """Mathematical language understanding, see arxiv.org/abs/1812.02825."""
+
+  URL = ("https://art.wangperawong.com/mathematical_language_understanding"
+         "_train.tar.gz")
 
   @property
   def vocab_type(self):
@@ -71,34 +74,31 @@ def is_generate_per_split(self):
     return False
 
   def generate_samples(self, data_dir, tmp_dir, dataset_split):
-    """Downloads and extracts the dataset and generates examples
+    """Downloads and extracts the dataset and generates examples.
 
     Args:
-      tmp_dir: temp directory to download and extract the dataset
       data_dir: The base directory where data and vocab files are stored.
+      tmp_dir: temp directory to download and extract the dataset.
+      dataset_split: split of the data-set.
 
-    Returns:
-      data generator
+    Yields:
+      The data examples.
     """
-
     if not tf.gfile.Exists(tmp_dir):
       tf.gfile.MakeDirs(tmp_dir)
 
     if not tf.gfile.Exists(data_dir):
       tf.gfile.MakeDirs(data_dir)
 
-    # Download and extract
+    # Download and extract.
     compressed_filename = os.path.basename(self.URL)
-    download_path = generator_utils.maybe_download(tmp_dir, compressed_filename,
-                                                   self.URL)
-
+    download_path = generator_utils.maybe_download(
+        tmp_dir, compressed_filename, self.URL)
     with tarfile.open(download_path, "r:gz") as tar:
       tar.extractall(tmp_dir)
-
-    filepath = os.path.join(tmp_dir, "mathematical_language_understanding_train.txt")
-
-    with open(filepath, 'r') as fp:
+    filepath = os.path.join(tmp_dir,
+                            "mathematical_language_understanding_train.txt")
+    with open(filepath, "r") as fp:
       for l in fp:
-        prob, ans = l.strip().split(':')
+        prob, ans = l.strip().split(":")
         yield {"inputs": prob, "targets": ans}
-
diff --git a/tensor2tensor/models/research/universal_transformer.py b/tensor2tensor/models/research/universal_transformer.py
@@ -240,10 +240,12 @@ def _greedy_infer(self, features, decode_length, use_tpu=False):
     Raises:
       NotImplementedError: If there are multiple data shards.
     """
-    return (self._slow_greedy_infer_tpu(features, decode_length) if use_tpu else
-            self._slow_greedy_infer(features, decode_length))
+    if use_tpu:
+      return self._slow_greedy_infer_tpu(features, decode_length)
+    return self._slow_greedy_infer(features, decode_length)
 
-  def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha, use_tpu=False):
+  def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha,
+                   use_tpu=False):
     """Beam search decoding.
 
     Args: