internal merge of PR #1213

cfiken · Copybara-Service · commit 9433a92637c2 · 2018-11-16T11:10:46.000-08:00
PiperOrigin-RevId: 221821207
diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py
@@ -802,7 +802,8 @@ def input_fn(self,
                config=None,
                force_repeat=False,
                prevent_repeat=False,
-               dataset_kwargs=None):
+               dataset_kwargs=None,
+               batch_shuffle_size=512):
     """Builds input pipeline for problem.
 
     Args:
@@ -817,6 +818,8 @@ def input_fn(self,
         Overrides force_repeat.
       dataset_kwargs: dict, if passed, will pass as kwargs to self.dataset
         method when called
+      batch_shuffle_size: int, the size of the buffer to shuffle batches.
+        if none, the batches will not be shuffled.
 
     Returns:
       (features_dict<str name, Tensor feature>, Tensor targets)
@@ -966,9 +969,8 @@ def define_shapes(example):
     # buffer size for record shuffling is smaller than the batch size. In such
     # cases, adding batch shuffling ensures that the data is in random order
     # during training
-    if hasattr(hparams, 'batch_shuffle_size'):
-      if is_training and hparams.batch_shuffle_size:
-        dataset = dataset.shuffle(hparams.batch_shuffle_size)
+    if is_training and batch_shuffle_size:
+      dataset = dataset.shuffle(batch_shuffle_size)
 
     def prepare_for_output(example):
       if not config or not config.use_tpu:
diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py
@@ -33,7 +33,6 @@ def basic_params1():
       # of tokens per batch per GPU or per TPU core.  Otherwise, this is
       # the number of examples per GPU or per TPU core.
       batch_size=4096,
-      batch_shuffle_size=512,
       # If True, then if the features are of variable length, the batch_size is
       # used as the actual batch size (and not tokens per batch).
       use_fixed_batch_size=False,
diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py
@@ -319,9 +319,8 @@ def decode_once(estimator,
     if decode_to_file:
       for i, (d_input, d_output, d_target) in enumerate(decoded_outputs):
         # Skip if all padding
-        if d_input:
-          if re.match("^({})+$".format(text_encoder.PAD), d_input):
-            continue
+        if re.match("^({})+$".format(text_encoder.PAD), d_input):
+          continue
         beam_score_str = ""
         if decode_hp.write_beam_scores:
           beam_score_str = "\t%.2f" % decoded_scores[i]