Skip to content
This repository has been archived by the owner on Jul 7, 2023. It is now read-only.

Commit

Permalink
Explicitly import estimator from tensorflow as a separate import inst…
Browse files Browse the repository at this point in the history
…ead of

accessing it via tf.estimator and depend on the tensorflow estimator target.

PiperOrigin-RevId: 436808246
  • Loading branch information
hertschuh authored and copybara-github committed Mar 23, 2022
1 parent a8e50c0 commit 316c9ce
Show file tree
Hide file tree
Showing 17 changed files with 60 additions and 43 deletions.
3 changes: 2 additions & 1 deletion tensor2tensor/bin/t2t_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from tensor2tensor.utils import usr_dir

import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator

flags = tf.flags
FLAGS = flags.FLAGS
Expand Down Expand Up @@ -134,7 +135,7 @@ def create_surrogate_run_config(hp):
def prepare_data(problem, hparams, params, config):
"""Construct input pipeline."""
input_fn = problem.make_estimator_input_fn(
tf.estimator.ModeKeys.EVAL, hparams, force_repeat=True)
tf_estimator.ModeKeys.EVAL, hparams, force_repeat=True)
dataset = input_fn(params, config)
features, _ = dataset.make_one_shot_iterator().get_next()
inputs, labels = features["targets"], features["inputs"]
Expand Down
3 changes: 2 additions & 1 deletion tensor2tensor/bin/t2t_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from tensor2tensor.utils import usr_dir

import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator

flags = tf.flags
FLAGS = flags.FLAGS
Expand Down Expand Up @@ -129,7 +130,7 @@ def score_file(filename):
features = {"targets": batch_targets}

# Prepare the model and the graph when model runs on features.
model = registry.model(FLAGS.model)(hparams, tf.estimator.ModeKeys.EVAL)
model = registry.model(FLAGS.model)(hparams, tf_estimator.ModeKeys.EVAL)
_, losses = model(features)
saver = tf.train.Saver()

Expand Down
3 changes: 2 additions & 1 deletion tensor2tensor/bin/t2t_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from tensor2tensor.utils import trainer_lib
from tensor2tensor.utils import usr_dir
import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator

flags = tf.flags
FLAGS = flags.FLAGS
Expand All @@ -42,7 +43,7 @@ def main(_):
dataset_split = "test" if FLAGS.eval_use_test_set else None
dataset_kwargs = {"dataset_split": dataset_split}
eval_input_fn = hparams.problem.make_estimator_input_fn(
tf.estimator.ModeKeys.EVAL, hparams, dataset_kwargs=dataset_kwargs)
tf_estimator.ModeKeys.EVAL, hparams, dataset_kwargs=dataset_kwargs)
config = t2t_trainer.create_run_config(hparams)

# summary-hook in tf.estimator.EstimatorSpec requires
Expand Down
5 changes: 3 additions & 2 deletions tensor2tensor/bin/t2t_prune.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from tensor2tensor.utils import usr_dir

import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator

flags = tf.flags
FLAGS = flags.FLAGS
Expand Down Expand Up @@ -79,7 +80,7 @@ def main(argv):

# add "_rev" as a hack to avoid image standardization
problem = registry.problem(FLAGS.problem)
input_fn = problem.make_estimator_input_fn(tf.estimator.ModeKeys.EVAL,
input_fn = problem.make_estimator_input_fn(tf_estimator.ModeKeys.EVAL,
hparams)
dataset = input_fn(params, config).repeat()
features, labels = dataset.make_one_shot_iterator().get_next()
Expand All @@ -91,7 +92,7 @@ def main(argv):
spec = model_fn(
features,
labels,
tf.estimator.ModeKeys.EVAL,
tf_estimator.ModeKeys.EVAL,
params=hparams,
config=config)

Expand Down
3 changes: 2 additions & 1 deletion tensor2tensor/bin/t2t_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from tensor2tensor.utils import trainer_lib
from tensor2tensor.utils import usr_dir
import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator


flags = tf.flags
Expand Down Expand Up @@ -243,7 +244,7 @@ def create_run_config(hp, output_dir=None):
"num_cores_per_replica":
1,
"per_host_input_for_training":
tf.estimator.tpu.InputPipelineConfig.BROADCAST,
tf_estimator.tpu.InputPipelineConfig.BROADCAST,
}

# the various custom getters we have written do not play well together yet.
Expand Down
9 changes: 5 additions & 4 deletions tensor2tensor/layers/common_image_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from tensor2tensor.utils import expert_utils

import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator


class AttentionType(object):
Expand Down Expand Up @@ -460,7 +461,7 @@ def ffn_layer(x, hparams, losses=None):
y = tf.reshape(y, x_shape)
elif hparams.ffn_layer == "local_moe_tpu":
overhead = (hparams.moe_overhead_train
if hparams.mode == tf.estimator.ModeKeys.TRAIN
if hparams.mode == tf_estimator.ModeKeys.TRAIN
else hparams.moe_overhead_eval)
x, x_shape, is_4d = maybe_reshape_4d_to_3d(x)
y, loss = expert_utils.local_moe_tpu(
Expand Down Expand Up @@ -531,7 +532,7 @@ def postprocess_image(x, rows, cols, hparams):
use_bias=True,
activation=None,
name="output_conv")
if (hparams.mode == tf.estimator.ModeKeys.PREDICT and
if (hparams.mode == tf_estimator.ModeKeys.PREDICT and
hparams.block_raster_scan):
y = targets
yshape = common_layers.shape_list(y)
Expand Down Expand Up @@ -577,7 +578,7 @@ def prepare_decoder(targets, hparams):

# during training, images are [batch, IMG_LEN, IMG_LEN, 3].
# At inference, they are [batch, curr_infer_length, 1, 1]
if hparams.mode == tf.estimator.ModeKeys.PREDICT:
if hparams.mode == tf_estimator.ModeKeys.PREDICT:
curr_infer_length = targets_shape[1]
if hparams.block_raster_scan:
assert hparams.img_len*channels % hparams.query_shape[1] == 0
Expand Down Expand Up @@ -659,7 +660,7 @@ def create_output(decoder_output, rows, cols, targets, hparams):
batch = common_layers.shape_list(decoded_image)[0]
depth = common_layers.shape_list(decoded_image)[-1]
likelihood = getattr(hparams, "likelihood", DistributionType.CAT)
if hparams.mode == tf.estimator.ModeKeys.PREDICT:
if hparams.mode == tf_estimator.ModeKeys.PREDICT:
y = tf.reshape(decoded_image, [batch, -1, 1, 1, depth])
output = y[:, :rows, :, :, :]
elif likelihood == DistributionType.CAT:
Expand Down
7 changes: 4 additions & 3 deletions tensor2tensor/layers/common_image_attention_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from tensor2tensor.utils import hparam

import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator


class CommonImageAttentionTest(parameterized.TestCase, tf.test.TestCase):
Expand All @@ -40,7 +41,7 @@ def testPostProcessImageTrainMode(self, likelihood, num_mixtures, depth):
hparams = hparam.HParams(
hidden_size=2,
likelihood=likelihood,
mode=tf.estimator.ModeKeys.TRAIN,
mode=tf_estimator.ModeKeys.TRAIN,
num_mixtures=num_mixtures,
)
inputs = tf.random_uniform([batch, rows, cols, hparams.hidden_size],
Expand All @@ -63,7 +64,7 @@ def testPostProcessImageInferMode(self, likelihood, num_mixtures, depth):
block_raster_scan=True,
hidden_size=2,
likelihood=likelihood,
mode=tf.estimator.ModeKeys.PREDICT,
mode=tf_estimator.ModeKeys.PREDICT,
num_mixtures=num_mixtures,
query_shape=[block_length, block_width],
)
Expand Down Expand Up @@ -95,7 +96,7 @@ def testCreateOutputTrainMode(self, likelihood, num_mixtures, depth):
hidden_size=2,
likelihood=likelihood,
num_channels=channels,
mode=tf.estimator.ModeKeys.TRAIN,
mode=tf_estimator.ModeKeys.TRAIN,
num_mixtures=num_mixtures,
)
decoder_output = tf.random_normal([batch, rows, cols, hparams.hidden_size])
Expand Down
19 changes: 10 additions & 9 deletions tensor2tensor/layers/discretization.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from tensor2tensor.layers import common_layers

import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator
import tensorflow_probability as tfp

from tensorflow.python.training import moving_averages # pylint: disable=g-direct-tensorflow-import
Expand Down Expand Up @@ -472,7 +473,7 @@ def gumbel_softmax(x,
d_dev = -tf.reduce_mean(d_variance)
ret = s

if mode != tf.estimator.ModeKeys.TRAIN:
if mode != tf_estimator.ModeKeys.TRAIN:
ret = tf.reshape(maxvhot, common_layers.shape_list(s)) # Just hot @eval.
return m, ret, d_dev * 5.0 + tf.reduce_mean(kl) * 0.002

Expand Down Expand Up @@ -754,7 +755,7 @@ def discrete_bottleneck(inputs,
y_clean = common_layers.saturating_sigmoid(outputs_discrete)
if summary:
tf.summary.histogram("y_clean", tf.reshape(y_clean, [-1]))
if noise_dev > 0 and mode == tf.estimator.ModeKeys.TRAIN:
if noise_dev > 0 and mode == tf_estimator.ModeKeys.TRAIN:
noise = tf.truncated_normal(
common_layers.shape_list(outputs_discrete),
mean=0.0,
Expand All @@ -766,7 +767,7 @@ def discrete_bottleneck(inputs,
y_discrete = tf.stop_gradient(d) + y - tf.stop_gradient(y)
pd = common_layers.inverse_exp_decay(startup_steps * 2)
pd *= discrete_mix
pd = pd if mode == tf.estimator.ModeKeys.TRAIN else 1.0
pd = pd if mode == tf_estimator.ModeKeys.TRAIN else 1.0
c = tf.where(
tf.less(tf.random_uniform([common_layers.shape_list(y)[0]]), pd),
y_discrete, y)
Expand Down Expand Up @@ -1379,17 +1380,17 @@ def tanh_discrete_bottleneck(x, bottleneck_bits, bottleneck_noise,
"""Simple discretization through tanh, flip bottleneck_noise many bits."""
x = tf.layers.dense(x, bottleneck_bits, name="tanh_discrete_bottleneck")
d0 = tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x))) - 1.0
if mode == tf.estimator.ModeKeys.TRAIN:
if mode == tf_estimator.ModeKeys.TRAIN:
x += tf.truncated_normal(
common_layers.shape_list(x), mean=0.0, stddev=0.2)
x = tf.tanh(x)
d = x + tf.stop_gradient(2.0 * tf.to_float(tf.less(0.0, x)) - 1.0 - x)
if mode == tf.estimator.ModeKeys.TRAIN:
if mode == tf_estimator.ModeKeys.TRAIN:
noise = tf.random_uniform(common_layers.shape_list(x))
noise = 2.0 * tf.to_float(tf.less(bottleneck_noise, noise)) - 1.0
d *= noise
d = common_layers.mix(d, x, discretize_warmup_steps,
mode == tf.estimator.ModeKeys.TRAIN)
mode == tf_estimator.ModeKeys.TRAIN)
return d, d0


Expand All @@ -1410,21 +1411,21 @@ def isemhash_bottleneck(x,
with tf.variable_scope("isemhash_bottleneck"):
x = tf.layers.dense(x, bottleneck_bits, name="dense")
y = common_layers.saturating_sigmoid(x)
if isemhash_noise_dev > 0 and mode == tf.estimator.ModeKeys.TRAIN:
if isemhash_noise_dev > 0 and mode == tf_estimator.ModeKeys.TRAIN:
noise = tf.truncated_normal(
common_layers.shape_list(x), mean=0.0, stddev=isemhash_noise_dev)
y = common_layers.saturating_sigmoid(x + noise)
d = tf.to_float(tf.less(0.5, y)) + y - tf.stop_gradient(y)
d = 2.0 * d - 1.0 # Move from [0, 1] to [-1, 1].
if mode == tf.estimator.ModeKeys.TRAIN: # Flip some bits.
if mode == tf_estimator.ModeKeys.TRAIN: # Flip some bits.
noise = tf.random_uniform(common_layers.shape_list(x))
noise = 2.0 * tf.to_float(tf.less(bottleneck_noise, noise)) - 1.0
d *= noise
d = common_layers.mix(
d,
2.0 * y - 1.0,
discretize_warmup_steps,
mode == tf.estimator.ModeKeys.TRAIN,
mode == tf_estimator.ModeKeys.TRAIN,
max_prob=isemhash_mix_prob)
return d, 0.0

Expand Down
9 changes: 5 additions & 4 deletions tensor2tensor/layers/latent_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from tensor2tensor.utils import beam_search

import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator
import tensorflow_probability as tfp

DO_SUMMARIES = True
Expand Down Expand Up @@ -556,7 +557,7 @@ def latent_prediction_model(inputs,
latents_pred_loss: Tensor of shape [batch, length_q].
"""
with tf.variable_scope(name, default_name="latent_prediction"):
if hparams.mode != tf.estimator.ModeKeys.PREDICT:
if hparams.mode != tf_estimator.ModeKeys.PREDICT:
latents_pred = transformer_latent_decoder(tf.stop_gradient(latents_dense),
inputs,
ed_attention_bias,
Expand Down Expand Up @@ -617,10 +618,10 @@ def transformer_autoencoder(inputs,
losses = {"extra": 0.,
"extra_loss": 0.,
"latent_pred": 0.}
if hparams.mode != tf.estimator.ModeKeys.PREDICT:
if hparams.mode != tf_estimator.ModeKeys.PREDICT:
targets_compressed = compress_fn(targets, hparams, name="compress")

if hparams.mode == tf.estimator.ModeKeys.TRAIN:
if hparams.mode == tf_estimator.ModeKeys.TRAIN:
scale = common_layers.inverse_exp_decay(hparams.startup_steps)
else:
scale = 1.0
Expand Down Expand Up @@ -681,7 +682,7 @@ def transformer_autoencoder(inputs,
[-1, hparams.img_len, hparams.img_len, hparams.hidden_size])

if hparams.use_gold_targets:
if hparams.mode == tf.estimator.ModeKeys.PREDICT:
if hparams.mode == tf_estimator.ModeKeys.PREDICT:
masking = predict_mask
else:
masking = common_layers.inverse_exp_decay(hparams.mask_startup_steps)
Expand Down
3 changes: 2 additions & 1 deletion tensor2tensor/layers/latent_layers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from tensor2tensor.utils import test_utils

import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator
tf.enable_eager_execution()


Expand Down Expand Up @@ -110,7 +111,7 @@ def testComputeBitsAndNats(self):
@test_utils.run_in_graph_and_eager_modes()
def testTransformerAutoencoder(self):
hparams = imagetransformer_latent_tiny()
hparams.mode = tf.estimator.ModeKeys.TRAIN
hparams.mode = tf_estimator.ModeKeys.TRAIN
block_dim = int(hparams.hidden_size // hparams.num_blocks)
block_v_size = 2**(hparams.bottleneck_bits /
(hparams.num_residuals * hparams.num_blocks))
Expand Down
7 changes: 4 additions & 3 deletions tensor2tensor/layers/modalities.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from tensor2tensor.layers import discretization

import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator
import tensorflow_probability as tfp


Expand Down Expand Up @@ -309,7 +310,7 @@ def _image_channel_compress_bottom(inputs, model_hparams, name="bottom"):
with tf.variable_scope(name):
inputs = tf.to_float(inputs)
hp = model_hparams
if hp.mode != tf.estimator.ModeKeys.PREDICT:
if hp.mode != tf_estimator.ModeKeys.PREDICT:
tf.summary.image(
"inputs",
common_layers.tpu_safe_image_summary(inputs),
Expand Down Expand Up @@ -600,7 +601,7 @@ def video_pixel_noise_bottom(x, model_hparams, vocab_size):
"""Bottom transformation for video."""
input_noise = getattr(model_hparams, "video_modality_input_noise", 0.25)
inputs = x
if model_hparams.mode == tf.estimator.ModeKeys.TRAIN:
if model_hparams.mode == tf_estimator.ModeKeys.TRAIN:
background = tfp.stats.percentile(inputs, 50., axis=[0, 1, 2, 3])
input_shape = common_layers.shape_list(inputs)
input_size = tf.reduce_prod(input_shape[:-1])
Expand Down Expand Up @@ -1126,7 +1127,7 @@ def symbol_top(body_output, targets, model_hparams, vocab_size):
body_output_shape = common_layers.shape_list(body_output)
var = get_weights(model_hparams, vocab_size, body_output_shape[-1])
if (model_hparams.factored_logits and
model_hparams.mode == tf.estimator.ModeKeys.TRAIN):
model_hparams.mode == tf_estimator.ModeKeys.TRAIN):
# insert channels dimension
body_output = tf.expand_dims(body_output, 3)
return common_layers.FactoredTensor(body_output, var)
Expand Down
7 changes: 4 additions & 3 deletions tensor2tensor/layers/modalities_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from tensor2tensor.utils import test_utils

import tensorflow.compat.v1 as tf
from tensorflow.compat.v1 import estimator as tf_estimator
tf.enable_eager_execution()


Expand Down Expand Up @@ -60,7 +61,7 @@ def testSymbolModalityInputs(self):
hidden_size = 9
model_hparams = common_hparams.basic_params1()
model_hparams.hidden_size = hidden_size
model_hparams.mode = tf.estimator.ModeKeys.TRAIN
model_hparams.mode = tf_estimator.ModeKeys.TRAIN
x = np.random.randint(
vocab_size, size=(batch_size, length, 1, 1))
data_parallelism = expert_utils.Parallelism(
Expand All @@ -86,7 +87,7 @@ def testSymbolModalityTargets(self):
vocab_size = 11
model_hparams = common_hparams.basic_params1()
model_hparams.hidden_size = hidden_size
model_hparams.mode = tf.estimator.ModeKeys.TRAIN
model_hparams.mode = tf_estimator.ModeKeys.TRAIN
body_output = np.random.randint(
100, size=(batch_size, length, height, hidden_size))
targets = np.random.randint(
Expand Down Expand Up @@ -127,7 +128,7 @@ def testSymbolModalityTargetsFactored(self):
model_hparams = common_hparams.basic_params1()
model_hparams.factored_logits = True
model_hparams.hidden_size = hidden_size
model_hparams.mode = tf.estimator.ModeKeys.TRAIN
model_hparams.mode = tf_estimator.ModeKeys.TRAIN
body_output = np.random.randint(
100, size=(batch_size, length, height, hidden_size))
targets = np.random.randint(
Expand Down
Loading

0 comments on commit 316c9ce

Please sign in to comment.