From 0cbaebe8a195d89e6d890fc0660ca4164fc2747c Mon Sep 17 00:00:00 2001
From: Neal Wu <wun@google.com>
Date: Wed, 7 Aug 2019 12:16:06 -0700
Subject: [PATCH] Replace all occurrences of initialize_all_variables
 (deprecated) with global_variables_initializer.

PiperOrigin-RevId: 262188420
Change-Id: Ia567000f3e3fa297ecd8363fe20d93e03272d50b
---
 .../behavioral_cloning_agent_test.py          |  6 ++---
 .../categorical_dqn_agent_test.py             |  2 +-
 tf_agents/agents/dqn/dqn_agent_test.py        | 10 ++++-----
 tf_agents/agents/ppo/ppo_agent_test.py        | 22 +++++++++----------
 .../agents/reinforce/reinforce_agent_test.py  |  2 +-
 tf_agents/agents/sac/sac_agent_test.py        |  2 +-
 tf_agents/utils/eager_utils.py                |  2 +-
 7 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/tf_agents/agents/behavioral_cloning/behavioral_cloning_agent_test.py b/tf_agents/agents/behavioral_cloning/behavioral_cloning_agent_test.py
index 2e71ff01f..8242eaa94 100644
--- a/tf_agents/agents/behavioral_cloning/behavioral_cloning_agent_test.py
+++ b/tf_agents/agents/behavioral_cloning/behavioral_cloning_agent_test.py
@@ -154,7 +154,7 @@ def testLoss(self):
         discount=discounts)
     loss_info = agent._loss(experience)
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     total_loss, _ = self.evaluate(loss_info)
 
     expected_loss = tf.reduce_mean(
@@ -273,7 +273,7 @@ def testPolicy(self):
         [2] + self._action_spec[0].shape.as_list(),
         action_step.action[0].shape,
     )
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     actions_ = self.evaluate(action_step.action)
     self.assertTrue(all(actions_[0] <= self._action_spec[0].maximum))
     self.assertTrue(all(actions_[0] >= self._action_spec[0].minimum))
@@ -289,7 +289,7 @@ def testInitializeRestoreAgent(self):
     time_steps = ts.restart(observations, batch_size=2)
     policy = agent.policy
     action_step = policy.action(time_steps)
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
 
     checkpoint = tf.train.Checkpoint(agent=agent)
 
diff --git a/tf_agents/agents/categorical_dqn/categorical_dqn_agent_test.py b/tf_agents/agents/categorical_dqn/categorical_dqn_agent_test.py
index 238d37e73..2f5951f71 100644
--- a/tf_agents/agents/categorical_dqn/categorical_dqn_agent_test.py
+++ b/tf_agents/agents/categorical_dqn/categorical_dqn_agent_test.py
@@ -399,7 +399,7 @@ def testTrainWithRnn(self):
     else:
       loss = agent.train(experience)
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     self.assertEqual(self.evaluate(counter), 0)
     self.evaluate(loss)
 
diff --git a/tf_agents/agents/dqn/dqn_agent_test.py b/tf_agents/agents/dqn/dqn_agent_test.py
index d3ff1c60e..762070f3a 100644
--- a/tf_agents/agents/dqn/dqn_agent_test.py
+++ b/tf_agents/agents/dqn/dqn_agent_test.py
@@ -155,7 +155,7 @@ def testLoss(self, agent_class):
     expected_loss = 26.0
     loss, _ = agent._loss(experience)
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     self.assertAllClose(self.evaluate(loss), expected_loss)
 
   def testLossNStep(self, agent_class):
@@ -204,7 +204,7 @@ def testLossNStep(self, agent_class):
     expected_loss = 47.42
     loss, _ = agent._loss(experience)
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     self.assertAllClose(self.evaluate(loss), expected_loss)
 
   def testLossNStepMidMidLastFirst(self, agent_class):
@@ -263,7 +263,7 @@ def testLossNStepMidMidLastFirst(self, agent_class):
     expected_loss = 21.5
     loss, _ = agent._loss(experience)
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     self.assertAllClose(self.evaluate(loss), expected_loss)
 
   def testPolicy(self, agent_class):
@@ -282,7 +282,7 @@ def testPolicy(self, agent_class):
         [2] + self._action_spec[0].shape.as_list(),
         action_step.action[0].shape,
     )
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     actions_ = self.evaluate(action_step.action)
     self.assertTrue(all(actions_[0] <= self._action_spec[0].maximum))
     self.assertTrue(all(actions_[0] >= self._action_spec[0].minimum))
@@ -298,7 +298,7 @@ def testInitializeRestoreAgent(self, agent_class):
     time_steps = ts.restart(observations, batch_size=2)
     policy = agent.policy
     action_step = policy.action(time_steps)
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
 
     checkpoint = tf.train.Checkpoint(agent=agent)
 
diff --git a/tf_agents/agents/ppo/ppo_agent_test.py b/tf_agents/agents/ppo/ppo_agent_test.py
index a6061ee42..825aa5133 100644
--- a/tf_agents/agents/ppo/ppo_agent_test.py
+++ b/tf_agents/agents/ppo/ppo_agent_test.py
@@ -250,7 +250,7 @@ def testTrain(self, num_epochs, use_td_lambda_return):
       loss = agent.train(experience)
 
     # Assert that counter starts out at zero.
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     self.assertEqual(0, self.evaluate(counter))
     self.evaluate(loss)
     # Assert that train_op ran increment_counter num_epochs times.
@@ -297,7 +297,7 @@ def testGetEpochLoss(self):
         train_step,
         debug_summaries=False)
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     total_loss, extra_loss_info = self.evaluate(loss_info)
     (policy_gradient_loss, value_estimation_loss, l2_regularization_loss,
      entropy_reg_loss, kl_penalty_loss) = extra_loss_info
@@ -365,7 +365,7 @@ def testL2RegularizationLoss(self, not_zero):
         tensor_spec.sample_spec_nest(self._time_step_spec, outer_dims=(2,)))
     loss = agent.l2_regularization_loss()
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     loss_ = self.evaluate(loss)
     self.assertAllClose(loss_, expected_loss)
 
@@ -408,7 +408,7 @@ def testEntropyRegularizationLoss(self, not_zero):
     loss = agent.entropy_regularization_loss(
         time_steps, current_policy_distribution, weights)
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     loss_ = self.evaluate(loss)
     self.assertAllClose(loss_, expected_loss)
 
@@ -431,7 +431,7 @@ def testValueEstimationLoss(self):
     expected_loss = 123.205
     loss = agent.value_estimation_loss(time_steps, returns, weights)
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     loss_ = self.evaluate(loss)
     self.assertAllClose(loss_, expected_loss)
 
@@ -462,7 +462,7 @@ def testPolicyGradientLoss(self):
                                       sample_action_log_probs, advantages,
                                       current_policy_distribution, weights)
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     loss_ = self.evaluate(loss)
     self.assertAllClose(loss_, expected_loss)
 
@@ -505,7 +505,7 @@ def testKlPenaltyLoss(self):
     kl_penalty_loss = agent.kl_penalty_loss(
         time_steps, action_distribution_parameters, current_policy_distribution,
         weights)
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     kl_penalty_loss_ = self.evaluate(kl_penalty_loss)
     self.assertEqual(expected_kl_penalty_loss, kl_penalty_loss_)
 
@@ -536,7 +536,7 @@ def testKlCutoffLoss(self, not_zero):
     expected_kl_cutoff_loss = kl_cutoff_coef * (.24**2)  # (0.74 - 0.5) ^ 2
 
     loss = agent.kl_cutoff_loss(kl_divergence)
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     loss_ = self.evaluate(loss)
     self.assertAllClose([loss_], [expected_kl_cutoff_loss])
 
@@ -560,7 +560,7 @@ def testAdaptiveKlLoss(self):
 
     # Force variable creation
     agent.policy.variables()
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
 
     # Loss should not change if data kl is target kl.
     loss_1 = agent.adaptive_kl_loss([10.0])
@@ -598,7 +598,7 @@ def testUpdateAdaptiveKlBeta(self):
         adaptive_kl_tolerance=0.5,
     )
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
 
     # When KL is target kl, beta should not change.
     update_adaptive_kl_beta_fn = common.function(agent.update_adaptive_kl_beta)
@@ -630,7 +630,7 @@ def testPolicy(self):
     action_step = agent.policy.action(time_steps)
     actions = action_step.action
     self.assertEqual(actions.shape.as_list(), [1, 1])
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     _ = self.evaluate(actions)
 
   def testNormalizeAdvantages(self):
diff --git a/tf_agents/agents/reinforce/reinforce_agent_test.py b/tf_agents/agents/reinforce/reinforce_agent_test.py
index 0cfa6382b..360de2fff 100644
--- a/tf_agents/agents/reinforce/reinforce_agent_test.py
+++ b/tf_agents/agents/reinforce/reinforce_agent_test.py
@@ -314,7 +314,7 @@ def testTrainWithRnn(self):
     else:
       loss = agent.train(experience)
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     self.assertEqual(self.evaluate(counter), 0)
     self.evaluate(loss)
     self.assertEqual(self.evaluate(counter), 1)
diff --git a/tf_agents/agents/sac/sac_agent_test.py b/tf_agents/agents/sac/sac_agent_test.py
index 6c54c0666..bf75ad68a 100644
--- a/tf_agents/agents/sac/sac_agent_test.py
+++ b/tf_agents/agents/sac/sac_agent_test.py
@@ -272,7 +272,7 @@ def testTrainWithRnn(self):
     else:
       loss = agent.train(experience)
 
-    self.evaluate(tf.compat.v1.initialize_all_variables())
+    self.evaluate(tf.compat.v1.global_variables_initializer())
     self.assertEqual(self.evaluate(counter), 0)
     self.evaluate(loss)
     self.assertEqual(self.evaluate(counter), 1)
diff --git a/tf_agents/utils/eager_utils.py b/tf_agents/utils/eager_utils.py
index 5fbc41c45..42f098bc4 100644
--- a/tf_agents/utils/eager_utils.py
+++ b/tf_agents/utils/eager_utils.py
@@ -31,7 +31,7 @@ def loss_fn(x, y):
     train_step_op = eager_utils.create_train_step(loss_op, optimizer)
     # Compute the loss and apply gradients to the variables using the optimizer.
     with tf.Session() as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(tf.compat.v1.global_variables_initializer())
       for _ in range(num_train_steps):
         loss_value = sess.run(train_step_op)