From 0cbaebe8a195d89e6d890fc0660ca4164fc2747c Mon Sep 17 00:00:00 2001 From: Neal Wu Date: Wed, 7 Aug 2019 12:16:06 -0700 Subject: [PATCH] Replace all occurrences of initialize_all_variables (deprecated) with global_variables_initializer. PiperOrigin-RevId: 262188420 Change-Id: Ia567000f3e3fa297ecd8363fe20d93e03272d50b --- .../behavioral_cloning_agent_test.py | 6 ++--- .../categorical_dqn_agent_test.py | 2 +- tf_agents/agents/dqn/dqn_agent_test.py | 10 ++++----- tf_agents/agents/ppo/ppo_agent_test.py | 22 +++++++++---------- .../agents/reinforce/reinforce_agent_test.py | 2 +- tf_agents/agents/sac/sac_agent_test.py | 2 +- tf_agents/utils/eager_utils.py | 2 +- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tf_agents/agents/behavioral_cloning/behavioral_cloning_agent_test.py b/tf_agents/agents/behavioral_cloning/behavioral_cloning_agent_test.py index 2e71ff01f..8242eaa94 100644 --- a/tf_agents/agents/behavioral_cloning/behavioral_cloning_agent_test.py +++ b/tf_agents/agents/behavioral_cloning/behavioral_cloning_agent_test.py @@ -154,7 +154,7 @@ def testLoss(self): discount=discounts) loss_info = agent._loss(experience) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) total_loss, _ = self.evaluate(loss_info) expected_loss = tf.reduce_mean( @@ -273,7 +273,7 @@ def testPolicy(self): [2] + self._action_spec[0].shape.as_list(), action_step.action[0].shape, ) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) actions_ = self.evaluate(action_step.action) self.assertTrue(all(actions_[0] <= self._action_spec[0].maximum)) self.assertTrue(all(actions_[0] >= self._action_spec[0].minimum)) @@ -289,7 +289,7 @@ def testInitializeRestoreAgent(self): time_steps = ts.restart(observations, batch_size=2) policy = agent.policy action_step = policy.action(time_steps) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) checkpoint = tf.train.Checkpoint(agent=agent) diff --git a/tf_agents/agents/categorical_dqn/categorical_dqn_agent_test.py b/tf_agents/agents/categorical_dqn/categorical_dqn_agent_test.py index 238d37e73..2f5951f71 100644 --- a/tf_agents/agents/categorical_dqn/categorical_dqn_agent_test.py +++ b/tf_agents/agents/categorical_dqn/categorical_dqn_agent_test.py @@ -399,7 +399,7 @@ def testTrainWithRnn(self): else: loss = agent.train(experience) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(counter), 0) self.evaluate(loss) diff --git a/tf_agents/agents/dqn/dqn_agent_test.py b/tf_agents/agents/dqn/dqn_agent_test.py index d3ff1c60e..762070f3a 100644 --- a/tf_agents/agents/dqn/dqn_agent_test.py +++ b/tf_agents/agents/dqn/dqn_agent_test.py @@ -155,7 +155,7 @@ def testLoss(self, agent_class): expected_loss = 26.0 loss, _ = agent._loss(experience) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(loss), expected_loss) def testLossNStep(self, agent_class): @@ -204,7 +204,7 @@ def testLossNStep(self, agent_class): expected_loss = 47.42 loss, _ = agent._loss(experience) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(loss), expected_loss) def testLossNStepMidMidLastFirst(self, agent_class): @@ -263,7 +263,7 @@ def testLossNStepMidMidLastFirst(self, agent_class): expected_loss = 21.5 loss, _ = agent._loss(experience) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(loss), expected_loss) def testPolicy(self, agent_class): @@ -282,7 +282,7 @@ def testPolicy(self, agent_class): [2] + self._action_spec[0].shape.as_list(), action_step.action[0].shape, ) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) actions_ = self.evaluate(action_step.action) self.assertTrue(all(actions_[0] <= self._action_spec[0].maximum)) self.assertTrue(all(actions_[0] >= self._action_spec[0].minimum)) @@ -298,7 +298,7 @@ def testInitializeRestoreAgent(self, agent_class): time_steps = ts.restart(observations, batch_size=2) policy = agent.policy action_step = policy.action(time_steps) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) checkpoint = tf.train.Checkpoint(agent=agent) diff --git a/tf_agents/agents/ppo/ppo_agent_test.py b/tf_agents/agents/ppo/ppo_agent_test.py index a6061ee42..825aa5133 100644 --- a/tf_agents/agents/ppo/ppo_agent_test.py +++ b/tf_agents/agents/ppo/ppo_agent_test.py @@ -250,7 +250,7 @@ def testTrain(self, num_epochs, use_td_lambda_return): loss = agent.train(experience) # Assert that counter starts out at zero. - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(0, self.evaluate(counter)) self.evaluate(loss) # Assert that train_op ran increment_counter num_epochs times. @@ -297,7 +297,7 @@ def testGetEpochLoss(self): train_step, debug_summaries=False) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) total_loss, extra_loss_info = self.evaluate(loss_info) (policy_gradient_loss, value_estimation_loss, l2_regularization_loss, entropy_reg_loss, kl_penalty_loss) = extra_loss_info @@ -365,7 +365,7 @@ def testL2RegularizationLoss(self, not_zero): tensor_spec.sample_spec_nest(self._time_step_spec, outer_dims=(2,))) loss = agent.l2_regularization_loss() - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) loss_ = self.evaluate(loss) self.assertAllClose(loss_, expected_loss) @@ -408,7 +408,7 @@ def testEntropyRegularizationLoss(self, not_zero): loss = agent.entropy_regularization_loss( time_steps, current_policy_distribution, weights) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) loss_ = self.evaluate(loss) self.assertAllClose(loss_, expected_loss) @@ -431,7 +431,7 @@ def testValueEstimationLoss(self): expected_loss = 123.205 loss = agent.value_estimation_loss(time_steps, returns, weights) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) loss_ = self.evaluate(loss) self.assertAllClose(loss_, expected_loss) @@ -462,7 +462,7 @@ def testPolicyGradientLoss(self): sample_action_log_probs, advantages, current_policy_distribution, weights) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) loss_ = self.evaluate(loss) self.assertAllClose(loss_, expected_loss) @@ -505,7 +505,7 @@ def testKlPenaltyLoss(self): kl_penalty_loss = agent.kl_penalty_loss( time_steps, action_distribution_parameters, current_policy_distribution, weights) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) kl_penalty_loss_ = self.evaluate(kl_penalty_loss) self.assertEqual(expected_kl_penalty_loss, kl_penalty_loss_) @@ -536,7 +536,7 @@ def testKlCutoffLoss(self, not_zero): expected_kl_cutoff_loss = kl_cutoff_coef * (.24**2) # (0.74 - 0.5) ^ 2 loss = agent.kl_cutoff_loss(kl_divergence) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) loss_ = self.evaluate(loss) self.assertAllClose([loss_], [expected_kl_cutoff_loss]) @@ -560,7 +560,7 @@ def testAdaptiveKlLoss(self): # Force variable creation agent.policy.variables() - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) # Loss should not change if data kl is target kl. loss_1 = agent.adaptive_kl_loss([10.0]) @@ -598,7 +598,7 @@ def testUpdateAdaptiveKlBeta(self): adaptive_kl_tolerance=0.5, ) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) # When KL is target kl, beta should not change. update_adaptive_kl_beta_fn = common.function(agent.update_adaptive_kl_beta) @@ -630,7 +630,7 @@ def testPolicy(self): action_step = agent.policy.action(time_steps) actions = action_step.action self.assertEqual(actions.shape.as_list(), [1, 1]) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) _ = self.evaluate(actions) def testNormalizeAdvantages(self): diff --git a/tf_agents/agents/reinforce/reinforce_agent_test.py b/tf_agents/agents/reinforce/reinforce_agent_test.py index 0cfa6382b..360de2fff 100644 --- a/tf_agents/agents/reinforce/reinforce_agent_test.py +++ b/tf_agents/agents/reinforce/reinforce_agent_test.py @@ -314,7 +314,7 @@ def testTrainWithRnn(self): else: loss = agent.train(experience) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(counter), 0) self.evaluate(loss) self.assertEqual(self.evaluate(counter), 1) diff --git a/tf_agents/agents/sac/sac_agent_test.py b/tf_agents/agents/sac/sac_agent_test.py index 6c54c0666..bf75ad68a 100644 --- a/tf_agents/agents/sac/sac_agent_test.py +++ b/tf_agents/agents/sac/sac_agent_test.py @@ -272,7 +272,7 @@ def testTrainWithRnn(self): else: loss = agent.train(experience) - self.evaluate(tf.compat.v1.initialize_all_variables()) + self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(counter), 0) self.evaluate(loss) self.assertEqual(self.evaluate(counter), 1) diff --git a/tf_agents/utils/eager_utils.py b/tf_agents/utils/eager_utils.py index 5fbc41c45..42f098bc4 100644 --- a/tf_agents/utils/eager_utils.py +++ b/tf_agents/utils/eager_utils.py @@ -31,7 +31,7 @@ def loss_fn(x, y): train_step_op = eager_utils.create_train_step(loss_op, optimizer) # Compute the loss and apply gradients to the variables using the optimizer. with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) + sess.run(tf.compat.v1.global_variables_initializer()) for _ in range(num_train_steps): loss_value = sess.run(train_step_op)