update to tf r1.3

MorvanZhou · Morvan Zhou · commit 57b04ee45b65 · 2017-08-19T17:08:58.000+10:00
diff --git a/contents/10_A3C/A3C_RNN.py b/contents/10_A3C/A3C_RNN.py
@@ -6,7 +6,7 @@
 View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 
 Using:
-tensorflow 1.0
+tensorflow r1.3
 gym 0.8.0
 """
 
@@ -63,7 +63,7 @@ def __init__(self, scope, globalAC=None):
                 with tf.name_scope('wrap_a_out'):
                     mu, sigma = mu * A_BOUND[1], sigma + 1e-4
 
-                normal_dist = tf.contrib.distributions.Normal(mu, sigma)
+                normal_dist = tf.distributions.Normal(mu, sigma)
 
                 with tf.name_scope('a_loss'):
                     log_prob = normal_dist.log_prob(self.a_his)
diff --git a/contents/10_A3C/A3C_continuous_action.py b/contents/10_A3C/A3C_continuous_action.py
@@ -6,7 +6,7 @@
 View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 
 Using:
-tensorflow 1.0
+tensorflow r1.3
 gym 0.8.0
 """
 
@@ -63,7 +63,7 @@ def __init__(self, scope, globalAC=None):
                 with tf.name_scope('wrap_a_out'):
                     mu, sigma = mu * A_BOUND[1], sigma + 1e-4
 
-                normal_dist = tf.contrib.distributions.Normal(mu, sigma)
+                normal_dist = tf.distributions.Normal(mu, sigma)
 
                 with tf.name_scope('a_loss'):
                     log_prob = normal_dist.log_prob(self.a_his)
diff --git a/contents/12_Proximal_Policy_Optimization/DPPO.py b/contents/12_Proximal_Policy_Optimization/DPPO.py
@@ -10,12 +10,11 @@
 View more on my tutorial website: https://morvanzhou.github.io/tutorials
 
 Dependencies:
-tensorflow r1.2
+tensorflow r1.3
 gym 0.9.2
 """
 
 import tensorflow as tf
-from tensorflow.contrib.distributions import Normal
 import numpy as np
 import matplotlib.pyplot as plt
 import gym, threading, queue
@@ -87,7 +86,7 @@ def _build_anet(self, name, trainable):
             l1 = tf.layers.dense(self.tfs, 200, tf.nn.relu, trainable=trainable)
             mu = 2 * tf.layers.dense(l1, A_DIM, tf.nn.tanh, trainable=trainable)
             sigma = tf.layers.dense(l1, A_DIM, tf.nn.softplus, trainable=trainable)
-            norm_dist = Normal(loc=mu, scale=sigma)
+            norm_dist = tf.distributions.Normal(loc=mu, scale=sigma)
         params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name)
         return norm_dist, params
 
diff --git a/contents/12_Proximal_Policy_Optimization/simply_PPO.py b/contents/12_Proximal_Policy_Optimization/simply_PPO.py
@@ -13,7 +13,6 @@
 """
 
 import tensorflow as tf
-from tensorflow.contrib.distributions import Normal, kl_divergence
 import numpy as np
 import matplotlib.pyplot as plt
 import gym
@@ -65,7 +64,7 @@ def __init__(self):
                 surr = ratio * self.tfadv
             if METHOD['name'] == 'kl_pen':
                 self.tflam = tf.placeholder(tf.float32, None, 'lambda')
-                kl = tf.stop_gradient(kl_divergence(oldpi, pi))
+                kl = tf.stop_gradient(tf.distributions.kl_divergence(oldpi, pi))
                 self.kl_mean = tf.reduce_mean(kl)
                 self.aloss = -(tf.reduce_mean(surr - self.tflam * kl))
             else:   # clipping method, find this is better
@@ -109,7 +108,7 @@ def _build_anet(self, name, trainable):
             l1 = tf.layers.dense(self.tfs, 100, tf.nn.relu, trainable=trainable)
             mu = 2 * tf.layers.dense(l1, A_DIM, tf.nn.tanh, trainable=trainable)
             sigma = tf.layers.dense(l1, A_DIM, tf.nn.softplus, trainable=trainable)
-            norm_dist = Normal(loc=mu, scale=sigma)
+            norm_dist = tf.distributions.Normal(loc=mu, scale=sigma)
         params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name)
         return norm_dist, params
 
diff --git a/contents/8_Actor_Critic_Advantage/AC_continue_Pendulum.py b/contents/8_Actor_Critic_Advantage/AC_continue_Pendulum.py
@@ -8,7 +8,7 @@
 View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 
 Using:
-tensorflow 1.0
+tensorflow r1.3
 gym 0.8.0
 """
 
@@ -57,7 +57,7 @@ def __init__(self, sess, n_features, action_bound, lr=0.0001):
         global_step = tf.Variable(0, trainable=False)
         # self.e = epsilon = tf.train.exponential_decay(2., global_step, 1000, 0.9)
         self.mu, self.sigma = tf.squeeze(mu*2), tf.squeeze(sigma+0.1)
-        self.normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma)
+        self.normal_dist = tf.distributions.Normal(self.mu, self.sigma)
 
         self.action = tf.clip_by_value(self.normal_dist.sample(1), action_bound[0], action_bound[1])