andyli386
diff --git a/‎nlp_class2/glove_tf.py
+11-8 b/‎nlp_class2/glove_tf.py
+11-8
diff --git a/‎nlp_class2/recursive_tensorflow.py
+12-9 b/‎nlp_class2/recursive_tensorflow.py
+12-9
diff --git a/‎nlp_class2/rntn_tensorflow_rnn.py
+21-18 b/‎nlp_class2/rntn_tensorflow_rnn.py
+21-18
diff --git a/‎nlp_class2/word2vec_tf.py
+16-13 b/‎nlp_class2/word2vec_tf.py
+16-13
diff --git a/‎recommenders/rbm_tf_k.py
+17-14 b/‎recommenders/rbm_tf_k.py
+17-14
@@ -22,6 +22,9 @@
 from rnn_class.util import get_wikipedia_data
 from rnn_class.brown import get_sentences_with_word2idx_limit_vocab, get_sentences_with_word2idx
 
+if tf.__version__.startswith('2'):
+    tf.compat.v1.disable_eager_execution()
+
 
 
 class Glove:
@@ -119,22 +122,22 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
         tfb = tf.Variable(b.reshape(V, 1).astype(np.float32))
         tfU = tf.Variable(U.astype(np.float32))
         tfc = tf.Variable(c.reshape(1, V).astype(np.float32))
-        tfLogX = tf.placeholder(tf.float32, shape=(V, V))
-        tffX = tf.placeholder(tf.float32, shape=(V, V))
+        tfLogX = tf.compat.v1.placeholder(tf.float32, shape=(V, V))
+        tffX = tf.compat.v1.placeholder(tf.float32, shape=(V, V))
 
-        delta = tf.matmul(tfW, tf.transpose(tfU)) + tfb + tfc + mu - tfLogX
-        cost = tf.reduce_sum(tffX * delta * delta)
+        delta = tf.matmul(tfW, tf.transpose(a=tfU)) + tfb + tfc + mu - tfLogX
+        cost = tf.reduce_sum(input_tensor=tffX * delta * delta)
         regularized_cost = cost
         for param in (tfW, tfU):
-            regularized_cost += reg*tf.reduce_sum(param * param)
+            regularized_cost += reg*tf.reduce_sum(input_tensor=param * param)
 
-        train_op = tf.train.MomentumOptimizer(
+        train_op = tf.compat.v1.train.MomentumOptimizer(
           learning_rate,
           momentum=0.9
         ).minimize(regularized_cost)
         # train_op = tf.train.AdamOptimizer(1e-3).minimize(regularized_cost)
-        init = tf.global_variables_initializer()
-        session = tf.InteractiveSession()
+        init = tf.compat.v1.global_variables_initializer()
+        session = tf.compat.v1.InteractiveSession()
         session.run(init)
 
         costs = []
 
@@ -17,6 +17,9 @@
 from datetime import datetime
 from util import init_weight, get_ptb_data, display_tree
 
+if tf.__version__.startswith('2'):
+    tf.compat.v1.disable_eager_execution()
+
 
 def get_labels(tree):
     # must be returned in the same order as tree logits are returned
@@ -73,22 +76,22 @@ def fit(self, trees, lr=1e-1, mu=0.9, reg=0.1, epochs=5):
             cost = self.get_cost(logits, labels, reg)
             costs.append(cost)
 
-            prediction = tf.argmax(logits, 1)
+            prediction = tf.argmax(input=logits, axis=1)
             predictions.append(prediction)
 
-            train_op = tf.train.MomentumOptimizer(lr, mu).minimize(cost)
+            train_op = tf.compat.v1.train.MomentumOptimizer(lr, mu).minimize(cost)
             train_ops.append(train_op)
 
         # save for later so we don't have to recompile
         self.predictions = predictions
         self.all_labels = all_labels
-        self.saver = tf.train.Saver()
+        self.saver = tf.compat.v1.train.Saver()
 
-        init = tf.initialize_all_variables()
+        init = tf.compat.v1.initialize_all_variables()
         actual_costs = []
         per_epoch_costs = []
         correct_rates = []
-        with tf.Session() as session:
+        with tf.compat.v1.Session() as session:
             session.run(init)
 
             for i in range(epochs):
@@ -136,7 +139,7 @@ def fit(self, trees, lr=1e-1, mu=0.9, reg=0.1, epochs=5):
 
     def get_cost(self, logits, labels, reg):
         cost = tf.reduce_mean(
-            tf.nn.sparse_softmax_cross_entropy_with_logits(
+            input_tensor=tf.nn.sparse_softmax_cross_entropy_with_logits(
                 logits=logits,
                 labels=labels
             )
@@ -150,7 +153,7 @@ def get_cost(self, logits, labels, reg):
     def get_output_recursive(self, tree, list_of_logits, is_root=True):
         if tree.word is not None:
             # this is a leaf node
-            x = tf.nn.embedding_lookup(self.We, [tree.word])
+            x = tf.nn.embedding_lookup(params=self.We, ids=[tree.word])
         else:
             # this node has children
             x1 = self.get_output_recursive(tree.left, list_of_logits, is_root=False)
@@ -197,12 +200,12 @@ def score(self, trees):
                 labels = get_labels(t)
                 all_labels.append(labels)
 
-                prediction = tf.argmax(logits, 1)
+                prediction = tf.argmax(input=logits, axis=1)
                 predictions.append(prediction)
 
         n_correct = 0
         n_total = 0
-        with tf.Session() as session:
+        with tf.compat.v1.Session() as session:
             self.saver.restore(session, "recursive.ckpt")
             for prediction, y in zip(predictions, all_labels):
                 p = session.run(prediction)
 
@@ -17,6 +17,9 @@
 from datetime import datetime
 from sklearn.metrics import f1_score
 
+if tf.__version__.startswith('2'):
+    tf.compat.v1.disable_eager_execution()
+
 
 
 class RecursiveNN:
@@ -54,10 +57,10 @@ def fit(self, trees, test_trees, reg=1e-3, epochs=8, train_inner_nodes=False):
         self.weights = [self.We, self.W11, self.W22, self.W12, self.W1, self.W2, self.Wo]
 
 
-        words = tf.placeholder(tf.int32, shape=(None,), name='words')
-        left_children = tf.placeholder(tf.int32, shape=(None,), name='left_children')
-        right_children = tf.placeholder(tf.int32, shape=(None,), name='right_children')
-        labels = tf.placeholder(tf.int32, shape=(None,), name='labels')
+        words = tf.compat.v1.placeholder(tf.int32, shape=(None,), name='words')
+        left_children = tf.compat.v1.placeholder(tf.int32, shape=(None,), name='left_children')
+        right_children = tf.compat.v1.placeholder(tf.int32, shape=(None,), name='right_children')
+        labels = tf.compat.v1.placeholder(tf.int32, shape=(None,), name='labels')
 
         # save for later
         self.words = words
@@ -89,9 +92,9 @@ def recurrence(hiddens, n):
             # any non-word will have index -1
 
             h_n = tf.cond(
-                w >= 0,
-                lambda: tf.nn.embedding_lookup(self.We, w),
-                lambda: recursive_net_transform(hiddens, n)
+                pred=w >= 0,
+                true_fn=lambda: tf.nn.embedding_lookup(params=self.We, ids=w),
+                false_fn=lambda: recursive_net_transform(hiddens, n)
             )
             hiddens = hiddens.write(n, h_n)
             n = tf.add(n, 1)
@@ -100,7 +103,7 @@ def recurrence(hiddens, n):
 
         def condition(hiddens, n):
             # loop should continue while n < len(words)
-            return tf.less(n, tf.shape(words)[0])
+            return tf.less(n, tf.shape(input=words)[0])
 
 
         hiddens = tf.TensorArray(
@@ -112,44 +115,44 @@ def condition(hiddens, n):
         )
 
         hiddens, _ = tf.while_loop(
-            condition,
-            recurrence,
-            [hiddens, tf.constant(0)],
+            cond=condition,
+            body=recurrence,
+            loop_vars=[hiddens, tf.constant(0)],
             parallel_iterations=1
         )
         h = hiddens.stack()
         logits = tf.matmul(h, self.Wo) + self.bo
 
-        prediction_op = tf.argmax(logits, axis=1)
+        prediction_op = tf.argmax(input=logits, axis=1)
         self.prediction_op = prediction_op
 
         rcost = reg*sum(tf.nn.l2_loss(p) for p in self.weights)
         if train_inner_nodes:
             # filter out -1s
-            labeled_indices = tf.where(labels >= 0)
+            labeled_indices = tf.compat.v1.where(labels >= 0)
 
             cost_op = tf.reduce_mean(
-                tf.nn.sparse_softmax_cross_entropy_with_logits(
+                input_tensor=tf.nn.sparse_softmax_cross_entropy_with_logits(
                     logits=tf.gather(logits, labeled_indices),
                     labels=tf.gather(labels, labeled_indices),
                 )
             ) + rcost
         else:
             cost_op = tf.reduce_mean(
-                tf.nn.sparse_softmax_cross_entropy_with_logits(
+                input_tensor=tf.nn.sparse_softmax_cross_entropy_with_logits(
                     logits=logits[-1],
                     labels=labels[-1],
                 )
             ) + rcost
 
-        train_op = tf.train.AdagradOptimizer(learning_rate=8e-3).minimize(cost_op)
+        train_op = tf.compat.v1.train.AdagradOptimizer(learning_rate=8e-3).minimize(cost_op)
         # train_op = tf.train.MomentumOptimizer(learning_rate=8e-3, momentum=0.9).minimize(cost_op)
 
         # NOTE: If you're using GPU, InteractiveSession breaks
         # AdagradOptimizer and some other optimizers
         # change to tf.Session() if so.
-        self.session = tf.Session()
-        init_op = tf.global_variables_initializer()
+        self.session = tf.compat.v1.Session()
+        init_op = tf.compat.v1.global_variables_initializer()
         self.session.run(init_op)
 
 
 
@@ -25,6 +25,9 @@
 import sys
 import string
 
+if tf.__version__.startswith('2'):
+    tf.compat.v1.disable_eager_execution()
+
 
 
 # unfortunately these work different ways
@@ -131,36 +134,36 @@ def train_model(savedir):
 
 
   # create the model
-  tf_input = tf.placeholder(tf.int32, shape=(None,))
-  tf_negword = tf.placeholder(tf.int32, shape=(None,))
-  tf_context = tf.placeholder(tf.int32, shape=(None,)) # targets (context)
+  tf_input = tf.compat.v1.placeholder(tf.int32, shape=(None,))
+  tf_negword = tf.compat.v1.placeholder(tf.int32, shape=(None,))
+  tf_context = tf.compat.v1.placeholder(tf.int32, shape=(None,)) # targets (context)
   tfW = tf.Variable(W)
   tfV = tf.Variable(V.T)
   # biases = tf.Variable(np.zeros(vocab_size, dtype=np.float32))
 
   def dot(A, B):
     C = A * B
-    return tf.reduce_sum(C, axis=1)
+    return tf.reduce_sum(input_tensor=C, axis=1)
 
   # correct middle word output
-  emb_input = tf.nn.embedding_lookup(tfW, tf_input) # 1 x D
-  emb_output = tf.nn.embedding_lookup(tfV, tf_context) # N x D
+  emb_input = tf.nn.embedding_lookup(params=tfW, ids=tf_input) # 1 x D
+  emb_output = tf.nn.embedding_lookup(params=tfV, ids=tf_context) # N x D
   correct_output = dot(emb_input, emb_output) # N
   # emb_input = tf.transpose(emb_input, (1, 0))
   # correct_output = tf.matmul(emb_output, emb_input)
   pos_loss = tf.nn.sigmoid_cross_entropy_with_logits(
-    labels=tf.ones(tf.shape(correct_output)), logits=correct_output)
+    labels=tf.ones(tf.shape(input=correct_output)), logits=correct_output)
 
   # incorrect middle word output
-  emb_input = tf.nn.embedding_lookup(tfW, tf_negword)
+  emb_input = tf.nn.embedding_lookup(params=tfW, ids=tf_negword)
   incorrect_output = dot(emb_input, emb_output)
   # emb_input = tf.transpose(emb_input, (1, 0))
   # incorrect_output = tf.matmul(emb_output, emb_input)
   neg_loss = tf.nn.sigmoid_cross_entropy_with_logits(
-    labels=tf.zeros(tf.shape(incorrect_output)), logits=incorrect_output)
+    labels=tf.zeros(tf.shape(input=incorrect_output)), logits=incorrect_output)
 
   # total loss
-  loss = tf.reduce_mean(pos_loss) + tf.reduce_mean(neg_loss)
+  loss = tf.reduce_mean(input_tensor=pos_loss) + tf.reduce_mean(input_tensor=neg_loss)
 
   # output = hidden.dot(tfV)
 
@@ -179,12 +182,12 @@ def dot(A, B):
 
   # optimizer
   # train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
-  train_op = tf.train.MomentumOptimizer(0.1, momentum=0.9).minimize(loss)
+  train_op = tf.compat.v1.train.MomentumOptimizer(0.1, momentum=0.9).minimize(loss)
   # train_op = tf.train.AdamOptimizer(1e-2).minimize(loss)
 
   # make session
-  session = tf.Session()
-  init_op = tf.global_variables_initializer()
+  session = tf.compat.v1.Session()
+  init_op = tf.compat.v1.global_variables_initializer()
   session.run(init_op)
 
 
 
@@ -14,6 +14,9 @@
 from scipy.sparse import lil_matrix, csr_matrix, save_npz, load_npz
 from datetime import datetime
 
+if tf.__version__.startswith('2'):
+    tf.compat.v1.disable_eager_execution()
+
 
 # is it possible to one-hot encode the data prior to feeding it
 # into the neural network, so that we don't have to do it on the fly?
@@ -84,13 +87,13 @@ def __init__(self, D, M, K):
 
     def build(self, D, M, K):
         # params
-        self.W = tf.Variable(tf.random_normal(shape=(D, K, M)) * np.sqrt(2.0 / M))
+        self.W = tf.Variable(tf.random.normal(shape=(D, K, M)) * np.sqrt(2.0 / M))
         self.c = tf.Variable(np.zeros(M).astype(np.float32))
         self.b = tf.Variable(np.zeros((D, K)).astype(np.float32))
 
         # data
-        self.X_in = tf.placeholder(tf.float32, shape=(None, D, K))
-        self.mask = tf.placeholder(tf.float32, shape=(None, D, K))
+        self.X_in = tf.compat.v1.placeholder(tf.float32, shape=(None, D, K))
+        self.mask = tf.compat.v1.placeholder(tf.float32, shape=(None, D, K))
 
         # conditional probabilities
         # NOTE: tf.contrib.distributions.Bernoulli API has changed in Tensorflow v1.2
@@ -99,39 +102,39 @@ def build(self, D, M, K):
         self.p_h_given_v = p_h_given_v # save for later
 
         # draw a sample from p(h | v)
-        r = tf.random_uniform(shape=tf.shape(p_h_given_v))
-        H = tf.to_float(r < p_h_given_v)
+        r = tf.random.uniform(shape=tf.shape(input=p_h_given_v))
+        H = tf.cast(r < p_h_given_v, dtype=tf.float32)
 
         # draw a sample from p(v | h)
         # note: we don't have to actually do the softmax
         logits = dot2(H, self.W) + self.b
-        cdist = tf.distributions.Categorical(logits=logits)
+        cdist = tf.compat.v1.distributions.Categorical(logits=logits)
         X_sample = cdist.sample() # shape is (N, D)
         X_sample = tf.one_hot(X_sample, depth=K) # turn it into (N, D, K)
         X_sample = X_sample * self.mask # missing ratings shouldn't contribute to objective
 
 
         # build the objective
-        objective = tf.reduce_mean(self.free_energy(self.X_in)) - tf.reduce_mean(self.free_energy(X_sample))
-        self.train_op = tf.train.AdamOptimizer(1e-2).minimize(objective)
+        objective = tf.reduce_mean(input_tensor=self.free_energy(self.X_in)) - tf.reduce_mean(input_tensor=self.free_energy(X_sample))
+        self.train_op = tf.compat.v1.train.AdamOptimizer(1e-2).minimize(objective)
         # self.train_op = tf.train.GradientDescentOptimizer(1e-3).minimize(objective)
 
         # build the cost
         # we won't use this to optimize the model parameters
         # just to observe what happens during training
         logits = self.forward_logits(self.X_in)
         self.cost = tf.reduce_mean(
-            tf.nn.softmax_cross_entropy_with_logits(
-                labels=self.X_in,
+            input_tensor=tf.nn.softmax_cross_entropy_with_logits(
+                labels=tf.stop_gradient(self.X_in),
                 logits=logits,
             )
         )
 
         # to get the output
         self.output_visible = self.forward_output(self.X_in)
 
-        initop = tf.global_variables_initializer()
-        self.session = tf.Session()
+        initop = tf.compat.v1.global_variables_initializer()
+        self.session = tf.compat.v1.Session()
         self.session.run(initop)
 
     def fit(self, X, mask, X_test, mask_test, epochs=10, batch_sz=256, show_fig=True):
@@ -202,10 +205,10 @@ def fit(self, X, mask, X_test, mask_test, epochs=10, batch_sz=256, show_fig=True
             plt.show()
 
     def free_energy(self, V):
-        first_term = -tf.reduce_sum(dot1(V, self.b))
+        first_term = -tf.reduce_sum(input_tensor=dot1(V, self.b))
         second_term = -tf.reduce_sum(
             # tf.log(1 + tf.exp(tf.matmul(V, self.W) + self.c)),
-            tf.nn.softplus(dot1(V, self.W) + self.c),
+            input_tensor=tf.nn.softplus(dot1(V, self.W) + self.c),
             axis=1
         )
         return first_term + second_term