Comments Cleanup

SuyashLakhotia · SuyashLakhotia · commit 2495b6cf5a70 · 2018-01-23T03:41:02.000+08:00
diff --git a/baseline.py b/baseline.py
@@ -32,10 +32,10 @@
 y_train = train.labels
 y_test = test.labels
 
-# Print information about the dataset.
+# Print information about the dataset
 utils.print_data_info(train, x_train, x_test, y_train, y_test)
 
-# To print for results.csv.
+# To print for results.csv
 data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))
 
 
diff --git a/cnn_ykim_train.py b/cnn_ykim_train.py
@@ -80,17 +80,17 @@
 # Correct sequence length if padding was overriden in data.py
 seq_len = x_train.shape[1]
 
-# Construct reverse lookup vocabulary.
+# Construct reverse lookup vocabulary
 reverse_vocab = {w: i for i, w in enumerate(train.vocab)}
 
-# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings.
+# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings
 print("Loading pre-trained embeddings from {}...".format(embedding_file))
 embeddings = data.load_word2vec(embedding_file, reverse_vocab, embedding_dim)
 
-# Print information about the dataset.
+# Print information about the dataset
 utils.print_data_info(train, x_train, x_test, y_train, y_test)
 
-# To print for results.csv.
+# To print for results.csv
 data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))
 
 
diff --git a/data.py b/data.py
@@ -54,11 +54,11 @@ def remove_short_documents(self, nwords, vocab="selected"):
         Remove documents that contain less than nwords.
         """
         if vocab is "selected":
-            # Word count with selected vocabulary.
+            # Word count with selected vocabulary
             wc = self.data_count.sum(axis=1)
             wc = np.squeeze(np.asarray(wc))
         elif vocab is "full":
-            # Word count with full vocabulary.
+            # Word count with full vocabulary
             wc = np.empty(len(self.documents), dtype=np.int)
             for i, doc in enumerate(self.documents):
                 wc[i] = len(doc.split())
@@ -96,20 +96,20 @@ def generate_word2ind(self, maxlen=None, padding="post", truncating="post"):
         Transforms documents to list of self.vocab indexes of the same length (i.e. maxlen). Do this at the
         very end.
         """
-        # Add "<UNK>" to vocabulary and create a reverse vocabulary lookup.
+        # Add "<UNK>" to vocabulary and create a reverse vocabulary lookup
         if self.vocab[-1] != "<UNK>":
             self.vocab = self.vocab + ["<UNK>"]
         reverse_vocab = {w: i for i, w in enumerate(self.vocab)}
 
-        # Tokenize all the documents using the CountVectorizer's analyzer.
+        # Tokenize all the documents using the CountVectorizer's analyzer
         analyzer = self.count_vectorizer.build_analyzer()
         tokenized_docs = np.array([analyzer(doc) for doc in self.documents])
 
-        # Transform documents from words to indexes using vocabulary.
+        # Transform documents from words to indexes using vocabulary
         sequences = np.array([[reverse_vocab[w] for w in tokens if w in reverse_vocab]
                               for tokens in tokenized_docs])
 
-        # Truncate or pad sequences to match maxlen. Adapted from tflearn.data_utils.pad_sequences.
+        # Truncate or pad sequences to match maxlen (adapted from tflearn.data_utils.pad_sequences)
         lengths = [len(s) for s in sequences]
         num_samples = len(sequences)
         if maxlen is None:
diff --git a/graph_cnn.py b/graph_cnn.py
@@ -212,7 +212,7 @@ def graph_max_pool(self, x, p):
             return x
 
     def filter_in_fourier(self, x, L, K, F_out, U, W):
-        # TODO: B x F x V would avoid the permutations
+        # TODO: B x F x V would avoid the permutations?
         B, V, F_in = x.get_shape()
         B, V, F_in = int(B), int(V), int(F_in)
         x = tf.transpose(x, perm=[1, 2, 0])  # V x F_in x B
diff --git a/graph_cnn_train.py b/graph_cnn_train.py
@@ -91,17 +91,17 @@
 y_train = train.labels
 y_test = test.labels
 
-# Construct reverse lookup vocabulary.
+# Construct reverse lookup vocabulary
 reverse_vocab = {w: i for i, w in enumerate(train.vocab)}
 
-# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings.
+# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings
 print("Loading pre-trained embeddings from {}...".format(embedding_file))
 embeddings = data.load_word2vec(embedding_file, reverse_vocab, embedding_dim)
 
-# Print information about the dataset.
+# Print information about the dataset
 utils.print_data_info(train, x_train, x_test, y_train, y_test)
 
-# To print for results.csv.
+# To print for results.csv
 data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))
 
 
diff --git a/mlp_train.py b/mlp_train.py
@@ -67,10 +67,10 @@
 y_train = train.labels
 y_test = test.labels
 
-# Print information about the dataset.
+# Print information about the dataset
 utils.print_data_info(train, x_train, x_test, y_train, y_test)
 
-# To print for results.csv.
+# To print for results.csv
 data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))