Skip to content

Commit 2495b6c

Browse files
Comments Cleanup
1 parent ba2d621 commit 2495b6c

File tree

6 files changed

+19
-19
lines changed

6 files changed

+19
-19
lines changed

baseline.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@
3232
y_train = train.labels
3333
y_test = test.labels
3434

35-
# Print information about the dataset.
35+
# Print information about the dataset
3636
utils.print_data_info(train, x_train, x_test, y_train, y_test)
3737

38-
# To print for results.csv.
38+
# To print for results.csv
3939
data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))
4040

4141

cnn_ykim_train.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,17 @@
8080
# Correct sequence length if padding was overriden in data.py
8181
seq_len = x_train.shape[1]
8282

83-
# Construct reverse lookup vocabulary.
83+
# Construct reverse lookup vocabulary
8484
reverse_vocab = {w: i for i, w in enumerate(train.vocab)}
8585

86-
# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings.
86+
# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings
8787
print("Loading pre-trained embeddings from {}...".format(embedding_file))
8888
embeddings = data.load_word2vec(embedding_file, reverse_vocab, embedding_dim)
8989

90-
# Print information about the dataset.
90+
# Print information about the dataset
9191
utils.print_data_info(train, x_train, x_test, y_train, y_test)
9292

93-
# To print for results.csv.
93+
# To print for results.csv
9494
data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))
9595

9696

data.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,11 @@ def remove_short_documents(self, nwords, vocab="selected"):
5454
Remove documents that contain less than nwords.
5555
"""
5656
if vocab is "selected":
57-
# Word count with selected vocabulary.
57+
# Word count with selected vocabulary
5858
wc = self.data_count.sum(axis=1)
5959
wc = np.squeeze(np.asarray(wc))
6060
elif vocab is "full":
61-
# Word count with full vocabulary.
61+
# Word count with full vocabulary
6262
wc = np.empty(len(self.documents), dtype=np.int)
6363
for i, doc in enumerate(self.documents):
6464
wc[i] = len(doc.split())
@@ -96,20 +96,20 @@ def generate_word2ind(self, maxlen=None, padding="post", truncating="post"):
9696
Transforms documents to list of self.vocab indexes of the same length (i.e. maxlen). Do this at the
9797
very end.
9898
"""
99-
# Add "<UNK>" to vocabulary and create a reverse vocabulary lookup.
99+
# Add "<UNK>" to vocabulary and create a reverse vocabulary lookup
100100
if self.vocab[-1] != "<UNK>":
101101
self.vocab = self.vocab + ["<UNK>"]
102102
reverse_vocab = {w: i for i, w in enumerate(self.vocab)}
103103

104-
# Tokenize all the documents using the CountVectorizer's analyzer.
104+
# Tokenize all the documents using the CountVectorizer's analyzer
105105
analyzer = self.count_vectorizer.build_analyzer()
106106
tokenized_docs = np.array([analyzer(doc) for doc in self.documents])
107107

108-
# Transform documents from words to indexes using vocabulary.
108+
# Transform documents from words to indexes using vocabulary
109109
sequences = np.array([[reverse_vocab[w] for w in tokens if w in reverse_vocab]
110110
for tokens in tokenized_docs])
111111

112-
# Truncate or pad sequences to match maxlen. Adapted from tflearn.data_utils.pad_sequences.
112+
# Truncate or pad sequences to match maxlen (adapted from tflearn.data_utils.pad_sequences)
113113
lengths = [len(s) for s in sequences]
114114
num_samples = len(sequences)
115115
if maxlen is None:

graph_cnn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def graph_max_pool(self, x, p):
212212
return x
213213

214214
def filter_in_fourier(self, x, L, K, F_out, U, W):
215-
# TODO: B x F x V would avoid the permutations
215+
# TODO: B x F x V would avoid the permutations?
216216
B, V, F_in = x.get_shape()
217217
B, V, F_in = int(B), int(V), int(F_in)
218218
x = tf.transpose(x, perm=[1, 2, 0]) # V x F_in x B

graph_cnn_train.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,17 +91,17 @@
9191
y_train = train.labels
9292
y_test = test.labels
9393

94-
# Construct reverse lookup vocabulary.
94+
# Construct reverse lookup vocabulary
9595
reverse_vocab = {w: i for i, w in enumerate(train.vocab)}
9696

97-
# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings.
97+
# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings
9898
print("Loading pre-trained embeddings from {}...".format(embedding_file))
9999
embeddings = data.load_word2vec(embedding_file, reverse_vocab, embedding_dim)
100100

101-
# Print information about the dataset.
101+
# Print information about the dataset
102102
utils.print_data_info(train, x_train, x_test, y_train, y_test)
103103

104-
# To print for results.csv.
104+
# To print for results.csv
105105
data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))
106106

107107

mlp_train.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,10 @@
6767
y_train = train.labels
6868
y_test = test.labels
6969

70-
# Print information about the dataset.
70+
# Print information about the dataset
7171
utils.print_data_info(train, x_train, x_test, y_train, y_test)
7272

73-
# To print for results.csv.
73+
# To print for results.csv
7474
data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))
7575

7676

0 commit comments

Comments
 (0)