Skip to content

Commit

Permalink
Comments Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
SuyashLakhotia committed Jan 22, 2018
1 parent ba2d621 commit 2495b6c
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 19 deletions.
4 changes: 2 additions & 2 deletions baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@
y_train = train.labels
y_test = test.labels

# Print information about the dataset.
# Print information about the dataset
utils.print_data_info(train, x_train, x_test, y_train, y_test)

# To print for results.csv.
# To print for results.csv
data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))


Expand Down
8 changes: 4 additions & 4 deletions cnn_ykim_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,17 @@
# Correct sequence length if padding was overriden in data.py
seq_len = x_train.shape[1]

# Construct reverse lookup vocabulary.
# Construct reverse lookup vocabulary
reverse_vocab = {w: i for i, w in enumerate(train.vocab)}

# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings.
# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings
print("Loading pre-trained embeddings from {}...".format(embedding_file))
embeddings = data.load_word2vec(embedding_file, reverse_vocab, embedding_dim)

# Print information about the dataset.
# Print information about the dataset
utils.print_data_info(train, x_train, x_test, y_train, y_test)

# To print for results.csv.
# To print for results.csv
data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))


Expand Down
12 changes: 6 additions & 6 deletions data.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ def remove_short_documents(self, nwords, vocab="selected"):
Remove documents that contain less than nwords.
"""
if vocab is "selected":
# Word count with selected vocabulary.
# Word count with selected vocabulary
wc = self.data_count.sum(axis=1)
wc = np.squeeze(np.asarray(wc))
elif vocab is "full":
# Word count with full vocabulary.
# Word count with full vocabulary
wc = np.empty(len(self.documents), dtype=np.int)
for i, doc in enumerate(self.documents):
wc[i] = len(doc.split())
Expand Down Expand Up @@ -96,20 +96,20 @@ def generate_word2ind(self, maxlen=None, padding="post", truncating="post"):
Transforms documents to list of self.vocab indexes of the same length (i.e. maxlen). Do this at the
very end.
"""
# Add "<UNK>" to vocabulary and create a reverse vocabulary lookup.
# Add "<UNK>" to vocabulary and create a reverse vocabulary lookup
if self.vocab[-1] != "<UNK>":
self.vocab = self.vocab + ["<UNK>"]
reverse_vocab = {w: i for i, w in enumerate(self.vocab)}

# Tokenize all the documents using the CountVectorizer's analyzer.
# Tokenize all the documents using the CountVectorizer's analyzer
analyzer = self.count_vectorizer.build_analyzer()
tokenized_docs = np.array([analyzer(doc) for doc in self.documents])

# Transform documents from words to indexes using vocabulary.
# Transform documents from words to indexes using vocabulary
sequences = np.array([[reverse_vocab[w] for w in tokens if w in reverse_vocab]
for tokens in tokenized_docs])

# Truncate or pad sequences to match maxlen. Adapted from tflearn.data_utils.pad_sequences.
# Truncate or pad sequences to match maxlen (adapted from tflearn.data_utils.pad_sequences)
lengths = [len(s) for s in sequences]
num_samples = len(sequences)
if maxlen is None:
Expand Down
2 changes: 1 addition & 1 deletion graph_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def graph_max_pool(self, x, p):
return x

def filter_in_fourier(self, x, L, K, F_out, U, W):
# TODO: B x F x V would avoid the permutations
# TODO: B x F x V would avoid the permutations?
B, V, F_in = x.get_shape()
B, V, F_in = int(B), int(V), int(F_in)
x = tf.transpose(x, perm=[1, 2, 0]) # V x F_in x B
Expand Down
8 changes: 4 additions & 4 deletions graph_cnn_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,17 +91,17 @@
y_train = train.labels
y_test = test.labels

# Construct reverse lookup vocabulary.
# Construct reverse lookup vocabulary
reverse_vocab = {w: i for i, w in enumerate(train.vocab)}

# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings.
# Process Google News word2vec file (in a memory-friendly way) and store relevant embeddings
print("Loading pre-trained embeddings from {}...".format(embedding_file))
embeddings = data.load_word2vec(embedding_file, reverse_vocab, embedding_dim)

# Print information about the dataset.
# Print information about the dataset
utils.print_data_info(train, x_train, x_test, y_train, y_test)

# To print for results.csv.
# To print for results.csv
data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))


Expand Down
4 changes: 2 additions & 2 deletions mlp_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@
y_train = train.labels
y_test = test.labels

# Print information about the dataset.
# Print information about the dataset
utils.print_data_info(train, x_train, x_test, y_train, y_test)

# To print for results.csv.
# To print for results.csv
data_str = "{{format: 'word2ind', vocab_size: {}}}".format(len(train.vocab))


Expand Down

0 comments on commit 2495b6c

Please sign in to comment.