Skip to content

Commit

Permalink
Copy .data_tfidf or .data_word2ind to .data
Browse files Browse the repository at this point in the history
  • Loading branch information
SuyashLakhotia committed Jan 14, 2018
1 parent 0dddf0c commit 5f5d000
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 12 deletions.
4 changes: 2 additions & 2 deletions baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
dataset = "20 Newsgroups"
train, test = data.load_dataset(dataset, out="tfidf", norm="l1")

x_train = train.data_tfidf.astype(np.float32)
x_test = test.data_tfidf.astype(np.float32)
x_train = train.data.astype(np.float32)
x_test = test.data.astype(np.float32)
y_train = train.labels
y_test = test.labels

Expand Down
4 changes: 2 additions & 2 deletions cnn_ykim_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
dataset = "20 Newsgroups"
train, test = data.load_dataset(dataset, out="word2ind", maxlen=seq_len)

x_train = train.data_word2ind.astype(np.int32)
x_test = test.data_word2ind.astype(np.int32)
x_train = train.data.astype(np.int32)
x_test = test.data.astype(np.int32)
y_train = train.labels
y_test = test.labels

Expand Down
4 changes: 4 additions & 0 deletions data.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,10 @@ def preprocess_train(self, out, **params):

if out == "tfidf":
self.tfidf_normalize(**params) # transform count matrix into a normalized tf-idf matrix
self.data = self.data_tfidf
elif out == "word2ind":
self.generate_word2ind(**params) # transform documents to sequences of vocab indexes
self.data = self.data_word2ind

def preprocess_test(self, train_vocab, out, **params):
self.clean_text()
Expand All @@ -174,8 +176,10 @@ def preprocess_test(self, train_vocab, out, **params):

if out == "tfidf":
self.tfidf_normalize(**params)
self.data = self.data_tfidf
elif out == "word2ind":
self.generate_word2ind(**params)
self.data = self.data_word2ind


def load_dataset(dataset, out, **params):
Expand Down
4 changes: 2 additions & 2 deletions gcnn_fourier_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@
dataset = "20 Newsgroups"
train, test = data.load_dataset(dataset, out="tfidf", norm="l1")

x_train = train.data_tfidf.astype(np.float32)
x_test = test.data_tfidf.astype(np.float32)
x_train = train.data.astype(np.float32)
x_test = test.data.astype(np.float32)
y_train = train.labels
y_test = test.labels

Expand Down
4 changes: 2 additions & 2 deletions gcnn_mdeff_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@
dataset = "20 Newsgroups"
train, test = data.load_dataset(dataset, out="tfidf", norm="l1")

x_train = train.data_tfidf.astype(np.float32)
x_test = test.data_tfidf.astype(np.float32)
x_train = train.data.astype(np.float32)
x_test = test.data.astype(np.float32)
y_train = train.labels
y_test = test.labels

Expand Down
4 changes: 2 additions & 2 deletions gcnn_spline_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@
dataset = "20 Newsgroups"
train, test = data.load_dataset(dataset, out="tfidf", norm="l1")

x_train = train.data_tfidf.astype(np.float32)
x_test = test.data_tfidf.astype(np.float32)
x_train = train.data.astype(np.float32)
x_test = test.data.astype(np.float32)
y_train = train.labels
y_test = test.labels

Expand Down
4 changes: 2 additions & 2 deletions mlp_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
dataset = "20 Newsgroups"
train, test = data.load_dataset(dataset, out="tfidf", norm="l1")

x_train = train.data_tfidf.astype(np.float32)
x_test = test.data_tfidf.astype(np.float32)
x_train = train.data.astype(np.float32)
x_test = test.data.astype(np.float32)
y_train = train.labels
y_test = test.labels

Expand Down

0 comments on commit 5f5d000

Please sign in to comment.