Copy .data_tfidf or .data_word2ind to .data

graphdeeplearning · Jan 14, 2018 · 5f5d000 · 5f5d000
1 parent 0dddf0c
commit 5f5d000
Show file tree

Hide file tree

Showing 7 changed files with 16 additions and 12 deletions.
diff --git a/baseline.py b/baseline.py
@@ -11,8 +11,8 @@
 dataset = "20 Newsgroups"
 train, test = data.load_dataset(dataset, out="tfidf", norm="l1")
 
-x_train = train.data_tfidf.astype(np.float32)
-x_test = test.data_tfidf.astype(np.float32)
+x_train = train.data.astype(np.float32)
+x_test = test.data.astype(np.float32)
 y_train = train.labels
 y_test = test.labels
 

diff --git a/cnn_ykim_train.py b/cnn_ykim_train.py
@@ -44,8 +44,8 @@
 dataset = "20 Newsgroups"
 train, test = data.load_dataset(dataset, out="word2ind", maxlen=seq_len)
 
-x_train = train.data_word2ind.astype(np.int32)
-x_test = test.data_word2ind.astype(np.int32)
+x_train = train.data.astype(np.int32)
+x_test = test.data.astype(np.int32)
 y_train = train.labels
 y_test = test.labels
 

diff --git a/data.py b/data.py
@@ -164,8 +164,10 @@ def preprocess_train(self, out, **params):
 
         if out == "tfidf":
             self.tfidf_normalize(**params)  # transform count matrix into a normalized tf-idf matrix
+            self.data = self.data_tfidf
         elif out == "word2ind":
             self.generate_word2ind(**params)  # transform documents to sequences of vocab indexes
+            self.data = self.data_word2ind
 
     def preprocess_test(self, train_vocab, out, **params):
         self.clean_text()
@@ -174,8 +176,10 @@ def preprocess_test(self, train_vocab, out, **params):
 
         if out == "tfidf":
             self.tfidf_normalize(**params)
+            self.data = self.data_tfidf
         elif out == "word2ind":
             self.generate_word2ind(**params)
+            self.data = self.data_word2ind
 
 
 def load_dataset(dataset, out, **params):

diff --git a/gcnn_fourier_train.py b/gcnn_fourier_train.py
@@ -50,8 +50,8 @@
 dataset = "20 Newsgroups"
 train, test = data.load_dataset(dataset, out="tfidf", norm="l1")
 
-x_train = train.data_tfidf.astype(np.float32)
-x_test = test.data_tfidf.astype(np.float32)
+x_train = train.data.astype(np.float32)
+x_test = test.data.astype(np.float32)
 y_train = train.labels
 y_test = test.labels
 

diff --git a/gcnn_mdeff_train.py b/gcnn_mdeff_train.py
@@ -51,8 +51,8 @@
 dataset = "20 Newsgroups"
 train, test = data.load_dataset(dataset, out="tfidf", norm="l1")
 
-x_train = train.data_tfidf.astype(np.float32)
-x_test = test.data_tfidf.astype(np.float32)
+x_train = train.data.astype(np.float32)
+x_test = test.data.astype(np.float32)
 y_train = train.labels
 y_test = test.labels
 

diff --git a/gcnn_spline_train.py b/gcnn_spline_train.py
@@ -51,8 +51,8 @@
 dataset = "20 Newsgroups"
 train, test = data.load_dataset(dataset, out="tfidf", norm="l1")
 
-x_train = train.data_tfidf.astype(np.float32)
-x_test = test.data_tfidf.astype(np.float32)
+x_train = train.data.astype(np.float32)
+x_test = test.data.astype(np.float32)
 y_train = train.labels
 y_test = test.labels
 

diff --git a/mlp_train.py b/mlp_train.py
@@ -38,8 +38,8 @@
 dataset = "20 Newsgroups"
 train, test = data.load_dataset(dataset, out="tfidf", norm="l1")
 
-x_train = train.data_tfidf.astype(np.float32)
-x_test = test.data_tfidf.astype(np.float32)
+x_train = train.data.astype(np.float32)
+x_test = test.data.astype(np.float32)
 y_train = train.labels
 y_test = test.labels