@@ -65,24 +65,34 @@ def cal_acc(labels, logits):
65
65
return acc .numpy ().item ()
66
66
67
67
68
- def encode_onehot (labels ):
68
+ def encode_onehot (dataset , labels ):
69
69
"""Provides a mapping from string labels to integer indices."""
70
70
label_index = {
71
- 'Case_Based' : 0 ,
72
- 'Genetic_Algorithms' : 1 ,
73
- 'Neural_Networks' : 2 ,
74
- 'Probabilistic_Methods' : 3 ,
75
- 'Reinforcement_Learning' : 4 ,
76
- 'Rule_Learning' : 5 ,
77
- 'Theory' : 6 ,
71
+ 'cora' : {
72
+ 'Case_Based' : 0 ,
73
+ 'Genetic_Algorithms' : 1 ,
74
+ 'Neural_Networks' : 2 ,
75
+ 'Probabilistic_Methods' : 3 ,
76
+ 'Reinforcement_Learning' : 4 ,
77
+ 'Rule_Learning' : 5 ,
78
+ 'Theory' : 6
79
+ },
80
+ 'citeseer' : {
81
+ 'AI' : 0 ,
82
+ 'IR' : 1 ,
83
+ 'HCI' : 2 ,
84
+ 'DB' : 3 ,
85
+ 'ML' : 4 ,
86
+ 'Agents' : 5
87
+ }
78
88
}
79
89
80
90
# Convert to onehot label
81
- num_classes = len (label_index )
91
+ num_classes = len (label_index [ dataset ] )
82
92
onehot_labels = np .zeros ((len (labels ), num_classes ))
83
93
idx = 0
84
94
for s in labels :
85
- onehot_labels [idx , label_index [s ]] = 1
95
+ onehot_labels [idx , label_index [dataset ][ s ]] = 1
86
96
idx += 1
87
97
return onehot_labels
88
98
@@ -115,23 +125,30 @@ def sparse_matrix_to_tf_sparse_tensor(matrix):
115
125
116
126
117
127
def load_dataset (dataset , sparse_features , normalize_adj ):
118
- """Loads Cora dataset."""
128
+ """Loads dataset."""
119
129
dir_path = os .path .join ('data' , dataset )
120
130
content_path = os .path .join (dir_path , '{}.content' .format (dataset ))
121
131
citation_path = os .path .join (dir_path , '{}.cites' .format (dataset ))
122
132
123
133
content = np .genfromtxt (content_path , dtype = np .dtype (str ))
124
-
125
- idx = np .array (content [:, 0 ], dtype = np .int32 )
134
+ idx = np .array (content [:, 0 ])
126
135
features = sp .csr_matrix (content [:, 1 :- 1 ], dtype = np .float32 )
127
- labels = encode_onehot (content [:, - 1 ])
136
+ labels = encode_onehot (dataset , content [:, - 1 ])
128
137
129
138
# Dict which maps paper id to data id
130
139
idx_map = {j : i for i , j in enumerate (idx )}
131
- edges_unordered = np .genfromtxt (citation_path , dtype = np .int32 )
140
+ edges_unordered = np .genfromtxt (citation_path , dtype = np .dtype ( str ) )
132
141
edges = np .array (
133
142
list (map (idx_map .get , edges_unordered .flatten ())),
134
- dtype = np .int32 ).reshape (edges_unordered .shape )
143
+ dtype = np .dtype (str )).reshape (edges_unordered .shape )
144
+
145
+ # Delete relation which the nodes appear in cites but not in content
146
+ del_rel = []
147
+ for i , j in enumerate (edges ):
148
+ if j [0 ] == 'None' or j [1 ] == 'None' :
149
+ del_rel .append (i )
150
+ edges = np .delete (edges , del_rel , 0 )
151
+
135
152
adj = sp .coo_matrix ((np .ones (edges .shape [0 ]), (edges [:, 0 ], edges [:, 1 ])),
136
153
shape = (labels .shape [0 ], labels .shape [0 ]),
137
154
dtype = np .float32 )
@@ -145,13 +162,13 @@ def load_dataset(dataset, sparse_features, normalize_adj):
145
162
if normalize_adj :
146
163
adj = normalize_adj_matrix (adj )
147
164
148
- # 5% for train, 300 for validation, 1000 for test
149
165
idx_train = slice (140 )
150
166
idx_val = slice (200 , 500 )
151
167
idx_test = slice (500 , 1500 )
152
168
153
169
features = tf .convert_to_tensor (np .array (features .todense ()))
154
170
labels = tf .convert_to_tensor (np .where (labels )[1 ])
171
+
155
172
if sparse_features :
156
173
adj = sparse_matrix_to_tf_sparse_tensor (adj )
157
174
else :
0 commit comments