Skip to content

Commit

Permalink
Debug and change of dataset splitting
Browse files Browse the repository at this point in the history
  • Loading branch information
Ioannis Chalkiadakis committed Aug 11, 2017
1 parent 82983c6 commit df71815
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 13 deletions.
14 changes: 8 additions & 6 deletions src/IMDB_dataset/imdb_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,18 +165,20 @@ def get_initial_embeddings_from_dictionary(n_words,embedding_dim,dictionary):
return ebd_init


def extract_features(filenames,seed,test_size,save_test,n_words,dictionary,embedding_dim):
def extract_features(filenames_train_valid,filenames_test,seed,test_size,save_test,n_words,dictionary,embedding_dim):

random.shuffle(filenames)
random.shuffle(filenames_train_valid)
random.shuffle(filenames_test)

X_train, X_valid, y_train, y_valid = train_test_split(filenames, np.zeros(len(filenames)),test_size=2*test_size,random_state=seed)
X_train, X_valid, y_train, y_valid = train_test_split(filenames_train_valid, np.zeros(len(filenames_train_valid)),test_size=0.1,random_state=seed)
filenames_train = X_train
filenames_valid = X_valid

"""
X_valid, X_test, y_valid, y_test = train_test_split(filenames_valid, np.zeros(len(filenames_valid)),test_size=0.5,random_state=seed)
filenames_valid = X_valid
filenames_test = X_test

"""
embedding_initMat = None
#embedding_initMat = get_initial_embeddings_from_dictionary(n_words,embedding_dim,dictionary)

Expand Down Expand Up @@ -273,9 +275,9 @@ def extract_labels(filenames_train,filenames_valid,filenames_test):



def preprocess_IMDBdata(seed,filenames,n_words=None,dictionary=None,embedding_dim=300,test_size=0.1,save_test=None):
def preprocess_IMDBdata(seed,filenames_train_valid,filenames_test,n_words=None,dictionary=None,embedding_dim=300,test_size=0.1,save_test=None):

trainX,validX,testX,filenames_train,filenames_valid,filenames_test,test_dict,test_dict_token,embedding_initMat = extract_features(filenames,seed,test_size,save_test,n_words,dictionary,embedding_dim)
trainX,validX,testX,filenames_train,filenames_valid,filenames_test,test_dict,test_dict_token,embedding_initMat = extract_features(filenames_train_valid,filenames_test,seed,test_size,save_test,n_words,dictionary,embedding_dim)
# extract_features_w2v(filenames,seed,test_size,save_test=None)

trainX,validX,testX,maxlen = pad_sequences(trainX, validX,testX, value=0.)
Expand Down
13 changes: 6 additions & 7 deletions src/lrp.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,6 @@ def get_NeuronExcitingWords_dict(lstm_hidden_json,kkeys,k,save_dir,topN=5):
if i not in NtoW_keys:
NtoW[str(i)] = []

print(map(int,list(NtoW.keys()))) #########################

with open(save_dir+re.sub('/', '_', k[-18:-4])+"_ActCells.json", 'w') as f:
json.dump(NtoW, f)
Expand Down Expand Up @@ -350,22 +349,23 @@ def load_intermediate_outputs(input_filename,embedding_json,fc_json,lstm_hidden_
lstm_cell = data_cell[input_filename]
fc_out = data_fc[input_filename]
embedding_output_data = data_ebd[input_filename]


T = embedding_output_data.shape
d = lstm_cell.shape[1]

return fc_out,lstm_hidden,lstm_cell,embedding_output_data,d
return fc_out,lstm_hidden,lstm_cell,embedding_output_data,d,T



def lrp_single_input(model,embedding_layer,n_words,layer_names,input_filename,single_input_data,data_token,eps,delta,fc_json,lstm_hidden_json,lstm_cell_json,ebd_json,dictionary,target_class,T,classes=2,lstm_actv1=expit,lstm_actv2=np.tanh,debug=False):
def lrp_single_input(model,embedding_layer,n_words,layer_names,input_filename,single_input_data,data_token,eps,delta,fc_json,lstm_hidden_json,lstm_cell_json,ebd_json,dictionary,target_class,classes=2,lstm_actv1=expit,lstm_actv2=np.tanh,debug=False):


with model.session.as_default():

lrp_mask = np.zeros((classes))
lrp_mask[target_class] = 1.0

fc_out,lstm_hidden,lstm_cell,embedding_output_data,d = load_intermediate_outputs(input_filename,ebd_json,fc_json,lstm_hidden_json,lstm_cell_json,layer_name=None)
fc_out,lstm_hidden,lstm_cell,embedding_output_data,d,T = load_intermediate_outputs(input_filename,ebd_json,fc_json,lstm_hidden_json,lstm_cell_json,layer_name=None)

#LRP through fc layer
fc_name = "fc"
Expand Down Expand Up @@ -407,10 +407,9 @@ def lrp_full(model,embedding_layer,n_words,input_filename,net_arch,net_arch_laye
k = list(keys_test)[i]
kkeys = list(data_test[k].keys())
kdata = np.array(list(data_test[k].values()))
T = kdata.shape
data_token = np.array(data_test_token[k])

lrp_input,lrp_fc,lstm_lrp_x,(lstm_lrp_h,lstm_lrp_g,lstm_lrp_c) = lrp_single_input(model,embedding_layer,n_words,net_arch_layers,k,kdata,data_token,eps,delta,fc_out_json,lstm_hidden_json,lstm_cell_json,ebd_json,dictionary,target_class=1,T=T,classes=2,lstm_actv1=expit,lstm_actv2=np.tanh,debug=debug)
lrp_input,lrp_fc,lstm_lrp_x,(lstm_lrp_h,lstm_lrp_g,lstm_lrp_c) = lrp_single_input(model,embedding_layer,n_words,net_arch_layers,k,kdata,data_token,eps,delta,fc_out_json,lstm_hidden_json,lstm_cell_json,ebd_json,dictionary,target_class=1,classes=2,lstm_actv1=expit,lstm_actv2=np.tanh,debug=debug)

lrp_neurons = get_topLRP_cells(lrp_fc,k,save_dir,topN)
reviewLRP_data = get_PosNegNeurons_dict(i,predictions,lrp_neurons)
Expand Down

0 comments on commit df71815

Please sign in to comment.