Skip to content

Commit 395de3f

Browse files
committed
Successfully trained abuse and intent models
1 parent 19abf06 commit 395de3f

File tree

6 files changed

+28
-13
lines changed

6 files changed

+28
-13
lines changed

execution/analysis/embeddings/collect_intent_verbs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,17 @@
1212
destination_dir = base_dir / 'embeddings'
1313

1414
frame_info_path = data_dir / 'intent_frame.csv'
15-
english_mask = data_dir / 'english_mask.csv'
15+
# english_mask = data_dir / 'english_mask.csv'
1616

1717
desire_index = 1
1818
action_index = 2
1919

20-
check_existence([frame_info_path, model_path, english_mask])
20+
check_existence([frame_info_path, model_path])
2121
make_dir(destination_dir)
2222
print('Config complete.')
2323

24-
english_mask = load_vector(english_mask).astype(bool)
25-
intent_frames = read_csv(frame_info_path, header=None, keep_default_na=False).values[english_mask]
24+
# english_mask = load_vector(english_mask).astype(bool)
25+
intent_frames = read_csv(frame_info_path, header=None, keep_default_na=False).values
2626
print('Loaded data with shape', intent_frames.shape)
2727

2828
desire_verbs = get_verbs(intent_frames, desire_index)

execution/prediction/abusive_intent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
from utilities.data_management import make_path, check_existence, open_w_pandas, get_model_path, vector_to_file, \
2-
make_dir
2+
make_dir, get_embedding_path
33
from model.networks import predict_abusive_intent
44
from config import dataset
55

66

7-
embedding_path = make_path('data/models/') / dataset / 'derived' / (dataset + '.bin')
7+
embedding_path = get_embedding_path()
88
processed_base = make_path('data/processed_data') / dataset / 'analysis'
99
context_path = processed_base / 'intent' / 'contexts.csv'
1010
predictions_base = processed_base / 'intent_abuse'

execution/training/abuse.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,24 @@
11
from model.networks import generate_abuse_network
22
from utilities.data_management import make_dir, make_path, open_w_pandas, check_existence, \
3-
get_model_path, load_vector, vector_to_file, split_sets
3+
get_model_path, load_vector, vector_to_file, split_sets, get_embedding_path
44
from fasttext import load_model
55
from model.layers.realtime_embedding import RealtimeEmbedding
66
from keras.callbacks import EarlyStopping
77
from config import dataset, max_tokens, training_verbosity, batch_size
8+
from time import time
89

910

1011
# Define paths
1112
abuse_weights_path = get_model_path('abuse')
12-
embedding_path = make_path('data/models/') / dataset / 'derived' / (dataset + '.bin')
13+
embedding_path = get_embedding_path()
1314
base_path = make_path('data/processed_data/') / dataset / 'analysis'
1415
data_path = make_path('data/prepared_data/abusive_data.csv')
16+
dest_dir = base_path / 'abuse'
1517

1618
# Check for files and make directories
1719
check_existence([embedding_path, data_path])
1820
make_dir(abuse_weights_path.parent)
21+
make_dir(dest_dir)
1922
print('Config complete.')
2023

2124
# Load embeddings and contexts
@@ -41,12 +44,24 @@
4144
training_steps = int(len(training_data) / batch_size) + 1
4245
validation_steps = int(len(testing_data) / batch_size) + 1
4346

44-
stopping_conditions = EarlyStopping(monitor='val_loss', patience=2, verbose=1, restore_best_weights=True)
45-
model.fit_generator(training, epochs=50, verbose=training_verbosity, callbacks=[stopping_conditions],
46-
validation_data=testing, shuffle=True)
47+
start = time()
4748

49+
stopping_conditions = EarlyStopping(monitor='val_loss', patience=3, verbose=1, restore_best_weights=True)
50+
history = model.fit_generator(training, epochs=50, verbose=training_verbosity, callbacks=[stopping_conditions],
51+
validation_data=testing, shuffle=True).history
52+
53+
training_time = time() - start
54+
print('Completed training in', training_time, 's')
55+
print('Training history', history)
4856

4957
evaluated_accuracy = model.evaluate_generator(testing, verbose=training_verbosity, steps=validation_steps)
5058
print('Model validation accuracy', evaluated_accuracy)
5159

5260
model.save_weights(str(abuse_weights_path))
61+
print('Completed training and saving abuse model.')
62+
63+
vector_to_file(training.data_source, dest_dir / 'training_data.csv')
64+
vector_to_file(training.labels, dest_dir / 'training_labels.csv')
65+
66+
vector_to_file(testing.data_source, dest_dir / 'testing_data.csv')
67+
vector_to_file(testing.labels, dest_dir / 'testing_labels.csv')

execution/training/intent.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
# Load embeddings and contexts
3131
embedding_model = load_model(str(embedding_path))
3232

33-
# english_mask = load_vector(english_mask_path).astype(bool)
3433
raw_contexts = open_w_pandas(context_path)['contexts'].values
3534
initial_labels = load_vector(initial_label_path)
3635
document_matrix = load_npz(document_matrix_path)

model/layers/realtime_embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def __init__(self, embedding_model, data_source, labels=None, labels_in_progress
3333
self.working_initial_labels = self.original_initial_labels
3434
self.is_training = False
3535

36-
self.concrete_weight = 1.5
36+
self.concrete_weight = 2
3737
self.midpoint = 0.5
3838
self.uniform_weights = uniform_weights
3939
self.data_length = ceil(len(self.working_data_source) / batch_size)

utilities/data_management/io.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ def output_abusive_intent(index_set, predictions, contexts, filename=None):
185185
'O': '%s',
186186
'U': '%s',
187187
'i': '%d',
188+
'b': '%d',
188189
'f': '%.6f'
189190
}
190191

0 commit comments

Comments
 (0)