Skip to content
This repository was archived by the owner on Jul 26, 2025. It is now read-only.

Commit 70bd45a

Browse files
committed
add detect
1 parent 1b0faf2 commit 70bd45a

6 files changed

+274
-47
lines changed

brnn_sequence_analyzer.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,63 @@ def predict(sequence, input_len, analyzer, nb_predictions=80,
390390
print "\n"
391391

392392

393+
def detect(sequence, input_len, analyzer, mapping='m2m', sentence_length=40):
394+
"""
395+
Scan the given sequence for detecting anormalies.
396+
397+
Arguments:
398+
sequence: {lsit}, the original input sequence
399+
input_len: {integer}, the number of unique id classes
400+
analyzer: {SequenceAnalyzer}, the sequence analyzer
401+
mapping: {string}, input to output mapping.
402+
'o2o': one-to-one
403+
'm2m': many-to-many
404+
sentence_length: {integer}, the length of each sentence.
405+
"""
406+
# sequence length
407+
length = len(sequence)
408+
409+
# predicted probabilities for each id
410+
# we assume the first sentence_length ids are true
411+
prob = [1] * sentence_length + [0] * (length - sentence_length)
412+
413+
# generate elements
414+
for start_index in xrange(length - sentence_length):
415+
# seed sentence
416+
X = sequence[start_index : start_index + sentence_length]
417+
# print "X: " + ' '.join(str(s).ljust(4) for s in sentence)
418+
419+
# Y_true
420+
# y_true = sequence[start_index + 1 : start_index + sentence_length + 1]
421+
# print "y_true: " + ' '.join(str(s).ljust(4) for s in y_true)
422+
y_next_true = sequence[start_index + sentence_length]
423+
424+
seed = np.zeros((1, sentence_length, input_len))
425+
# format input
426+
for t in range(0, sentence_length):
427+
seed[0, t, X[t]] = 1
428+
429+
# get predictionsverbose = 0, no logging
430+
predictions = analyzer.model.predict(seed, verbose=0)[0]
431+
432+
# y_predicted
433+
y_next_pred = 0
434+
if mapping == 'o2o':
435+
prob[start_index + sentence_length] = predictions[y_next_true]
436+
y_next_pred = np.argmax(predictions)
437+
elif mapping == 'm2m':
438+
# next_sentence = []
439+
# for pred in predictions:
440+
# next_sentence.append(np.argmax(pred))
441+
# y_next_pred = next_sentence[-1]
442+
# print "y_pred: " + ' '.join(str(id_).ljust(4)
443+
# for id_ in next_sentence)
444+
y_next_pred = np.argmax(predictions[-1])
445+
prob[start_index + sentence_length] = predictions[-1][y_next_true]
446+
447+
return prob
448+
449+
393450
def train(analyzer, train_sequence, val_sequence, input_len,
394451
batch_size=128, nb_epoch=50, nb_iterations=4,
395452
sentence_length=40, step=40, mapping='m2m'):

brnn_sequence_analyzer_gen.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,63 @@ def train(analyzer, train_data, nb_training_samples,
447447
analyzer.save_model("weights-after-iteration.hdf5")
448448

449449

450+
def detect(sequence, input_len, analyzer, mapping='m2m', sentence_length=40):
451+
"""
452+
Scan the given sequence for detecting anormalies.
453+
454+
Arguments:
455+
sequence: {lsit}, the original input sequence
456+
input_len: {integer}, the number of unique id classes
457+
analyzer: {SequenceAnalyzer}, the sequence analyzer
458+
mapping: {string}, input to output mapping.
459+
'o2o': one-to-one
460+
'm2m': many-to-many
461+
sentence_length: {integer}, the length of each sentence.
462+
"""
463+
# sequence length
464+
length = len(sequence)
465+
466+
# predicted probabilities for each id
467+
# we assume the first sentence_length ids are true
468+
prob = [1] * sentence_length + [0] * (length - sentence_length)
469+
470+
# generate elements
471+
for start_index in xrange(length - sentence_length):
472+
# seed sentence
473+
X = sequence[start_index : start_index + sentence_length]
474+
# print "X: " + ' '.join(str(s).ljust(4) for s in sentence)
475+
476+
# Y_true
477+
# y_true = sequence[start_index + 1 : start_index + sentence_length + 1]
478+
# print "y_true: " + ' '.join(str(s).ljust(4) for s in y_true)
479+
y_next_true = sequence[start_index + sentence_length]
480+
481+
seed = np.zeros((1, sentence_length, input_len))
482+
# format input
483+
for t in range(0, sentence_length):
484+
seed[0, t, X[t]] = 1
485+
486+
# get predictionsverbose = 0, no logging
487+
predictions = analyzer.model.predict(seed, verbose=0)[0]
488+
489+
# y_predicted
490+
y_next_pred = 0
491+
if mapping == 'o2o':
492+
prob[start_index + sentence_length] = predictions[y_next_true]
493+
y_next_pred = np.argmax(predictions)
494+
elif mapping == 'm2m':
495+
# next_sentence = []
496+
# for pred in predictions:
497+
# next_sentence.append(np.argmax(pred))
498+
# y_next_pred = next_sentence[-1]
499+
# print "y_pred: " + ' '.join(str(id_).ljust(4)
500+
# for id_ in next_sentence)
501+
y_next_pred = np.argmax(predictions[-1])
502+
prob[start_index + sentence_length] = predictions[-1][y_next_true]
503+
504+
return prob
505+
506+
450507
def run(hidden_len=512, batch_size=128, nb_batch=200, nb_epoch=50,
451508
nb_iterations=4, lr=0.001, validation_split=0.05, nb_predictions=20,
452509
mapping='m2m', sentence_length=80, step=80, mode='train'):

rnn_sequence_analyzer.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,6 @@ def train(analyzer, train_sequence, val_sequence, input_len,
417417
analyzer.save_model("weights-after-iteration.hdf5")
418418

419419

420-
421420
def detect(sequence, input_len, analyzer, mapping='m2m', sentence_length=40):
422421
"""
423422
Scan the given sequence for detecting anormalies.

rnn_sequence_analyzer_gen.py

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def __init__(self, sentence_length, input_len, hidden_len, output_len):
5454

5555
def build(self, layer='LSTM', mapping='m2m', nb_layers=2, dropout=0.2):
5656
"""
57-
Stacked LSTM with specified dropout rate (default 0.2), built with
57+
Stacked RNN with specified dropout rate (default 0.2), built with
5858
softmax activation, cross entropy loss and rmsprop optimizer.
5959
6060
Arguments:
@@ -122,7 +122,9 @@ class LAYER(GRU):
122122

123123
self.model.add(Activation('softmax'))
124124

125-
self.model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
125+
self.model.compile(loss='categorical_crossentropy',
126+
optimizer='rmsprop',
127+
metrics=['accuracy'])
126128

127129
def save_model(self, filename):
128130
"""
@@ -184,7 +186,7 @@ def on_epoch_end(self, epoch, logs={}): # pylint: disable=W0102
184186
A method starting at the begining of the training.
185187
186188
Arguments:
187-
epoch: {integer}, the current epoch
189+
epoch: {integer}, the current epoch.
188190
logs: {dictionary}, recording the training and validation
189191
losses and accuracy of every epoch.
190192
"""
@@ -383,6 +385,45 @@ def predict(sequence, input_len, analyzer, nb_predictions=80,
383385
print "\n"
384386

385387

388+
def train(analyzer, train_data, nb_training_samples,
389+
val_data, nb_validation_samples,
390+
nb_epoch=50, nb_iterations=4):
391+
"""
392+
Trains the network.
393+
394+
Arguments:
395+
analyzer: {SequenceAnalyzer}.
396+
train_data: {tuple}, training data (X_train, y_train).
397+
val_data: {tuple}, validation data (X_val, y_val).
398+
nb_training_samples: {integer}, the number training samples.
399+
nb_validation_samples: {integer}, the number validation samples.
400+
nb_iterations: {integer}, number of iterations.
401+
sentence_length: {integer}, the length of each training sentence.
402+
"""
403+
for iteration in range(1, nb_iterations+1):
404+
print ""
405+
print "------------------------ Start Training ------------------------"
406+
print "Iteration: ", iteration
407+
print "Number of epoch per iteration: ", nb_epoch
408+
409+
# history of losses and accuracy
410+
history = History()
411+
412+
# saves the model weights after each epoch
413+
# if the validation loss decreased
414+
checkpointer = ModelCheckpoint(filepath="weights.hdf5",
415+
verbose=1, save_best_only=True)
416+
417+
# train the model with data generator
418+
analyzer.model.fit_generator(train_data,
419+
samples_per_epoch=nb_training_samples,
420+
nb_epoch=nb_epoch, verbose=1,
421+
callbacks=[history, checkpointer],
422+
validation_data=val_data,
423+
nb_val_samples=nb_validation_samples)
424+
425+
analyzer.save_model("weights-after-iteration.hdf5")
426+
386427

387428
def detect(sequence, input_len, analyzer, mapping='m2m', sentence_length=40):
388429
"""
@@ -441,47 +482,6 @@ def detect(sequence, input_len, analyzer, mapping='m2m', sentence_length=40):
441482
return prob
442483

443484

444-
445-
def train(analyzer, train_data, nb_training_samples,
446-
val_data, nb_validation_samples,
447-
nb_epoch=50, nb_iterations=4):
448-
"""
449-
Trains the network.
450-
451-
Arguments:
452-
analyzer: {SequenceAnalyzer}.
453-
train_data: {tuple}, training data (X_train, y_train).
454-
val_data: {tuple}, validation data (X_val, y_val).
455-
nb_training_samples: {integer}, the number training samples.
456-
nb_validation_samples: {integer}, the number validation samples.
457-
nb_iterations: {integer}, number of iterations.
458-
sentence_length: {integer}, the length of each training sentence.
459-
"""
460-
for iteration in range(1, nb_iterations+1):
461-
print ""
462-
print "------------------------ Start Training ------------------------"
463-
print "Iteration: ", iteration
464-
print "Number of epoch per iteration: ", nb_epoch
465-
466-
# history of losses and accuracy
467-
history = History()
468-
469-
# saves the model weights after each epoch
470-
# if the validation loss decreased
471-
checkpointer = ModelCheckpoint(filepath="weights.hdf5",
472-
verbose=1, save_best_only=True)
473-
474-
# train the model with data generator
475-
analyzer.model.fit_generator(train_data,
476-
samples_per_epoch=nb_training_samples,
477-
nb_epoch=nb_epoch, verbose=1,
478-
callbacks=[history, checkpointer],
479-
validation_data=val_data,
480-
nb_val_samples=nb_validation_samples)
481-
482-
analyzer.save_model("weights-after-iteration.hdf5")
483-
484-
485485
def run(hidden_len=512, batch_size=128, nb_batch=200, nb_epoch=50,
486486
nb_iterations=4, lr=0.001, validation_split=0.05, nb_predictions=20,
487487
mapping='m2m', sentence_length=80, step=80, mode='train'):

sequence_analyzer.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,63 @@ def train(analyzer, train_sequence, val_sequence, input_len,
547547
analyzer.save_model("weights-after-iteration.hdf5")
548548

549549

550+
def detect(sequence, input_len, analyzer, mapping='m2m', sentence_length=40):
551+
"""
552+
Scan the given sequence for detecting anormalies.
553+
554+
Arguments:
555+
sequence: {lsit}, the original input sequence
556+
input_len: {integer}, the number of unique id classes
557+
analyzer: {SequenceAnalyzer}, the sequence analyzer
558+
mapping: {string}, input to output mapping.
559+
'o2o': one-to-one
560+
'm2m': many-to-many
561+
sentence_length: {integer}, the length of each sentence.
562+
"""
563+
# sequence length
564+
length = len(sequence)
565+
566+
# predicted probabilities for each id
567+
# we assume the first sentence_length ids are true
568+
prob = [1] * sentence_length + [0] * (length - sentence_length)
569+
570+
# generate elements
571+
for start_index in xrange(length - sentence_length):
572+
# seed sentence
573+
X = sequence[start_index : start_index + sentence_length]
574+
# print "X: " + ' '.join(str(s).ljust(4) for s in sentence)
575+
576+
# Y_true
577+
# y_true = sequence[start_index + 1 : start_index + sentence_length + 1]
578+
# print "y_true: " + ' '.join(str(s).ljust(4) for s in y_true)
579+
y_next_true = sequence[start_index + sentence_length]
580+
581+
seed = np.zeros((1, sentence_length, input_len))
582+
# format input
583+
for t in range(0, sentence_length):
584+
seed[0, t, X[t]] = 1
585+
586+
# get predictionsverbose = 0, no logging
587+
predictions = analyzer.model.predict(seed, verbose=0)[0]
588+
589+
# y_predicted
590+
y_next_pred = 0
591+
if mapping == 'o2o':
592+
prob[start_index + sentence_length] = predictions[y_next_true]
593+
y_next_pred = np.argmax(predictions)
594+
elif mapping == 'm2m':
595+
# next_sentence = []
596+
# for pred in predictions:
597+
# next_sentence.append(np.argmax(pred))
598+
# y_next_pred = next_sentence[-1]
599+
# print "y_pred: " + ' '.join(str(id_).ljust(4)
600+
# for id_ in next_sentence)
601+
y_next_pred = np.argmax(predictions[-1])
602+
prob[start_index + sentence_length] = predictions[-1][y_next_true]
603+
604+
return prob
605+
606+
550607
def run(hidden_len=512, batch_size=128, nb_epoch=50, nb_iterations=4, lr=0.001,
551608
validation_split=0.05, # pylint: disable=W0613
552609
nb_predictions=20, mapping='m2m', sentence_length=80, step=80,
@@ -592,10 +649,10 @@ def run(hidden_len=512, batch_size=128, nb_epoch=50, nb_iterations=4, lr=0.001,
592649
analyzer.build(layer='LSTM', mapping=mapping, nb_layers=2, dropout=0.2)
593650

594651
# plot model
595-
# rnn.plot_model()
652+
# analyzer.plot_model()
596653

597654
# load the previous model weights
598-
# rnn.load_model("weightsf4-61.hdf5")
655+
# analyzer.load_model("weightsf4-61.hdf5")
599656

600657
# reset the learning rate
601658
if lr != 0.001:

sequence_analyzer_gen.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,63 @@ def train(analyzer, train_data, nb_training_samples,
553553
analyzer.save_model("weights-after-iteration.hdf5")
554554

555555

556+
def detect(sequence, input_len, analyzer, mapping='m2m', sentence_length=40):
557+
"""
558+
Scan the given sequence for detecting anormalies.
559+
560+
Arguments:
561+
sequence: {lsit}, the original input sequence
562+
input_len: {integer}, the number of unique id classes
563+
analyzer: {SequenceAnalyzer}, the sequence analyzer
564+
mapping: {string}, input to output mapping.
565+
'o2o': one-to-one
566+
'm2m': many-to-many
567+
sentence_length: {integer}, the length of each sentence.
568+
"""
569+
# sequence length
570+
length = len(sequence)
571+
572+
# predicted probabilities for each id
573+
# we assume the first sentence_length ids are true
574+
prob = [1] * sentence_length + [0] * (length - sentence_length)
575+
576+
# generate elements
577+
for start_index in xrange(length - sentence_length):
578+
# seed sentence
579+
X = sequence[start_index : start_index + sentence_length]
580+
# print "X: " + ' '.join(str(s).ljust(4) for s in sentence)
581+
582+
# Y_true
583+
# y_true = sequence[start_index + 1 : start_index + sentence_length + 1]
584+
# print "y_true: " + ' '.join(str(s).ljust(4) for s in y_true)
585+
y_next_true = sequence[start_index + sentence_length]
586+
587+
seed = np.zeros((1, sentence_length, input_len))
588+
# format input
589+
for t in range(0, sentence_length):
590+
seed[0, t, X[t]] = 1
591+
592+
# get predictionsverbose = 0, no logging
593+
predictions = analyzer.model.predict(seed, verbose=0)[0]
594+
595+
# y_predicted
596+
y_next_pred = 0
597+
if mapping == 'o2o':
598+
prob[start_index + sentence_length] = predictions[y_next_true]
599+
y_next_pred = np.argmax(predictions)
600+
elif mapping == 'm2m':
601+
# next_sentence = []
602+
# for pred in predictions:
603+
# next_sentence.append(np.argmax(pred))
604+
# y_next_pred = next_sentence[-1]
605+
# print "y_pred: " + ' '.join(str(id_).ljust(4)
606+
# for id_ in next_sentence)
607+
y_next_pred = np.argmax(predictions[-1])
608+
prob[start_index + sentence_length] = predictions[-1][y_next_true]
609+
610+
return prob
611+
612+
556613
def run(hidden_len=512, batch_size=128, nb_batch=200, nb_epoch=50,
557614
nb_iterations=4, lr=0.001, validation_split=0.05, nb_predictions=20,
558615
mapping='m2m', sentence_length=80, step=80, mode='train'):

0 commit comments

Comments
 (0)