-
Notifications
You must be signed in to change notification settings - Fork 0
/
LSTM.py
150 lines (127 loc) · 5.15 KB
/
LSTM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API
import os
import json
filename='tweets.txt'
filepath='C:\Wingz\TrumpAIBot'
path = os.path.join(filepath, filename)
data = open(path,'r').read()
#data = data.lower()
#preprocess data
chars = sorted(list(set(data)))
char_to_int = {c:i for i,c in enumerate(chars)}
int_to_char = {i:c for i,c in enumerate(chars)}
#summarize stats
print 'Total chars in text: ' + str(len(data))
print 'Total unique chars: ' + str(len(chars))
#determine hyperparameters
seq_length = 15 #how many chars the sliding window covers
dataX = []
dataY = []
for i in range(0, len(data) - seq_length, 1):
seq_in = data[i: i + seq_length] #window of chars/data to predict off of
seq_out = data[i + seq_length] #actual output, one char
#append each list of input chars and its corresponding output char
dataX.append([char_to_int[char] for char in seq_in])
dataY.append(char_to_int[seq_out])
#summarize stats
print 'Total number of different training patterns: '+ str(len(dataX))
#reshape input data to [samples, time steps, features]. each sample is a 100x1 matrix
#and one hot encode the output variables
X = numpy.reshape(dataX, (len(dataX), seq_length, 1))
X = X / float(len(chars)) #normalize each integer to make learning easier, however not one-hot encoded
y = np_utils.to_categorical(dataY) #one hot enconde the output variable
#build LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1],X.shape[2]))) #256 memory cells
model.add(Dropout(0.2))
model.add(Dense(y.shape[1],activation='softmax')) #DENSE = standard feedforward layer, every layer connected (dense)
model.compile(loss='categorical_crossentropy', optimizer='adam')
#define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1,save_best_only=True, mode='min')
callbacks_list = [checkpoint]
#fit the model
model.fit(X, y, nb_epoch=20, batch_size = 128, callbacks=callbacks_list)
#load best weights
filename = ''
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')
#twitter API information
#information for twitter REST API connection
keys = {
'ACCESS_TOKEN':'2314659403-gMRoUj8gowoiB3nocfbXwIA8JpJ4VxOUcYVXR44',
'ACCESS_SECRET':'EhfdMWiYFL2MeFUgYiid1jl22BJKSuCCBuQABdAPlnlBu',
'CONSUMER_KEY':'u5gk7UZvyGgcWfS7BEt2zYCQP',
'CONSUMER_SECRET':'r8zsmgKBpJEXt4wQcUBpajrX19zgyVkIvj657Ouh1mScCZTKTK'
}
temp = []
class StdOutListener(StreamListener):
def __init__(self):
self.input = []
self.output = []
def on_data(self, data):
#only use data where reply is to my bot
data = json.loads(data)
temp.append(data)
#print json.loads(data)['text']
tweet_input = []
if data['is_quote_status'] == False and data['retweeted'] == False:
text = data['text'][11:]
if len(text) < 140:
left_over = 140 - len(text)
for i in range(left_over):
text = text + ' '
print text
text = text[0:seq_length]
tweet_input = [char_to_int[char] for char in text]
print text
print tweet_input, len(tweet_input)
result = '@'
result = result + data['user']['screen_name'] + ' '
print result
for i in range((140-len(result))):
x = numpy.reshape(tweet_input, (1,seq_length, 1))
x = x / float(len(chars))
#print x
prediction = model.predict(x, verbose=0)
#print prediction
index = numpy.argmax(prediction) #grab indices of max arguments
print int_to_char[index]
result = result + int_to_char[index]
#seq_in = [int_to_char[value] for value in tweet_input]
#print result
#print tweet_input, len(tweet_input)
tweet_input.append(index)
tweet_input = tweet_input[1:len(tweet_input)] #predict off of new input +1 char
print tweet_input
print result
result = result[0:140]
api.update_status(result)
#print data
#push data into in put stream
#self.input = [] #clear input stream
#self.input = data
#for i in range(data):
# x = numpy.reshape(len(data), )
#predict chars from input stream
#reply with the predicted chars 140 chars or less to user
#return True
def on_error(self, status):
print status
if __name__ == '__main__':
l = StdOutListener()
auth = OAuthHandler(keys['CONSUMER_KEY'], keys['CONSUMER_SECRET'])
auth.set_access_token(keys['ACCESS_TOKEN'], keys['ACCESS_SECRET'])
stream = Stream(auth, l)
stream.filter(track=['ZengAIBot'])
api = API(auth)