Skip to content

Commit 4562a2b

Browse files
add new theano
1 parent dda0034 commit 4562a2b

File tree

3 files changed

+367
-1
lines changed

3 files changed

+367
-1
lines changed

hmm_class/hmmc_theano2.py

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
# https://deeplearningcourses.com/c/unsupervised-machine-learning-hidden-markov-models-in-python
2+
# https://udemy.com/unsupervised-machine-learning-hidden-markov-models-in-python
3+
# http://lazyprogrammer.me
4+
# Continuous-observation HMM in Theano using gradient descent.
5+
6+
# This script differs from hmmc_theano.py in the following way:
7+
# Instead of re-normalizing the parameters at each iteration,
8+
# we instead make the parameters free to vary between -inf to +inf.
9+
# We then use softmax to ensure the probabilities are positive and sum to 1.
10+
11+
from __future__ import print_function, division
12+
from builtins import range
13+
# Note: you may need to update your version of future
14+
# sudo pip install -U future
15+
16+
17+
import wave
18+
import theano
19+
import theano.tensor as T
20+
import numpy as np
21+
import matplotlib.pyplot as plt
22+
23+
# from theano.sandbox import solve # does not have gradient functionality
24+
from generate_c import get_signals, big_init
25+
26+
27+
class HMM:
28+
def __init__(self, M, K):
29+
self.M = M # number of hidden states
30+
self.K = K # number of Gaussians
31+
32+
def fit(self, X, learning_rate=1e-2, max_iter=10):
33+
# train the HMM model using the Baum-Welch algorithm
34+
# a specific instance of the expectation-maximization algorithm
35+
36+
N = len(X)
37+
D = X[0].shape[1] # assume each x is organized (T, D)
38+
39+
pi0 = np.ones(self.M) # initial state distribution
40+
A0 = np.random.randn(self.M, self.M) # state transition matrix
41+
R0 = np.ones((self.M, self.K)) # mixture proportions
42+
mu0 = np.zeros((self.M, self.K, D))
43+
for i in range(self.M):
44+
for k in range(self.K):
45+
random_idx = np.random.choice(N)
46+
x = X[random_idx]
47+
random_time_idx = np.random.choice(len(x))
48+
mu0[i,k] = x[random_time_idx]
49+
sigma0 = np.random.randn(self.M, self.K, D, D)
50+
51+
thx, cost = self.set(pi0, A0, R0, mu0, sigma0)
52+
53+
pi_update = self.preSoftmaxPi - learning_rate*T.grad(cost, self.preSoftmaxPi)
54+
A_update = self.preSoftmaxA - learning_rate*T.grad(cost, self.preSoftmaxA)
55+
R_update = self.preSoftmaxR - learning_rate*T.grad(cost, self.preSoftmaxR)
56+
mu_update = self.mu - learning_rate*T.grad(cost, self.mu)
57+
sigma_update = self.sigmaFactor - learning_rate*T.grad(cost, self.sigmaFactor)
58+
59+
updates = [
60+
(self.preSoftmaxPi, pi_update),
61+
(self.preSoftmaxA, A_update),
62+
(self.preSoftmaxR, R_update),
63+
(self.mu, mu_update),
64+
(self.sigmaFactor, sigma_update),
65+
]
66+
67+
train_op = theano.function(
68+
inputs=[thx],
69+
updates=updates,
70+
)
71+
72+
costs = []
73+
for it in range(max_iter):
74+
print("it:", it)
75+
76+
for n in range(N):
77+
c = self.log_likelihood_multi(X).sum()
78+
print("c:", c)
79+
costs.append(c)
80+
train_op(X[n])
81+
82+
plt.plot(costs)
83+
plt.show()
84+
85+
def set(self, preSoftmaxPi, preSoftmaxA, preSoftmaxR, mu, sigmaFactor):
86+
self.preSoftmaxPi = theano.shared(preSoftmaxPi)
87+
self.preSoftmaxA = theano.shared(preSoftmaxA)
88+
self.preSoftmaxR = theano.shared(preSoftmaxR)
89+
self.mu = theano.shared(mu)
90+
self.sigmaFactor = theano.shared(sigmaFactor)
91+
M, K = preSoftmaxR.shape
92+
self.M = M
93+
self.K = K
94+
95+
pi = T.nnet.softmax(self.preSoftmaxPi).flatten()
96+
A = T.nnet.softmax(self.preSoftmaxA)
97+
R = T.nnet.softmax(self.preSoftmaxR)
98+
99+
100+
D = self.mu.shape[2]
101+
twopiD = (2*np.pi)**D
102+
103+
# set up theano variables and functions
104+
thx = T.matrix('X') # represents a TxD matrix of sequential observations
105+
def mvn_pdf(x, m, S):
106+
k = 1 / T.sqrt(twopiD * T.nlinalg.det(S))
107+
e = T.exp(-0.5*(x - m).T.dot(T.nlinalg.matrix_inverse(S).dot(x - m)))
108+
return k*e
109+
110+
def gmm_pdf(x):
111+
def state_pdfs(xt):
112+
def component_pdf(j, xt):
113+
Bj_t = 0
114+
# j = T.cast(j, 'int32')
115+
for k in range(self.K):
116+
# k = int(k)
117+
# a = R[j,k]
118+
# b = mu[j,k]
119+
# c = sigma[j,k]
120+
L = self.sigmaFactor[j,k]
121+
S = L.dot(L.T)
122+
Bj_t += R[j,k] * mvn_pdf(xt, self.mu[j,k], S)
123+
return Bj_t
124+
125+
Bt, _ = theano.scan(
126+
fn=component_pdf,
127+
sequences=T.arange(self.M),
128+
n_steps=self.M,
129+
outputs_info=None,
130+
non_sequences=[xt],
131+
)
132+
return Bt
133+
134+
B, _ = theano.scan(
135+
fn=state_pdfs,
136+
sequences=x,
137+
n_steps=x.shape[0],
138+
outputs_info=None,
139+
)
140+
return B.T
141+
142+
B = gmm_pdf(thx)
143+
# scale = T.zeros((thx.shape[0], 1), dtype=theano.config.floatX)
144+
# scale[0] = (self.pi*B[:,0]).sum()
145+
146+
def recurrence(t, old_a, B):
147+
a = old_a.dot(A) * B[:, t]
148+
s = a.sum()
149+
return (a / s), s
150+
151+
[alpha, scale], _ = theano.scan(
152+
fn=recurrence,
153+
sequences=T.arange(1, thx.shape[0]),
154+
outputs_info=[pi*B[:,0], None],
155+
n_steps=thx.shape[0]-1,
156+
non_sequences=[B],
157+
)
158+
159+
cost = -T.log(scale).sum()
160+
self.cost_op = theano.function(
161+
inputs=[thx],
162+
outputs=cost,
163+
)
164+
return thx, cost
165+
166+
def log_likelihood_multi(self, X):
167+
return np.array([self.cost_op(x) for x in X])
168+
169+
170+
def real_signal():
171+
spf = wave.open('helloworld.wav', 'r')
172+
173+
#Extract Raw Audio from Wav File
174+
# If you right-click on the file and go to "Get Info", you can see:
175+
# sampling rate = 16000 Hz
176+
# bits per sample = 16
177+
# The first is quantization in time
178+
# The second is quantization in amplitude
179+
# We also do this for images!
180+
# 2^16 = 65536 is how many different sound levels we have
181+
signal = spf.readframes(-1)
182+
signal = np.fromstring(signal, 'Int16')
183+
T = len(signal)
184+
signal = (signal - signal.mean()) / signal.std()
185+
186+
hmm = HMM(3, 3)
187+
# signal needs to be of shape N x T(n) x D
188+
hmm.fit(signal.reshape(1, T, 1), learning_rate=2e-7, max_iter=20)
189+
190+
191+
def fake_signal():
192+
signals = get_signals()
193+
hmm = HMM(5, 3)
194+
hmm.fit(signals, max_iter=3)
195+
L = hmm.log_likelihood_multi(signals).sum()
196+
print("LL for fitted params:", L)
197+
198+
# test in actual params
199+
_, _, _, pi, A, R, mu, sigma = big_init()
200+
201+
# turn these into their "pre-softmax" forms
202+
pi = np.log(pi)
203+
A = np.log(A)
204+
R = np.log(R)
205+
206+
# decompose sigma using cholesky factorization
207+
sigma = np.linalg.cholesky(sigma)
208+
209+
hmm.set(pi, A, R, mu, sigma)
210+
L = hmm.log_likelihood_multi(signals).sum()
211+
print("LL for actual params:", L)
212+
213+
if __name__ == '__main__':
214+
# real_signal()
215+
fake_signal()
216+

hmm_class/hmmd_tf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def __init__(self, M):
2020
def set_session(self, session):
2121
self.session = session
2222

23-
def fit(self, X, learning_rate=0.001, max_iter=10, print_period=1):
23+
def fit(self, X, max_iter=10, print_period=1):
2424
# train the HMM model using stochastic gradient descent
2525

2626
N = len(X)

hmm_class/hmmd_theano2.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
# https://deeplearningcourses.com/c/unsupervised-machine-learning-hidden-markov-models-in-python
2+
# https://udemy.com/unsupervised-machine-learning-hidden-markov-models-in-python
3+
# http://lazyprogrammer.me
4+
# Discrete Hidden Markov Model (HMM) in Theano using gradient descent.
5+
6+
# This script differs from hmmd_theano.py in the following way:
7+
# Instead of re-normalizing the parameters at each iteration,
8+
# we instead make the parameters free to vary between -inf to +inf.
9+
# We then use softmax to ensure the probabilities are positive and sum to 1.
10+
11+
12+
from __future__ import print_function, division
13+
from builtins import range
14+
# Note: you may need to update your version of future
15+
# sudo pip install -U future
16+
17+
18+
import numpy as np
19+
import theano
20+
import theano.tensor as T
21+
import matplotlib.pyplot as plt
22+
23+
24+
class HMM:
25+
def __init__(self, M):
26+
self.M = M # number of hidden states
27+
28+
def fit(self, X, learning_rate=0.001, max_iter=10, V=None, print_period=1):
29+
# train the HMM model using stochastic gradient descent
30+
# print "X to train:", X
31+
32+
# determine V, the vocabulary size
33+
# assume observables are already integers from 0..V-1
34+
# X is a jagged array of observed sequences
35+
if V is None:
36+
V = max(max(x) for x in X) + 1
37+
N = len(X)
38+
print("number of train samples:", N)
39+
40+
preSoftmaxPi0 = np.zeros(self.M) # initial state distribution
41+
preSoftmaxA0 = np.random.randn(self.M, self.M) # state transition matrix
42+
preSoftmaxB0 = np.random.randn(self.M, V) # output distribution
43+
44+
thx, cost = self.set(preSoftmaxPi0, preSoftmaxA0, preSoftmaxB0)
45+
46+
pi_update = self.preSoftmaxPi - learning_rate*T.grad(cost, self.preSoftmaxPi)
47+
A_update = self.preSoftmaxA - learning_rate*T.grad(cost, self.preSoftmaxA)
48+
B_update = self.preSoftmaxB - learning_rate*T.grad(cost, self.preSoftmaxB)
49+
50+
updates = [
51+
(self.preSoftmaxPi, pi_update),
52+
(self.preSoftmaxA, A_update),
53+
(self.preSoftmaxB, B_update),
54+
]
55+
56+
train_op = theano.function(
57+
inputs=[thx],
58+
updates=updates,
59+
allow_input_downcast=True,
60+
)
61+
62+
costs = []
63+
for it in range(max_iter):
64+
if it % print_period == 0:
65+
print("it:", it)
66+
67+
for n in range(N):
68+
# this would of course be much faster if we didn't do this on
69+
# every iteration of the loop
70+
c = self.get_cost_multi(X).sum()
71+
costs.append(c)
72+
train_op(X[n])
73+
74+
# print "A:", self.A.get_value()
75+
# print "B:", self.B.get_value()
76+
# print "pi:", self.pi.get_value()
77+
plt.plot(costs)
78+
plt.show()
79+
80+
def get_cost(self, x):
81+
# returns log P(x | model)
82+
# using the forward part of the forward-backward algorithm
83+
# print "getting cost for:", x
84+
return self.cost_op(x)
85+
86+
def log_likelihood(self, x):
87+
return -self.cost_op(x)
88+
89+
def get_cost_multi(self, X):
90+
P = np.random.random(len(X))
91+
return np.array([self.get_cost(x) for x, p in zip(X, P)])
92+
93+
def set(self, preSoftmaxPi, preSoftmaxA, preSoftmaxB):
94+
self.preSoftmaxPi = theano.shared(preSoftmaxPi)
95+
self.preSoftmaxA = theano.shared(preSoftmaxA)
96+
self.preSoftmaxB = theano.shared(preSoftmaxB)
97+
98+
pi = T.nnet.softmax(self.preSoftmaxPi).flatten()
99+
# softmax returns 1xD if input is a 1-D array of size D
100+
A = T.nnet.softmax(self.preSoftmaxA)
101+
B = T.nnet.softmax(self.preSoftmaxB)
102+
103+
# define cost
104+
thx = T.ivector('thx')
105+
def recurrence(t, old_a, x):
106+
a = old_a.dot(A) * B[:, x[t]]
107+
s = a.sum()
108+
return (a / s), s
109+
110+
[alpha, scale], _ = theano.scan(
111+
fn=recurrence,
112+
sequences=T.arange(1, thx.shape[0]),
113+
outputs_info=[pi*B[:,thx[0]], None],
114+
n_steps=thx.shape[0]-1,
115+
non_sequences=thx
116+
)
117+
118+
cost = -T.log(scale).sum()
119+
self.cost_op = theano.function(
120+
inputs=[thx],
121+
outputs=cost,
122+
allow_input_downcast=True,
123+
)
124+
return thx, cost
125+
126+
127+
def fit_coin():
128+
X = []
129+
for line in open('coin_data.txt'):
130+
# 1 for H, 0 for T
131+
x = [1 if e == 'H' else 0 for e in line.rstrip()]
132+
X.append(x)
133+
134+
hmm = HMM(2)
135+
hmm.fit(X)
136+
L = hmm.get_cost_multi(X).sum()
137+
print("LL with fitted params:", L)
138+
139+
# try true values
140+
# remember these must be in their "pre-softmax" forms
141+
pi = np.log( np.array([0.5, 0.5]) )
142+
A = np.log( np.array([[0.1, 0.9], [0.8, 0.2]]) )
143+
B = np.log( np.array([[0.6, 0.4], [0.3, 0.7]]) )
144+
hmm.set(pi, A, B)
145+
L = hmm.get_cost_multi(X).sum()
146+
print("LL with true params:", L)
147+
148+
149+
if __name__ == '__main__':
150+
fit_coin()

0 commit comments

Comments
 (0)