Skip to content

Commit 63771d2

Browse files
committed
Merge pull request hildensia#5 from hildensia/mixture_observations
Mixture observations
2 parents f18009b + e4a67dc commit 63771d2

File tree

5 files changed

+173
-136
lines changed

5 files changed

+173
-136
lines changed

bayesian_changepoint_detection/cy_offline.pyx

-123
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from __future__ import division
2+
import numpy as np
3+
4+
def generate_normal_time_series(num, minl=50, maxl=1000):
5+
data = np.array([], dtype=np.float64)
6+
partition = np.random.randint(minl, maxl, num)
7+
for p in partition:
8+
mean = np.random.randn()*10
9+
var = np.random.randn()*1
10+
if var < 0:
11+
var = var * -1
12+
tdata = np.random.normal(mean, var, p)
13+
data = np.concatenate((data, tdata))
14+
return partition, np.atleast_2d(data).T
15+
16+
def generate_multinormal_time_series(num, dim, minl=50, maxl=1000):
17+
data = np.empty((1,dim), dtype=np.float64)
18+
partition = np.random.randint(minl, maxl, num)
19+
for p in partition:
20+
mean = np.random.standard_normal(dim)*10
21+
# Generate a random SPD matrix
22+
A = np.random.standard_normal((dim,dim))
23+
var = np.dot(A,A.T)
24+
25+
tdata = np.random.multivariate_normal(mean, var, p)
26+
data = np.concatenate((data, tdata))
27+
return partition, data[1:,:]
28+
29+
def generate_xuan_motivating_example(minl=50, maxl=1000):
30+
dim = 2
31+
num = 3
32+
partition = np.random.randint(minl, maxl, num)
33+
mu = np.zeros(dim)
34+
Sigma1 = np.asarray([[1.0,0.75],[0.75,1.0]])
35+
data = np.random.multivariate_normal(mu, Sigma1, partition[0])
36+
Sigma2 = np.asarray([[1.0,0.0],[0.0,1.0]])
37+
data = np.concatenate((data,np.random.multivariate_normal(mu, Sigma2, partition[1])))
38+
Sigma3 = np.asarray([[1.0,-0.75],[-0.75,1.0]])
39+
data = np.concatenate((data,np.random.multivariate_normal(mu, Sigma3, partition[2])))
40+
return partition, data
41+
42+
43+
44+
45+

bayesian_changepoint_detection/offline_changepoint_detection.py

+51-13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import division
22
import numpy as np
3-
from scipy.special import gammaln
3+
from scipy.special import gammaln, multigammaln
44
from scipy.misc import comb
55
from decorator import decorator
66

@@ -39,12 +39,13 @@ def offline_changepoint_detection(data, prior_func,
3939
"""Compute the likelihood of changepoints on data.
4040
4141
Keyword arguments:
42-
data -- the time series data
43-
prior_func -- a function given the likelihood of a changepoint given the
44-
distance to the last one
42+
data -- the time series data
43+
prior_func -- a function given the likelihood of a changepoint given the distance to the last one
4544
observation_log_likelihood_function -- a function giving the log likelihood
4645
of a data part
47-
P -- the likelihoods if pre-computed
46+
truncate -- the cutoff probability 10^truncate to stop computation for that changepoint log likelihood
47+
48+
P -- the likelihoods if pre-computed
4849
"""
4950

5051
n = len(data)
@@ -65,9 +66,9 @@ def offline_changepoint_detection(data, prior_func,
6566
Q[n-1] = P[n-1, n-1]
6667

6768
for t in reversed(range(n-1)):
68-
P_next_cp = -np.inf # == -log(0)
69+
P_next_cp = -np.inf # == log(0)
6970
for s in range(t, n-1):
70-
P[t, s] = observation_log_likelihood_function(data, t, s + 1)
71+
P[t, s] = observation_log_likelihood_function(data, t, s+1)
7172

7273
# compute recursion
7374
summand = P[t, s] + Q[s + 1] + g[s + 1 - t]
@@ -82,7 +83,7 @@ def offline_changepoint_detection(data, prior_func,
8283

8384
# (1 - G) is numerical stable until G becomes numerically 1
8485
if G[n-1-t] < -1e-15: # exp(-1e-15) = .99999...
85-
antiG = np.log(1 - np.exp(G[n-1-t]))
86+
antiG = np.log(1 - np.exp(G[n-1-t]))
8687
else:
8788
# (1 - G) is approx. -log(G) for G close to 1
8889
antiG = np.log(-G[n-1-t])
@@ -108,28 +109,65 @@ def gaussian_obs_log_likelihood(data, t, s):
108109
s += 1
109110
n = s - t
110111
mean = data[t:s].sum(0) / n
111-
112+
112113
muT = (n * mean) / (1 + n)
113114
nuT = 1 + n
114115
alphaT = 1 + n / 2
115116
betaT = 1 + 0.5 * ((data[t:s] - mean) ** 2).sum(0) + ((n)/(1 + n)) * (mean**2 / 2)
116117
scale = (betaT*(nuT + 1))/(alphaT * nuT)
117-
118+
118119
# splitting the PDF of the student distribution up is /much/ faster.
119120
# (~ factor 20) using sum over for loop is even more worthwhile
120121
prob = np.sum(np.log(1 + (data[t:s] - muT)**2/(nuT * scale)))
121122
lgA = gammaln((nuT + 1) / 2) - np.log(np.sqrt(np.pi * nuT * scale)) - gammaln(nuT/2)
122-
123+
123124
return np.sum(n * lgA - (nuT + 1)/2 * prob)
124125

126+
def ifm_obs_log_likelihood(data, t, s):
127+
'''Independent Features model from xuan et al'''
128+
s += 1
129+
n = s - t
130+
x = data[t:s]
131+
if len(x.shape)==2:
132+
d = x.shape[1]
133+
else:
134+
d = 1
135+
x = np.atleast_2d(x).T
136+
137+
N0 = d # weakest prior we can use to retain proper prior
138+
V0 = np.var(x)
139+
Vn = V0 + (x**2).sum(0)
140+
141+
# sum over dimension and return (section 3.1 from Xuan paper):
142+
return d*( -(n/2)*np.log(np.pi) + (N0/2)*np.log(V0) - \
143+
gammaln(N0/2) + gammaln((N0+n)/2) ) - \
144+
( ((N0+n)/2)*np.log(Vn) ).sum(0)
145+
146+
def fullcov_obs_log_likelihood(data, t, s):
147+
'''Full Covariance model from xuan et al'''
148+
s += 1
149+
n = s - t
150+
x = data[t:s]
151+
if len(x.shape)==2:
152+
dim = x.shape[1]
153+
else:
154+
dim = 1
155+
x = np.atleast_2d(x).T
156+
157+
N0 = dim # weakest prior we can use to retain proper prior
158+
V0 = np.var(x)*np.eye(dim)
159+
Vn = V0 + np.array([np.outer(x[i], x[i].T) for i in xrange(x.shape[0])]).sum(0)
160+
161+
# section 3.2 from Xuan paper:
162+
return -(dim*n/2)*np.log(np.pi) + (N0/2)*np.linalg.slogdet(V0)[1] - \
163+
multigammaln(N0/2,dim) + multigammaln((N0+n)/2,dim) - \
164+
((N0+n)/2)*np.linalg.slogdet(Vn)[1]
125165

126166
def const_prior(r, l):
127167
return 1/(l)
128168

129-
130169
def geometric_prior(t, p):
131170
return p * ((1 - p) ** (t - 1))
132171

133-
134172
def neg_binominal_prior(t, k, p):
135173
return comb(t - k, k - 1) * p ** k * (1 - p) ** (t - k)

example.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from __future__ import division
2+
import numpy as np
3+
import matplotlib.pyplot as plt
4+
import seaborn
5+
6+
import cProfile
7+
import bayesian_changepoint_detection.offline_changepoint_detection as offcd
8+
import bayesian_changepoint_detection.generate_data as gd
9+
from functools import partial
10+
11+
if __name__ == '__main__':
12+
show_plot = True
13+
dim = 4
14+
if dim == 1:
15+
partition, data = gd.generate_normal_time_series(7, 50, 200)
16+
else:
17+
partition, data = gd.generate_multinormal_time_series(7, dim, 50, 200)
18+
changes = np.cumsum(partition)
19+
20+
if show_plot:
21+
fig, ax = plt.subplots(figsize=[16,12])
22+
for p in changes:
23+
ax.plot([p,p],[np.min(data),np.max(data)],'r')
24+
for d in range(dim):
25+
ax.plot(data[:,d])
26+
plt.show()
27+
28+
29+
#Q, P, Pcp = offcd.offline_changepoint_detection(data,partial(offcd.const_prior, l=(len(data)+1)),offcd.gaussian_obs_log_likelihood, truncate=-20)
30+
#Q_ifm, P_ifm, Pcp_ifm = offcd.offline_changepoint_detection(data,partial(offcd.const_prior, l=(len(data)+1)),offcd.ifm_obs_log_likelihood,truncate=-20)
31+
Q_full, P_full, Pcp_full = offcd.offline_changepoint_detection(data,partial(offcd.const_prior, l=(len(data)+1)),offcd.fullcov_obs_log_likelihood, truncate=-50)
32+
33+
if show_plot:
34+
fig, ax = plt.subplots(figsize=[18, 16])
35+
ax = fig.add_subplot(2, 1, 1)
36+
for p in changes:
37+
ax.plot([p,p],[np.min(data),np.max(data)],'r')
38+
for d in range(dim):
39+
ax.plot(data[:,d])
40+
ax = fig.add_subplot(2, 1, 2, sharex=ax)
41+
ax.plot(np.exp(Pcp_full).sum(0))
42+
plt.show()
43+

xuan_motivating_example.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
''' Example from Xiang Xuan's thesis: Section 3.2'''
2+
from __future__ import division
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
6+
import bayesian_changepoint_detection.offline_changepoint_detection as offcd
7+
import bayesian_changepoint_detection.generate_data as gd
8+
from functools import partial
9+
10+
if __name__ == '__main__':
11+
show_plot = True
12+
13+
partition, data = gd.generate_xuan_motivating_example(50,200)
14+
changes = np.cumsum(partition)
15+
16+
Q_ifm, P_ifm, Pcp_ifm = offcd.offline_changepoint_detection(data,partial(offcd.const_prior, l=(len(data)+1)),offcd.ifm_obs_log_likelihood,truncate=-20)
17+
Q_full, P_full, Pcp_full = offcd.offline_changepoint_detection(data,partial(offcd.const_prior, l=(len(data)+1)),offcd.fullcov_obs_log_likelihood, truncate=-20)
18+
19+
if show_plot:
20+
fig, ax = plt.subplots(figsize=[18, 16])
21+
ax = fig.add_subplot(3, 1, 1)
22+
for p in changes:
23+
ax.plot([p,p],[np.min(data),np.max(data)],'r')
24+
for d in range(2):
25+
ax.plot(data[:,d])
26+
plt.legend(['Raw data with Original Changepoints'])
27+
ax1 = fig.add_subplot(3, 1, 2, sharex=ax)
28+
ax1.plot(np.exp(Pcp_ifm).sum(0))
29+
plt.legend(['Independent Factor Model'])
30+
ax2 = fig.add_subplot(3, 1, 3, sharex=ax)
31+
ax2.plot(np.exp(Pcp_full).sum(0))
32+
plt.legend(['Full Covariance Model'])
33+
plt.show()
34+

0 commit comments

Comments
 (0)