1
1
from __future__ import division
2
2
import numpy as np
3
3
from scipy .special import gammaln
4
- from scipy .misc import comb
4
+ from scipy .misc import comb , logsumexp
5
5
from decorator import decorator
6
6
7
7
8
8
def _dynamicProgramming (f , * args , ** kwargs ):
9
+ if f .data is None :
10
+ f .data = args [0 ]
11
+
12
+ if not np .array_equal (f .data , args [0 ]):
13
+ f .cache = {}
14
+ f .data = args [0 ]
15
+
9
16
try :
10
17
f .cache [args [1 :3 ]]
11
18
except KeyError :
@@ -15,10 +22,13 @@ def _dynamicProgramming(f, *args, **kwargs):
15
22
16
23
def dynamicProgramming (f ):
17
24
f .cache = {}
25
+ f .data = None
18
26
return decorator (_dynamicProgramming , f )
19
27
20
28
21
- def offline_changepoint_detection (data , prior_func , observation_log_likelihood_function , P = None , truncate = - np .inf ):
29
+ def offline_changepoint_detection (data , prior_func ,
30
+ observation_log_likelihood_function ,
31
+ truncate = - np .inf ):
22
32
"""Compute the likelihood of changepoints on data.
23
33
24
34
Keyword arguments:
@@ -34,10 +44,8 @@ def offline_changepoint_detection(data, prior_func, observation_log_likelihood_f
34
44
Q = np .zeros ((n ,))
35
45
g = np .zeros ((n ,))
36
46
G = np .zeros ((n ,))
37
-
38
- if P is None :
39
- P = np .ones ((n ,n )) # a log_likelihood won't become one
40
-
47
+ P = np .ones ((n , n )) * - np .inf
48
+
41
49
# save everything in log representation
42
50
for t in range (n ):
43
51
g [t ] = np .log (prior_func (t ))
@@ -46,28 +54,24 @@ def offline_changepoint_detection(data, prior_func, observation_log_likelihood_f
46
54
else :
47
55
G [t ] = np .logaddexp (G [t - 1 ], g [t ])
48
56
49
- if P [n - 1 , n - 1 ] == 1 :
50
- P [n - 1 , n - 1 ] = observation_log_likelihood_function (data , n - 1 , n )
51
-
57
+ P [n - 1 , n - 1 ] = observation_log_likelihood_function (data , n - 1 , n )
52
58
Q [n - 1 ] = P [n - 1 , n - 1 ]
53
59
54
60
for t in reversed (range (n - 1 )):
55
61
P_next_cp = - np .inf # == -log(0)
56
62
for s in range (t , n - 1 ):
57
- # don't recompute log likelihoods already saved
58
- if P [t , s ] == 1 :
59
- P [t , s ] = observation_log_likelihood_function (data , t , s + 1 )
60
-
63
+ P [t , s ] = observation_log_likelihood_function (data , t , s + 1 )
64
+
61
65
# compute recursion
62
66
summand = P [t , s ] + Q [s + 1 ] + g [s + 1 - t ]
63
67
P_next_cp = np .logaddexp (P_next_cp , summand )
64
-
65
- # truncate sum to become approx. linear in time (see Fearnhead, 2006, eq. (3))
68
+
69
+ # truncate sum to become approx. linear in time (see
70
+ # Fearnhead, 2006, eq. (3))
66
71
if summand - P_next_cp < truncate :
67
72
break
68
-
69
- if P [t , n - 1 ] == 1 :
70
- P [t , n - 1 ] = observation_log_likelihood_function (data , t , n )
73
+
74
+ P [t , n - 1 ] = observation_log_likelihood_function (data , t , n )
71
75
72
76
# (1 - G) is numerical stable until G becomes numerically 1
73
77
if G [n - 1 - t ] < - 1e-15 : # exp(-1e-15) = .99999...
@@ -80,21 +84,17 @@ def offline_changepoint_detection(data, prior_func, observation_log_likelihood_f
80
84
81
85
Pcp = np .ones ((n - 1 , n - 1 )) * - np .inf
82
86
for t in range (n - 1 ):
83
- if P [0 , t ] == 1 :
84
- P [0 , t ] = observation_log_likelihood_function (data , 0 , t + 1 )
85
- Pcp [0 , t ] = P [0 , t ] + Q [t + 1 ] + g [t ] - Q [0 ]
87
+ Pcp [0 , t ] = P [0 , t ] + Q [t + 1 ] + g [t ] - Q [0 ]
86
88
for j in range (1 , n - 1 ):
87
89
for t in range (j , n - 1 ):
88
- for tau in range (j - 1 , t ):
89
- if P [tau + 1 , t ] == 1 :
90
- P [tau + 1 , t ] = observation_log_likelihood_function (data , tau + 1 , t + 1 )
91
- tmp_cond = Pcp [j - 1 , tau ] + P [tau + 1 , t ] + Q [t + 1 ] + g [t - tau ] - Q [tau + 1 ]
92
- Pcp [j , t ] = np .logaddexp (Pcp [j , t ], tmp_cond )
90
+ tmp_cond = Pcp [j - 1 , j - 1 :t ] + P [j :t + 1 , t ] + Q [t + 1 ] + g [0 :t - j + 1 ] - Q [j :t + 1 ]
91
+ Pcp [j , t ] = logsumexp (tmp_cond )
93
92
94
93
return Q , P , Pcp
95
94
96
95
@dynamicProgramming
97
96
def gaussian_obs_log_likelihood (data , t , s ):
97
+ s += 1
98
98
n = s - t
99
99
mean = data [t :s ].sum (0 ) / n
100
100
@@ -104,8 +104,8 @@ def gaussian_obs_log_likelihood(data, t, s):
104
104
betaT = 1 + 0.5 * ((data [t :s ] - mean ) ** 2 ).sum (0 ) + ((n )/ (1 + n )) * (mean ** 2 / 2 )
105
105
scale = (betaT * (nuT + 1 ))/ (alphaT * nuT )
106
106
107
- # splitting the PDF of the student distribution up is /much/ faster. (~ factor 20)
108
- # using sum over for loop is even more worthwhile
107
+ # splitting the PDF of the student distribution up is /much/ faster.
108
+ # (~ factor 20) using sum over for loop is even more worthwhile
109
109
prob = np .sum (np .log (1 + (data [t :s ] - muT )** 2 / (nuT * scale )))
110
110
lgA = gammaln ((nuT + 1 ) / 2 ) - np .log (np .sqrt (np .pi * nuT * scale )) - gammaln (nuT / 2 )
111
111
@@ -115,8 +115,10 @@ def gaussian_obs_log_likelihood(data, t, s):
115
115
def const_prior (r , l ):
116
116
return 1 / (l )
117
117
118
+
118
119
def geometric_prior (t , p ):
119
120
return p * ((1 - p ) ** (t - 1 ))
120
121
122
+
121
123
def neg_binominal_prior (t , k , p ):
122
124
return comb (t - k , k - 1 ) * p ** k * (1 - p ) ** (t - k )
0 commit comments