|
| 1 | +import numpy as np |
| 2 | +from scipy.stats import dirichlet |
| 3 | +from scipy.special import psi, polygamma, gammaln |
| 4 | + |
| 5 | +eps = 1e-100 |
| 6 | +max_iter = 10 |
| 7 | +converge_criteria = 0.001 |
| 8 | + |
| 9 | +def parameter_estimation(theta, old_alpha): |
| 10 | + """Estimating a dirichlet parameter given a set of multinomial parameters. |
| 11 | + |
| 12 | + Parameters |
| 13 | + ---------- |
| 14 | +
|
| 15 | + theta: a set of multinomial, N x K matrix (N = # of observation, K = dimension of dirichlet) |
| 16 | + old_alpha: initial guess on the dirichlet parameter (K-dim) |
| 17 | + """ |
| 18 | + |
| 19 | + log_p_bar = np.mean(np.log(theta), 0) #sufficient statistics |
| 20 | + |
| 21 | + for j in xrange(max_iter): |
| 22 | + digamma_alpha = psi(np.sum(old_alpha)) + log_p_bar |
| 23 | + old_alpha = np.exp(digamma_alpha) + 0.5 |
| 24 | + old_alpha[old_alpha<0.6] = 1.0/(- digamma_alpha[old_alpha<0.6] + psi(1.)) |
| 25 | + |
| 26 | + for i in xrange(max_iter): |
| 27 | + new_alpha = old_alpha - (psi(old_alpha)-digamma_alpha)/(polygamma(1,old_alpha)) |
| 28 | + old_alpha = new_alpha |
| 29 | + |
| 30 | + return new_alpha |
| 31 | + |
| 32 | +def collapsed_parameter_estimation(z, alpha): |
| 33 | + """Estimating a dirichlet parameter in the collapsed sampling environment of Dir-Mult |
| 34 | +
|
| 35 | + Parameters |
| 36 | + ---------- |
| 37 | +
|
| 38 | + z: assignment count, N x K matrix |
| 39 | + alpha: initial guess on the dirichlet parameter (K-dim) |
| 40 | + """ |
| 41 | + |
| 42 | + N, K = z.shape |
| 43 | + |
| 44 | + z_sum = np.sum(z,1) |
| 45 | + |
| 46 | + converge = False |
| 47 | + |
| 48 | + cur_iter = 0 |
| 49 | + |
| 50 | + max_iter = 30 |
| 51 | + while not converge or (cur_iter <= max_iter): |
| 52 | + |
| 53 | + random_order = np.random.permutation(K) |
| 54 | + |
| 55 | + for ti in random_order: |
| 56 | + alpha_sum = np.sum(alpha) |
| 57 | + numerator = 0 |
| 58 | + denominator = 0 |
| 59 | + |
| 60 | + for ni in xrange(N): |
| 61 | + numerator += psi(z[ni,ti] + alpha[ti]) - psi(alpha[ti]) |
| 62 | + denominator += psi(z_sum[ni] + alpha_sum) - psi(alpha_sum) |
| 63 | + |
| 64 | + old_val = alpha[ti] |
| 65 | + alpha[ti] = alpha[ti] * (numerator / denominator) |
| 66 | + |
| 67 | + if alpha[ti] <= 0 : |
| 68 | + alpha[ti] = old_val |
| 69 | + |
| 70 | + new_sum = np.sum(alpha) |
| 71 | + |
| 72 | + if np.abs(alpha_sum - new_sum) < converge_criteria : |
| 73 | + converge = True |
| 74 | + break |
| 75 | + |
| 76 | + cur_iter += 1 |
| 77 | + |
| 78 | + return alpha |
| 79 | + |
| 80 | +def test_parameter_estimation(): |
| 81 | + N = 100 # number of observations |
| 82 | + K = 50 # dimension of Dirichlet |
| 83 | + |
| 84 | + _alpha = np.random.gamma(1,1) * np.random.dirichlet([1.]*K) # ground truth alpha |
| 85 | + |
| 86 | + obs = np.random.dirichlet(_alpha, size=N) + eps # draw N samples from Dir(_alpha) |
| 87 | + obs /= np.sum(obs, 1)[:,np.newaxis] #renormalize for added eps |
| 88 | + |
| 89 | + initial_alpha = np.ones(K) # first guess on alpha |
| 90 | + |
| 91 | + #estimating |
| 92 | + est_alpha = parameter_estimation(obs, initial_alpha) |
| 93 | + |
| 94 | + g_ll = 0 #log-likelihood with ground truth parameter |
| 95 | + b_ll = 0 #log-likelihood with initial guess of alpha |
| 96 | + ll = 0 #log-likelihood with estimated parameter |
| 97 | + for i in xrange(N): |
| 98 | + g_ll += dirichlet.logpdf(obs[i], _alpha) |
| 99 | + b_ll += dirichlet.logpdf(obs[i], initial_alpha) |
| 100 | + ll += dirichlet.logpdf(obs[i], est_alpha) |
| 101 | + |
| 102 | + print 'Test with parameter estimation' |
| 103 | + print 'likelihood p(obs|_alpha) = %.3f' % g_ll |
| 104 | + print 'likelihood p(obs|initial_alpha) = %.3f' % b_ll |
| 105 | + print 'likelihood p(obs|estimate_alpha) = %.3f' % ll |
| 106 | + print 'likelihood difference = %.3f' % (g_ll - ll) |
| 107 | + |
| 108 | + |
| 109 | +def test_collapsed_parameter_estimation(): |
| 110 | + N = 100 # number of observations |
| 111 | + K = 50 # dimension of Dirichlet |
| 112 | + W = 200 # number of multinomial trials for each observation |
| 113 | + |
| 114 | + _alpha = np.random.gamma(1,1) * np.random.dirichlet([1.]*K) # ground truth alpha |
| 115 | + |
| 116 | + obs = np.zeros([N,K]) |
| 117 | + for i in xrange(N): |
| 118 | + theta = np.random.dirichlet(_alpha) |
| 119 | + obs[i] = np.random.multinomial(W, theta) |
| 120 | + |
| 121 | + alpha = np.ones(K) |
| 122 | + alpha = collapsed_parameter_estimation(obs, alpha) |
| 123 | + |
| 124 | + g_ll = 0 |
| 125 | + ll = 0 |
| 126 | + for i in xrange(N): |
| 127 | + g_ll += gammaln(np.sum(_alpha)) - np.sum(gammaln(_alpha)) \ |
| 128 | + + np.sum(gammaln(obs[i] + _alpha)) - gammaln(np.sum(obs[i] + _alpha)) |
| 129 | + ll += gammaln(np.sum(alpha)) - np.sum(gammaln(alpha)) \ |
| 130 | + + np.sum(gammaln(obs[i] + alpha)) - gammaln(np.sum(obs[i] + alpha)) |
| 131 | + |
| 132 | + print 'Test with collapsed sampling optimizer' |
| 133 | + print 'likelihood p(obs|_alpha) = %.3f' % g_ll |
| 134 | + print 'likelihood p(obs|alpha) = %.3f' % ll |
| 135 | + print 'likelihood difference = %.3f' % (g_ll - ll) |
| 136 | + |
| 137 | + |
| 138 | +if __name__ == '__main__': |
| 139 | + test_parameter_estimation() |
| 140 | + test_collapsed_parameter_estimation() |
0 commit comments