forked from jmcmahan/LinVer-Matlab
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Distance matrix energy test and MCMC tweaks
- Added the distance-matrix-based energy distance code - Tweaked the proposal matrix initialization of the MCMC algorithm - Acceptance rate check passed out of MCMC routine - Beta fit with uniform prior and uncorrelated noise tested and appears to work
- Loading branch information
Showing
8 changed files
with
184 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
function e = EnSt2DistMat(ind1, ind2, dmat) | ||
% Compute the energy distance statistic using the pre-computed distance | ||
% matrix. The "ind1" and "ind2" should be permutations of indices of the | ||
% pooled sample, each of the size of the two samples being tested. That is, | ||
% X has some size N1, Y has some size N2, the pooled sample has some size | ||
% N = N1 + N2, so ind1 is some permutation of 1:N of size N1, and ind2 are | ||
% the remaining indices not chosen. | ||
|
||
n1 = length(ind1); | ||
n2 = length(ind2); | ||
|
||
N = n1 + n2; | ||
|
||
diff1 = 0; | ||
diff2 = 0; | ||
diff3 = 0; | ||
|
||
for i = 1:n1 | ||
for j = 1:n2 | ||
diff1 = diff1 + dist_from_dmat(ind1(i), ind2(j), dmat, N); | ||
end | ||
end | ||
|
||
for i = 1:n1 | ||
for j = 1:n1 | ||
diff2 = diff2 + dist_from_dmat(ind1(i), ind1(j), dmat, N); | ||
end | ||
end | ||
|
||
for i = 1:n2 | ||
for j = 1:n2 | ||
diff3 = diff3 + dist_from_dmat(ind2(i), ind2(j), dmat, N); | ||
end | ||
end | ||
|
||
e = (diff1*2/(n1*n2) - diff2/n1^2 - diff3/n2^2) * n1*n2 / N; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
function d = dist_from_dmat(i, j, dmat, N) | ||
% Grab the distance from distance matrix "dmat", which is a vector storing | ||
% the triangular elements of the pairwise distance matrix. | ||
|
||
% Always have i < j | ||
if j > i | ||
k = i; | ||
i = j; | ||
j = k; | ||
end | ||
|
||
|
||
% This formula just converts the i,j index into the vector format we've | ||
% used. | ||
|
||
d = dmat(i + (j-1)*N - (j-1)*j/2); | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
function [dmat] = dist_matrix(X, Y) | ||
% Compute the distance matrix for pooled samples X and Y for use | ||
% with the energy distance code. dmat is a vector holding the | ||
% triangular portions of the distance matrix. | ||
% X is d by n1, Y is d by n2, with d being the dimension, n1,n2 being | ||
% the number of samples. | ||
|
||
[d, N1] = size(X); | ||
N2 = size(Y,2); | ||
|
||
N = N1 + N2; | ||
|
||
dmat = zeros((N+1)*N/2, 1); | ||
|
||
Z = [X, Y]; | ||
k = 1; | ||
for i = 1:N | ||
for j=i:N | ||
dmat(k) = norm(Z(:,i) - Z(:,j)); | ||
k = k + 1; | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
function sample = draw_posterior_sample(param, post, M) | ||
% draw_posterior_sample(param, post, N) - Draw N samples from the posterior | ||
% defined by post for the problem defined by param. | ||
|
||
if strcmp(param.unknowns, 'beta') | ||
L = chol(post.sigma2 / param.lambda); | ||
sample = zeros(M, param.Nbeta); | ||
for j = 1:M | ||
sample(j, :) = post.mu2' + randn(1, param.Nbeta)*L; | ||
end | ||
elseif strcmp(param.unknowns, 'beta_lambda') | ||
elseif strcmp(param.unknowns, 'beta_lambda_phi') | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
function [reject, pval, estat, B] = energy_dist_test_dmat(s1, s2, alpha, B) | ||
% Do the two-sample energy statistic test for equality of distributions. | ||
% s1 = d-by-N1 set of N1 samples of d-dimensional variables | ||
% s2 = d-by-N2 set of N2 samples of d-dimensional variables | ||
% alpha = significance level to use. The hypothesis that the distributions | ||
% are equal is rejected if more than 100*(1-alpha) percent of | ||
% the permutation samples from the pooled data are smaller than | ||
% the observed value. | ||
% B = number of permutation trials to use for approximating the | ||
% distribution of the test statistic. If not provided, is | ||
% automatically computed from alpha. | ||
% | ||
% The test compares the data in s1 to the data in s2 to see if the samples | ||
% appear to come from the same distribution. | ||
% | ||
% reject = True if rejected at alpha significance level, False otherwise. | ||
% i.e., if true, the data supports the notion that s1 and s2 are | ||
% from different distributions at significance level alpha. | ||
% If false, it is undetermined. | ||
% pval = ratio of permutation samples with energy distance greater than or | ||
% equal to the observed energy distance (includes the observed). | ||
% estat = vector of energy statistics for the B+1 permutation samples | ||
% (includes the non-permuted s1, s2) | ||
|
||
|
||
if nargin < 4 | ||
B = 5*ceil(1/alpha)-1; | ||
end | ||
|
||
if 1/(B+1) >= alpha | ||
disp('Warning: Choice of B may not be appropriate for desired signifcance, alpha'); | ||
end | ||
|
||
% Note, need the same d for s1 and s2. | ||
[d, n1] = size(s1); | ||
[d, n2] = size(s2); | ||
|
||
dmat = dist_matrix(s1, s2); | ||
|
||
N = n1+n2; | ||
|
||
pool = [s1, s2]; | ||
|
||
estat = zeros(B+1, 1); | ||
%eobs = EnSt2(s1, s2); | ||
id1 = 1:n1; | ||
id2 = (n1+1):(n1+n2); | ||
eobs = EnSt2DistMat(id1, id2, dmat); | ||
estat(1) = eobs; | ||
|
||
for j = 2:B+1 | ||
|
||
%idx = randsample(N, N); | ||
idx = randperm(N); | ||
%x = pool(:, idx(1:n1)); | ||
%y = pool(:, idx(n1+1:end)); | ||
%estat(j) = EnSt2(x, y); | ||
|
||
% The distance-matrix-based version used the permuted indices of the pooled | ||
% sample, not the samples themselves. | ||
x = idx(1:n1); | ||
y = idx(n1+1:end); | ||
estat(j) = EnSt2DistMat(x, y, dmat); | ||
end | ||
|
||
pval = sum(eobs < estat) / (B+1); | ||
|
||
if alpha > pval | ||
reject = true; | ||
else | ||
reject = false; | ||
end | ||
|
||
end | ||
|