forked from jmcmahan/LinVer-Matlab
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added code for the energy statistic test
- Loading branch information
Showing
2 changed files
with
113 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
function [e] = EnSt2(X, Y) | ||
%EnSt2 Energy statistic for samples x and y | ||
% Computes the energy statistic for samples X and Y which are | ||
% d by n1 and d by n2 matrices, respectively containing n1, n2 | ||
% observations of the d-dimensional variables. | ||
|
||
[d, N1] = size(X); | ||
N2 = size(Y,2); | ||
n = N1 + N2; | ||
|
||
% Implementation will always assume x has more samples | ||
if N1 < N2 | ||
x = Y; | ||
n1 = N2; | ||
y = X; | ||
n2 = N1; | ||
else | ||
x = X; | ||
n1 = N1; | ||
y = Y; | ||
n2 = N2; | ||
end | ||
|
||
|
||
diff1 = 0; | ||
diff2 = 0; | ||
diff3 = 0; | ||
% All pairwise subtractions implemented by circularly shifting | ||
for j = 1 : n1 | ||
xs = circshift(x, [0 j]); | ||
diff1 = diff1 + sumdists(xs(:,1:n2), y); | ||
diff2 = diff2 + sumdists(xs, x); | ||
end | ||
|
||
for j = 1 : n2 | ||
ys = circshift(y, [0 j]); | ||
diff3 = diff3 + sumdists(ys, y); | ||
end | ||
|
||
e = (diff1*2/(n1*n2) - diff2/n1^2 - diff3/n2^2) * n1*n2 / n; | ||
|
||
end | ||
|
||
|
||
|
||
function v = sumdists(x, y) | ||
% Computes the sum of the distances between the given sets of | ||
% observations. The number of observations (i.e., number of columns in | ||
% this code) must be equal. | ||
v = sum(sqrt(sum(abs(x - y).^2,1))); | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
function [reject, pval, estat, B] = energy_dist_test(s1, s2, alpha, B) | ||
% Do the two-sample energy statistic test for equality of distributions. | ||
% s1 = d-by-N1 set of N1 samples of d-dimensional variables | ||
% s2 = d-by-N2 set of N2 samples of d-dimensional variables | ||
% alpha = significance level to use. The hypothesis that the distributions | ||
% are equal is rejected if more than 100*(1-alpha) percent of | ||
% the permutation samples from the pooled data are smaller than | ||
% the observed value. | ||
% B = number of permutation trials to use for approximating the | ||
% distribution of the test statistic. If not provided, is | ||
% automatically computed from alpha. | ||
% | ||
% The test compares the data in s1 to the data in s2 to see if the samples | ||
% appear to come from the same distribution. | ||
% | ||
% reject = True if rejected at alpha significance level, False otherwise. | ||
% i.e., if true, the data supports the notion that s1 and s2 are | ||
% from different distributions at significance level alpha. | ||
% If false, it is undetermined. | ||
% pval = ratio of permutation samples with energy distance greater than or | ||
% equal to the observed energy distance (includes the observed). | ||
% estat = vector of energy statistics for the B+1 permutation samples | ||
% (includes the non-permuted s1, s2) | ||
|
||
|
||
if nargin < 4 | ||
B = 5*ceil(1/alpha)-1; | ||
end | ||
|
||
if 1/(B+1) >= alpha | ||
disp('Warning: Choice of B may not be appropriate for desired signifcance, alpha'); | ||
end | ||
|
||
% Note, need the same d for s1 and s2. | ||
[d, n1] = size(s1); | ||
[d, n2] = size(s2); | ||
|
||
N = n1+n2; | ||
|
||
pool = [s1, s2]; | ||
|
||
estat = zeros(B+1, 1); | ||
eobs = EnSt2(s1, s2); | ||
estat(1) = eobs; | ||
|
||
for j = 2:B+1 | ||
idx = randsample(N, N); | ||
x = pool(:, idx(1:n1)); | ||
y = pool(:, idx(n1+1:end)); | ||
estat(j) = EnSt2(x, y); | ||
end | ||
|
||
pval = sum(eobs < estat) / (B+1); | ||
|
||
if alpha > pval | ||
reject = true; | ||
else | ||
reject = false; | ||
end | ||
|
||
end | ||
|