|
| 1 | +defmodule Statistics.Tests.TTest do |
| 2 | + |
| 3 | + import Statistics |
| 4 | + import Statistics.Math |
| 5 | + alias Statistics.Distributions.T |
| 6 | + |
| 7 | + @moduledoc """ |
| 8 | + Student's t test |
| 9 | + |
| 10 | + """ |
| 11 | + |
| 12 | + @doc """ |
| 13 | + A two-sided test for the null hypothesis that the |
| 14 | + expected value (mean) of a sample of independent |
| 15 | + observations a is equal to the given population mean, `popmean`. |
| 16 | +
|
| 17 | + Returns the _t_ statistic, and the _p_ value. |
| 18 | +
|
| 19 | + ## Example |
| 20 | +
|
| 21 | + iex> Statistics.Tests.TTest.one_sample([1,2,3,2,1], 3) |
| 22 | + %{p: 0.023206570788795993, t: -3.585685828003181} |
| 23 | +
|
| 24 | + """ |
| 25 | + def one_sample(list, popmean) do |
| 26 | + df = length(list)-1 |
| 27 | + t = (mean(list) - popmean) / (stdev(list) / sqrt(length(list))) |
| 28 | + p = get_t_prob(t, df) |
| 29 | + %{t: t, p: p} |
| 30 | + end |
| 31 | + |
| 32 | + |
| 33 | + @doc """ |
| 34 | + A two-sided test for the null hypothesis that the |
| 35 | + mean of `list1` is different to the mean of `list2`. |
| 36 | +
|
| 37 | + The variance of the lists should be equal but the |
| 38 | + sample size of each last can be different. |
| 39 | +
|
| 40 | + Returns the _t_ statistic, and the _p_ value. |
| 41 | +
|
| 42 | + ## Example |
| 43 | +
|
| 44 | + iex> Statistics.Tests.TTest.ind_samples([1,2,3,2,1], [3,2,4,3,5]) |
| 45 | + %{p: 0.022802155958137702, t: -2.82842712474619} |
| 46 | +
|
| 47 | + iex> Statistics.Tests.TTest.ind_samples([1,2,3,2,1], [3,2,4,3,5,4,5,6]) |
| 48 | + %{p: 0.0044530673387188, t: -3.5858542135407596} |
| 49 | +
|
| 50 | + """ |
| 51 | + def ind_samples(list1, list2) do |
| 52 | + df = length(list1) + length(list2) - 2 |
| 53 | + mu1 = mean(list1) |
| 54 | + mu2 = mean(list2) |
| 55 | + # calculate pooled standard deviation and |
| 56 | + # sample proportion differently when |
| 57 | + # sample sizes are unequal |
| 58 | + {sp, sz} = case length(list1) == length(list2) do |
| 59 | + true -> |
| 60 | + spt = sqrt((variance(list1) + variance(list2)) / 2) |
| 61 | + szt = sqrt(2/length(list1)) |
| 62 | + {spt, szt} |
| 63 | + false -> |
| 64 | + # weight variances by sample size |
| 65 | + adj_var1 = (length(list1)-1) * variance(list1) |
| 66 | + adj_var2 = (length(list2)-1) * variance(list2) |
| 67 | + spf = sqrt((adj_var1 + adj_var2) / df) |
| 68 | + szf = sqrt((1 / length(list1)) + (1 / length(list2))) |
| 69 | + {spf, szf} |
| 70 | + end |
| 71 | + t = (mu1 - mu2) / (sp * sz) |
| 72 | + p = get_t_prob(t, df) |
| 73 | + %{t: t, p: p} |
| 74 | + end |
| 75 | + |
| 76 | + |
| 77 | + defp get_t_prob(t, df) do |
| 78 | + c = T.cdf(df).(t) |
| 79 | + p = case t < 0.0 do |
| 80 | + true -> c |
| 81 | + false -> 1 - c |
| 82 | + end |
| 83 | + case p < 0.5 do # two-sided test |
| 84 | + true -> 2 * p |
| 85 | + false -> 1.0 |
| 86 | + end |
| 87 | + end |
| 88 | + |
| 89 | +end |
0 commit comments