|
| 1 | +# From the course: Bayesin Machine Learning in Python: A/B Testing |
| 2 | +# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing |
| 3 | +import numpy as np |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +from scipy.stats import chi2, chi2_contingency |
| 6 | + |
| 7 | +# contingency table |
| 8 | +# click no click |
| 9 | +#------------------------------ |
| 10 | +# ad A | a b |
| 11 | +# ad B | c d |
| 12 | +# |
| 13 | +# chi^2 = (ad - bc)^2 (a + b + c + d) / [ (a + b)(c + d)(a + c)(b + d)] |
| 14 | +# degrees of freedom = (#cols - 1) x (#rows - 1) = (2 - 1)(2 - 1) = 1 |
| 15 | + |
| 16 | +# short example |
| 17 | + |
| 18 | +# T = np.array([[36, 14], [30, 25]]) |
| 19 | +# c2 = np.linalg.det(T)**2 * T.sum() / ( T[0].sum()*T[1].sum()*T[:,0].sum()*T[:,1].sum() ) |
| 20 | +# p_value = 1 - chi2.cdf(x=c2, df=1) |
| 21 | + |
| 22 | +# equivalent: |
| 23 | +# (36-31.429)**2/31.429+(14-18.571)**2/18.571 + (30-34.571)**2/34.571 + (25-20.429)**2/20.429 |
| 24 | + |
| 25 | + |
| 26 | +class DataGenerator: |
| 27 | + def __init__(self, p1, p2): |
| 28 | + self.p1 = p1 |
| 29 | + self.p2 = p2 |
| 30 | + |
| 31 | + def next(self): |
| 32 | + click1 = 1 if (np.random.random() < self.p1) else 0 |
| 33 | + click2 = 1 if (np.random.random() < self.p2) else 0 |
| 34 | + return click1, click2 |
| 35 | + |
| 36 | + |
| 37 | +def get_p_value(T): |
| 38 | + # same as scipy.stats.chi2_contingency(T, correction=False) |
| 39 | + det = T[0,0]*T[1,1] - T[0,1]*T[1,0] |
| 40 | + c2 = float(det) / T[0].sum() * det / T[1].sum() * T.sum() / T[:,0].sum() / T[:,1].sum() |
| 41 | + p = 1 - chi2.cdf(x=c2, df=1) |
| 42 | + return p |
| 43 | + |
| 44 | + |
| 45 | +def run_experiment(p1, p2, N): |
| 46 | + data = DataGenerator(p1, p2) |
| 47 | + p_values = np.empty(N) |
| 48 | + T = np.zeros((2, 2)).astype(np.float32) |
| 49 | + for i in xrange(N): |
| 50 | + c1, c2 = data.next() |
| 51 | + T[0,c1] += 1 |
| 52 | + T[1,c2] += 1 |
| 53 | + # ignore the first 10 values |
| 54 | + if i < 10: |
| 55 | + p_values[i] = None |
| 56 | + else: |
| 57 | + p_values[i] = get_p_value(T) |
| 58 | + plt.plot(p_values) |
| 59 | + plt.plot(np.ones(N)*0.05) |
| 60 | + plt.show() |
| 61 | + |
| 62 | +run_experiment(0.1, 0.11, 20000) |
0 commit comments