Skip to content

Commit 0da3fb1

Browse files
bayesian ab testing
1 parent eff49a4 commit 0da3fb1

File tree

5 files changed

+210
-0
lines changed

5 files changed

+210
-0
lines changed

ab_testing/bayesian_bandit.py

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# From the course: Bayesin Machine Learning in Python: A/B Testing
2+
# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
3+
import matplotlib.pyplot as plt
4+
import numpy as np
5+
from scipy.stats import beta
6+
7+
8+
NUM_TRIALS = 2000
9+
BANDIT_PROBABILITIES = [0.2, 0.5, 0.75]
10+
11+
12+
class Bandit(object):
13+
def __init__(self, p):
14+
self.p = p
15+
self.a = 1
16+
self.b = 1
17+
18+
def pull(self):
19+
return np.random.random() < self.p
20+
21+
def sample(self):
22+
return np.random.beta(self.a, self.b)
23+
24+
def update(self, x):
25+
self.a += x
26+
self.b += 1 - x
27+
28+
29+
def plot(bandits, trial):
30+
x = np.linspace(0, 1, 200)
31+
for b in bandits:
32+
y = beta.pdf(x, b.a, b.b)
33+
plt.plot(x, y, label="real p: %.4f" % b.p)
34+
plt.title("Bandit distributions after %s trials" % trial)
35+
plt.legend()
36+
plt.show()
37+
38+
39+
def experiment():
40+
bandits = [Bandit(p) for p in BANDIT_PROBABILITIES]
41+
42+
sample_points = [5,10,20,50,100,200,500,1000,1500,1999]
43+
for i in xrange(NUM_TRIALS):
44+
45+
# take a sample from each bandit
46+
bestb = None
47+
maxsample = -1
48+
allsamples = [] # let's collect these just to print for debugging
49+
for b in bandits:
50+
sample = b.sample()
51+
allsamples.append("%.4f" % sample)
52+
if sample > maxsample:
53+
maxsample = sample
54+
bestb = b
55+
if i in sample_points:
56+
print "current samples: %s" % allsamples
57+
plot(bandits, i)
58+
59+
# pull the arm for the bandit with the largest sample
60+
x = bestb.pull()
61+
62+
# update the distribution for the bandit whose arm we just pulled
63+
bestb.update(x)
64+
65+
66+
if __name__ == "__main__":
67+
experiment()

ab_testing/chisquare.py

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# From the course: Bayesin Machine Learning in Python: A/B Testing
2+
# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
from scipy.stats import chi2, chi2_contingency
6+
7+
# contingency table
8+
# click no click
9+
#------------------------------
10+
# ad A | a b
11+
# ad B | c d
12+
#
13+
# chi^2 = (ad - bc)^2 (a + b + c + d) / [ (a + b)(c + d)(a + c)(b + d)]
14+
# degrees of freedom = (#cols - 1) x (#rows - 1) = (2 - 1)(2 - 1) = 1
15+
16+
# short example
17+
18+
# T = np.array([[36, 14], [30, 25]])
19+
# c2 = np.linalg.det(T)**2 * T.sum() / ( T[0].sum()*T[1].sum()*T[:,0].sum()*T[:,1].sum() )
20+
# p_value = 1 - chi2.cdf(x=c2, df=1)
21+
22+
# equivalent:
23+
# (36-31.429)**2/31.429+(14-18.571)**2/18.571 + (30-34.571)**2/34.571 + (25-20.429)**2/20.429
24+
25+
26+
class DataGenerator:
27+
def __init__(self, p1, p2):
28+
self.p1 = p1
29+
self.p2 = p2
30+
31+
def next(self):
32+
click1 = 1 if (np.random.random() < self.p1) else 0
33+
click2 = 1 if (np.random.random() < self.p2) else 0
34+
return click1, click2
35+
36+
37+
def get_p_value(T):
38+
# same as scipy.stats.chi2_contingency(T, correction=False)
39+
det = T[0,0]*T[1,1] - T[0,1]*T[1,0]
40+
c2 = float(det) / T[0].sum() * det / T[1].sum() * T.sum() / T[:,0].sum() / T[:,1].sum()
41+
p = 1 - chi2.cdf(x=c2, df=1)
42+
return p
43+
44+
45+
def run_experiment(p1, p2, N):
46+
data = DataGenerator(p1, p2)
47+
p_values = np.empty(N)
48+
T = np.zeros((2, 2)).astype(np.float32)
49+
for i in xrange(N):
50+
c1, c2 = data.next()
51+
T[0,c1] += 1
52+
T[1,c2] += 1
53+
# ignore the first 10 values
54+
if i < 10:
55+
p_values[i] = None
56+
else:
57+
p_values[i] = get_p_value(T)
58+
plt.plot(p_values)
59+
plt.plot(np.ones(N)*0.05)
60+
plt.show()
61+
62+
run_experiment(0.1, 0.11, 20000)

ab_testing/convergence.py

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# From the course: Bayesin Machine Learning in Python: A/B Testing
2+
# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
3+
import matplotlib.pyplot as plt
4+
import numpy as np
5+
from bayesian_bandit import Bandit
6+
7+
8+
def run_experiment(p1, p2, p3, N):
9+
bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]
10+
11+
data = np.empty(N)
12+
13+
for i in xrange(N):
14+
# thompson sampling
15+
j = np.argmax([b.sample() for b in bandits])
16+
x = bandits[j].pull()
17+
bandits[j].update(x)
18+
19+
# for the plot
20+
data[i] = x
21+
cumulative_average_ctr = np.cumsum(data) / (np.arange(N) + 1)
22+
23+
# plot moving average ctr
24+
plt.plot(cumulative_average_ctr)
25+
plt.plot(np.ones(N)*p1)
26+
plt.plot(np.ones(N)*p2)
27+
plt.plot(np.ones(N)*p3)
28+
plt.ylim((0,1))
29+
plt.xscale('log')
30+
plt.show()
31+
32+
33+
run_experiment(0.2, 0.25, 0.3, 100000)

ab_testing/demo.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# From the course: Bayesin Machine Learning in Python: A/B Testing
2+
# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
from scipy.stats import beta
6+
7+
def plot(a, b, trial, ctr):
8+
x = np.linspace(0, 1, 200)
9+
y = beta.pdf(x, a, b)
10+
mean = float(a) / (a + b)
11+
plt.plot(x, y)
12+
plt.title("Distributions after %s trials, true rate = %.1f, mean = %.2f" % (trial, ctr, mean))
13+
plt.show()
14+
15+
true_ctr = 0.3
16+
a, b = 1, 1 # beta parameters
17+
show = [0, 5, 10, 25, 50, 100, 200, 300, 500, 700, 1000, 1500]
18+
for t in xrange(1501):
19+
coin_toss_result = (np.random.random() < true_ctr)
20+
if coin_toss_result:
21+
a += 1
22+
else:
23+
b += 1
24+
25+
if t in show:
26+
plot(a, b, t+1, true_ctr)

ab_testing/ttest.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# From the course: Bayesin Machine Learning in Python: A/B Testing
2+
# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
3+
import numpy as np
4+
from scipy import stats
5+
6+
# generate data
7+
N = 10
8+
a = np.random.randn(N) + 2 # mean 2, variance 1
9+
b = np.random.randn(N) # mean 0, variance 1
10+
11+
# roll your own t-test:
12+
var_a = a.var(ddof=1) # unbiased estimator, divide by N-1 instead of N
13+
var_b = b.var(ddof=1)
14+
s = np.sqrt( (var_a + var_b) / 2 ) # balanced standard deviation
15+
t = (a.mean() - b.mean()) / (s * np.sqrt(2.0/N)) # t-statistic
16+
df = 2*N - 2 # degrees of freedom
17+
p = 1 - stats.t.cdf(t, df=df) # one-sided test p-value
18+
print "t:\t", t, "p:\t", 2*p # two-sided test p-value
19+
20+
# built-in t-test:
21+
t2, p2 = stats.ttest_ind(a, b)
22+
print "t2:\t", t2, "p2:\t", p2

0 commit comments

Comments
 (0)