forked from lazyprogrammer/machine_learning_examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptimistic_starter.py
71 lines (52 loc) · 1.77 KB
/
optimistic_starter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# From the course: Bayesin Machine Learning in Python: A/B Testing
# https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future
import matplotlib.pyplot as plt
import numpy as np
NUM_TRIALS = 10000
EPS = 0.1
BANDIT_PROBABILITIES = [0.2, 0.5, 0.75]
class Bandit:
def __init__(self, p):
# p: the win rate
self.p = p
self.p_estimate = # TODO
self.N = # TODO
def pull(self):
# draw a 1 with probability p
return np.random.random() < self.p
def update(self, x):
# TODO
self.p_estimate = # TODO
def experiment():
bandits = [Bandit(p) for p in BANDIT_PROBABILITIES]
rewards = np.zeros(NUM_TRIALS)
for i in range(NUM_TRIALS):
# use optimistic initial values to select the next bandit
j = # TODO
# pull the arm for the bandit with the largest sample
x = bandits[j].pull()
# update rewards log
rewards[i] = x
# update the distribution for the bandit whose arm we just pulled
bandits[j].update(x)
# print mean estimates for each bandit
for b in bandits:
print("mean estimate:", b.p_estimate)
# print total reward
print("total reward earned:", rewards.sum())
print("overall win rate:", rewards.sum() / NUM_TRIALS)
print("num times selected each bandit:", [b.N for b in bandits])
# plot the results
cumulative_rewards = np.cumsum(rewards)
win_rates = cumulative_rewards / (np.arange(NUM_TRIALS) + 1)
plt.ylim([0, 1])
plt.plot(win_rates)
plt.plot(np.ones(NUM_TRIALS)*np.max(BANDIT_PROBABILITIES))
plt.show()
if __name__ == "__main__":
experiment()