forked from AllenDowney/BayesMadeSimple
-
Notifications
You must be signed in to change notification settings - Fork 0
/
volunteer.py
147 lines (107 loc) · 3.58 KB
/
volunteer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""This file contains code used in "Think Stats",
by Allen B. Downey, available from greenteapress.com
Copyright 2013 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
from __future__ import print_function, division
import thinkbayes
import thinkplot
import numpy
"""
Problem: students sign up to participate in a community service
project. Some fraction, q, of the students who sign up actually
participate, and of those some fraction, r, report back.
Given a sample of students who sign up and the number who report
back, we can estimate the product q*r, but don't learn much about
q and r separately.
If we can get a smaller sample of students where we know who
participated and who reported, we can use that to improve the
estimates of q and r.
And we can use that to compute the posterior distribution of the
number of students who participated.
"""
class Volunteer(thinkbayes.Suite):
def Likelihood(self, data, hypo):
"""Computes the likelihood of the data under the hypothesis.
hypo: pair of (q, r)
data: one of two possible formats
"""
if len(data) == 2:
return self.Likelihood1(data, hypo)
elif len(data) == 3:
return self.Likelihood2(data, hypo)
else:
raise ValueError()
def Likelihood1(self, data, hypo):
"""Computes the likelihood of the data under the hypothesis.
hypo: pair of (q, r)
data: tuple (signed up, reported)
"""
q, r = hypo
p = q * r
signed_up, reported = data
yes = reported
no = signed_up - reported
like = p**yes * (1-p)**no
return like
def Likelihood2(self, data, hypo):
"""Computes the likelihood of the data under the hypothesis.
hypo: pair of (q, r)
data: tuple (signed up, participated, reported)
"""
q, r = hypo
signed_up, participated, reported = data
yes = participated
no = signed_up - participated
like1 = q**yes * (1-q)**no
yes = reported
no = participated - reported
like2 = r**yes * (1-r)**no
return like1 * like2
def MarginalDistribution(suite, index):
"""Extracts the marginal distribution of one parameter.
suite: Suite
index: which parameter
returns: Pmf
"""
pmf = thinkbayes.Pmf()
for t, prob in suite.Items():
pmf.Incr(t[index], prob)
return pmf
def MarginalProduct(suite):
"""Extracts the distribution of the product of the parameters.
suite: Suite
returns: Pmf
"""
pmf = thinkbayes.Pmf()
for (q, r), prob in suite.Items():
pmf.Incr(q*r, prob)
return pmf
def main():
probs = numpy.linspace(0, 1, 101)
hypos = []
for q in probs:
for r in probs:
hypos.append((q, r))
suite = Volunteer(hypos)
# update the Suite with the larger sample of students who
# signed up and reported
data = 140, 50
suite.Update(data)
# update again with the smaller sample of students who signed
# up, participated, and reported
data = 5, 3, 1
suite.Update(data)
#p_marginal = MarginalProduct(suite)
q_marginal = MarginalDistribution(suite, 0)
r_marginal = MarginalDistribution(suite, 1)
thinkplot.Pmf(q_marginal, label='q')
thinkplot.Pmf(r_marginal, label='r')
#thinkplot.Pmf(p_marginal)
thinkplot.Save(root='volunteer1',
xlabel='fraction participating/reporting',
ylabel='PMF',
formats=['png']
)
if __name__ == '__main__':
main()