Skip to content

Commit b1b38d4

Browse files
committed
added M/Z bias sims
1 parent db96bec commit b1b38d4

File tree

2 files changed

+328
-0
lines changed

2 files changed

+328
-0
lines changed

Chapter16.ipynb

Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,319 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Chapter 16: Difficulties of Unconfoundedness in Observational Studies for Causal Effects"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import numpy as np\n",
17+
"import pandas as pd\n",
18+
"import scipy as sp\n",
19+
"import statsmodels.api as sm\n",
20+
"import statsmodels.formula.api as smf\n",
21+
"# viz\n",
22+
"import matplotlib\n",
23+
"import matplotlib.pyplot as plt\n",
24+
"import seaborn as sns\n",
25+
"font = {'family' : 'IBM Plex Sans Condensed',\n",
26+
" 'weight' : 'normal',\n",
27+
" 'size' : 10}\n",
28+
"plt.rc('font', **font)\n",
29+
"plt.rcParams['figure.figsize'] = (10, 10)\n",
30+
"%matplotlib inline\n",
31+
"\n",
32+
"from utils import *\n"
33+
]
34+
},
35+
{
36+
"cell_type": "code",
37+
"execution_count": 2,
38+
"metadata": {},
39+
"outputs": [
40+
{
41+
"data": {
42+
"text/html": [
43+
"<div>\n",
44+
"<style scoped>\n",
45+
" .dataframe tbody tr th:only-of-type {\n",
46+
" vertical-align: middle;\n",
47+
" }\n",
48+
"\n",
49+
" .dataframe tbody tr th {\n",
50+
" vertical-align: top;\n",
51+
" }\n",
52+
"\n",
53+
" .dataframe thead th {\n",
54+
" text-align: right;\n",
55+
" }\n",
56+
"</style>\n",
57+
"<table border=\"1\" class=\"dataframe\">\n",
58+
" <thead>\n",
59+
" <tr style=\"text-align: right;\">\n",
60+
" <th></th>\n",
61+
" <th>U1</th>\n",
62+
" <th>U2</th>\n",
63+
" <th>X</th>\n",
64+
" <th>Y</th>\n",
65+
" </tr>\n",
66+
" </thead>\n",
67+
" <tbody>\n",
68+
" <tr>\n",
69+
" <th>0</th>\n",
70+
" <td>0.868786</td>\n",
71+
" <td>3.271211</td>\n",
72+
" <td>2.870715</td>\n",
73+
" <td>2.776858</td>\n",
74+
" </tr>\n",
75+
" <tr>\n",
76+
" <th>1</th>\n",
77+
" <td>0.102776</td>\n",
78+
" <td>-1.424613</td>\n",
79+
" <td>-0.343647</td>\n",
80+
" <td>-2.323463</td>\n",
81+
" </tr>\n",
82+
" <tr>\n",
83+
" <th>2</th>\n",
84+
" <td>-0.473300</td>\n",
85+
" <td>-0.808196</td>\n",
86+
" <td>-2.437951</td>\n",
87+
" <td>-0.117681</td>\n",
88+
" </tr>\n",
89+
" <tr>\n",
90+
" <th>3</th>\n",
91+
" <td>-0.524105</td>\n",
92+
" <td>-0.641949</td>\n",
93+
" <td>-0.149231</td>\n",
94+
" <td>-0.537228</td>\n",
95+
" </tr>\n",
96+
" <tr>\n",
97+
" <th>4</th>\n",
98+
" <td>-0.183823</td>\n",
99+
" <td>0.540470</td>\n",
100+
" <td>-0.029903</td>\n",
101+
" <td>1.374849</td>\n",
102+
" </tr>\n",
103+
" </tbody>\n",
104+
"</table>\n",
105+
"</div>"
106+
],
107+
"text/plain": [
108+
" U1 U2 X Y\n",
109+
"0 0.868786 3.271211 2.870715 2.776858\n",
110+
"1 0.102776 -1.424613 -0.343647 -2.323463\n",
111+
"2 -0.473300 -0.808196 -2.437951 -0.117681\n",
112+
"3 -0.524105 -0.641949 -0.149231 -0.537228\n",
113+
"4 -0.183823 0.540470 -0.029903 1.374849"
114+
]
115+
},
116+
"execution_count": 2,
117+
"metadata": {},
118+
"output_type": "execute_result"
119+
}
120+
],
121+
"source": [
122+
"n = int(1e6)\n",
123+
"df = simulate(\n",
124+
" U1 = lambda: np.random.normal(size = n),\n",
125+
" U2 = lambda: np.random.normal(size = n),\n",
126+
" X = lambda U1, U2: U1 + U2 + np.random.normal(size=n),\n",
127+
" Y = lambda U2: U2 + np.random.normal(size=n),\n",
128+
" )\n",
129+
"\n",
130+
"df.head()\n"
131+
]
132+
},
133+
{
134+
"cell_type": "markdown",
135+
"metadata": {},
136+
"source": [
137+
"## M-bias"
138+
]
139+
},
140+
{
141+
"cell_type": "markdown",
142+
"metadata": {},
143+
"source": [
144+
"### continuous treatment\n"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": 3,
150+
"metadata": {},
151+
"outputs": [
152+
{
153+
"data": {
154+
"text/plain": [
155+
"(-0.0005873841517624718, -0.19992316827000112)"
156+
]
157+
},
158+
"execution_count": 3,
159+
"metadata": {},
160+
"output_type": "execute_result"
161+
}
162+
],
163+
"source": [
164+
"\n",
165+
"df['Z'] = df.U1 + np.random.normal(size=n)\n",
166+
"\n",
167+
"smf.ols(\"Y ~ Z\", df).fit().params[1], smf.ols(\"Y ~ Z + X\", df).fit().params[1]\n"
168+
]
169+
},
170+
{
171+
"cell_type": "markdown",
172+
"metadata": {},
173+
"source": [
174+
"### binary treatment"
175+
]
176+
},
177+
{
178+
"cell_type": "code",
179+
"execution_count": 4,
180+
"metadata": {},
181+
"outputs": [
182+
{
183+
"data": {
184+
"text/plain": [
185+
"(0.0010055221102859783, -0.4154833890606614)"
186+
]
187+
},
188+
"execution_count": 4,
189+
"metadata": {},
190+
"output_type": "execute_result"
191+
}
192+
],
193+
"source": [
194+
"df['Z'] = df.Z >= 0\n",
195+
"\n",
196+
"smf.ols(\"Y ~ Z\", df).fit().params[1], smf.ols(\"Y ~ Z + X\", df).fit().params[1]\n"
197+
]
198+
},
199+
{
200+
"cell_type": "markdown",
201+
"metadata": {},
202+
"source": [
203+
"## Z-bias"
204+
]
205+
},
206+
{
207+
"cell_type": "code",
208+
"execution_count": 5,
209+
"metadata": {},
210+
"outputs": [],
211+
"source": [
212+
"n = int(1e6)\n",
213+
"df = simulate(\n",
214+
" U = lambda: np.random.normal(size = n),\n",
215+
" X = lambda: np.random.normal(size = n),\n",
216+
" Z = lambda X, U: X + U + np.random.normal(size=n),\n",
217+
" Y = lambda U: U + np.random.normal(size=n),\n",
218+
" )\n"
219+
]
220+
},
221+
{
222+
"cell_type": "code",
223+
"execution_count": 7,
224+
"metadata": {},
225+
"outputs": [
226+
{
227+
"data": {
228+
"text/plain": [
229+
"(0.33315108130802534, 0.4997989461297992)"
230+
]
231+
},
232+
"execution_count": 7,
233+
"metadata": {},
234+
"output_type": "execute_result"
235+
}
236+
],
237+
"source": [
238+
"smf.ols(\"Y ~ Z\", df).fit().params[1], smf.ols(\"Y ~ Z + X\", df).fit().params[1]\n"
239+
]
240+
},
241+
{
242+
"cell_type": "markdown",
243+
"metadata": {},
244+
"source": [
245+
"Adjusted comparison is more biased."
246+
]
247+
},
248+
{
249+
"cell_type": "markdown",
250+
"metadata": {},
251+
"source": [
252+
"### stronger association"
253+
]
254+
},
255+
{
256+
"cell_type": "code",
257+
"execution_count": 8,
258+
"metadata": {},
259+
"outputs": [
260+
{
261+
"data": {
262+
"text/plain": [
263+
"(0.16699612964603475, 0.4998217107196198)"
264+
]
265+
},
266+
"execution_count": 8,
267+
"metadata": {},
268+
"output_type": "execute_result"
269+
}
270+
],
271+
"source": [
272+
"df['Z'] = 2 * df.X + df.U + np.random.normal(size=n)\n",
273+
"smf.ols(\"Y ~ Z\", df).fit().params[1], smf.ols(\"Y ~ Z + X\", df).fit().params[1]\n"
274+
]
275+
},
276+
{
277+
"cell_type": "code",
278+
"execution_count": 9,
279+
"metadata": {},
280+
"outputs": [
281+
{
282+
"data": {
283+
"text/plain": [
284+
"(0.00990024072283937, 0.500804991941852)"
285+
]
286+
},
287+
"execution_count": 9,
288+
"metadata": {},
289+
"output_type": "execute_result"
290+
}
291+
],
292+
"source": [
293+
"df['Z'] = 10 * df.X + df.U + np.random.normal(size=n)\n",
294+
"smf.ols(\"Y ~ Z\", df).fit().params[1], smf.ols(\"Y ~ Z + X\", df).fit().params[1]\n"
295+
]
296+
}
297+
],
298+
"metadata": {
299+
"kernelspec": {
300+
"display_name": "metrics",
301+
"language": "python",
302+
"name": "python3"
303+
},
304+
"language_info": {
305+
"codemirror_mode": {
306+
"name": "ipython",
307+
"version": 3
308+
},
309+
"file_extension": ".py",
310+
"mimetype": "text/x-python",
311+
"name": "python",
312+
"nbconvert_exporter": "python",
313+
"pygments_lexer": "ipython3",
314+
"version": "3.11.5"
315+
}
316+
},
317+
"nbformat": 4,
318+
"nbformat_minor": 2
319+
}

utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
def simulate(**kwargs):
5+
values = {}
6+
for k,v in kwargs.items():
7+
inputs = {arg: values[arg] for arg in v.__code__.co_varnames}
8+
values[k] = v(**inputs)
9+
return pd.DataFrame(values)

0 commit comments

Comments
 (0)