Skip to content

Commit b763d59

Browse files
committed
regression adjustment from chap 6
1 parent 09f00ba commit b763d59

File tree

1 file changed

+240
-0
lines changed

1 file changed

+240
-0
lines changed

Chapter06.ipynb

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Chapter 6: Rerandomization and Regression Adjustment"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 2,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import numpy as np\n",
17+
"import pandas as pd\n",
18+
"import scipy as sp\n",
19+
"import statsmodels.api as sm\n",
20+
"import statsmodels.formula.api as smf\n",
21+
"# viz\n",
22+
"import matplotlib\n",
23+
"import matplotlib.pyplot as plt\n",
24+
"import seaborn as sns\n",
25+
"font = {'family' : 'IBM Plex Sans Condensed',\n",
26+
" 'weight' : 'normal',\n",
27+
" 'size' : 10}\n",
28+
"plt.rc('font', **font)\n",
29+
"plt.rcParams['figure.figsize'] = (6, 6)\n",
30+
"%matplotlib inline\n",
31+
"%config InlineBackend.figure_format = 'retina'\n"
32+
]
33+
},
34+
{
35+
"cell_type": "markdown",
36+
"metadata": {},
37+
"source": [
38+
"## Regression Adjustment"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": 11,
44+
"metadata": {},
45+
"outputs": [
46+
{
47+
"name": "stderr",
48+
"output_type": "stream",
49+
"text": [
50+
"/tmp/ipykernel_129775/576707837.py:3: SettingWithCopyWarning: \n",
51+
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
52+
"Try using .loc[row_indexer,col_indexer] = value instead\n",
53+
"\n",
54+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
55+
" angrist2['y'] = angrist2.GPA_year1.fillna(angrist2.GPA_year1.mean())\n"
56+
]
57+
}
58+
],
59+
"source": [
60+
"angrist = pd.read_stata(\"star.dta\")\n",
61+
"angrist2 = angrist.query(\"control == 1 | sfsp == 1\")\n",
62+
"angrist2['y'] = angrist2.GPA_year1.fillna(angrist2.GPA_year1.mean())\n"
63+
]
64+
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": 20,
68+
"metadata": {},
69+
"outputs": [],
70+
"source": [
71+
"y, z, x = angrist2.y.values, angrist2.sfsp.values, angrist2.loc[:, ['female', 'gpa0']].values\n"
72+
]
73+
},
74+
{
75+
"cell_type": "markdown",
76+
"metadata": {},
77+
"source": [
78+
"### unadjusted regression"
79+
]
80+
},
81+
{
82+
"cell_type": "code",
83+
"execution_count": 37,
84+
"metadata": {},
85+
"outputs": [],
86+
"source": [
87+
"restable = (sm.OLS(y, sm.add_constant(z)).fit(cov_type='HC2').summary().tables[1].as_html())\n",
88+
"unadj_res = pd.read_html(restable, header=0, index_col=0)[0].iloc[1, 0:4]\n"
89+
]
90+
},
91+
{
92+
"cell_type": "markdown",
93+
"metadata": {},
94+
"source": [
95+
"### adjusted (Lin 2013) regression"
96+
]
97+
},
98+
{
99+
"cell_type": "code",
100+
"execution_count": 38,
101+
"metadata": {},
102+
"outputs": [],
103+
"source": [
104+
"# standardize x\n",
105+
"x = (x - x.mean(axis = 0))/x.std(axis = 0)\n"
106+
]
107+
},
108+
{
109+
"cell_type": "code",
110+
"execution_count": 39,
111+
"metadata": {},
112+
"outputs": [],
113+
"source": [
114+
"restable = (sm.OLS(y,\n",
115+
" sm.add_constant(\n",
116+
" np.c_[z, x, z[:,np.newaxis]*x]\n",
117+
" ))\n",
118+
" .fit(cov_type='HC2').summary().tables[1].as_html()\n",
119+
")\n",
120+
"lin_res = pd.read_html(restable, header=0, index_col=0)[0].iloc[1,0:4]\n"
121+
]
122+
},
123+
{
124+
"cell_type": "code",
125+
"execution_count": 43,
126+
"metadata": {},
127+
"outputs": [
128+
{
129+
"data": {
130+
"text/html": [
131+
"<div>\n",
132+
"<style scoped>\n",
133+
" .dataframe tbody tr th:only-of-type {\n",
134+
" vertical-align: middle;\n",
135+
" }\n",
136+
"\n",
137+
" .dataframe tbody tr th {\n",
138+
" vertical-align: top;\n",
139+
" }\n",
140+
"\n",
141+
" .dataframe thead th {\n",
142+
" text-align: right;\n",
143+
" }\n",
144+
"</style>\n",
145+
"<table border=\"1\" class=\"dataframe\">\n",
146+
" <thead>\n",
147+
" <tr style=\"text-align: right;\">\n",
148+
" <th></th>\n",
149+
" <th>coef</th>\n",
150+
" <th>se</th>\n",
151+
" <th>t</th>\n",
152+
" <th>p</th>\n",
153+
" </tr>\n",
154+
" </thead>\n",
155+
" <tbody>\n",
156+
" <tr>\n",
157+
" <th>unadjusted</th>\n",
158+
" <td>0.0518</td>\n",
159+
" <td>0.078</td>\n",
160+
" <td>0.669</td>\n",
161+
" <td>0.504</td>\n",
162+
" </tr>\n",
163+
" <tr>\n",
164+
" <th>adjusted</th>\n",
165+
" <td>0.0682</td>\n",
166+
" <td>0.074</td>\n",
167+
" <td>0.925</td>\n",
168+
" <td>0.355</td>\n",
169+
" </tr>\n",
170+
" </tbody>\n",
171+
"</table>\n",
172+
"</div>"
173+
],
174+
"text/plain": [
175+
" coef se t p\n",
176+
"unadjusted 0.0518 0.078 0.669 0.504\n",
177+
"adjusted 0.0682 0.074 0.925 0.355"
178+
]
179+
},
180+
"execution_count": 43,
181+
"metadata": {},
182+
"output_type": "execute_result"
183+
}
184+
],
185+
"source": [
186+
"pd.DataFrame(np.c_[unadj_res, lin_res].T,\n",
187+
" columns=['coef', 'se', 't', 'p'],\n",
188+
" index=['unadjusted', 'adjusted']\n",
189+
")\n"
190+
]
191+
},
192+
{
193+
"cell_type": "markdown",
194+
"metadata": {},
195+
"source": [
196+
"## Rerandomization simulation"
197+
]
198+
},
199+
{
200+
"cell_type": "markdown",
201+
"metadata": {},
202+
"source": [
203+
"TBD"
204+
]
205+
},
206+
{
207+
"cell_type": "code",
208+
"execution_count": null,
209+
"metadata": {},
210+
"outputs": [],
211+
"source": [
212+
"def Mahalanobis2(z, x):\n",
213+
" x1 = x[z == 1, :]\n",
214+
" x0 = x[z == 0, :]\n",
215+
" n0, n1 = x0.shape[0], x1.shape[0]\n"
216+
]
217+
}
218+
],
219+
"metadata": {
220+
"kernelspec": {
221+
"display_name": "econometrics",
222+
"language": "python",
223+
"name": "python3"
224+
},
225+
"language_info": {
226+
"codemirror_mode": {
227+
"name": "ipython",
228+
"version": 3
229+
},
230+
"file_extension": ".py",
231+
"mimetype": "text/x-python",
232+
"name": "python",
233+
"nbconvert_exporter": "python",
234+
"pygments_lexer": "ipython3",
235+
"version": "3.9.13"
236+
}
237+
},
238+
"nbformat": 4,
239+
"nbformat_minor": 2
240+
}

0 commit comments

Comments
 (0)