regression adjustment from chap 6

apoorvalal · apoorvalal · commit b763d5939e32 · 2023-10-07T18:04:40.000-07:00
diff --git a/Chapter06.ipynb b/Chapter06.ipynb
@@ -0,0 +1,240 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Chapter 6: Rerandomization and Regression Adjustment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import scipy as sp\n",
+    "import statsmodels.api as sm\n",
+    "import statsmodels.formula.api as smf\n",
+    "# viz\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "font = {'family' : 'IBM Plex Sans Condensed',\n",
+    "               'weight' : 'normal',\n",
+    "               'size'   : 10}\n",
+    "plt.rc('font', **font)\n",
+    "plt.rcParams['figure.figsize'] = (6, 6)\n",
+    "%matplotlib inline\n",
+    "%config InlineBackend.figure_format = 'retina'\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Regression Adjustment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_129775/576707837.py:3: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  angrist2['y'] = angrist2.GPA_year1.fillna(angrist2.GPA_year1.mean())\n"
+     ]
+    }
+   ],
+   "source": [
+    "angrist = pd.read_stata(\"star.dta\")\n",
+    "angrist2 = angrist.query(\"control == 1 | sfsp == 1\")\n",
+    "angrist2['y'] = angrist2.GPA_year1.fillna(angrist2.GPA_year1.mean())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y, z, x = angrist2.y.values, angrist2.sfsp.values, angrist2.loc[:, ['female', 'gpa0']].values\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### unadjusted regression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "restable = (sm.OLS(y, sm.add_constant(z)).fit(cov_type='HC2').summary().tables[1].as_html())\n",
+    "unadj_res = pd.read_html(restable, header=0, index_col=0)[0].iloc[1, 0:4]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### adjusted (Lin 2013) regression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# standardize x\n",
+    "x = (x - x.mean(axis = 0))/x.std(axis = 0)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "restable = (sm.OLS(y,\n",
+    "        sm.add_constant(\n",
+    "            np.c_[z, x, z[:,np.newaxis]*x]\n",
+    "            ))\n",
+    "    .fit(cov_type='HC2').summary().tables[1].as_html()\n",
+    ")\n",
+    "lin_res = pd.read_html(restable, header=0, index_col=0)[0].iloc[1,0:4]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>coef</th>\n",
+       "      <th>se</th>\n",
+       "      <th>t</th>\n",
+       "      <th>p</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>unadjusted</th>\n",
+       "      <td>0.0518</td>\n",
+       "      <td>0.078</td>\n",
+       "      <td>0.669</td>\n",
+       "      <td>0.504</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>adjusted</th>\n",
+       "      <td>0.0682</td>\n",
+       "      <td>0.074</td>\n",
+       "      <td>0.925</td>\n",
+       "      <td>0.355</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              coef     se      t      p\n",
+       "unadjusted  0.0518  0.078  0.669  0.504\n",
+       "adjusted    0.0682  0.074  0.925  0.355"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(np.c_[unadj_res, lin_res].T,\n",
+    "    columns=['coef', 'se', 't', 'p'],\n",
+    "    index=['unadjusted', 'adjusted']\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Rerandomization simulation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "TBD"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def Mahalanobis2(z, x):\n",
+    "    x1 = x[z == 1, :]\n",
+    "    x0 = x[z == 0, :]\n",
+    "    n0, n1 = x0.shape[0], x1.shape[0]\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "econometrics",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}