finished part 2

apoorvalal · apoorvalal · commit 0dbe89ecbd0a · 2023-10-21T10:01:36.000-07:00
diff --git a/Chapter09.ipynb b/Chapter09.ipynb
@@ -0,0 +1,207 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Chapter 9: Bridging Finite and Super-population Causal Inference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from joblib import Parallel, delayed\n",
+    "\n",
+    "import numpy as np\n",
+    "import statsmodels.api as sm\n",
+    "\n",
+    "np.random.seed(42)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def linestimator(Z, Y, X):\n",
+    "    X = (X - X.mean(axis=0))/X.std(axis=0)\n",
+    "    n, p = X.shape\n",
+    "    # fully interacted OLS\n",
+    "    Xmat = np.c_[sm.add_constant(Z),\n",
+    "              X,\n",
+    "              Z.reshape(-1, 1) * X]\n",
+    "    m = sm.OLS(Y, Xmat).fit(cov_type=\"HC2\")\n",
+    "    est, vehw = m.params[1], m.bse[1]**2\n",
+    "    # super-population correction\n",
+    "    inter = m.params[-p:] # (β_1 - β_0) term - last p elements of coef\n",
+    "    # (β_1 - β_0)' Σ (β_1 - β_0) / n\n",
+    "    superCorr = np.sum(inter * (np.cov(X.T) @ inter))/n\n",
+    "    vsuper = vehw + superCorr\n",
+    "    return est, np.sqrt(vehw), np.sqrt(vsuper)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def onerepl(*args):\n",
+    "    n = 500\n",
+    "    X = np.random.normal(0, 1, n*2).reshape(n, 2)\n",
+    "    Y0 = X[:, 0] + X[:, 0]**2 + np.random.uniform(-.5, .5, n)\n",
+    "    Y1 = X[:, 1] + X[:, 1]**2 + np.random.uniform(-1, 1, n)\n",
+    "    Z = np.random.binomial(1, .6, n)\n",
+    "    Y = Y0 * (1 - Z) + Y1 * Z\n",
+    "    return linestimator(Z, Y, X)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.052230404017171474, 0.02176516995732536, 0.026679530224550992)"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "onerepl()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nrep, k = 2000, 8\n",
+    "results = Parallel(n_jobs = k)(delayed(onerepl)(i) for i in range(nrep))\n",
+    "simres = np.vstack(results)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.007213145049286639, 0.01843410232910921, 0.022661715589192874)"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# bias, estimated EHW SE, estimated super-population SE\n",
+    "simres[:, 0].mean(), simres[:, 1].mean(), simres[:, 2].mean()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.15129734780104556"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# empirical SD\n",
+    "simres[:, 0].std()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.1795"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# EHW coverage\n",
+    "np.mean((simres[:, 0] - 1.96 * simres[:, 1]) * (simres[:, 0] + 1.96 * simres[:, 1] ) <= 0)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.218"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# superpop coverage\n",
+    "np.mean((simres[:, 0] - 1.96 * simres[:, 2]) * (simres[:, 0] + 1.96 * simres[:, 2] ) <= 0)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "econometrics",
+   "language": "python",
+   "name": "econometrics"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}