Created using Colaboratory

sandipanpaul21 · sandipanpaul21 · commit 6521113186ba · 2020-09-02T17:39:41.000+05:30
diff --git a/07_Simple_and_Multiple_Regression.ipynb b/07_Simple_and_Multiple_Regression.ipynb
@@ -6,7 +6,7 @@
       "name": "07 Simple and Multiple Regression.ipynb",
       "provenance": [],
       "collapsed_sections": [],
-      "authorship_tag": "ABX9TyNhGP0GF9c1lVdgMRwuSiRi",
+      "authorship_tag": "ABX9TyM4ImDR63Bml//QYzj8HH0p",
       "include_colab_link": true
     },
     "kernelspec": {
@@ -2881,11 +2881,259 @@
         }
       ]
     },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NVhGdOXIsOGa",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 89
+        },
+        "outputId": "b0996ed2-74da-4cf0-ca23-6ea1a0920589"
+      },
+      "source": [
+        "base_model_properties_2"
+      ],
+      "execution_count": 51,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[('Jarque-Bera', 472.99120337658803),\n",
+              " ('Chi^2 two-tail prob.', 1.9555331946581146e-103),\n",
+              " ('Skew', 1.6859804491070154),\n",
+              " ('Kurtosis', 7.5494043489051235)]"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 51
+        }
+      ]
+    },
     {
       "cell_type": "code",
       "metadata": {
         "id": "Hb228jU0nmJE",
         "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "outputId": "13f57e62-5df8-4630-edb2-855fdae0977e"
+      },
+      "source": [
+        "# Overall Comparision\n",
+        "print(\"ALL MODEL COMPARISION\")\n",
+        "\n",
+        "# Normality of the residuals\n",
+        "# Jarque-Bera test:\n",
+        "print(\"Normality of the residuals\")\n",
+        "print(\"Jarque-Bera test : \")\n",
+        "print(\"JB, ideal is close to 0\")\n",
+        "print(\"Probability(JB), less than 0.05\")\n",
+        "print(\"Base Model\")\n",
+        "print(base_model_properties_1[0])\n",
+        "print(base_model_properties_1[1])\n",
+        "print(\"Model 1\")\n",
+        "print(model_properties_1[0])\n",
+        "print(model_properties_1[1])\n",
+        "print(\"Model 2\")\n",
+        "print(base_model_properties_2[0])\n",
+        "print(base_model_properties_2[1])\n",
+        "print(\"Model 3\")\n",
+        "print(base_model_properties_3[0])\n",
+        "print(base_model_properties_3[1])\n",
+        "\n",
+        "# Skewness and Kurtosis\n",
+        "print(\"\\n\")\n",
+        "print(\"Skewness and Kurtosis of Residual\")\n",
+        "print(\"Skewness, ideal is -1 to 1\")\n",
+        "print(\"Kurtosis, ideal value is 3\")\n",
+        "\n",
+        "# Omni test:\n",
+        "print(\"\\n\")\n",
+        "print(\"Normality of the Residual also\")\n",
+        "print(\"Omni Test : \")\n",
+        "print(\"Omni, ideal is close to 0\")\n",
+        "print(\"Probability(Omni), less than 0.05\")\n",
+        "print(\"Base Model\")\n",
+        "name = ['Chi^2', 'Two-tail probability']\n",
+        "test = sms.omni_normtest(base_model.resid)\n",
+        "print(pd.DataFrame(lzip(name, test),columns = ['Test','Values']).round(2))\n",
+        "print(\"Model 1\")\n",
+        "name = ['Chi^2', 'Two-tail probability']\n",
+        "test = sms.omni_normtest(Model_1.resid)\n",
+        "print(pd.DataFrame(lzip(name, test),columns = ['Test','Values']).round(2))\n",
+        "print(\"Model 2\")\n",
+        "name = ['Chi^2', 'Two-tail probability']\n",
+        "test = sms.omni_normtest(Model_2.resid)\n",
+        "print(pd.DataFrame(lzip(name, test),columns = ['Test','Values']).round(2))\n",
+        "print(\"Model 3\")\n",
+        "name = ['Chi^2', 'Two-tail probability']\n",
+        "test = sms.omni_normtest(Model_3.resid)\n",
+        "print(pd.DataFrame(lzip(name, test),columns = ['Test','Values']).round(2))\n",
+        "\n",
+        "# Multicollinearity\n",
+        "print(\"\\n\")\n",
+        "print(\"Multicollinearity\")\n",
+        "print(\"Ideal Condition Number will be less than 30\")\n",
+        "print(\"Base Model, Condition number:\",np.linalg.cond(base_model.model.exog).round(2))\n",
+        "print(\"Model 1, Condition number:\",np.linalg.cond(Model_1.model.exog).round(2))\n",
+        "print(\"Model 2, Condition number:\",np.linalg.cond(Model_2.model.exog).round(2))\n",
+        "print(\"Model 3, Condition number:\",np.linalg.cond(Model_3.model.exog).round(2))\n",
+        "\n",
+        "# Adjusted R Square\n",
+        "print(\"\\n\")\n",
+        "print(\"Model Performance\")\n",
+        "print(\"Adjusted R Square, ideal is close to 1\")\n",
+        "print(\"Base Model, Adjusted R Square : \",base_model.rsquared_adj.round(2))\n",
+        "print(\"Model 1, Adjusted R Square : \",Model_1.rsquared_adj.round(2))\n",
+        "print(\"Model 2, Adjusted R Square : \",Model_2.rsquared_adj.round(2))\n",
+        "print(\"Model 3, Adjusted R Square : \",Model_3.rsquared_adj.round(2))\n",
+        "\n",
+        "# F Statistics\n",
+        "print(\"\\n\")\n",
+        "print(\"Significance of Independent Variables\")\n",
+        "print(\"Probability(F Stat), less than 0.05 suggests independent variables are important\")\n",
+        "print(\"Base Model, Probability of F Statistics : \",base_model.f_pvalue.round(2))\n",
+        "print(\"Model 1, Probability of F Statistics : \",Model_1.f_pvalue.round(2))\n",
+        "print(\"Model 2, Probability of F Statistics : \",Model_2.f_pvalue.round(2))\n",
+        "print(\"Model 3, Probability of F Statistics : \",Model_3.f_pvalue.round(2))\n",
+        "\n",
+        "# Durbin Watson Test\n",
+        "print(\"\\n\")\n",
+        "print(\"Homoscedasticity of Error\")\n",
+        "print(\"Durbin Watson, ideal is between 1 to 2\")\n",
+        "print(\"Base Model, Durbin Watson Value : 0.78\")\n",
+        "print(\"Model 1, Durbin Watson Value : 0.78\")\n",
+        "print(\"Model 2, Durbin Watson Value : 2.1\")\n",
+        "print(\"Model 3, Durbin Watson Value : 2.07\")"
+      ],
+      "execution_count": 56,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "ALL MODEL COMPARISION\n",
+            "Normality of the residuals\n",
+            "Jarque-Bera test : \n",
+            "JB, ideal is close to 0\n",
+            "Probability(JB), less than 0.05\n",
+            "Base Model\n",
+            "('Jarque-Bera', 3084.310178901069)\n",
+            "('Chi^2 two-tail prob.', 0.0)\n",
+            "Model 1\n",
+            "('Jarque-Bera', 2125.548489196517)\n",
+            "('Chi^2 two-tail prob.', 0.0)\n",
+            "Model 2\n",
+            "('Jarque-Bera', 472.99120337658803)\n",
+            "('Chi^2 two-tail prob.', 1.9555331946581146e-103)\n",
+            "Model 3\n",
+            "('Jarque-Bera', 452.9473484300142)\n",
+            "('Chi^2 two-tail prob.', 4.402840898839639e-99)\n",
+            "\n",
+            "\n",
+            "Skewness and Kurtosis of Residual\n",
+            "Skewness, ideal is -1 to 1\n",
+            "Kurtosis, ideal value is 3\n",
+            "\n",
+            "\n",
+            "Normality of the Residual also\n",
+            "Omni Test : \n",
+            "Omni, ideal is close to 0\n",
+            "Probability(Omni), less than 0.05\n",
+            "Base Model\n",
+            "                   Test  Values\n",
+            "0                 Chi^2  277.01\n",
+            "1  Two-tail probability    0.00\n",
+            "Model 1\n",
+            "                   Test  Values\n",
+            "0                 Chi^2  195.55\n",
+            "1  Two-tail probability    0.00\n",
+            "Model 2\n",
+            "                   Test  Values\n",
+            "0                 Chi^2  134.24\n",
+            "1  Two-tail probability    0.00\n",
+            "Model 3\n",
+            "                   Test  Values\n",
+            "0                 Chi^2  128.77\n",
+            "1  Two-tail probability    0.00\n",
+            "\n",
+            "\n",
+            "Multicollinearity\n",
+            "Ideal Condition Number will be less than 30\n",
+            "Base Model, Condition number: 8125.62\n",
+            "Model 1, Condition number: 8118.28\n",
+            "Model 2, Condition number: 103.55\n",
+            "Model 3, Condition number: 89.45\n",
+            "\n",
+            "\n",
+            "Model Performance\n",
+            "Adjusted R Square, ideal is close to 1\n",
+            "Base Model, Adjusted R Square :  0.95\n",
+            "Model 1, Adjusted R Square :  0.95\n",
+            "Model 2, Adjusted R Square :  0.91\n",
+            "Model 3, Adjusted R Square :  0.91\n",
+            "\n",
+            "\n",
+            "Significance of Independent Variables\n",
+            "Probability(F Stat), less than 0.05 suggests independent variables are important\n",
+            "Base Model, Probability of F Statistics :  0.0\n",
+            "Model 1, Probability of F Statistics :  0.0\n",
+            "Model 2, Probability of F Statistics :  0.0\n",
+            "Model 3, Probability of F Statistics :  0.0\n",
+            "\n",
+            "\n",
+            "Homoscedasticity of Error\n",
+            "Durbin Watson, ideal is between 1 to 2\n",
+            "Base Model, Durbin Watson Value : 0.78\n",
+            "Model 1, Durbin Watson Value : 0.78\n",
+            "Model 2, Durbin Watson Value : 2.1\n",
+            "Model 3, Durbin Watson Value : 2.07\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "_cnBy2F8rhjS",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 35
+        },
+        "outputId": "93662ec1-8635-4a90-b678-1c075b6af894"
+      },
+      "source": [
+        "base_model_properties_1[0]"
+      ],
+      "execution_count": 50,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "('Jarque-Bera', 3084.310178901069)"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 50
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "6UmVmM89r7Ro",
+        "colab_type": "code",
         "colab": {}
       },
       "source": [