Created using Colaboratory

sandipanpaul21 · sandipanpaul21 · commit 43d362fc466d · 2020-09-03T18:50:11.000+05:30
diff --git a/08_Logistic_Regression.ipynb b/08_Logistic_Regression.ipynb
@@ -5,7 +5,7 @@
     "colab": {
       "name": "08 Logistic Regression.ipynb",
       "provenance": [],
-      "authorship_tag": "ABX9TyOu3EEwuHsR/3JTtbVCA2NW",
+      "authorship_tag": "ABX9TyNd8trmiuHbcgUjcCZGui3z",
       "include_colab_link": true
     },
     "kernelspec": {
@@ -668,7 +668,7 @@
       "source": [
         "# Model Prediction\n",
         "\n",
-        "print(\"Sample Prediction\")\n",
+        "print(\"Sample Prediction of Model 1\")\n",
         "pred = result.predict(X_test)\n",
         "model_prediction = pd.DataFrame(pred.round(2),columns = ['Prediction'])\n",
         "model_prediction['temp'] = 'temp'\n",
@@ -724,23 +724,164 @@
       "metadata": {
         "id": "dhcskdb5BCiU",
         "colab_type": "code",
-        "colab": {}
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 593
+        },
+        "outputId": "ecdbf054-6db5-476c-a9af-d566378d8785"
       },
       "source": [
-        ""
+        "# Lets build model with all variables\n",
+        "X_train, X_test, y_train, y_test = train_test_split(Independent_Variable_Base_Set,Dependent_Variable,test_size = 0.3,random_state = 21)\n",
+        "logit_model = sm.Logit(y_train,X_train)\n",
+        "result = logit_model.fit(method='bfgs')\n",
+        "print(result.summary2())\n",
+        "\n",
+        "# Model Summary\n",
+        "\n",
+        "print(\"Model 2 Summary\")\n",
+        "print(\"Iteration suggests how many loop model did to perform the fit\")\n",
+        "print(\"Iterations : 22\")\n",
+        "r_square_2 = result.prsquared.round(2)\n",
+        "print(\"Pseudo R Square suggests overall effect size (ideal value is close to 1)\")\n",
+        "print(\"Model 2, MacFadden Pseudo R Square : \",r_square_2)\n",
+        "base_model_aic_2 = result.aic.round(2)\n",
+        "print(\"AIC compares Goodness of Fit, Lower AIC better is the Model\")\n",
+        "print(\"Model 2, AIC  :\",base_model_aic_2)\n",
+        "base_model_bic_2 = result.bic.round(2)\n",
+        "print(\"BIC also work same as AIC, Lower BIC better is the Model\")\n",
+        "print(\"Model 2, BIC :\",base_model_bic_2)"
       ],
-      "execution_count": 10,
-      "outputs": []
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Optimization terminated successfully.\n",
+            "         Current function value: 0.000001\n",
+            "         Iterations: 22\n",
+            "         Function evaluations: 24\n",
+            "         Gradient evaluations: 24\n",
+            "                          Results: Logit\n",
+            "===================================================================\n",
+            "Model:               Logit            Pseudo R-squared: 1.000      \n",
+            "Dependent Variable:  species          AIC:              8.0002     \n",
+            "Date:                2020-09-03 13:17 BIC:              16.9941    \n",
+            "No. Observations:    70               Log-Likelihood:   -8.2981e-05\n",
+            "Df Model:            3                LL-Null:          -47.487    \n",
+            "Df Residuals:        66               LLR p-value:      1.8711e-20 \n",
+            "Converged:           1.0000           Scale:            1.0000     \n",
+            "-------------------------------------------------------------------\n",
+            "               Coef.   Std.Err.    z    P>|z|    [0.025     0.975] \n",
+            "-------------------------------------------------------------------\n",
+            "sepallengthcm -2.6981 1485.3731 -0.0018 0.9986 -2913.9759 2908.5796\n",
+            "sepalwidthcm  -8.8019 2565.4623 -0.0034 0.9973 -5037.0157 5019.4118\n",
+            "petallengthcm 12.9770 1215.7072  0.0107 0.9915 -2369.7654 2395.7193\n",
+            "petalwidthcm   5.7296 2155.0836  0.0027 0.9979 -4218.1565 4229.6158\n",
+            "===================================================================\n",
+            "\n",
+            "Model 2 Summary\n",
+            "Iteration suggests how many loop model did to perform the fit\n",
+            "Iterations : 22\n",
+            "Pseudo R Square suggests overall effect size (ideal value is close to 1)\n",
+            "Model 2, MacFadden Pseudo R Square :  1.0\n",
+            "AIC compares Goodness of Fit, Lower AIC better is the Model\n",
+            "Model 2, AIC  : 8.0\n",
+            "BIC also work same as AIC, Lower BIC better is the Model\n",
+            "Model 2, BIC : 16.99\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "code",
       "metadata": {
         "id": "v67NCxRZGCXx",
         "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 341
+        },
+        "outputId": "f6a2cb25-02da-4349-d9a8-39fbd619ecca"
+      },
+      "source": [
+        "# Model Prediction\n",
+        "\n",
+        "print(\"Sample Prediction of Model 2\")\n",
+        "pred = result.predict(X_test)\n",
+        "model_prediction = pd.DataFrame(pred.round(2),columns = ['Prediction'])\n",
+        "model_prediction['temp'] = 'temp'\n",
+        "model_prediction['Final_Class'] = np.where(model_prediction['Prediction'] > 0.5,1,0)\n",
+        "print(model_prediction.head())\n",
+        "temp = model_prediction.groupby('temp')['Final_Class'].apply(list)\n",
+        "y_pred = temp.loc['temp']\n",
+        "model_2_accuracy = accuracy_score(y_test,y_pred).round(2)\n",
+        "print(\"\\nModel Performance\")\n",
+        "print(\"Model 2, Accuracy :\",model_2_accuracy)\n",
+        "model_2_precision = precision_score(y_test,y_pred).round(2)\n",
+        "print(\"Model 2, Precision :\",model_2_precision)\n",
+        "model_2_recall = recall_score(y_test,y_pred).round(2)\n",
+        "print(\"Model 2, Recall :\",model_2_recall)\n",
+        "model_2_fscore = f1_score(y_test,y_pred).round(2)\n",
+        "print(\"Model 2, F1 Score :\",model_2_fscore)\n",
+        "model_2_roc = roc_auc_score(y_test,y_pred)\n",
+        "print(\"Model 2, AUC :\",model_2_roc)\n",
+        "print(\"\\nConfusion Matrix, Model 2\")\n",
+        "model_2_cm = confusion_matrix(y_test,y_pred)\n",
+        "print(model_2_cm)"
+      ],
+      "execution_count": 15,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Sample Prediction of Model 2\n",
+            "    Prediction  temp  Final_Class\n",
+            "23         0.0  temp            0\n",
+            "81         1.0  temp            1\n",
+            "85         1.0  temp            1\n",
+            "34         0.0  temp            0\n",
+            "62         1.0  temp            1\n",
+            "\n",
+            "Model Performance\n",
+            "Model 2, Accuracy : 1.0\n",
+            "Model 2, Precision : 1.0\n",
+            "Model 2, Recall : 1.0\n",
+            "Model 2, F1 Score : 1.0\n",
+            "Model 2, AUC : 1.0\n",
+            "\n",
+            "Confusion Matrix, Model 2\n",
+            "[[21  0]\n",
+            " [ 0  9]]\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ca71zTREHpbi",
+        "colab_type": "code",
         "colab": {}
       },
       "source": [
-        ""
+        "# Model Comparision\n",
+        "\n",
+        "\n",
+        "print(\"Model 1, Accuracy :\",model_1_accuracy)\n",
+        "model_1_precision = precision_score(y_test,y_pred).round(2)\n",
+        "print(\"Model 1, Precision :\",model_1_precision)\n",
+        "model_1_recall = recall_score(y_test,y_pred).round(2)\n",
+        "print(\"Model 1, Recall :\",model_1_recall)\n",
+        "model_1_fscore = f1_score(y_test,y_pred).round(2)\n",
+        "print(\"Model 1, F1 Score :\",model_1_fscore)\n",
+        "model_1_roc = roc_auc_score(y_test,y_pred)\n",
+        "print(\"Model 1, AUC :\",model_1_roc)\n",
+        "print(\"\\nConfusion Matrix, Model 1\")\n",
+        "model_1_cm = confusion_matrix(y_test,y_pred)\n",
+        "print(model_1_cm)"
       ],
       "execution_count": null,
       "outputs": []