|
5 | 5 | "colab": {
|
6 | 6 | "name": "08 Logistic Regression.ipynb",
|
7 | 7 | "provenance": [],
|
8 |
| - "authorship_tag": "ABX9TyOu3EEwuHsR/3JTtbVCA2NW", |
| 8 | + "authorship_tag": "ABX9TyNd8trmiuHbcgUjcCZGui3z", |
9 | 9 | "include_colab_link": true
|
10 | 10 | },
|
11 | 11 | "kernelspec": {
|
|
668 | 668 | "source": [
|
669 | 669 | "# Model Prediction\n",
|
670 | 670 | "\n",
|
671 |
| - "print(\"Sample Prediction\")\n", |
| 671 | + "print(\"Sample Prediction of Model 1\")\n", |
672 | 672 | "pred = result.predict(X_test)\n",
|
673 | 673 | "model_prediction = pd.DataFrame(pred.round(2),columns = ['Prediction'])\n",
|
674 | 674 | "model_prediction['temp'] = 'temp'\n",
|
|
724 | 724 | "metadata": {
|
725 | 725 | "id": "dhcskdb5BCiU",
|
726 | 726 | "colab_type": "code",
|
727 |
| - "colab": {} |
| 727 | + "colab": { |
| 728 | + "base_uri": "https://localhost:8080/", |
| 729 | + "height": 593 |
| 730 | + }, |
| 731 | + "outputId": "ecdbf054-6db5-476c-a9af-d566378d8785" |
728 | 732 | },
|
729 | 733 | "source": [
|
730 |
| - "" |
| 734 | + "# Lets build model with all variables\n", |
| 735 | + "X_train, X_test, y_train, y_test = train_test_split(Independent_Variable_Base_Set,Dependent_Variable,test_size = 0.3,random_state = 21)\n", |
| 736 | + "logit_model = sm.Logit(y_train,X_train)\n", |
| 737 | + "result = logit_model.fit(method='bfgs')\n", |
| 738 | + "print(result.summary2())\n", |
| 739 | + "\n", |
| 740 | + "# Model Summary\n", |
| 741 | + "\n", |
| 742 | + "print(\"Model 2 Summary\")\n", |
| 743 | + "print(\"Iteration suggests how many loop model did to perform the fit\")\n", |
| 744 | + "print(\"Iterations : 22\")\n", |
| 745 | + "r_square_2 = result.prsquared.round(2)\n", |
| 746 | + "print(\"Pseudo R Square suggests overall effect size (ideal value is close to 1)\")\n", |
| 747 | + "print(\"Model 2, MacFadden Pseudo R Square : \",r_square_2)\n", |
| 748 | + "base_model_aic_2 = result.aic.round(2)\n", |
| 749 | + "print(\"AIC compares Goodness of Fit, Lower AIC better is the Model\")\n", |
| 750 | + "print(\"Model 2, AIC :\",base_model_aic_2)\n", |
| 751 | + "base_model_bic_2 = result.bic.round(2)\n", |
| 752 | + "print(\"BIC also work same as AIC, Lower BIC better is the Model\")\n", |
| 753 | + "print(\"Model 2, BIC :\",base_model_bic_2)" |
731 | 754 | ],
|
732 |
| - "execution_count": 10, |
733 |
| - "outputs": [] |
| 755 | + "execution_count": 14, |
| 756 | + "outputs": [ |
| 757 | + { |
| 758 | + "output_type": "stream", |
| 759 | + "text": [ |
| 760 | + "Optimization terminated successfully.\n", |
| 761 | + " Current function value: 0.000001\n", |
| 762 | + " Iterations: 22\n", |
| 763 | + " Function evaluations: 24\n", |
| 764 | + " Gradient evaluations: 24\n", |
| 765 | + " Results: Logit\n", |
| 766 | + "===================================================================\n", |
| 767 | + "Model: Logit Pseudo R-squared: 1.000 \n", |
| 768 | + "Dependent Variable: species AIC: 8.0002 \n", |
| 769 | + "Date: 2020-09-03 13:17 BIC: 16.9941 \n", |
| 770 | + "No. Observations: 70 Log-Likelihood: -8.2981e-05\n", |
| 771 | + "Df Model: 3 LL-Null: -47.487 \n", |
| 772 | + "Df Residuals: 66 LLR p-value: 1.8711e-20 \n", |
| 773 | + "Converged: 1.0000 Scale: 1.0000 \n", |
| 774 | + "-------------------------------------------------------------------\n", |
| 775 | + " Coef. Std.Err. z P>|z| [0.025 0.975] \n", |
| 776 | + "-------------------------------------------------------------------\n", |
| 777 | + "sepallengthcm -2.6981 1485.3731 -0.0018 0.9986 -2913.9759 2908.5796\n", |
| 778 | + "sepalwidthcm -8.8019 2565.4623 -0.0034 0.9973 -5037.0157 5019.4118\n", |
| 779 | + "petallengthcm 12.9770 1215.7072 0.0107 0.9915 -2369.7654 2395.7193\n", |
| 780 | + "petalwidthcm 5.7296 2155.0836 0.0027 0.9979 -4218.1565 4229.6158\n", |
| 781 | + "===================================================================\n", |
| 782 | + "\n", |
| 783 | + "Model 2 Summary\n", |
| 784 | + "Iteration suggests how many loop model did to perform the fit\n", |
| 785 | + "Iterations : 22\n", |
| 786 | + "Pseudo R Square suggests overall effect size (ideal value is close to 1)\n", |
| 787 | + "Model 2, MacFadden Pseudo R Square : 1.0\n", |
| 788 | + "AIC compares Goodness of Fit, Lower AIC better is the Model\n", |
| 789 | + "Model 2, AIC : 8.0\n", |
| 790 | + "BIC also work same as AIC, Lower BIC better is the Model\n", |
| 791 | + "Model 2, BIC : 16.99\n" |
| 792 | + ], |
| 793 | + "name": "stdout" |
| 794 | + } |
| 795 | + ] |
734 | 796 | },
|
735 | 797 | {
|
736 | 798 | "cell_type": "code",
|
737 | 799 | "metadata": {
|
738 | 800 | "id": "v67NCxRZGCXx",
|
739 | 801 | "colab_type": "code",
|
| 802 | + "colab": { |
| 803 | + "base_uri": "https://localhost:8080/", |
| 804 | + "height": 341 |
| 805 | + }, |
| 806 | + "outputId": "f6a2cb25-02da-4349-d9a8-39fbd619ecca" |
| 807 | + }, |
| 808 | + "source": [ |
| 809 | + "# Model Prediction\n", |
| 810 | + "\n", |
| 811 | + "print(\"Sample Prediction of Model 2\")\n", |
| 812 | + "pred = result.predict(X_test)\n", |
| 813 | + "model_prediction = pd.DataFrame(pred.round(2),columns = ['Prediction'])\n", |
| 814 | + "model_prediction['temp'] = 'temp'\n", |
| 815 | + "model_prediction['Final_Class'] = np.where(model_prediction['Prediction'] > 0.5,1,0)\n", |
| 816 | + "print(model_prediction.head())\n", |
| 817 | + "temp = model_prediction.groupby('temp')['Final_Class'].apply(list)\n", |
| 818 | + "y_pred = temp.loc['temp']\n", |
| 819 | + "model_2_accuracy = accuracy_score(y_test,y_pred).round(2)\n", |
| 820 | + "print(\"\\nModel Performance\")\n", |
| 821 | + "print(\"Model 2, Accuracy :\",model_2_accuracy)\n", |
| 822 | + "model_2_precision = precision_score(y_test,y_pred).round(2)\n", |
| 823 | + "print(\"Model 2, Precision :\",model_2_precision)\n", |
| 824 | + "model_2_recall = recall_score(y_test,y_pred).round(2)\n", |
| 825 | + "print(\"Model 2, Recall :\",model_2_recall)\n", |
| 826 | + "model_2_fscore = f1_score(y_test,y_pred).round(2)\n", |
| 827 | + "print(\"Model 2, F1 Score :\",model_2_fscore)\n", |
| 828 | + "model_2_roc = roc_auc_score(y_test,y_pred)\n", |
| 829 | + "print(\"Model 2, AUC :\",model_2_roc)\n", |
| 830 | + "print(\"\\nConfusion Matrix, Model 2\")\n", |
| 831 | + "model_2_cm = confusion_matrix(y_test,y_pred)\n", |
| 832 | + "print(model_2_cm)" |
| 833 | + ], |
| 834 | + "execution_count": 15, |
| 835 | + "outputs": [ |
| 836 | + { |
| 837 | + "output_type": "stream", |
| 838 | + "text": [ |
| 839 | + "Sample Prediction of Model 2\n", |
| 840 | + " Prediction temp Final_Class\n", |
| 841 | + "23 0.0 temp 0\n", |
| 842 | + "81 1.0 temp 1\n", |
| 843 | + "85 1.0 temp 1\n", |
| 844 | + "34 0.0 temp 0\n", |
| 845 | + "62 1.0 temp 1\n", |
| 846 | + "\n", |
| 847 | + "Model Performance\n", |
| 848 | + "Model 2, Accuracy : 1.0\n", |
| 849 | + "Model 2, Precision : 1.0\n", |
| 850 | + "Model 2, Recall : 1.0\n", |
| 851 | + "Model 2, F1 Score : 1.0\n", |
| 852 | + "Model 2, AUC : 1.0\n", |
| 853 | + "\n", |
| 854 | + "Confusion Matrix, Model 2\n", |
| 855 | + "[[21 0]\n", |
| 856 | + " [ 0 9]]\n" |
| 857 | + ], |
| 858 | + "name": "stdout" |
| 859 | + } |
| 860 | + ] |
| 861 | + }, |
| 862 | + { |
| 863 | + "cell_type": "code", |
| 864 | + "metadata": { |
| 865 | + "id": "ca71zTREHpbi", |
| 866 | + "colab_type": "code", |
740 | 867 | "colab": {}
|
741 | 868 | },
|
742 | 869 | "source": [
|
743 |
| - "" |
| 870 | + "# Model Comparision\n", |
| 871 | + "\n", |
| 872 | + "\n", |
| 873 | + "print(\"Model 1, Accuracy :\",model_1_accuracy)\n", |
| 874 | + "model_1_precision = precision_score(y_test,y_pred).round(2)\n", |
| 875 | + "print(\"Model 1, Precision :\",model_1_precision)\n", |
| 876 | + "model_1_recall = recall_score(y_test,y_pred).round(2)\n", |
| 877 | + "print(\"Model 1, Recall :\",model_1_recall)\n", |
| 878 | + "model_1_fscore = f1_score(y_test,y_pred).round(2)\n", |
| 879 | + "print(\"Model 1, F1 Score :\",model_1_fscore)\n", |
| 880 | + "model_1_roc = roc_auc_score(y_test,y_pred)\n", |
| 881 | + "print(\"Model 1, AUC :\",model_1_roc)\n", |
| 882 | + "print(\"\\nConfusion Matrix, Model 1\")\n", |
| 883 | + "model_1_cm = confusion_matrix(y_test,y_pred)\n", |
| 884 | + "print(model_1_cm)" |
744 | 885 | ],
|
745 | 886 | "execution_count": null,
|
746 | 887 | "outputs": []
|
|
0 commit comments