Skip to content

Commit 030bd0e

Browse files
Created using Colaboratory
1 parent d901714 commit 030bd0e

File tree

1 file changed

+181
-24
lines changed

1 file changed

+181
-24
lines changed

08_Logistic_Regression.ipynb

Lines changed: 181 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"colab": {
66
"name": "08 Logistic Regression.ipynb",
77
"provenance": [],
8-
"authorship_tag": "ABX9TyNmC97wOqe43KRJd+bMnG4a",
8+
"authorship_tag": "ABX9TyOzdICGm/7utO3jg6uo4fdn",
99
"include_colab_link": true
1010
},
1111
"kernelspec": {
@@ -33,15 +33,21 @@
3333
"base_uri": "https://localhost:8080/",
3434
"height": 73
3535
},
36-
"outputId": "488a4dcc-a320-41aa-d30d-18001ff28526"
36+
"outputId": "b05857ae-b7ca-4eeb-af64-ee339d5216a8"
3737
},
3838
"source": [
3939
"# Libraries\n",
4040
"from sklearn import datasets\n",
4141
"import pandas as pd\n",
42-
"import statsmodels.api as sm"
42+
"import statsmodels.api as sm\n",
43+
"from sklearn.linear_model import LogisticRegression\n",
44+
"from sklearn import metrics\n",
45+
"from sklearn.model_selection import train_test_split\n",
46+
"import numpy as np\n",
47+
"from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score\n",
48+
"from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score"
4349
],
44-
"execution_count": 10,
50+
"execution_count": 1,
4551
"outputs": [
4652
{
4753
"output_type": "stream",
@@ -62,7 +68,7 @@
6268
"base_uri": "https://localhost:8080/",
6369
"height": 204
6470
},
65-
"outputId": "1ab9c9da-03ea-4454-e837-fa364133db44"
71+
"outputId": "bc5937b1-e238-4776-e45e-b02333be145d"
6672
},
6773
"source": [
6874
"# IRIS Dataset\n",
@@ -79,7 +85,7 @@
7985
"iris.columns = iris.columns.str.replace(\")\",\"\")\n",
8086
"iris.head()"
8187
],
82-
"execution_count": 3,
88+
"execution_count": 2,
8389
"outputs": [
8490
{
8591
"output_type": "execute_result",
@@ -167,7 +173,7 @@
167173
"metadata": {
168174
"tags": []
169175
},
170-
"execution_count": 3
176+
"execution_count": 2
171177
}
172178
]
173179
},
@@ -180,13 +186,13 @@
180186
"base_uri": "https://localhost:8080/",
181187
"height": 71
182188
},
183-
"outputId": "3d834a1c-9e56-4aec-abe7-c380280abb99"
189+
"outputId": "4b1fcb62-60bb-4180-9b09-a053ebcde243"
184190
},
185191
"source": [
186192
"# Target Column Distribution\n",
187193
"iris['species'].value_counts()"
188194
],
189-
"execution_count": 4,
195+
"execution_count": 3,
190196
"outputs": [
191197
{
192198
"output_type": "execute_result",
@@ -200,7 +206,7 @@
200206
"metadata": {
201207
"tags": []
202208
},
203-
"execution_count": 4
209+
"execution_count": 3
204210
}
205211
]
206212
},
@@ -213,13 +219,13 @@
213219
"base_uri": "https://localhost:8080/",
214220
"height": 142
215221
},
216-
"outputId": "9e9e1c39-ebe4-473b-dfb1-d6dfc94c75ce"
222+
"outputId": "4d927c5f-d629-4f5b-be62-d548b75eb025"
217223
},
218224
"source": [
219225
"# Distribution (mean) of Independent Columns respect to Dependent Column\n",
220226
"iris.groupby('species').mean().round(2)"
221227
],
222-
"execution_count": 5,
228+
"execution_count": 4,
223229
"outputs": [
224230
{
225231
"output_type": "execute_result",
@@ -285,7 +291,7 @@
285291
"metadata": {
286292
"tags": []
287293
},
288-
"execution_count": 5
294+
"execution_count": 4
289295
}
290296
]
291297
},
@@ -298,14 +304,14 @@
298304
"base_uri": "https://localhost:8080/",
299305
"height": 204
300306
},
301-
"outputId": "5260ca48-0b2f-4400-e29c-7137ecab6959"
307+
"outputId": "10749ad7-318a-4caf-b8d1-59a5439dfc23"
302308
},
303309
"source": [
304310
"# Independent Variables\n",
305311
"Independent_Variable_Base_Set = iris[iris.columns[0:4]]\n",
306312
"Independent_Variable_Base_Set.head()"
307313
],
308-
"execution_count": 6,
314+
"execution_count": 5,
309315
"outputs": [
310316
{
311317
"output_type": "execute_result",
@@ -387,7 +393,7 @@
387393
"metadata": {
388394
"tags": []
389395
},
390-
"execution_count": 6
396+
"execution_count": 5
391397
}
392398
]
393399
},
@@ -400,14 +406,14 @@
400406
"base_uri": "https://localhost:8080/",
401407
"height": 204
402408
},
403-
"outputId": "19e38ebb-d128-4087-8583-bf3a75fef110"
409+
"outputId": "48a3efc3-1f7a-49bc-cf99-57875155d2b3"
404410
},
405411
"source": [
406412
"# Dependent Variable\n",
407413
"Dependent_Variable = iris[iris.columns[-1:iris.columns.size]]\n",
408414
"Dependent_Variable.head()"
409415
],
410-
"execution_count": 9,
416+
"execution_count": 6,
411417
"outputs": [
412418
{
413419
"output_type": "execute_result",
@@ -471,7 +477,7 @@
471477
"metadata": {
472478
"tags": []
473479
},
474-
"execution_count": 9
480+
"execution_count": 6
475481
}
476482
]
477483
},
@@ -484,7 +490,7 @@
484490
"base_uri": "https://localhost:8080/",
485491
"height": 431
486492
},
487-
"outputId": "54db6207-45cd-4270-e0c9-e3dde41afa82"
493+
"outputId": "e1db0f39-4317-4e06-b6b3-477716d48e15"
488494
},
489495
"source": [
490496
"# Fitting Logistic Model \n",
@@ -494,7 +500,7 @@
494500
"result = logit_model.fit(method = 'bfgs')\n",
495501
"print(result.summary2())"
496502
],
497-
"execution_count": 12,
503+
"execution_count": 7,
498504
"outputs": [
499505
{
500506
"output_type": "stream",
@@ -508,7 +514,7 @@
508514
"===================================================================\n",
509515
"Model: Logit Pseudo R-squared: 1.000 \n",
510516
"Dependent Variable: species AIC: 8.0002 \n",
511-
"Date: 2020-09-03 11:50 BIC: 18.4209 \n",
517+
"Date: 2020-09-03 13:11 BIC: 18.4209 \n",
512518
"No. Observations: 100 Log-Likelihood: -0.00011118\n",
513519
"Df Model: 3 LL-Null: -69.315 \n",
514520
"Df Residuals: 96 LLR p-value: 7.4648e-30 \n",
@@ -536,7 +542,7 @@
536542
"base_uri": "https://localhost:8080/",
537543
"height": 179
538544
},
539-
"outputId": "b9612ca6-26d7-4877-dffb-a28619558c19"
545+
"outputId": "736bc19c-e30a-45d8-8eed-bb39dea0365e"
540546
},
541547
"source": [
542548
"# Model Summary\n",
@@ -554,7 +560,7 @@
554560
"print(\"BIC also work same as AIC, Lower BIC better is the Model\")\n",
555561
"print(\"Base Model, BIC :\",base_model_bic)"
556562
],
557-
"execution_count": 22,
563+
"execution_count": 8,
558564
"outputs": [
559565
{
560566
"output_type": "stream",
@@ -578,6 +584,157 @@
578584
"metadata": {
579585
"id": "IdcrKXSXzxZY",
580586
"colab_type": "code",
587+
"colab": {
588+
"base_uri": "https://localhost:8080/",
589+
"height": 539
590+
},
591+
"outputId": "73ef4b6f-f34d-4ea0-8c0f-3879eda08c43"
592+
},
593+
"source": [
594+
"# Split the Dataset how logistic works\n",
595+
"\n",
596+
"# Lets start with selecting one variable\n",
597+
"Independent_Variable_Set_v1 = iris[iris.columns[0:1]]\n",
598+
"X_train, X_test, y_train, y_test = train_test_split(Independent_Variable_Set_v1,Dependent_Variable,test_size = 0.3,random_state = 21)\n",
599+
"logit_model = sm.Logit(y_train,X_train)\n",
600+
"result = logit_model.fit(method='bfgs')\n",
601+
"print(result.summary2())\n",
602+
"\n",
603+
"# Model Summary\n",
604+
"\n",
605+
"print(\"Model 1 Summary\")\n",
606+
"print(\"Iteration suggests how many loop model did to perform the fit\")\n",
607+
"print(\"Iterations : 3\")\n",
608+
"r_square_1 = result.prsquared.round(2)\n",
609+
"print(\"Pseudo R Square suggests overall effect size (ideal value is close to 1)\")\n",
610+
"print(\"Model 1, MacFadden Pseudo R Square : \",r_square_1)\n",
611+
"base_model_aic_1 = result.aic.round(2)\n",
612+
"print(\"AIC compares Goodness of Fit, Lower AIC better is the Model\")\n",
613+
"print(\"Model 1, AIC :\",base_model_aic_1)\n",
614+
"base_model_bic_1 = result.bic.round(2)\n",
615+
"print(\"BIC also work same as AIC, Lower BIC better is the Model\")\n",
616+
"print(\"Model 1, BIC :\",base_model_bic_1)"
617+
],
618+
"execution_count": 9,
619+
"outputs": [
620+
{
621+
"output_type": "stream",
622+
"text": [
623+
"Optimization terminated successfully.\n",
624+
" Current function value: 0.662428\n",
625+
" Iterations: 3\n",
626+
" Function evaluations: 5\n",
627+
" Gradient evaluations: 5\n",
628+
" Results: Logit\n",
629+
"==============================================================\n",
630+
"Model: Logit Pseudo R-squared: 0.024 \n",
631+
"Dependent Variable: species AIC: 94.7399\n",
632+
"Date: 2020-09-03 13:11 BIC: 96.9884\n",
633+
"No. Observations: 70 Log-Likelihood: -46.370\n",
634+
"Df Model: 0 LL-Null: -47.487\n",
635+
"Df Residuals: 69 LLR p-value: nan \n",
636+
"Converged: 1.0000 Scale: 1.0000 \n",
637+
"---------------------------------------------------------------\n",
638+
" Coef. Std.Err. z P>|z| [0.025 0.975]\n",
639+
"---------------------------------------------------------------\n",
640+
"sepallengthcm 0.0902 0.0442 2.0399 0.0414 0.0035 0.1769\n",
641+
"==============================================================\n",
642+
"\n",
643+
"Model 1 Summary\n",
644+
"Iteration suggests how many loop model did to perform the fit\n",
645+
"Iterations : 3\n",
646+
"Pseudo R Square suggests overall effect size (ideal value is close to 1)\n",
647+
"Model 1, MacFadden Pseudo R Square : 0.02\n",
648+
"AIC compares Goodness of Fit, Lower AIC better is the Model\n",
649+
"Model 1, AIC : 94.74\n",
650+
"BIC also work same as AIC, Lower BIC better is the Model\n",
651+
"Model 1, BIC : 96.99\n"
652+
],
653+
"name": "stdout"
654+
}
655+
]
656+
},
657+
{
658+
"cell_type": "code",
659+
"metadata": {
660+
"id": "QwGV0dfc5Tq0",
661+
"colab_type": "code",
662+
"colab": {
663+
"base_uri": "https://localhost:8080/",
664+
"height": 323
665+
},
666+
"outputId": "0ec5d4c8-56ff-4b9c-997b-1c61cff2886e"
667+
},
668+
"source": [
669+
"# Model Prediction\n",
670+
"\n",
671+
"pred = result.predict(X_test)\n",
672+
"model_prediction = pd.DataFrame(pred.round(2),columns = ['Prediction'])\n",
673+
"model_prediction['temp'] = 'temp'\n",
674+
"model_prediction['Final_Class'] = np.where(model_prediction['Prediction'] > 0.5,1,0)\n",
675+
"print(model_prediction.head())\n",
676+
"temp = model_prediction.groupby('temp')['Final_Class'].apply(list)\n",
677+
"y_pred = temp.loc['temp']\n",
678+
"model_1_accuracy = accuracy_score(y_test,y_pred).round(2)\n",
679+
"print(\"\\nModel Performance\")\n",
680+
"print(\"Model 1, Accuracy :\",model_1_accuracy)\n",
681+
"model_1_precision = precision_score(y_test,y_pred).round(2)\n",
682+
"print(\"Model 1, Precision :\",model_1_precision)\n",
683+
"model_1_recall = recall_score(y_test,y_pred).round(2)\n",
684+
"print(\"Model 1, Recall :\",model_1_recall)\n",
685+
"model_1_fscore = f1_score(y_test,y_pred).round(2)\n",
686+
"print(\"Model 1, F1 Score :\",model_1_fscore)\n",
687+
"model_1_roc = roc_auc_score(y_test,y_pred)\n",
688+
"print(\"Model 1, AUC :\",model_1_roc)\n",
689+
"print(\"\\nConfusion Matrix Model 1\")\n",
690+
"model_1_cm = confusion_matrix(y_test,y_pred)\n",
691+
"print(model_1_cm)"
692+
],
693+
"execution_count": 10,
694+
"outputs": [
695+
{
696+
"output_type": "stream",
697+
"text": [
698+
" Prediction temp Final_Class\n",
699+
"23 0.61 temp 1\n",
700+
"81 0.62 temp 1\n",
701+
"85 0.63 temp 1\n",
702+
"34 0.61 temp 1\n",
703+
"62 0.63 temp 1\n",
704+
"\n",
705+
"Model Performance\n",
706+
"Model 1, Accuracy : 0.3\n",
707+
"Model 1, Precision : 0.3\n",
708+
"Model 1, Recall : 1.0\n",
709+
"Model 1, F1 Score : 0.46\n",
710+
"Model 1, AUC : 0.5\n",
711+
"\n",
712+
"Confusion Matrix Model 1\n",
713+
"[[ 0 21]\n",
714+
" [ 0 9]]\n"
715+
],
716+
"name": "stdout"
717+
}
718+
]
719+
},
720+
{
721+
"cell_type": "code",
722+
"metadata": {
723+
"id": "dhcskdb5BCiU",
724+
"colab_type": "code",
725+
"colab": {}
726+
},
727+
"source": [
728+
""
729+
],
730+
"execution_count": 10,
731+
"outputs": []
732+
},
733+
{
734+
"cell_type": "code",
735+
"metadata": {
736+
"id": "v67NCxRZGCXx",
737+
"colab_type": "code",
581738
"colab": {}
582739
},
583740
"source": [

0 commit comments

Comments
 (0)