|
34 | 34 | "base_uri": "https://localhost:8080/",
|
35 | 35 | "height": 73
|
36 | 36 | },
|
37 |
| - "outputId": "a3ee0a12-0104-4c4c-b4ae-58c4d1790fde" |
| 37 | + "outputId": "b942fff1-46a4-493e-fee0-55b910ae3173" |
38 | 38 | },
|
39 | 39 | "source": [
|
40 | 40 | "# Libraries \n",
|
|
67 | 67 | "base_uri": "https://localhost:8080/",
|
68 | 68 | "height": 204
|
69 | 69 | },
|
70 |
| - "outputId": "28170068-8b0a-41d0-c232-52ad620b6004" |
| 70 | + "outputId": "35052345-2dba-4323-a36a-1f0c33c64279" |
71 | 71 | },
|
72 | 72 | "source": [
|
73 | 73 | "# Load the Datasets (For indepth analysis please refer to Part O1 All About Datasets)\n",
|
|
236 | 236 | "base_uri": "https://localhost:8080/",
|
237 | 237 | "height": 395
|
238 | 238 | },
|
239 |
| - "outputId": "c0ba3c2a-5d1b-4fcd-b45b-345f8ab0f690" |
| 239 | + "outputId": "e3a6efdc-4e96-41de-86ad-50ccabb17b41" |
240 | 240 | },
|
241 | 241 | "source": [
|
242 | 242 | "# Dataset overall Information\n",
|
|
282 | 282 | "base_uri": "https://localhost:8080/",
|
283 | 283 | "height": 107
|
284 | 284 | },
|
285 |
| - "outputId": "5d67a37d-833e-4520-e101-ad5403a54b30" |
| 285 | + "outputId": "349a9591-0a95-4e8a-a892-bad733ee880e" |
286 | 286 | },
|
287 | 287 | "source": [
|
288 | 288 | "# Let set the BASE MODEL on which we will improve\n",
|
|
318 | 318 | "base_uri": "https://localhost:8080/",
|
319 | 319 | "height": 688
|
320 | 320 | },
|
321 |
| - "outputId": "625eb866-7bfd-4aec-e722-d6b968cc4d0a" |
| 321 | + "outputId": "864677de-26f1-4b44-cbf5-d665de057d4f" |
322 | 322 | },
|
323 | 323 | "source": [
|
324 | 324 | "# Base Model\n",
|
|
349 | 349 | " <th>Date:</th> <td>Wed, 02 Sep 2020</td> <th> Prob (F-statistic):</th> <td>2.38e-320</td>\n",
|
350 | 350 | "</tr>\n",
|
351 | 351 | "<tr>\n",
|
352 |
| - " <th>Time:</th> <td>11:15:35</td> <th> Log-Likelihood: </th> <td> -1556.1</td> \n", |
| 352 | + " <th>Time:</th> <td>11:18:47</td> <th> Log-Likelihood: </th> <td> -1556.1</td> \n", |
353 | 353 | "</tr>\n",
|
354 | 354 | "<tr>\n",
|
355 | 355 | " <th>No. Observations:</th> <td> 506</td> <th> AIC: </th> <td> 3136.</td> \n",
|
|
429 | 429 | "Model: OLS Adj. R-squared (uncentered): 0.953\n",
|
430 | 430 | "Method: Least Squares F-statistic: 846.6\n",
|
431 | 431 | "Date: Wed, 02 Sep 2020 Prob (F-statistic): 2.38e-320\n",
|
432 |
| - "Time: 11:15:35 Log-Likelihood: -1556.1\n", |
| 432 | + "Time: 11:18:47 Log-Likelihood: -1556.1\n", |
433 | 433 | "No. Observations: 506 AIC: 3136.\n",
|
434 | 434 | "Df Residuals: 494 BIC: 3187.\n",
|
435 | 435 | "Df Model: 12 \n",
|
|
479 | 479 | "base_uri": "https://localhost:8080/",
|
480 | 480 | "height": 755
|
481 | 481 | },
|
482 |
| - "outputId": "a909a773-e369-40bf-edad-6c72f05aa72b" |
| 482 | + "outputId": "60b78f0e-9956-4d18-b995-b5a0882a5d58" |
483 | 483 | },
|
484 | 484 | "source": [
|
485 | 485 | "# Model Properties\n",
|
|
667 | 667 | "base_uri": "https://localhost:8080/",
|
668 | 668 | "height": 89
|
669 | 669 | },
|
670 |
| - "outputId": "1c03ab7c-c0fe-48f9-ab84-b87a2dd2dc55" |
| 670 | + "outputId": "76569343-32db-4b52-9b67-33305321afc4" |
671 | 671 | },
|
672 | 672 | "source": [
|
673 | 673 | "# Let test the Base Model\n",
|
|
703 | 703 | "base_uri": "https://localhost:8080/",
|
704 | 704 | "height": 688
|
705 | 705 | },
|
706 |
| - "outputId": "29556eda-af03-43fc-e4c9-577b78a47df1" |
| 706 | + "outputId": "d1d85ad3-f047-48d1-ebab-f923ec7febaa" |
707 | 707 | },
|
708 | 708 | "source": [
|
709 | 709 | "# Base Model\n",
|
|
731 | 731 | " <th>Date:</th> <td>Wed, 02 Sep 2020</td> <th> Prob (F-statistic):</th> <td>1.06e-223</td>\n",
|
732 | 732 | "</tr>\n",
|
733 | 733 | "<tr>\n",
|
734 |
| - " <th>Time:</th> <td>11:15:35</td> <th> Log-Likelihood: </th> <td> -1078.0</td> \n", |
| 734 | + " <th>Time:</th> <td>11:18:48</td> <th> Log-Likelihood: </th> <td> -1078.0</td> \n", |
735 | 735 | "</tr>\n",
|
736 | 736 | "<tr>\n",
|
737 | 737 | " <th>No. Observations:</th> <td> 354</td> <th> AIC: </th> <td> 2180.</td> \n",
|
|
811 | 811 | "Model: OLS Adj. R-squared (uncentered): 0.954\n",
|
812 | 812 | "Method: Least Squares F-statistic: 619.5\n",
|
813 | 813 | "Date: Wed, 02 Sep 2020 Prob (F-statistic): 1.06e-223\n",
|
814 |
| - "Time: 11:15:35 Log-Likelihood: -1078.0\n", |
| 814 | + "Time: 11:18:48 Log-Likelihood: -1078.0\n", |
815 | 815 | "No. Observations: 354 AIC: 2180.\n",
|
816 | 816 | "Df Residuals: 342 BIC: 2226.\n",
|
817 | 817 | "Df Model: 12 \n",
|
|
861 | 861 | "base_uri": "https://localhost:8080/",
|
862 | 862 | "height": 89
|
863 | 863 | },
|
864 |
| - "outputId": "14e5ec3d-d30a-4dde-f243-697d9b857e6b" |
| 864 | + "outputId": "7e5f7f5c-c2b6-4044-e3df-bfb5c09fe978" |
865 | 865 | },
|
866 | 866 | "source": [
|
867 | 867 | "# Base Model Test and Metrics\n",
|
|
910 | 910 | "base_uri": "https://localhost:8080/",
|
911 | 911 | "height": 668
|
912 | 912 | },
|
913 |
| - "outputId": "5948c239-5cc0-4c5a-d270-ae4b03535302" |
| 913 | + "outputId": "72050626-3b7d-484b-b626-c24febbd8ff6" |
914 | 914 | },
|
915 | 915 | "source": [
|
916 | 916 | "# Model 2, let do some data cleaning\n",
|
|
930 | 930 | "output_type": "execute_result",
|
931 | 931 | "data": {
|
932 | 932 | "text/plain": [
|
933 |
| - "<matplotlib.axes._subplots.AxesSubplot at 0x7fc1a2aacef0>" |
| 933 | + "<matplotlib.axes._subplots.AxesSubplot at 0x7fa4ec497eb8>" |
934 | 934 | ]
|
935 | 935 | },
|
936 | 936 | "metadata": {
|
|
962 | 962 | "base_uri": "https://localhost:8080/",
|
963 | 963 | "height": 421
|
964 | 964 | },
|
965 |
| - "outputId": "a5d701eb-ab8c-4b3f-e649-ce78d8d5099e" |
| 965 | + "outputId": "393561f7-f1eb-4b04-ac15-27e05e88db26" |
966 | 966 | },
|
967 | 967 | "source": [
|
968 | 968 | "# Filtering Highly Positive or Negative Correlated Values\n",
|
|
1228 | 1228 | "base_uri": "https://localhost:8080/",
|
1229 | 1229 | "height": 89
|
1230 | 1230 | },
|
1231 |
| - "outputId": "80b3e50b-abb0-4d11-c254-b1a7bb73e7ad" |
| 1231 | + "outputId": "58adb35d-61b0-4d33-d4f6-fffdfe530224" |
1232 | 1232 | },
|
1233 | 1233 | "source": [
|
1234 | 1234 | "# Find index of feature columns with correlation greater than 0.75\n",
|
|
1304 | 1304 | "base_uri": "https://localhost:8080/",
|
1305 | 1305 | "height": 517
|
1306 | 1306 | },
|
1307 |
| - "outputId": "a9a4751a-d915-4276-f53d-7fd6c6ff014e" |
| 1307 | + "outputId": "3be53026-e6b3-4f46-a6d6-ee74b7a8546e" |
1308 | 1308 | },
|
1309 | 1309 | "source": [
|
1310 | 1310 | "# Now check Outlier in the data\n",
|
|
1365 | 1365 | "base_uri": "https://localhost:8080/",
|
1366 | 1366 | "height": 535
|
1367 | 1367 | },
|
1368 |
| - "outputId": "efc04a52-9e44-4af5-ddfe-1161bfeedfe1" |
| 1368 | + "outputId": "cf78a8dc-8237-40c4-b64f-e817e009a684" |
1369 | 1369 | },
|
1370 | 1370 | "source": [
|
1371 | 1371 | "# ZN Column\n",
|
|
1403 | 1403 | "output_type": "execute_result",
|
1404 | 1404 | "data": {
|
1405 | 1405 | "text/plain": [
|
1406 |
| - "<matplotlib.axes._subplots.AxesSubplot at 0x7fc1a2aac780>" |
| 1406 | + "<matplotlib.axes._subplots.AxesSubplot at 0x7fa4e7320080>" |
1407 | 1407 | ]
|
1408 | 1408 | },
|
1409 | 1409 | "metadata": {
|
|
1435 | 1435 | "base_uri": "https://localhost:8080/",
|
1436 | 1436 | "height": 535
|
1437 | 1437 | },
|
1438 |
| - "outputId": "c9505075-8b9a-40d6-b26b-088afe6a838b" |
| 1438 | + "outputId": "49ff2c16-77f8-462b-9beb-6dd165edcb63" |
1439 | 1439 | },
|
1440 | 1440 | "source": [
|
1441 | 1441 | "# CRIM Column\n",
|
|
1472 | 1472 | "output_type": "execute_result",
|
1473 | 1473 | "data": {
|
1474 | 1474 | "text/plain": [
|
1475 |
| - "<matplotlib.axes._subplots.AxesSubplot at 0x7fc19f604fd0>" |
| 1475 | + "<matplotlib.axes._subplots.AxesSubplot at 0x7fa4e72acba8>" |
1476 | 1476 | ]
|
1477 | 1477 | },
|
1478 | 1478 | "metadata": {
|
|
1504 | 1504 | "base_uri": "https://localhost:8080/",
|
1505 | 1505 | "height": 517
|
1506 | 1506 | },
|
1507 |
| - "outputId": "912f850a-3176-47fb-e4fe-a889b62be5c3" |
| 1507 | + "outputId": "29fcab85-2354-4406-8497-89ee77dc4fe1" |
1508 | 1508 | },
|
1509 | 1509 | "source": [
|
1510 | 1510 | "# CHAS Column\n",
|
|
1563 | 1563 | "base_uri": "https://localhost:8080/",
|
1564 | 1564 | "height": 535
|
1565 | 1565 | },
|
1566 |
| - "outputId": "a2c2de83-cf0c-4871-c589-f2c180d05ac2" |
| 1566 | + "outputId": "46ae1502-1750-4dfd-d2f0-faca0788c0b5" |
1567 | 1567 | },
|
1568 | 1568 | "source": [
|
1569 | 1569 | "# AGE Column \n",
|
|
1600 | 1600 | "output_type": "execute_result",
|
1601 | 1601 | "data": {
|
1602 | 1602 | "text/plain": [
|
1603 |
| - "<matplotlib.axes._subplots.AxesSubplot at 0x7fc19d80a860>" |
| 1603 | + "<matplotlib.axes._subplots.AxesSubplot at 0x7fa4e71f9fd0>" |
1604 | 1604 | ]
|
1605 | 1605 | },
|
1606 | 1606 | "metadata": {
|
|
1632 | 1632 | "base_uri": "https://localhost:8080/",
|
1633 | 1633 | "height": 517
|
1634 | 1634 | },
|
1635 |
| - "outputId": "f2f76a7c-e752-4c57-e572-a5173ec04a56" |
| 1635 | + "outputId": "408fe42d-abb0-4361-b60e-10fd52f0b156" |
1636 | 1636 | },
|
1637 | 1637 | "source": [
|
1638 | 1638 | "# DIS Column\n",
|
|
1691 | 1691 | "base_uri": "https://localhost:8080/",
|
1692 | 1692 | "height": 535
|
1693 | 1693 | },
|
1694 |
| - "outputId": "34d2f9a5-fff0-4d19-914f-66d4e8994e2e" |
| 1694 | + "outputId": "051626d0-b0bc-40cd-d446-6bc8e53d4da5" |
1695 | 1695 | },
|
1696 | 1696 | "source": [
|
1697 | 1697 | "# RAD Column \n",
|
|
1728 | 1728 | "output_type": "execute_result",
|
1729 | 1729 | "data": {
|
1730 | 1730 | "text/plain": [
|
1731 |
| - "<matplotlib.axes._subplots.AxesSubplot at 0x7fc19d76a4a8>" |
| 1731 | + "<matplotlib.axes._subplots.AxesSubplot at 0x7fa4e71599e8>" |
1732 | 1732 | ]
|
1733 | 1733 | },
|
1734 | 1734 | "metadata": {
|
|
1760 | 1760 | "base_uri": "https://localhost:8080/",
|
1761 | 1761 | "height": 517
|
1762 | 1762 | },
|
1763 |
| - "outputId": "86733563-e5c5-4ec4-88ef-844ba5cec7a6" |
| 1763 | + "outputId": "24bc009c-15cd-4fbd-9b6d-3ba75ce51561" |
1764 | 1764 | },
|
1765 | 1765 | "source": [
|
1766 | 1766 | "# B Column\n",
|
|
1819 | 1819 | "base_uri": "https://localhost:8080/",
|
1820 | 1820 | "height": 266
|
1821 | 1821 | },
|
1822 |
| - "outputId": "5c5e6409-3cc3-4108-d3f1-0561b3f19b48" |
| 1822 | + "outputId": "21451197-0d54-4fe3-aab1-7b42ef5e6cd3" |
1823 | 1823 | },
|
1824 | 1824 | "source": [
|
1825 | 1825 | "# Lets check Skewness in the Data\n",
|
|
1919 | 1919 | "base_uri": "https://localhost:8080/",
|
1920 | 1920 | "height": 143
|
1921 | 1921 | },
|
1922 |
| - "outputId": "b271b85d-1eac-4be1-93f3-c6b71cb21f24" |
| 1922 | + "outputId": "45014745-cddc-4761-b371-47b455fe4a58" |
1923 | 1923 | },
|
1924 | 1924 | "source": [
|
1925 | 1925 | "# Lets remove Skewness one by one\n",
|
|
1974 | 1974 | "base_uri": "https://localhost:8080/",
|
1975 | 1975 | "height": 181
|
1976 | 1976 | },
|
1977 |
| - "outputId": "d788a1e6-5981-458e-8502-1c5b7c68be15" |
| 1977 | + "outputId": "81cf6594-e560-4da9-cbc8-916dbb421812" |
1978 | 1978 | },
|
1979 | 1979 | "source": [
|
1980 | 1980 | "# ZN Variable\n",
|
|
2030 | 2030 | "base_uri": "https://localhost:8080/",
|
2031 | 2031 | "height": 143
|
2032 | 2032 | },
|
2033 |
| - "outputId": "361702f4-29b1-431f-947a-c3ccadcc5053" |
| 2033 | + "outputId": "bbc21ebe-22e3-4f62-d8cf-f3f2a18968cd" |
2034 | 2034 | },
|
2035 | 2035 | "source": [
|
2036 | 2036 | "# B Variable\n",
|
|
2084 | 2084 | "base_uri": "https://localhost:8080/",
|
2085 | 2085 | "height": 204
|
2086 | 2086 | },
|
2087 |
| - "outputId": "0e3a12f1-f55e-4c9e-c511-0dd0aac61f36" |
| 2087 | + "outputId": "0d27725b-4371-4ab9-ea94-617c90ef6169" |
2088 | 2088 | },
|
2089 | 2089 | "source": [
|
2090 | 2090 | "# Removing variables (raw variables) which are transformed and keep one set also (no removing)\n",
|
|
2210 | 2210 | "base_uri": "https://localhost:8080/",
|
2211 | 2211 | "height": 549
|
2212 | 2212 | },
|
2213 |
| - "outputId": "9c382304-e0b3-4d83-9a65-3dffe61fed16" |
| 2213 | + "outputId": "ed95ef32-583d-4138-caa0-32296fba66d7" |
2214 | 2214 | },
|
2215 | 2215 | "source": [
|
2216 | 2216 | "# Now lets train the model\n",
|
|
2253 | 2253 | " <th>Date:</th> <td>Wed, 02 Sep 2020</td> <th> Prob (F-statistic):</th> <td>8.87e-181</td>\n",
|
2254 | 2254 | "</tr>\n",
|
2255 | 2255 | "<tr>\n",
|
2256 |
| - " <th>Time:</th> <td>11:15:38</td> <th> Log-Likelihood: </th> <td> -1196.2</td> \n", |
| 2256 | + " <th>Time:</th> <td>11:18:51</td> <th> Log-Likelihood: </th> <td> -1196.2</td> \n", |
2257 | 2257 | "</tr>\n",
|
2258 | 2258 | "<tr>\n",
|
2259 | 2259 | " <th>No. Observations:</th> <td> 354</td> <th> AIC: </th> <td> 2406.</td> \n",
|
|
2318 | 2318 | "Model: OLS Adj. R-squared (uncentered): 0.912\n",
|
2319 | 2319 | "Method: Least Squares F-statistic: 528.2\n",
|
2320 | 2320 | "Date: Wed, 02 Sep 2020 Prob (F-statistic): 8.87e-181\n",
|
2321 |
| - "Time: 11:15:38 Log-Likelihood: -1196.2\n", |
| 2321 | + "Time: 11:18:51 Log-Likelihood: -1196.2\n", |
2322 | 2322 | "No. Observations: 354 AIC: 2406.\n",
|
2323 | 2323 | "Df Residuals: 347 BIC: 2434.\n",
|
2324 | 2324 | "Df Model: 7 \n",
|
|
2361 | 2361 | "base_uri": "https://localhost:8080/",
|
2362 | 2362 | "height": 89
|
2363 | 2363 | },
|
2364 |
| - "outputId": "7722cf1c-6236-4b70-867e-46acdff4de25" |
| 2364 | + "outputId": "62c653ac-c601-4e2c-8350-b787e4c009d3" |
2365 | 2365 | },
|
2366 | 2366 | "source": [
|
2367 | 2367 | "# Prediction of Model 2 \n",
|
|
2397 | 2397 | "base_uri": "https://localhost:8080/",
|
2398 | 2398 | "height": 507
|
2399 | 2399 | },
|
2400 |
| - "outputId": "4b45c80b-7cc1-4260-8a9f-8b51107ea8f9" |
| 2400 | + "outputId": "ab66b478-3e63-406d-bcd5-0b958375ff33" |
2401 | 2401 | },
|
2402 | 2402 | "source": [
|
2403 | 2403 | "# Model 3\n",
|
|
2443 | 2443 | " <th>Date:</th> <td>Wed, 02 Sep 2020</td> <th> Prob (F-statistic):</th> <td>5.60e-182</td>\n",
|
2444 | 2444 | "</tr>\n",
|
2445 | 2445 | "<tr>\n",
|
2446 |
| - " <th>Time:</th> <td>11:15:38</td> <th> Log-Likelihood: </th> <td> -1200.1</td> \n", |
| 2446 | + " <th>Time:</th> <td>11:18:51</td> <th> Log-Likelihood: </th> <td> -1200.1</td> \n", |
2447 | 2447 | "</tr>\n",
|
2448 | 2448 | "<tr>\n",
|
2449 | 2449 | " <th>No. Observations:</th> <td> 354</td> <th> AIC: </th> <td> 2410.</td> \n",
|
|
2502 | 2502 | "Model: OLS Adj. R-squared (uncentered): 0.911\n",
|
2503 | 2503 | "Method: Least Squares F-statistic: 726.0\n",
|
2504 | 2504 | "Date: Wed, 02 Sep 2020 Prob (F-statistic): 5.60e-182\n",
|
2505 |
| - "Time: 11:15:38 Log-Likelihood: -1200.1\n", |
| 2505 | + "Time: 11:18:51 Log-Likelihood: -1200.1\n", |
2506 | 2506 | "No. Observations: 354 AIC: 2410.\n",
|
2507 | 2507 | "Df Residuals: 349 BIC: 2430.\n",
|
2508 | 2508 | "Df Model: 5 \n",
|
|
2543 | 2543 | "base_uri": "https://localhost:8080/",
|
2544 | 2544 | "height": 89
|
2545 | 2545 | },
|
2546 |
| - "outputId": "d230f68a-87e8-40ac-e5d8-4a2e487d5802" |
| 2546 | + "outputId": "92f764cc-3387-4a8f-c22d-dea8f934f75e" |
2547 | 2547 | },
|
2548 | 2548 | "source": [
|
2549 | 2549 | "# Prediction of Model 3\n",
|
|
0 commit comments