|
6 | 6 | "name": "07 Simple and Multiple Regression.ipynb",
|
7 | 7 | "provenance": [],
|
8 | 8 | "collapsed_sections": [],
|
9 |
| - "authorship_tag": "ABX9TyNhGP0GF9c1lVdgMRwuSiRi", |
| 9 | + "authorship_tag": "ABX9TyM4ImDR63Bml//QYzj8HH0p", |
10 | 10 | "include_colab_link": true
|
11 | 11 | },
|
12 | 12 | "kernelspec": {
|
|
2881 | 2881 | }
|
2882 | 2882 | ]
|
2883 | 2883 | },
|
| 2884 | + { |
| 2885 | + "cell_type": "code", |
| 2886 | + "metadata": { |
| 2887 | + "id": "NVhGdOXIsOGa", |
| 2888 | + "colab_type": "code", |
| 2889 | + "colab": { |
| 2890 | + "base_uri": "https://localhost:8080/", |
| 2891 | + "height": 89 |
| 2892 | + }, |
| 2893 | + "outputId": "b0996ed2-74da-4cf0-ca23-6ea1a0920589" |
| 2894 | + }, |
| 2895 | + "source": [ |
| 2896 | + "base_model_properties_2" |
| 2897 | + ], |
| 2898 | + "execution_count": 51, |
| 2899 | + "outputs": [ |
| 2900 | + { |
| 2901 | + "output_type": "execute_result", |
| 2902 | + "data": { |
| 2903 | + "text/plain": [ |
| 2904 | + "[('Jarque-Bera', 472.99120337658803),\n", |
| 2905 | + " ('Chi^2 two-tail prob.', 1.9555331946581146e-103),\n", |
| 2906 | + " ('Skew', 1.6859804491070154),\n", |
| 2907 | + " ('Kurtosis', 7.5494043489051235)]" |
| 2908 | + ] |
| 2909 | + }, |
| 2910 | + "metadata": { |
| 2911 | + "tags": [] |
| 2912 | + }, |
| 2913 | + "execution_count": 51 |
| 2914 | + } |
| 2915 | + ] |
| 2916 | + }, |
2884 | 2917 | {
|
2885 | 2918 | "cell_type": "code",
|
2886 | 2919 | "metadata": {
|
2887 | 2920 | "id": "Hb228jU0nmJE",
|
2888 | 2921 | "colab_type": "code",
|
| 2922 | + "colab": { |
| 2923 | + "base_uri": "https://localhost:8080/", |
| 2924 | + "height": 1000 |
| 2925 | + }, |
| 2926 | + "outputId": "13f57e62-5df8-4630-edb2-855fdae0977e" |
| 2927 | + }, |
| 2928 | + "source": [ |
| 2929 | + "# Overall Comparision\n", |
| 2930 | + "print(\"ALL MODEL COMPARISION\")\n", |
| 2931 | + "\n", |
| 2932 | + "# Normality of the residuals\n", |
| 2933 | + "# Jarque-Bera test:\n", |
| 2934 | + "print(\"Normality of the residuals\")\n", |
| 2935 | + "print(\"Jarque-Bera test : \")\n", |
| 2936 | + "print(\"JB, ideal is close to 0\")\n", |
| 2937 | + "print(\"Probability(JB), less than 0.05\")\n", |
| 2938 | + "print(\"Base Model\")\n", |
| 2939 | + "print(base_model_properties_1[0])\n", |
| 2940 | + "print(base_model_properties_1[1])\n", |
| 2941 | + "print(\"Model 1\")\n", |
| 2942 | + "print(model_properties_1[0])\n", |
| 2943 | + "print(model_properties_1[1])\n", |
| 2944 | + "print(\"Model 2\")\n", |
| 2945 | + "print(base_model_properties_2[0])\n", |
| 2946 | + "print(base_model_properties_2[1])\n", |
| 2947 | + "print(\"Model 3\")\n", |
| 2948 | + "print(base_model_properties_3[0])\n", |
| 2949 | + "print(base_model_properties_3[1])\n", |
| 2950 | + "\n", |
| 2951 | + "# Skewness and Kurtosis\n", |
| 2952 | + "print(\"\\n\")\n", |
| 2953 | + "print(\"Skewness and Kurtosis of Residual\")\n", |
| 2954 | + "print(\"Skewness, ideal is -1 to 1\")\n", |
| 2955 | + "print(\"Kurtosis, ideal value is 3\")\n", |
| 2956 | + "\n", |
| 2957 | + "# Omni test:\n", |
| 2958 | + "print(\"\\n\")\n", |
| 2959 | + "print(\"Normality of the Residual also\")\n", |
| 2960 | + "print(\"Omni Test : \")\n", |
| 2961 | + "print(\"Omni, ideal is close to 0\")\n", |
| 2962 | + "print(\"Probability(Omni), less than 0.05\")\n", |
| 2963 | + "print(\"Base Model\")\n", |
| 2964 | + "name = ['Chi^2', 'Two-tail probability']\n", |
| 2965 | + "test = sms.omni_normtest(base_model.resid)\n", |
| 2966 | + "print(pd.DataFrame(lzip(name, test),columns = ['Test','Values']).round(2))\n", |
| 2967 | + "print(\"Model 1\")\n", |
| 2968 | + "name = ['Chi^2', 'Two-tail probability']\n", |
| 2969 | + "test = sms.omni_normtest(Model_1.resid)\n", |
| 2970 | + "print(pd.DataFrame(lzip(name, test),columns = ['Test','Values']).round(2))\n", |
| 2971 | + "print(\"Model 2\")\n", |
| 2972 | + "name = ['Chi^2', 'Two-tail probability']\n", |
| 2973 | + "test = sms.omni_normtest(Model_2.resid)\n", |
| 2974 | + "print(pd.DataFrame(lzip(name, test),columns = ['Test','Values']).round(2))\n", |
| 2975 | + "print(\"Model 3\")\n", |
| 2976 | + "name = ['Chi^2', 'Two-tail probability']\n", |
| 2977 | + "test = sms.omni_normtest(Model_3.resid)\n", |
| 2978 | + "print(pd.DataFrame(lzip(name, test),columns = ['Test','Values']).round(2))\n", |
| 2979 | + "\n", |
| 2980 | + "# Multicollinearity\n", |
| 2981 | + "print(\"\\n\")\n", |
| 2982 | + "print(\"Multicollinearity\")\n", |
| 2983 | + "print(\"Ideal Condition Number will be less than 30\")\n", |
| 2984 | + "print(\"Base Model, Condition number:\",np.linalg.cond(base_model.model.exog).round(2))\n", |
| 2985 | + "print(\"Model 1, Condition number:\",np.linalg.cond(Model_1.model.exog).round(2))\n", |
| 2986 | + "print(\"Model 2, Condition number:\",np.linalg.cond(Model_2.model.exog).round(2))\n", |
| 2987 | + "print(\"Model 3, Condition number:\",np.linalg.cond(Model_3.model.exog).round(2))\n", |
| 2988 | + "\n", |
| 2989 | + "# Adjusted R Square\n", |
| 2990 | + "print(\"\\n\")\n", |
| 2991 | + "print(\"Model Performance\")\n", |
| 2992 | + "print(\"Adjusted R Square, ideal is close to 1\")\n", |
| 2993 | + "print(\"Base Model, Adjusted R Square : \",base_model.rsquared_adj.round(2))\n", |
| 2994 | + "print(\"Model 1, Adjusted R Square : \",Model_1.rsquared_adj.round(2))\n", |
| 2995 | + "print(\"Model 2, Adjusted R Square : \",Model_2.rsquared_adj.round(2))\n", |
| 2996 | + "print(\"Model 3, Adjusted R Square : \",Model_3.rsquared_adj.round(2))\n", |
| 2997 | + "\n", |
| 2998 | + "# F Statistics\n", |
| 2999 | + "print(\"\\n\")\n", |
| 3000 | + "print(\"Significance of Independent Variables\")\n", |
| 3001 | + "print(\"Probability(F Stat), less than 0.05 suggests independent variables are important\")\n", |
| 3002 | + "print(\"Base Model, Probability of F Statistics : \",base_model.f_pvalue.round(2))\n", |
| 3003 | + "print(\"Model 1, Probability of F Statistics : \",Model_1.f_pvalue.round(2))\n", |
| 3004 | + "print(\"Model 2, Probability of F Statistics : \",Model_2.f_pvalue.round(2))\n", |
| 3005 | + "print(\"Model 3, Probability of F Statistics : \",Model_3.f_pvalue.round(2))\n", |
| 3006 | + "\n", |
| 3007 | + "# Durbin Watson Test\n", |
| 3008 | + "print(\"\\n\")\n", |
| 3009 | + "print(\"Homoscedasticity of Error\")\n", |
| 3010 | + "print(\"Durbin Watson, ideal is between 1 to 2\")\n", |
| 3011 | + "print(\"Base Model, Durbin Watson Value : 0.78\")\n", |
| 3012 | + "print(\"Model 1, Durbin Watson Value : 0.78\")\n", |
| 3013 | + "print(\"Model 2, Durbin Watson Value : 2.1\")\n", |
| 3014 | + "print(\"Model 3, Durbin Watson Value : 2.07\")" |
| 3015 | + ], |
| 3016 | + "execution_count": 56, |
| 3017 | + "outputs": [ |
| 3018 | + { |
| 3019 | + "output_type": "stream", |
| 3020 | + "text": [ |
| 3021 | + "ALL MODEL COMPARISION\n", |
| 3022 | + "Normality of the residuals\n", |
| 3023 | + "Jarque-Bera test : \n", |
| 3024 | + "JB, ideal is close to 0\n", |
| 3025 | + "Probability(JB), less than 0.05\n", |
| 3026 | + "Base Model\n", |
| 3027 | + "('Jarque-Bera', 3084.310178901069)\n", |
| 3028 | + "('Chi^2 two-tail prob.', 0.0)\n", |
| 3029 | + "Model 1\n", |
| 3030 | + "('Jarque-Bera', 2125.548489196517)\n", |
| 3031 | + "('Chi^2 two-tail prob.', 0.0)\n", |
| 3032 | + "Model 2\n", |
| 3033 | + "('Jarque-Bera', 472.99120337658803)\n", |
| 3034 | + "('Chi^2 two-tail prob.', 1.9555331946581146e-103)\n", |
| 3035 | + "Model 3\n", |
| 3036 | + "('Jarque-Bera', 452.9473484300142)\n", |
| 3037 | + "('Chi^2 two-tail prob.', 4.402840898839639e-99)\n", |
| 3038 | + "\n", |
| 3039 | + "\n", |
| 3040 | + "Skewness and Kurtosis of Residual\n", |
| 3041 | + "Skewness, ideal is -1 to 1\n", |
| 3042 | + "Kurtosis, ideal value is 3\n", |
| 3043 | + "\n", |
| 3044 | + "\n", |
| 3045 | + "Normality of the Residual also\n", |
| 3046 | + "Omni Test : \n", |
| 3047 | + "Omni, ideal is close to 0\n", |
| 3048 | + "Probability(Omni), less than 0.05\n", |
| 3049 | + "Base Model\n", |
| 3050 | + " Test Values\n", |
| 3051 | + "0 Chi^2 277.01\n", |
| 3052 | + "1 Two-tail probability 0.00\n", |
| 3053 | + "Model 1\n", |
| 3054 | + " Test Values\n", |
| 3055 | + "0 Chi^2 195.55\n", |
| 3056 | + "1 Two-tail probability 0.00\n", |
| 3057 | + "Model 2\n", |
| 3058 | + " Test Values\n", |
| 3059 | + "0 Chi^2 134.24\n", |
| 3060 | + "1 Two-tail probability 0.00\n", |
| 3061 | + "Model 3\n", |
| 3062 | + " Test Values\n", |
| 3063 | + "0 Chi^2 128.77\n", |
| 3064 | + "1 Two-tail probability 0.00\n", |
| 3065 | + "\n", |
| 3066 | + "\n", |
| 3067 | + "Multicollinearity\n", |
| 3068 | + "Ideal Condition Number will be less than 30\n", |
| 3069 | + "Base Model, Condition number: 8125.62\n", |
| 3070 | + "Model 1, Condition number: 8118.28\n", |
| 3071 | + "Model 2, Condition number: 103.55\n", |
| 3072 | + "Model 3, Condition number: 89.45\n", |
| 3073 | + "\n", |
| 3074 | + "\n", |
| 3075 | + "Model Performance\n", |
| 3076 | + "Adjusted R Square, ideal is close to 1\n", |
| 3077 | + "Base Model, Adjusted R Square : 0.95\n", |
| 3078 | + "Model 1, Adjusted R Square : 0.95\n", |
| 3079 | + "Model 2, Adjusted R Square : 0.91\n", |
| 3080 | + "Model 3, Adjusted R Square : 0.91\n", |
| 3081 | + "\n", |
| 3082 | + "\n", |
| 3083 | + "Significance of Independent Variables\n", |
| 3084 | + "Probability(F Stat), less than 0.05 suggests independent variables are important\n", |
| 3085 | + "Base Model, Probability of F Statistics : 0.0\n", |
| 3086 | + "Model 1, Probability of F Statistics : 0.0\n", |
| 3087 | + "Model 2, Probability of F Statistics : 0.0\n", |
| 3088 | + "Model 3, Probability of F Statistics : 0.0\n", |
| 3089 | + "\n", |
| 3090 | + "\n", |
| 3091 | + "Homoscedasticity of Error\n", |
| 3092 | + "Durbin Watson, ideal is between 1 to 2\n", |
| 3093 | + "Base Model, Durbin Watson Value : 0.78\n", |
| 3094 | + "Model 1, Durbin Watson Value : 0.78\n", |
| 3095 | + "Model 2, Durbin Watson Value : 2.1\n", |
| 3096 | + "Model 3, Durbin Watson Value : 2.07\n" |
| 3097 | + ], |
| 3098 | + "name": "stdout" |
| 3099 | + } |
| 3100 | + ] |
| 3101 | + }, |
| 3102 | + { |
| 3103 | + "cell_type": "code", |
| 3104 | + "metadata": { |
| 3105 | + "id": "_cnBy2F8rhjS", |
| 3106 | + "colab_type": "code", |
| 3107 | + "colab": { |
| 3108 | + "base_uri": "https://localhost:8080/", |
| 3109 | + "height": 35 |
| 3110 | + }, |
| 3111 | + "outputId": "93662ec1-8635-4a90-b678-1c075b6af894" |
| 3112 | + }, |
| 3113 | + "source": [ |
| 3114 | + "base_model_properties_1[0]" |
| 3115 | + ], |
| 3116 | + "execution_count": 50, |
| 3117 | + "outputs": [ |
| 3118 | + { |
| 3119 | + "output_type": "execute_result", |
| 3120 | + "data": { |
| 3121 | + "text/plain": [ |
| 3122 | + "('Jarque-Bera', 3084.310178901069)" |
| 3123 | + ] |
| 3124 | + }, |
| 3125 | + "metadata": { |
| 3126 | + "tags": [] |
| 3127 | + }, |
| 3128 | + "execution_count": 50 |
| 3129 | + } |
| 3130 | + ] |
| 3131 | + }, |
| 3132 | + { |
| 3133 | + "cell_type": "code", |
| 3134 | + "metadata": { |
| 3135 | + "id": "6UmVmM89r7Ro", |
| 3136 | + "colab_type": "code", |
2889 | 3137 | "colab": {}
|
2890 | 3138 | },
|
2891 | 3139 | "source": [
|
|
0 commit comments