Merge pull request #30 from kitrak-rev/main

diffrxction · web-flow · commit b72b9460d9e1 · 2022-10-13T18:50:59.000+05:30
Added Root Mean Square Error code and readme
diff --git a/MLSelfImplementedAlgos/Error Metrics/Root Mean Squared Error/DocumentationRMSE.md b/MLSelfImplementedAlgos/Error Metrics/Root Mean Squared Error/DocumentationRMSE.md
@@ -0,0 +1,13 @@
+# Root Mean Squared Error
+
+The Root mean squared error (RMSE) tells you how close a regression line is to a set of points. It does this by taking the distances from the points to the regression line (these distances are the “errors”) ,squaring them and taking whole root. The squaring is necessary to remove any negative signs. It also gives more weight to larger differences, to keep the final value from reaching very high values, root of it is taken as error metric in RMSE. It’s called the root [mean ](https://www.statisticshowto.com/mean/)squared error as you’re finding the root of the average of a set of errors squred. The lower the RMSE, the better the forecast.
+
+![image](https://www.gstatic.com/education/formulas2/472522532/en/root_mean_square_deviation.svg)
+
+The calculations for the root mean squared error are similar to the standard deviation. To find the RMSE, take the observed value, subtract the predicted value, and square that difference. Repeat that for all observations. Then, sum all of those squared values and divide by the number of observations.And finally take root of the value obtained in the previous step.
+
+For example, in regression the root mean squared error represents the root of average squared residual
+
+![image](https://user-images.githubusercontent.com/78155475/194711594-67ecd6cb-d9f9-42dc-b47f-3b154d4aff2d.png)
+
+As the data points fall closer to the regression line, the model has less error, decreasing the MSE. A model with less error produces more precise predictions.
diff --git a/MLSelfImplementedAlgos/Error Metrics/Root Mean Squared Error/Root Mean Squared Error.ipynb b/MLSelfImplementedAlgos/Error Metrics/Root Mean Squared Error/Root Mean Squared Error.ipynb
@@ -0,0 +1,163 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Uvsgr8sQd12g",
+        "outputId": "461cad3e-f1b9-4e1b-f5f4-8b0eacb92914"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "X-shape:  (442, 10) Y-shape:  (442,)\n",
+            "        age       sex       bmi        bp        s1        s2        s3  \\\n",
+            "0  0.038076  0.050680  0.061696  0.021872 -0.044223 -0.034821 -0.043401   \n",
+            "1 -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163  0.074412   \n",
+            "2  0.085299  0.050680  0.044451 -0.005671 -0.045599 -0.034194 -0.032356   \n",
+            "3 -0.089063 -0.044642 -0.011595 -0.036656  0.012191  0.024991 -0.036038   \n",
+            "4  0.005383 -0.044642 -0.036385  0.021872  0.003935  0.015596  0.008142   \n",
+            "\n",
+            "         s4        s5        s6  \n",
+            "0 -0.002592  0.019908 -0.017646  \n",
+            "1 -0.039493 -0.068330 -0.092204  \n",
+            "2 -0.002592  0.002864 -0.025930  \n",
+            "3  0.034309  0.022692 -0.009362  \n",
+            "4 -0.002592 -0.031991 -0.046641  \n",
+            "0    151.0\n",
+            "1     75.0\n",
+            "2    141.0\n",
+            "3    206.0\n",
+            "4    135.0\n",
+            "Name: target, dtype: float64\n"
+          ]
+        }
+      ],
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn import datasets\n",
+        "from sklearn.linear_model import LinearRegression\n",
+        "lr= LinearRegression()\n",
+        "X, y = datasets.load_diabetes(return_X_y=True,as_frame=True)\n",
+        "print(\"X-shape: \",X.shape,\"Y-shape: \",y.shape)\n",
+        "print(X.head())\n",
+        "print(y.head())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#About the data"
+      ],
+      "metadata": {
+        "id": "ODGG7Yxw5nLw"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The above dataset has 10 dimensions giving details on different aspects of diabetes with the output being numerical representation of the progress of the disease."
+      ],
+      "metadata": {
+        "id": "whGXfcci5vpI"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.2, random_state=42)\n",
+        "lr.fit(X_train,y_train)\n",
+        "y_pred= lr.predict(X_test)"
+      ],
+      "metadata": {
+        "id": "fHPyY8Pq3dGl"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "**Root Mean Squared Error**"
+      ],
+      "metadata": {
+        "id": "PTKQ59QO3hzB"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "RMSE=np.sqrt(np.square(np.subtract(y_test,y_pred)).mean())\n",
+        "print(RMSE)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cL09sEN93lqv",
+        "outputId": "cdc9e846-d327-4119-f0f4-e0406a7d31d1"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "42.79389304196525\n",
+            "53.853256984914395\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#Lets check this out with sklearn inbuilt module\n",
+        "from sklearn.metrics import mean_absolute_error,mean_squared_error\n",
+        "print(mean_squared_error(y_test,y_pred,squared=False))\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Q60ANYm13zKt",
+        "outputId": "32177f76-f79f-443f-aea7-bd875ca2a60f"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "42.79389304196525\n",
+            "53.853256984914395\n"
+          ]
+        }
+      ]
+    }
+  ]
+}