KamiLimu1 · root458 · Mar 6, 2023
diff --git a/Decision Trees.ipynb b/Decision Trees.ipynb
diff --git a/Random Forests.ipynb b/Random Forests.ipynb
@@ -0,0 +1,306 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# <font color='#31394d'> Random Forests Practice Exercise </font>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this exercise we're going to use the famous <a href=\"https://archive.ics.uci.edu/ml/datasets/iris\" target=\"_blank\">Iris dataset</a> to determine the species of iris using a random forest classifier. Begin by importing the necessary libraries and loading the Iris dataset from sklearn."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "sns.set(rc={'figure.figsize':(6,6)}) \n",
+    "import warnings\n",
+    "warnings.simplefilter(\"ignore\")\n",
+    "\n",
+    "%matplotlib inline\n",
+    "\n",
+    "from sklearn import datasets\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.model_selection import cross_val_score\n",
+    "from sklearn.model_selection import cross_validate\n",
+    "from sklearn.metrics import SCORERS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sepal length (cm)</th>\n",
+       "      <th>sepal width (cm)</th>\n",
+       "      <th>petal length (cm)</th>\n",
+       "      <th>petal width (cm)</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>5.1</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>1.4</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4.9</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.4</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>4.7</td>\n",
+       "      <td>3.2</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4.6</td>\n",
+       "      <td>3.1</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5.0</td>\n",
+       "      <td>3.6</td>\n",
+       "      <td>1.4</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \\\n",
+       "0                5.1               3.5                1.4               0.2   \n",
+       "1                4.9               3.0                1.4               0.2   \n",
+       "2                4.7               3.2                1.3               0.2   \n",
+       "3                4.6               3.1                1.5               0.2   \n",
+       "4                5.0               3.6                1.4               0.2   \n",
+       "\n",
+       "   target  \n",
+       "0       0  \n",
+       "1       0  \n",
+       "2       0  \n",
+       "3       0  \n",
+       "4       0  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = datasets.load_iris()\n",
+    "\n",
+    "# for display purposes\n",
+    "iris = pd.DataFrame(data.data, columns=data.feature_names)\n",
+    "iris[\"target\"] = data.target\n",
+    "iris.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "🚀 <font color='#D9C4B1'>Exercise: </font> Build a random forest classifier, train and evaluate it using cross-validation. You can use the functions below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>fit_time</th>\n",
+       "      <td>0.504849</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>score_time</th>\n",
+       "      <td>0.031602</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>test_score</th>\n",
+       "      <td>0.960000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>train_score</th>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    0\n",
+       "fit_time     0.504849\n",
+       "score_time   0.031602\n",
+       "test_score   0.960000\n",
+       "train_score  1.000000"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X = data.data\n",
+    "y = data.target\n",
+    "\n",
+    "# Instantiate model\n",
+    "rfc = RandomForestClassifier(n_estimators=50, random_state=42)\n",
+    "\n",
+    "def evaluate_model(estimator):\n",
+    "    cv_results = cross_validate(estimator, X, y, scoring='accuracy', n_jobs=-1, cv=10, return_train_score=True)\n",
+    "    return pd.DataFrame(cv_results).abs().mean().to_dict()\n",
+    "\n",
+    "# Evaluate\n",
+    "results = evaluate_model(rfc)\n",
+    "\n",
+    "def display_results(results):\n",
+    "    results_df  = pd.DataFrame(results, index=[0]).T\n",
+    "    results_cols = results_df.columns\n",
+    "    for col in results_df:\n",
+    "        results_df[col] = results_df[col].apply(np.mean)\n",
+    "    return results_df\n",
+    "\n",
+    "# Display results\n",
+    "display_results(results)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "🚀 <font color='#D9C4B1'>Exercise: </font> Adjust the hyperparameters (e.g. number of trees). Does model performance decrease or increase? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Best hyperparameters:  {'n_estimators': 200}\n",
+      "Best accuracy score:  0.9666666666666666\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.model_selection import GridSearchCV\n",
+    "\n",
+    "# Define the parameter grid\n",
+    "param_grid = {'n_estimators': [50, 100, 200, 300, 400, 500]}\n",
+    "\n",
+    "rfc = RandomForestClassifier(random_state=42)\n",
+    "\n",
+    "grid_search = GridSearchCV(rfc, param_grid=param_grid, scoring='accuracy', n_jobs=-1, cv=10)\n",
+    "\n",
+    "# Fit the grid search object to the data\n",
+    "grid_search.fit(X, y)\n",
+    "\n",
+    "# Display the best hyperparameters\n",
+    "print(\"Best hyperparameters: \", grid_search.best_params_)\n",
+    "\n",
+    "# Display the average accuracy score of the best model\n",
+    "print(\"Best accuracy score: \", grid_search.best_score_)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}