-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7c8087c
commit d888826
Showing
10 changed files
with
219,640 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,345 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Assignment 2\n", | ||
"Classify the email using the binary classification method. Email Spam detection has two states:\n", | ||
"a) Normal State – Not Spam,\n", | ||
"b) Abnormal State – Spam.\n", | ||
"Use K-Nearest Neighbors and Support Vector Machine for classification.\n", | ||
"Analyze their performance.\n", | ||
"Dataset link: The emails.csv dataset on the Kaggle https://www.kaggle.com/datasets/balaka18/email-spam-classification-dataset-csv" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"from sklearn.model_selection import train_test_split\n", | ||
"from sklearn.svm import SVC\n", | ||
"from sklearn.metrics import accuracy_score\n", | ||
"from sklearn.neighbors import KNeighborsClassifier" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>Email No.</th>\n", | ||
" <th>the</th>\n", | ||
" <th>to</th>\n", | ||
" <th>ect</th>\n", | ||
" <th>and</th>\n", | ||
" <th>for</th>\n", | ||
" <th>of</th>\n", | ||
" <th>a</th>\n", | ||
" <th>you</th>\n", | ||
" <th>hou</th>\n", | ||
" <th>...</th>\n", | ||
" <th>connevey</th>\n", | ||
" <th>jay</th>\n", | ||
" <th>valued</th>\n", | ||
" <th>lay</th>\n", | ||
" <th>infrastructure</th>\n", | ||
" <th>military</th>\n", | ||
" <th>allowing</th>\n", | ||
" <th>ff</th>\n", | ||
" <th>dry</th>\n", | ||
" <th>Prediction</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>Email 1</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>1</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>2</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>...</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>Email 2</td>\n", | ||
" <td>8</td>\n", | ||
" <td>13</td>\n", | ||
" <td>24</td>\n", | ||
" <td>6</td>\n", | ||
" <td>6</td>\n", | ||
" <td>2</td>\n", | ||
" <td>102</td>\n", | ||
" <td>1</td>\n", | ||
" <td>27</td>\n", | ||
" <td>...</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>1</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>Email 3</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>1</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>8</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>...</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
" <td>Email 4</td>\n", | ||
" <td>0</td>\n", | ||
" <td>5</td>\n", | ||
" <td>22</td>\n", | ||
" <td>0</td>\n", | ||
" <td>5</td>\n", | ||
" <td>1</td>\n", | ||
" <td>51</td>\n", | ||
" <td>2</td>\n", | ||
" <td>10</td>\n", | ||
" <td>...</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
" <td>Email 5</td>\n", | ||
" <td>7</td>\n", | ||
" <td>6</td>\n", | ||
" <td>17</td>\n", | ||
" <td>1</td>\n", | ||
" <td>5</td>\n", | ||
" <td>2</td>\n", | ||
" <td>57</td>\n", | ||
" <td>0</td>\n", | ||
" <td>9</td>\n", | ||
" <td>...</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" <td>1</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"<p>5 rows × 3002 columns</p>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" Email No. the to ect and for of a you hou ... connevey jay \\\n", | ||
"0 Email 1 0 0 1 0 0 0 2 0 0 ... 0 0 \n", | ||
"1 Email 2 8 13 24 6 6 2 102 1 27 ... 0 0 \n", | ||
"2 Email 3 0 0 1 0 0 0 8 0 0 ... 0 0 \n", | ||
"3 Email 4 0 5 22 0 5 1 51 2 10 ... 0 0 \n", | ||
"4 Email 5 7 6 17 1 5 2 57 0 9 ... 0 0 \n", | ||
"\n", | ||
" valued lay infrastructure military allowing ff dry Prediction \n", | ||
"0 0 0 0 0 0 0 0 0 \n", | ||
"1 0 0 0 0 0 1 0 0 \n", | ||
"2 0 0 0 0 0 0 0 0 \n", | ||
"3 0 0 0 0 0 0 0 0 \n", | ||
"4 0 0 0 0 0 1 0 0 \n", | ||
"\n", | ||
"[5 rows x 3002 columns]" | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"df = pd.read_csv(\"./datasets/emails.csv\", nrows=5000)\n", | ||
"df.head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"Email No. 0\n", | ||
"the 0\n", | ||
"to 0\n", | ||
"ect 0\n", | ||
"and 0\n", | ||
" ..\n", | ||
"military 0\n", | ||
"allowing 0\n", | ||
"ff 0\n", | ||
"dry 0\n", | ||
"Prediction 0\n", | ||
"Length: 3002, dtype: int64" | ||
] | ||
}, | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"df.isnull().sum()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"X = df.drop([\"Email No.\", \"Prediction\"], axis=1)\n", | ||
"Y = df[\"Prediction\"]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size = 0.25, random_state = 1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Accuracy Score for KNN: 0.8728\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"knn = KNeighborsClassifier(n_neighbors=7)\n", | ||
"knn.fit(train_x, train_y)\n", | ||
"print(\"Accuracy Score for KNN:\", knn.score(test_x, test_y))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Accuracy Score for SVC: 0.9168\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"svc = SVC(C=1.0, kernel='rbf', gamma='auto')\n", | ||
"svc.fit(train_x, train_y)\n", | ||
"y_pred2 = svc.predict(test_x)\n", | ||
"print(\"Accuracy Score for SVC:\", accuracy_score(y_pred2, test_y))" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.7" | ||
}, | ||
"orig_nbformat": 4 | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.