From 807a170dd736881097ee733f097fa96d99511ec1 Mon Sep 17 00:00:00 2001 From: NITHIN999999 <71773426+NITHIN999999@users.noreply.github.com> Date: Sat, 10 Oct 2020 08:19:41 +0530 Subject: [PATCH] Add files via upload --- Python Project-2_Movie lens Research.ipynb | 1329 ++++++++++++++++++++ 1 file changed, 1329 insertions(+) create mode 100644 Python Project-2_Movie lens Research.ipynb diff --git a/Python Project-2_Movie lens Research.ipynb b/Python Project-2_Movie lens Research.ipynb new file mode 100644 index 0000000..814027f --- /dev/null +++ b/Python Project-2_Movie lens Research.ipynb @@ -0,0 +1,1329 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'C:\\\\Users\\\\NITHIN\\\\Documents\\\\02)Data Science with Python\\\\Data-Science-with-Python-Project-2'" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pwd" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n", + " movies = pd.read_csv(\"C:\\\\Users\\\\NITHIN\\\\Documents\\\\02)Data Science with Python\\\\Data-Science-with-Python-Project-2\\\\movies.dat\", sep=\"::\", names=['MovieID', 'Title', 'Genres'] )\n" + ] + } + ], + "source": [ + "#Input movies dataset\n", + "movies = pd.read_csv(\"C:\\\\Users\\\\NITHIN\\\\Documents\\\\02)Data Science with Python\\\\Data-Science-with-Python-Project-2\\\\movies.dat\", sep=\"::\", names=['MovieID', 'Title', 'Genres'] )" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MovieIDTitleGenres
01Toy Story (1995)Animation|Children's|Comedy
12Jumanji (1995)Adventure|Children's|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama
45Father of the Bride Part II (1995)Comedy
\n", + "
" + ], + "text/plain": [ + " MovieID Title Genres\n", + "0 1 Toy Story (1995) Animation|Children's|Comedy\n", + "1 2 Jumanji (1995) Adventure|Children's|Fantasy\n", + "2 3 Grumpier Old Men (1995) Comedy|Romance\n", + "3 4 Waiting to Exhale (1995) Comedy|Drama\n", + "4 5 Father of the Bride Part II (1995) Comedy" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Read the sample movies dataset\n", + "movies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n", + " ratings = pd.read_csv(\"C:\\\\Users\\\\NITHIN\\\\Documents\\\\02)Data Science with Python\\\\Data-Science-with-Python-Project-2\\\\ratings.dat\", sep=\"::\", names=['UserID', 'MovieID', 'Rating', 'Timestamp'] )\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
UserIDMovieIDRatingTimestamp
0111935978300760
116613978302109
219143978301968
3134084978300275
4123555978824291
\n", + "
" + ], + "text/plain": [ + " UserID MovieID Rating Timestamp\n", + "0 1 1193 5 978300760\n", + "1 1 661 3 978302109\n", + "2 1 914 3 978301968\n", + "3 1 3408 4 978300275\n", + "4 1 2355 5 978824291" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Input ratings dataset\n", + "ratings = pd.read_csv(\"C:\\\\Users\\\\NITHIN\\\\Documents\\\\02)Data Science with Python\\\\Data-Science-with-Python-Project-2\\\\ratings.dat\", sep=\"::\", names=['UserID', 'MovieID', 'Rating', 'Timestamp'] )\n", + "\n", + "#Read the sample ratings dataset\n", + "ratings.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":2: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n", + " users = pd.read_csv(\"C:\\\\Users\\\\NITHIN\\\\Documents\\\\02)Data Science with Python\\\\Data-Science-with-Python-Project-2\\\\users.dat\", sep=\"::\", names=['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code'] )\n" + ] + } + ], + "source": [ + "#Input users dataset\n", + "users = pd.read_csv(\"C:\\\\Users\\\\NITHIN\\\\Documents\\\\02)Data Science with Python\\\\Data-Science-with-Python-Project-2\\\\users.dat\", sep=\"::\", names=['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code'] )" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
UserIDGenderAgeOccupationZip-code
01F11048067
12M561670072
23M251555117
34M45702460
45M252055455
\n", + "
" + ], + "text/plain": [ + " UserID Gender Age Occupation Zip-code\n", + "0 1 F 1 10 48067\n", + "1 2 M 56 16 70072\n", + "2 3 M 25 15 55117\n", + "3 4 M 45 7 02460\n", + "4 5 M 25 20 55455" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Read the sample users dataset\n", + "users.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MovieIDTitleUserIDAgeGenderOccupationRating
01193One Flew Over the Cuckoo's Nest (1975)11F105
1661James and the Giant Peach (1996)11F103
2914My Fair Lady (1964)11F103
33408Erin Brockovich (2000)11F104
42355Bug's Life, A (1998)11F105
\n", + "
" + ], + "text/plain": [ + " MovieID Title UserID Age Gender \\\n", + "0 1193 One Flew Over the Cuckoo's Nest (1975) 1 1 F \n", + "1 661 James and the Giant Peach (1996) 1 1 F \n", + "2 914 My Fair Lady (1964) 1 1 F \n", + "3 3408 Erin Brockovich (2000) 1 1 F \n", + "4 2355 Bug's Life, A (1998) 1 1 F \n", + "\n", + " Occupation Rating \n", + "0 10 5 \n", + "1 10 3 \n", + "2 10 3 \n", + "3 10 4 \n", + "4 10 5 " + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Merge the ratings and users with movieID and UserID\n", + "ratings_user = pd.merge(ratings,users, on=['UserID'])\n", + "ratings_movie = pd.merge(ratings,movies, on=['MovieID'])\n", + "\n", + "master_data = pd.merge(ratings_user,ratings_movie,\n", + " on=['UserID', 'MovieID', 'Rating'])[['MovieID', 'Title', 'UserID', 'Age', 'Gender', 'Occupation', \"Rating\"]]\n", + "\n", + "master_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#User age distribution\n", + "import matplotlib.pyplot as plt\n", + "\n", + "users['Age'].hist(bins=50)\n", + "plt.xlabel('Age')\n", + "plt.ylabel('Population')\n", + "plt.show" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Age\n", + "1 112\n", + "18 448\n", + "25 790\n", + "35 423\n", + "45 143\n", + "50 108\n", + "56 53\n", + "Name: MovieID, dtype: int64" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#User rating of the movie “Toy Story”\n", + "\n", + "res = master_data[master_data.Title == \"Toy Story (1995)\"]\n", + "\n", + "plt.plot(res.groupby(\"Age\")[\"MovieID\"].count(),'--bo')\n", + "res.groupby(\"Age\")[\"MovieID\"].count()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#Top 25 movies by viewership rating\n", + "\n", + "res = master_data.groupby(\"Title\").size().sort_values(ascending=False)[:25]\n", + "plt.ylabel(\"Title\")\n", + "plt.xlabel(\"Viewership Count\")\n", + "res.plot(kind=\"barh\")\n", + "#res" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MovieIDTitleUserIDAgeGenderOccupationRating
4406671258Shining, The (1980)269625M74
4406681270Back to the Future (1985)269625M72
4406691617L.A. Confidential (1997)269625M74
4406701625Game, The (1997)269625M74
4406711644I Know What You Did Last Summer (1997)269625M72
4406721645Devil's Advocate, The (1997)269625M74
4406731805Wild Things (1998)269625M74
4406741892Perfect Murder, A (1998)269625M74
440675800Lone Star (1996)269625M75
4406762338I Still Know What You Did Last Summer (1998)269625M72
4406771711Midnight in the Garden of Good and Evil (1997)269625M74
4406783176Talented Mr. Ripley, The (1999)269625M74
4406792389Psycho (1998)269625M74
4406801589Cop Land (1997)269625M73
4406812713Lake Placid (1999)269625M71
4406823386JFK (1991)269625M71
4406831783Palmetto (1998)269625M74
440684350Client, The (1994)269625M73
4406851092Basic Instinct (1992)269625M74
4406861097E.T. the Extra-Terrestrial (1982)269625M73
\n", + "
" + ], + "text/plain": [ + " MovieID Title UserID Age \\\n", + "440667 1258 Shining, The (1980) 2696 25 \n", + "440668 1270 Back to the Future (1985) 2696 25 \n", + "440669 1617 L.A. Confidential (1997) 2696 25 \n", + "440670 1625 Game, The (1997) 2696 25 \n", + "440671 1644 I Know What You Did Last Summer (1997) 2696 25 \n", + "440672 1645 Devil's Advocate, The (1997) 2696 25 \n", + "440673 1805 Wild Things (1998) 2696 25 \n", + "440674 1892 Perfect Murder, A (1998) 2696 25 \n", + "440675 800 Lone Star (1996) 2696 25 \n", + "440676 2338 I Still Know What You Did Last Summer (1998) 2696 25 \n", + "440677 1711 Midnight in the Garden of Good and Evil (1997) 2696 25 \n", + "440678 3176 Talented Mr. Ripley, The (1999) 2696 25 \n", + "440679 2389 Psycho (1998) 2696 25 \n", + "440680 1589 Cop Land (1997) 2696 25 \n", + "440681 2713 Lake Placid (1999) 2696 25 \n", + "440682 3386 JFK (1991) 2696 25 \n", + "440683 1783 Palmetto (1998) 2696 25 \n", + "440684 350 Client, The (1994) 2696 25 \n", + "440685 1092 Basic Instinct (1992) 2696 25 \n", + "440686 1097 E.T. the Extra-Terrestrial (1982) 2696 25 \n", + "\n", + " Gender Occupation Rating \n", + "440667 M 7 4 \n", + "440668 M 7 2 \n", + "440669 M 7 4 \n", + "440670 M 7 4 \n", + "440671 M 7 2 \n", + "440672 M 7 4 \n", + "440673 M 7 4 \n", + "440674 M 7 4 \n", + "440675 M 7 5 \n", + "440676 M 7 2 \n", + "440677 M 7 4 \n", + "440678 M 7 4 \n", + "440679 M 7 4 \n", + "440680 M 7 3 \n", + "440681 M 7 1 \n", + "440682 M 7 1 \n", + "440683 M 7 4 \n", + "440684 M 7 3 \n", + "440685 M 7 4 \n", + "440686 M 7 3 " + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#Find the ratings for all the movies reviewed by for a particular user of user id = 2696\n", + "\n", + "res = master_data[master_data.UserID == 2696]\n", + "\n", + "plt.scatter(y=res.Title, x=res.Rating)\n", + "\n", + "res" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AnimationChildren'sComedyAdventureFantasyRomanceDramaActionCrimeThriller...Sci-FiDocumentaryWarMusicalMysteryFilm-NoirWesternGenderAgeRating
00000001000...0000000F15
11100000000...0001000F13
20000010000...0001000F13
30000001000...0000000F14
41110000000...0000000F15
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " Animation Children's Comedy Adventure Fantasy Romance Drama Action Crime \\\n", + "0 0 0 0 0 0 0 1 0 0 \n", + "1 1 1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 1 0 0 0 \n", + "3 0 0 0 0 0 0 1 0 0 \n", + "4 1 1 1 0 0 0 0 0 0 \n", + "\n", + " Thriller ... Sci-Fi Documentary War Musical Mystery Film-Noir Western \\\n", + "0 0 ... 0 0 0 0 0 0 0 \n", + "1 0 ... 0 0 0 1 0 0 0 \n", + "2 0 ... 0 0 0 1 0 0 0 \n", + "3 0 ... 0 0 0 0 0 0 0 \n", + "4 0 ... 0 0 0 0 0 0 0 \n", + "\n", + " Gender Age Rating \n", + "0 F 1 5 \n", + "1 F 1 3 \n", + "2 F 1 3 \n", + "3 F 1 4 \n", + "4 F 1 5 \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Feature Engineering\n", + "\n", + "val = movies.Genres.str.split(\"|\")\n", + "\n", + "res_col = []\n", + "for v in val:\n", + " for i in v:\n", + " if i not in res_col:\n", + " res_col.append(i)\n", + "\n", + "res_col.append(\"Gender\")\n", + "res_col.append(\"Age\")\n", + "res_col.append(\"Rating\")\n", + "\n", + "df = pd.DataFrame(columns=res_col)\n", + "\n", + "res = master_data.merge(movies, on = ['MovieID'], how=\"left\")[[\"Genres\",\"Rating\",\"Gender\", \"Age\"]]\n", + "\n", + "for index, row in res.head(20000).iterrows():\n", + " tmp = row.Genres.split(\"|\") \n", + " \n", + " for i in tmp:\n", + " # print(i)\n", + " df.loc[index,i] = 1\n", + " df.loc[index,\"Gender\"] = res.loc[index,\"Gender\"]\n", + " df.loc[index,\"Age\"] = res.loc[index,\"Age\"]\n", + " df.loc[index,\"Rating\"] = res.loc[index,\"Rating\"]\n", + " \n", + "# var = res.loc[index, \"Rating\"]\n", + "# if var == 1:\n", + "# df.loc[index,\"Rating\"] = \"one\" \n", + "# elif var == 2:\n", + "# df.loc[index,\"Rating\"] = \"two\"\n", + "# elif var == 3:\n", + "# df.loc[index,\"Rating\"] = \"three\"\n", + "# elif var == 4:\n", + "# df.loc[index,\"Rating\"] = \"four\"\n", + "# else:\n", + "# df.loc[index,\"Rating\"] = \"five\"\n", + " \n", + " df.loc[index,df.columns[~df.columns.isin(tmp+[\"Gender\",\"Rating\",\"Age\"])]] = 0\n", + "\n", + "df.head()\n", + " \n", + "\n", + "#df.loc[i,\"Animation\"] = 1\n", + "\n", + "#df" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\NITHIN\\anaconda3\\lib\\site-packages\\pandas\\core\\generic.py:5303: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " self[name] = value\n" + ] + } + ], + "source": [ + "\n", + "from sklearn import datasets \n", + "from sklearn.metrics import confusion_matrix \n", + "from sklearn.model_selection import train_test_split \n", + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "X = df[df.columns[~df.columns.isin([\"Rating\"])]]\n", + "y = df.Rating\n", + "\n", + "# dividing X, y into train and test data \n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0) \n", + "\n", + "number = LabelEncoder()\n", + "X_train.Gender = number.fit_transform(X_train[\"Gender\"].astype(\"str\"))\n", + "X_test.Gender = number.fit_transform(X_test[\"Gender\"].astype(\"str\"))\n", + "y_train = number.fit_transform(y_train.astype(\"int\"))\n", + "y_test = number.fit_transform(y_test.astype(\"int\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.34" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#SVM\n", + "\n", + "from sklearn.svm import SVC \n", + "svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train, y_train) \n", + "svm_predictions = svm_model_linear.predict(X_test) \n", + " \n", + "# model accuracy for X_test \n", + "accuracy = svm_model_linear.score(X_test, y_test) \n", + " \n", + "# creating a confusion matrix \n", + "cm = confusion_matrix(y_test, svm_predictions) \n", + "accuracy\n", + "#cm" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.3102" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#KNN\n", + "\n", + "from sklearn.neighbors import KNeighborsClassifier \n", + "knn = KNeighborsClassifier(n_neighbors = 7).fit(X_train, y_train) \n", + " \n", + "# accuracy on X_test \n", + "accuracy = knn.score(X_test, y_test) \n", + " \n", + "# creating a confusion matrix \n", + "knn_predictions = knn.predict(X_test) \n", + "cm = confusion_matrix(y_test, knn_predictions) \n", + "\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.2788" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Naive Bayes classifier \n", + "\n", + "from sklearn.naive_bayes import GaussianNB \n", + "gnb = GaussianNB().fit(X_train, y_train) \n", + "gnb_predictions = gnb.predict(X_test) \n", + " \n", + "# accuracy on X_test \n", + "accuracy = gnb.score(X_test, y_test) \n", + " \n", + "# creating a confusion matrix \n", + "cm = confusion_matrix(y_test, gnb_predictions) \n", + "\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}