Skip to content

Commit

Permalink
Add Map Reduce on Movies Dataset Implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
avs8687 committed Oct 25, 2018
1 parent 30db528 commit b296cf2
Show file tree
Hide file tree
Showing 3 changed files with 104,841 additions and 0 deletions.
32 changes: 32 additions & 0 deletions Data Analytics/Map Reduce/mapreduce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Author : Ajinkya Sonawane
"""
import pandas as pd

data = pd.read_csv('ratings_small.csv')
#print(data)
data = data.iloc[:1000,:]

def find(movieId):
global data
temp = []
for index,row in data.iterrows():
if row['movieId'] == movieId:
temp.append(row['rating'])
return temp

def process():
unique_movei_ids = set(data['movieId'])
mapped_values = {}
reduced_values = {}

#Map the ratings with same movie id
for i in list(unique_movei_ids):
mapped_values[i] = find(i)

#Reduce the mapped values by taking the average of the ratings
for key,value in mapped_values.items():
reduced_values[key] = sum(value)/len(value)
print(key,':',mapped_values[key],'-->',reduced_values[key])

process()
Loading

0 comments on commit b296cf2

Please sign in to comment.