Add Map Reduce on Movies Dataset Implementation

dpavloff · Oct 25, 2018 · b296cf2 · b296cf2
1 parent 30db528
commit b296cf2
Show file tree

Hide file tree

Showing 3 changed files with 104,841 additions and 0 deletions.
diff --git a/Data Analytics/Map Reduce/mapreduce.py b/Data Analytics/Map Reduce/mapreduce.py
@@ -0,0 +1,32 @@
+"""
+    Author : Ajinkya Sonawane
+"""
+import pandas as pd
+
+data = pd.read_csv('ratings_small.csv')
+#print(data)
+data = data.iloc[:1000,:]
+
+def find(movieId):
+    global data
+    temp = []
+    for index,row in data.iterrows():
+        if row['movieId'] == movieId:
+            temp.append(row['rating'])
+    return temp
+
+def process():
+    unique_movei_ids = set(data['movieId'])
+    mapped_values = {}
+    reduced_values = {}
+
+    #Map the ratings with same movie id
+    for i in list(unique_movei_ids):
+        mapped_values[i] = find(i)
+
+    #Reduce the mapped values by taking the average of the ratings
+    for key,value in mapped_values.items():
+        reduced_values[key] = sum(value)/len(value)
+        print(key,':',mapped_values[key],'-->',reduced_values[key])
+
+process()