Skip to content

Commit e68eca2

Browse files
committed
評価の別れた映画を抽出した
1 parent 99450e8 commit e68eca2

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed

ch02/movies_evalution.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import pandas as pd
4+
import study
5+
6+
def main():
7+
metadata = {
8+
'users': {
9+
'names': ['user_id', 'gender', 'age', 'occupation', 'zip'],
10+
'dat': 'users.dat'
11+
},
12+
'ratings': {
13+
'names': ['user_id', 'movie_id', 'rating', 'timestamp'],
14+
'dat': 'ratings.dat'
15+
},
16+
'movies': {
17+
'names': ['movie_id', 'title', 'genre'],
18+
'dat': 'movies.dat'
19+
}
20+
}
21+
22+
# data loading
23+
tables = {}
24+
for k, v in metadata.items():
25+
tables[k] = read_table(study.DATA_DIR + 'ch02/movielens/' + v['dat'], v['names'])
26+
# join(ratings.user_id = users.user_id, ratings.movie_id = movies.movie_id)
27+
data = pd.merge(pd.merge(tables['ratings'], tables['users']), tables['movies'])
28+
29+
# u'タイトル別評価件数のうち、件数が上位である映画に対する女性の平均評価
30+
mean_ratings = data.pivot_table(
31+
'rating', rows='title', cols='gender', aggfunc='mean'
32+
)
33+
ratings_by_title = data.groupby('title').size()
34+
active_titles = ratings_by_title.index[ratings_by_title >= 250]
35+
mean_ratings = mean_ratings.ix[active_titles]
36+
top_female_ratings = mean_ratings.sort_index(by='F', ascending=False)
37+
print top_female_ratings['F'][:10]
38+
39+
# calculate for each row
40+
mean_ratings['diff'] = mean_ratings['M'] - mean_ratings['F']
41+
sorted_by_diff = mean_ratings.sort_index(by='diff')
42+
print sorted_by_diff[:15]
43+
44+
# u'評価の別れた映画TOP10
45+
ratings_std_by_title = data.groupby('title')['rating'].std()
46+
ratings_std_by_title = ratings_std_by_title.ix[active_titles]
47+
print ratings_std_by_title.order(ascending=False)[:10]
48+
49+
def read_table(file_path, names, sep='::', header=None):
50+
return pd.read_table(file_path, sep=sep, header=header, names=names)
51+
52+
if __name__ == '__main__':
53+
print main()

0 commit comments

Comments
 (0)