Skip to content

Commit 5eb0ccd

Browse files
committed
pandasを使ってTOP10を集計した
1 parent fb0d556 commit 5eb0ccd

File tree

1 file changed

+16
-9
lines changed

1 file changed

+16
-9
lines changed

ch02/shorturl_analysis.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from collections import defaultdict, Counter
33
import json
44
import os
5+
from pandas import DataFrame
6+
57

68
def main():
79
"""
@@ -14,12 +16,18 @@ def main():
1416

1517
time_zones = [r['tz'] for r in records if 'tz' in r]
1618

17-
tz_counts = get_counts(time_zones)
19+
# u'pandasのデータフレームを使用
20+
frame = DataFrame(records)
21+
print list(frame.columns.values)
22+
23+
# u'列にアクセスして、Noneを置き換える
24+
clean_tz = frame['tz'].fillna('Missing')
25+
# u' clean_tz == '' => each row index: bool(if values is '' then true)
26+
clean_tz[clean_tz == ''] = 'Unknown'
27+
# u'それぞれの数を集計し、TOP10を表示
28+
tz_counts = clean_tz.value_counts()
1829

19-
return [
20-
top_counts(tz_counts),
21-
Counter(tz_counts).most_common(10),
22-
]
30+
return tz_counts[:10]
2331

2432
def get_counts(sequence):
2533
counts = defaultdict(int) # values initialize to zero
@@ -29,14 +37,13 @@ def get_counts(sequence):
2937
return counts
3038

3139
def top_counts(count_dict, n=10):
32-
"""
33-
:type count_dict: dict
34-
:type n: int
35-
"""
3640
counts = [(count, tz) for tz, count in count_dict.items()]
3741
counts.sort()
3842

3943
return counts[-n:]
4044

45+
def top_counts_by_counter(count_dict, n=10):
46+
return Counter(count_dict).most_common(n)
47+
4148
if __name__ == '__main__':
4249
print main()

0 commit comments

Comments
 (0)