2
2
from collections import defaultdict , Counter
3
3
import json
4
4
import os
5
+ from pandas import DataFrame
6
+
5
7
6
8
def main ():
7
9
"""
@@ -14,12 +16,18 @@ def main():
14
16
15
17
time_zones = [r ['tz' ] for r in records if 'tz' in r ]
16
18
17
- tz_counts = get_counts (time_zones )
19
+ # u'pandasのデータフレームを使用
20
+ frame = DataFrame (records )
21
+ print list (frame .columns .values )
22
+
23
+ # u'列にアクセスして、Noneを置き換える
24
+ clean_tz = frame ['tz' ].fillna ('Missing' )
25
+ # u' clean_tz == '' => each row index: bool(if values is '' then true)
26
+ clean_tz [clean_tz == '' ] = 'Unknown'
27
+ # u'それぞれの数を集計し、TOP10を表示
28
+ tz_counts = clean_tz .value_counts ()
18
29
19
- return [
20
- top_counts (tz_counts ),
21
- Counter (tz_counts ).most_common (10 ),
22
- ]
30
+ return tz_counts [:10 ]
23
31
24
32
def get_counts (sequence ):
25
33
counts = defaultdict (int ) # values initialize to zero
@@ -29,14 +37,13 @@ def get_counts(sequence):
29
37
return counts
30
38
31
39
def top_counts (count_dict , n = 10 ):
32
- """
33
- :type count_dict: dict
34
- :type n: int
35
- """
36
40
counts = [(count , tz ) for tz , count in count_dict .items ()]
37
41
counts .sort ()
38
42
39
43
return counts [- n :]
40
44
45
+ def top_counts_by_counter (count_dict , n = 10 ):
46
+ return Counter (count_dict ).most_common (n )
47
+
41
48
if __name__ == '__main__' :
42
49
print main ()
0 commit comments