-
Notifications
You must be signed in to change notification settings - Fork 19
/
activehours.py
executable file
·139 lines (118 loc) · 4.87 KB
/
activehours.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python3
"""
A program to plot the activity of a chat over 24 hours
"""
import argparse
from json import loads
from datetime import date,timedelta,datetime
from os import path
from collections import defaultdict
import matplotlib.pyplot as plt
from sys import maxsize
def extract_info(event):
text_time = datetime.fromtimestamp(event['date']).hour
text_date = date.fromtimestamp(event['date'])
text_length = len(event['text'])
return text_date, text_time, text_length
def make_ddict_in_range(json_file,start,end):
"""
return a defaultdict(int) of dates with activity on those dates in a date range
"""
events = (loads(line) for line in json_file)
#generator, so whole file is not put in mem
msg_infos = (extract_info(event) for event in events if 'text' in event)
msg_infos = ((date,time,length) for (date,time,length) in msg_infos if date >= start and date <= end)
counter = defaultdict(int)
#a dict with hours as keys and frequency as values
for date_text,time_text,length in msg_infos:
counter[time_text] += length
return counter
def parse_args():
parser = argparse.ArgumentParser(
description="Visualise the most active times of day in a Telegram chat")
required = parser.add_argument_group('required arguments')
#https://stackoverflow.com/questions/24180527/argparse-required-arguments-listed-under-optional-arguments
required.add_argument(
'-f', '--file',
help='paths to the json file (chat log) to analyse.',
required = True
)
parser.add_argument(
'-o', '--output-folder',
help='the folder to save the activity graph image in.'
'Using this option will make the graph not display on screen.')
#parser.add_argument(
# '-b', '--bin-size',
# help='the number of days to group together as one datapoint. '
# 'Higher number is more smooth graph, lower number is more spiky. '
# 'Default 3.',
# type=int,default=3)
# #and negative bin sizes are = 1
parser.add_argument(
'-s','--figure-size',
help='the size of the figure shown or saved (X and Y size).'
'Choose an appropriate value for your screen size. Default 14 8.',
nargs=2,type=int,default=[14,8]
)
parser.add_argument(
'-d','--date-range',
help='the range of dates you want to look at data between. '
'Must be in format YYYY-MM-DD YYYY-MM-DD with the first date '
'the start of the range, and the second the end. Example: '
"-d '2017-11-20 2017-05-15'. Make sure you don't put a day "
'that is too high for the month eg 30th February.',
default="1000-01-01 4017-01-01"
#hopefully no chatlogs contain these dates :p
)
return parser.parse_args()
def save_figure(folder,filename):
if len(filename) > 200:
#file name likely to be so long as to cause issues
figname = input(
"This graph is going to have a very long file name. Please enter a custom name(no need to add an extension): ")
else:
figname = "Active hours in {}".format(filename)
plt.savefig("{}/{}.png".format(folder, figname))
def annotate_figure(filename):
plt.title("Active hours in {}".format(filename))
plt.ylabel("Activity level (chars)", size=14)
plt.xlabel("Hour of the day", size=14)
#sidenote: no idea what timezone lmao
plt.gca().set_xlim([0,24])
plt.xticks(([x+0.5 for x in range(24)]),range(24))
#if binsize > 1:
# plt.ylabel("Activity level (chars per {} days)".format(binsize), size=14)
#else:
# plt.ylabel("Activity level (chars per day)", size=14)
def get_dates(arg_dates):
if " " not in arg_dates:
print("You must put a space between start and end dates")
exit()
daterange = arg_dates.split()
start_date = datetime.strptime(daterange[0], "%Y-%m-%d").date()
end_date = datetime.strptime(daterange[1], "%Y-%m-%d").date()
return (start_date,end_date)
def main():
"""
main function
"""
args = parse_args()
filepath = args.file
savefolder = args.output_folder
figure_size = args.figure_size
start_date,end_date = get_dates(args.date_range)
filename = path.splitext(path.split(filepath)[-1])[0]
plt.figure(figsize=figure_size)
with open(filepath, 'r') as jsonfile:
chat_counter = make_ddict_in_range(
jsonfile,start_date,end_date)
plt.bar(*zip(*chat_counter.items()))
annotate_figure(filename)
if savefolder is not None:
#if there is a given folder to save the figure in, save it there
save_figure(savefolder,filename)
else:
#if a save folder was not specified, just open a window to display graph
plt.show()
if __name__ == "__main__":
main()