Skip to content

Commit 16f3fe7

Browse files
committed
Initial commit
0 parents  commit 16f3fe7

11 files changed

+1542
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.py[cod]

README.txt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# HTTP access log monitor
2+
3+
## Application:
4+
* Monitors a w3c-formatted HTTP access log and reports on the stats in the console.
5+
* It can both tail a log file and update the stats every 10s (configurable), or run once and display the stats
6+
* In tail mode there's an alarm for having lots of hits in the last 2 mins.
7+
* There's also a very basic apache access log emulator to test the Monitor program in a realistic scenario
8+
9+
10+
## Design TODO:
11+
* Separate out display logic into its own class rather than having a long output_to_console function in Monitor
12+
For now it's ok since the UI is very basic, but any more complexity would require this
13+
14+
* Put the parsing on a separate thread so we could aggregate and parse simultaneously:
15+
The parser could push records to a queue from which the Monitor would grab and process them
16+
17+
* The alerting should probably be less sensitive to movements around the threshold;
18+
perhaps have a timer to regulate how frequently alerts can be generated
19+
Also keep them in a limited size queue so the total number of alerts is contained
20+
21+
* Currently everything runs in-memory. For very large log files it would be beneficial
22+
to parse and process in batches, and dump old processed records to disk (or erase), since they
23+
aren't especially useful. Only the counters and the most recent 2 mins hold
24+
useful data, and these are lightweight.
25+
26+
* Implement a more flexible and extensive custom counting class. For now, collections.Counter
27+
works great but any more complex counting (like top users per section) would be cumbersome.
28+
29+
* Finally, in terms of features, it would be nice to tabulate the results, and have an export option.
30+
Naturally there are many more stats to be displayed, and custom alerting rules for a variety of fields would be great.

__init__.py

Whitespace-only changes.

apache_http_log_emulator.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""
2+
:created: 15 Jan 2016
3+
:author: bleerodgers blr@nw3weather.co.uk
4+
"""
5+
6+
import random
7+
8+
from datetime import datetime
9+
10+
11+
class ApacheLogEmulator(object):
12+
"""
13+
Very basic emulator for logging http traffic from an Apache web server
14+
"""
15+
16+
def __init__(self, log_path):
17+
self.log_path = log_path
18+
19+
def emulate(self, count):
20+
lines = []
21+
for i in xrange(count):
22+
lines.append(
23+
self._get_record()
24+
)
25+
self._write_lines(lines)
26+
27+
def _get_record(self):
28+
fields = [
29+
self._random_ip(), # host
30+
'-', # ident
31+
'-', # authuser
32+
self._current_date(), # date
33+
self._random_request(), # request
34+
self._random_status(), # status
35+
str(random.randint(0, 10000)), # bytes
36+
'"http://www.google.com"', # referrer
37+
'"Mozilla/5.0 (compatible; ApacheLogBot/0.1;)"' # user agent string
38+
]
39+
return ' '.join(fields) + '\n'
40+
41+
def _write_lines(self, data):
42+
with open(self.log_path, 'a') as f:
43+
f.writelines(data)
44+
45+
def _random_ip(self):
46+
return random.choice([
47+
'192.168.1.52',
48+
'10.10.10.10',
49+
'22.22.2.222',
50+
'255.25.55.25',
51+
'16.2.33.45',
52+
'1.22.333.444'
53+
])
54+
55+
def _current_date(self):
56+
return '[{0} +0000]'.format(datetime.now().strftime('%d/%b/%Y:%H:%M:%S'))
57+
58+
def _random_request(self):
59+
method = random.choice(['GET', 'PUT', 'POST', 'DELETE', 'HEAD'])
60+
protocol = 'HTTP/' + random.choice(['1.0', '1.1', '2'])
61+
62+
page = '{0}.html'.format(random.choice(('edit', 'make', 'scrap', 'grab')))
63+
query_str = random.choice(['?randomise', ''])
64+
page += query_str
65+
66+
sections = []
67+
for i in xrange(random.randint(1, 5)):
68+
sections.append(random.choice([chr(i) for i in xrange(65, 80)]))
69+
sections.append(page)
70+
71+
url = '/'
72+
root_weight = 0.1
73+
if random.random() > root_weight:
74+
url += '/'.join(sections)
75+
76+
return '"{0}"'.format(' '.join((method, url, protocol)))
77+
78+
def _random_status(self):
79+
return str(random.choice([
80+
100, 101,
81+
200, 201, 206,
82+
301, 302, 304,
83+
401, 403, 404, 418, 451,
84+
500, 502, 503
85+
]))
86+
87+
88+
def main():
89+
import time
90+
import argparse
91+
92+
parser = argparse.ArgumentParser(description='Apache http access log emulator')
93+
parser.add_argument('-p', '--path', help='path to write to [apache_log.txt]')
94+
parser.add_argument('-f', '--frequency', type=float, help='how often to write records, in s [3.3]')
95+
parser.add_argument('-c', '--count', type=int, help='max count of records to write [1000]')
96+
97+
args = parser.parse_args()
98+
emulator = ApacheLogEmulator(args.path or 'apache_log.txt')
99+
100+
while True:
101+
count = random.randint(0, args.count or 1000)
102+
print 'Generating {0} random http requests'.format(count)
103+
emulator.emulate(count)
104+
time.sleep(args.frequency or 3.3)
105+
106+
107+
if __name__ == '__main__':
108+
main()

0 commit comments

Comments
 (0)