Skip to content
This repository has been archived by the owner on Aug 8, 2024. It is now read-only.

add TRACE method #28

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions apache_log_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
from datetime import datetime, tzinfo, timedelta
from six.moves.urllib.parse import urlparse, parse_qs, parse_qsl

import user_agents

Expand Down Expand Up @@ -37,12 +38,34 @@ def make_regex(format_template):

def extra_request_from_first_line(matched_strings):
first_line = matched_strings['request_first_line']
match = re.match("^(?P<method>GET|HEAD|POST|OPTIONS|PUT|CONNECT|PATCH|PROPFIND|DELETE)\s?(?P<url>.{,10000}?)(\s+HTTP/(?P<http_ver>1.[01]))?$", first_line)
match = re.match("^(?P<method>GET|HEAD|POST|OPTIONS|PUT|CONNECT|PATCH|PROPFIND|DELETE|TRACE)\s?(?P<url>.{,10000}?)(\s+HTTP/(?P<http_ver>1.[01]))?$", first_line)
if match is None:
# Possibly garbage, ignore it
results = { 'request_first_line': first_line, 'request_method': '', 'request_url': '', 'request_http_ver': ''}
else:
results = { 'request_first_line': first_line, 'request_method': match.groupdict()['method'], 'request_url': match.groupdict()['url'], 'request_http_ver': match.groupdict()['http_ver']}
url = match.groupdict()['url']
results = { 'request_first_line': first_line, 'request_method': match.groupdict()['method'], 'request_url': url, 'request_http_ver': match.groupdict()['http_ver']}

# Parse the URL:
parsed_url = urlparse(url)
results.update({
'request_url_scheme': parsed_url.scheme,
'request_url_netloc': parsed_url.netloc,
'request_url_path': parsed_url.path,
'request_url_query': parsed_url.query,
'request_url_fragment': parsed_url.fragment,
'request_url_username': parsed_url.username,
'request_url_password': parsed_url.password,
'request_url_hostname': parsed_url.hostname,
'request_url_port': parsed_url.port,
})

# Parse the query string
results.update({
'request_url_query_dict': parse_qs(parsed_url.query),
'request_url_query_list': parse_qsl(parsed_url.query),
'request_url_query_simple_dict': dict(parse_qsl(parsed_url.query))
})
return results

def parse_user_agent(matched_strings):
Expand Down Expand Up @@ -135,7 +158,7 @@ def format_time(matched_strings):
}

IPv4_ADDR_REGEX = '(?:\d{1,3}\.){3}\d{1,3}'
IPv6_ADDR_REGEX = "([0-9A-Fa-f]{0,4}:){2,7}([0-9A-Fa-f]{0,4})"
IPv6_ADDR_REGEX = r"([0-9A-Fa-f]{0,4}:){2,7}([0-9A-Fa-f]{1,4}|("+IPv4_ADDR_REGEX+"))"
IP_ADDR_REGEX = "("+IPv4_ADDR_REGEX+"|"+IPv6_ADDR_REGEX+")"

FORMAT_STRINGS = [
Expand Down
2 changes: 1 addition & 1 deletion apache_log_parser/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Store the version info so that setup.py and __init__ can access it. """
__version__ = "1.6.2.dev"
__version__ = "1.7.0"
45 changes: 36 additions & 9 deletions apache_log_parser/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import unittest
import apache_log_parser
import datetime
import doctest
import os.path

class ApacheLogParserTestCase(unittest.TestCase):
maxDiff = None
Expand Down Expand Up @@ -35,6 +33,12 @@ def test_pr8(self):
'time_us': '363701', 'num_keepalives': '0', 'request_first_line': 'GET /mypage/this/that?stuff=all HTTP/1.1',
'pid': '18572', 'response_bytes_clf': '5129', 'request_header_user_agent__os__family': u'Windows 7',
'request_url': '/mypage/this/that?stuff=all', 'request_http_ver': '1.1',
'request_url_fragment': '', 'request_url_hostname': None,
'request_url_netloc': '', 'request_url_password': None,
'request_url_path': '/mypage/this/that', 'request_url_port': None,
'request_url_query': 'stuff=all', 'request_url_query_dict': {'stuff': ['all']}, 'request_url_query_list': [('stuff', 'all')],
'request_url_query_simple_dict': {'stuff': 'all'},
'request_url_scheme': '', 'request_url_username': None,
'request_header_referer': '-', 'server_name': 'mysite.co.uk', 'request_header_user_agent__is_mobile': False,
'request_header_user_agent__browser__version_string': '37.0.2062',
'request_header_user_agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36',
Expand All @@ -59,6 +63,12 @@ def test_pr8(self):
'pid': '25572', 'request_first_line': 'GET /Class/method/ HTTP/1.1', 'request_method': 'GET',
'server_port': '80', 'response_bytes_clf': '2266', 'server_name2': 'othersite',
'request_url': '/Class/method/',
'request_url_fragment': '', 'request_url_hostname': None,
'request_url_netloc': '', 'request_url_password': None,
'request_url_path': '/Class/method/', 'request_url_port': None,
'request_url_query': '', 'request_url_query_dict': {}, 'request_url_query_simple_dict': {},
'request_url_query_list': [], 'request_url_scheme': '',
'request_url_username': None,
'env_unique_id': 'VHiZx6wQGCMAAEiBE8kAAAAA:VHiZx6wQGiMAAGPkBnMAAAAH:VHiZx6wQGiMAAGPkBnMAAAAH',
'remote_ip': '192.168.1.100'})

Expand All @@ -79,6 +89,12 @@ def test_issue9(self):
'response_bytes_clf': '344',
'server_name2': 'blah.foo.com',
'request_url': '/content_images/3/American-University-in-Cairo-AUC.jpeg.jpg',
'request_url_fragment': '', 'request_url_hostname': None,
'request_url_netloc': '', 'request_url_password': None,
'request_url_path': '/content_images/3/American-University-in-Cairo-AUC.jpeg.jpg',
'request_url_port': None, 'request_url_query': '',
'request_url_query_dict': {}, 'request_url_query_list': [], 'request_url_query_simple_dict': {},
'request_url_scheme': '', 'request_url_username': None,
'remote_host': '10.1.1.1',
'time_received': '[08/Mar/2015:18:06:58 -0400]',
'time_received_datetimeobj': datetime.datetime(2015, 3, 8, 18, 6, 58),
Expand Down Expand Up @@ -126,13 +142,24 @@ def test_issue12_nonnum_status(self):
self.assertNotEqual(log_data1, None)
self.assertEqual(log_data1['status'], '-')

def test_issue10_ipv6(self):
parser = apache_log_parser.make_parser("%h %a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"")
sample1 = '10.178.98.112 2607:5300:60:2c74:: - - [24/Mar/2015:16:40:45 -0400] "GET /category/blog/page/3 HTTP/1.0" 200 41207 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30"'
log_data1 = parser(sample1)

def test_doctest_readme(self):
doctest.testfile("../README.md")
def test_parsed_url(self):
parser = apache_log_parser.Parser("%h %v %V %l %u %t %r %>s %b %{Referer}i %{User-agent}i")
log = "10.1.1.1 T1 blah.foo.com - - [08/Mar/2015:18:06:58 -0400] GET /content_images/3/American-University-in-Cairo-AUC.jpeg.jpg?loc=12,23&loc=4,5&query=route&car=yes HTTP/1.1 404 344 http://www.google.ie AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.76 Safari/537.36"
data = parser.parse(log)
self.assertEqual(data['request_url'], '/content_images/3/American-University-in-Cairo-AUC.jpeg.jpg?loc=12,23&loc=4,5&query=route&car=yes')
self.assertEqual(data['request_url_scheme'], '')
self.assertEqual(data['request_url_fragment'], '')
self.assertEqual(data['request_url_hostname'], None)
self.assertEqual(data['request_url_netloc'], '')
self.assertEqual(data['request_url_password'], None)
self.assertEqual(data['request_url_path'], '/content_images/3/American-University-in-Cairo-AUC.jpeg.jpg')
self.assertEqual(data['request_url_port'], None)
self.assertEqual(data['request_url_query'], 'loc=12,23&loc=4,5&query=route&car=yes')
self.assertEqual(data['request_url_query_dict'], {'car': ['yes'], 'query': ['route'], 'loc': ['12,23', '4,5']})
self.assertEqual(data['request_url_query_list'], [('loc', '12,23'), ('loc', '4,5'), ('query', 'route'), ('car', 'yes')])
self.assertEqual(data['request_url_query_simple_dict'], {'car': 'yes', 'query': 'route', 'loc': '4,5'})
self.assertEqual(data['request_url_scheme'], '')
self.assertEqual(data['request_url_username'], None)



Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
packages=['apache_log_parser'],
install_requires = [
'user-agents',
'six',
],
license = 'GPLv3+',
description = "Parse lines from an apache log file",
Expand Down