Skip to content

Commit 66cebe1

Browse files
committed
ENH: add Honeypot HTTP Scan feed to Shadowserver parser
1 parent 9d4baa8 commit 66cebe1

File tree

4 files changed

+197
-0
lines changed

4 files changed

+197
-0
lines changed

intelmq/bots/parsers/shadowserver/_config.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
7979
"""
8080
import re
81+
import base64
8182
from typing import Optional, Dict, Tuple, Any
8283

8384
import intelmq.lib.harmonization as harmonization
@@ -204,6 +205,18 @@ def convert_date_utc(value: str) -> Optional[str]:
204205
return harmonization.DateTime.sanitize(value + '+00:00')
205206

206207

208+
_base64_alphabet = set('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=')
209+
210+
211+
def maybe_base64(value: Optional[str]) -> Optional[str]:
212+
if not value:
213+
return None
214+
elif set(value).issubset(_base64_alphabet):
215+
return value
216+
217+
return base64.b64encode(value.encode()).decode()
218+
219+
207220
# https://www.shadowserver.org/wiki/pmwiki.php/Services/Open-DB2
208221
open_db2_discovery_service = {
209222
'required_fields': [
@@ -2813,6 +2826,71 @@ def scan_exchange_identifier(field):
28132826
}
28142827
}
28152828

2829+
# https://www.shadowserver.org/what-we-do/network-reporting/honeypot-http-scanner-events/
2830+
honeypot_http_scan = {
2831+
'required_fields': [
2832+
('time.source', 'timestamp', add_UTC_to_timestamp),
2833+
('source.ip', 'src_ip', validate_ip),
2834+
('source.port', 'src_port'),
2835+
],
2836+
'optional_fields': [
2837+
('source.asn', 'src_asn', invalidate_zero),
2838+
('source.geolocation.cc', 'src_geo'),
2839+
('source.geolocation.region', 'src_region'),
2840+
('source.geolocation.city', 'src_city'),
2841+
('source.reverse_dns', 'src_hostname'),
2842+
('extra.source.naics', 'src_naics', invalidate_zero),
2843+
('extra.source.sector', 'src_sector', validate_to_none),
2844+
('extra.', 'device_vendor', validate_to_none),
2845+
('extra.', 'device_type', validate_to_none),
2846+
('extra.', 'device_model', validate_to_none),
2847+
('destination.ip', 'dst_ip', validate_ip),
2848+
('destination.port', 'dst_port'),
2849+
('destination.asn', 'dst_asn', invalidate_zero),
2850+
('destination.geolocation.cc', 'dst_geo'),
2851+
('destination.geolocation.region', 'dst_region'),
2852+
('destination.geolocation.city', 'dst_city'),
2853+
('destination.reverse_dns', 'dst_hostname'),
2854+
('extra.destination.naics', 'dst_naics', invalidate_zero),
2855+
('extra.destination.sector', 'dst_sector', invalidate_zero),
2856+
('extra.', 'public_source', validate_to_none),
2857+
('malware.name', 'infection'),
2858+
('extra.', 'family', validate_to_none),
2859+
('extra.', 'tag', validate_to_none),
2860+
('extra.', 'application', validate_to_none),
2861+
('extra.', 'version', validate_to_none),
2862+
('extra.', 'event_id', validate_to_none),
2863+
('extra.', 'pattern', validate_to_none),
2864+
('destination.url', 'http_url', convert_http_host_and_url, True),
2865+
('user_agent', 'http_agent', validate_to_none),
2866+
('extra.method', 'http_request_method', validate_to_none),
2867+
('extra.', 'url_scheme', validate_to_none),
2868+
('extra.', 'session_tags', validate_to_none),
2869+
('extra.', 'vulnerability_enum', validate_to_none),
2870+
('extra.', 'vulnerability_id', validate_to_none),
2871+
('extra.', 'vulnerability_class', validate_to_none),
2872+
('extra.', 'vulnerability_score', validate_to_none),
2873+
('extra.', 'vulnerability_severity', validate_to_none),
2874+
('extra.', 'vulnerability_version', validate_to_none),
2875+
('extra.', 'threat_framework', validate_to_none),
2876+
('extra.', 'threat_tactic_id', validate_to_none),
2877+
('extra.', 'threat_technique_id', validate_to_none),
2878+
('extra.', 'target_vendor', validate_to_none),
2879+
('extra.', 'target_product', validate_to_none),
2880+
('extra.', 'target_class', validate_to_none),
2881+
('extra.', 'file_md5', validate_to_none),
2882+
('extra.', 'file_sha256', validate_to_none),
2883+
('extra.', 'request_raw', maybe_base64),
2884+
('extra.', 'body_raw', maybe_base64),
2885+
],
2886+
'constant_fields': {
2887+
'classification.identifier': 'honeypot-http-scan',
2888+
'classification.taxonomy': 'other',
2889+
'classification.type': 'other',
2890+
'protocol.application': 'http',
2891+
}
2892+
}
2893+
28162894
mapping = (
28172895
# feed name, file name, function
28182896
('Accessible-ADB', 'scan_adb', accessible_adb),
@@ -2845,6 +2923,7 @@ def scan_exchange_identifier(field):
28452923
('Honeypot-Amplification-DDoS-Events', 'event4_honeypot_ddos_amp', honeypot_ddos_amp),
28462924
('Honeypot-Brute-Force-Events', 'event4_honeypot_brute_force', honeypot_brute_force),
28472925
('Honeypot-Darknet', 'event4_honeypot_darknet', event4_honeypot_darknet),
2926+
('Honeypot-HTTP-Scan', 'event4_honeypot_http_scan', honeypot_http_scan),
28482927
('ICS-Scanners', 'hp_ics_scan', ics_scanners),
28492928
('IPv6-Sinkhole-HTTP-Drone', 'sinkhole6_http', ipv6_sinkhole_http_drone), # legacy (replaced by event46_sinkhole_http)
28502929
('IP-Spoofer-Events', 'event4_ip_spoofer', event4_ip_spoofer),
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# SPDX-FileCopyrightText: 2021 Mikk Margus Möll <mikk@cert.ee>
2+
#
3+
# SPDX-License-Identifier: AGPL-3.0-or-later
4+
5+
# -*- coding: utf-8 -*-
6+
7+
import os
8+
import unittest
9+
10+
import intelmq.lib.test as test
11+
import intelmq.lib.utils as utils
12+
from intelmq.bots.parsers.shadowserver.parser import ShadowserverParserBot
13+
14+
with open(os.path.join(os.path.dirname(__file__),
15+
'testdata/event4_honeypot_http_scan.csv')) as handle:
16+
EXAMPLE_FILE = handle.read()
17+
EXAMPLE_LINES = EXAMPLE_FILE.splitlines()
18+
19+
EXAMPLE_REPORT = {'feed.name': 'Honeypot-HTTP-Scan',
20+
'raw': utils.base64_encode(EXAMPLE_FILE),
21+
'__type': 'Report',
22+
'time.observation': '2021-08-01T12:00:00+00:00',
23+
'extra.file_name': '2021-08-01-event4_honeypot_http_scan-test-test.csv',
24+
25+
}
26+
EVENTS = [{
27+
'__type': 'Event',
28+
'feed.name': 'Honeypot-HTTP-Scan',
29+
'malware.name': 'http-scan',
30+
'protocol.application': 'http',
31+
'classification.identifier': 'honeypot-http-scan',
32+
'classification.taxonomy': 'other',
33+
'classification.type': 'other',
34+
'destination.asn': 5678,
35+
'destination.geolocation.cc': 'UK',
36+
'destination.geolocation.city': 'MAIDENHEAD',
37+
'destination.geolocation.region': 'WINDSOR AND MAIDENHEAD',
38+
'destination.ip': '109.87.65.43',
39+
'destination.port': 80,
40+
'extra.destination.naics': 518210,
41+
'extra.http_url': '/js/ueditor/wwwroot/way-board.cgi',
42+
'extra.protocol': 'tcp',
43+
'extra.public_source': 'CAPRICA-EU',
44+
'extra.request_raw': 'R0VUIC9qcy91ZWRpdG9yL3d3d3Jvb3Qvd2F5LWJvYXJkLmNnaSBIVFRQLzEuMHJuQWNjZXB0OiB0ZXh0L2h0bWwsYXBwbGljYXRpb24veGh0bWwreG1sLGFwcGxpY2F0aW9uL3htbDtxPTAuOSwqLyo7cT0wLjhybkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZXJuQWNjZXB0LUxhbmd1YWdlOiBlbi1VUyxlbjtxPTAuNXJuQ29ubmVjdGlvbjogY2xvc2VybkRudDogMXJuSG9zdDogMTA5Ljg3LjY1LjQzcm5PcmlnaW46IGh0dHA6Ly8xMDkuODcuNjUuNDNyblJlZmVyZXI6IGh0dHA6Ly8xMDkuODcuNjUuNDMvcm5Vc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoV2luZG93cyBOVCA2LjE7IFdPVzY0KSBBcHBsZVdlYktpdC81MzcuMzYgKEtIVE1MLCBsaWtlIEdlY2tvKSBDaHJvbWUvNTMuMC4yNzg1LjEwNCBTYWZhcmkvNTM3LjM2IENvcmUvMS41My4zMDg0LjQwMCBRUUJyb3dzZXIvOS42LjExMzQ2LjQwMA==',
45+
'extra.source.naics': 518210,
46+
'extra.source.sector': 'Communications, Service Provider, and Hosting Service',
47+
'extra.version': '3.1.3-dev',
48+
'raw': utils.base64_encode('\n'.join([EXAMPLE_LINES[0],
49+
EXAMPLE_LINES[1]])),
50+
'source.asn': 1234,
51+
'source.geolocation.cc': 'EE',
52+
'source.geolocation.city': 'TALLINN',
53+
'source.geolocation.region': 'HARJUMAA',
54+
'source.ip': '191.23.45.67',
55+
'source.port': 36455,
56+
'source.reverse_dns': '191-23-45-67-host.example.com',
57+
'time.observation': '2021-08-01T12:00:00+00:00',
58+
'time.source': '2021-08-01T00:24:08+00:00'
59+
},
60+
{
61+
'__type': 'Event',
62+
'feed.name': 'Honeypot-HTTP-Scan',
63+
'malware.name': 'http-scan',
64+
'protocol.application': 'http',
65+
'classification.identifier': 'honeypot-http-scan',
66+
'classification.taxonomy': 'other',
67+
'classification.type': 'other',
68+
'destination.asn': 23456,
69+
'destination.geolocation.cc': 'UA',
70+
'destination.geolocation.city': 'KHARKIV',
71+
'destination.geolocation.region': "KHARKIVS'KA OBLAST'",
72+
'destination.ip': '82.41.20.10',
73+
'destination.port': 8080,
74+
'extra.http_url': '/',
75+
'extra.method': 'GET',
76+
'extra.protocol': 'tcp',
77+
'extra.public_source': 'CAPRICA-EU',
78+
'extra.request_raw': 'R0VUIC8gSFRUUC8xLjENCkhvc3Q6IDgyLjQxLjIwLjEwOjgwODANCkFjY2VwdDogdGV4dC9odG1sLGFwcGxpY2F0aW9uL3hodG1sK3htbCxhcHBsaWNhdGlvbi94bWw7cT0wLjksKi8qO3E9MC44DQpBY2NlcHQtRW5jb2Rpbmc6IGRlZmxhdGUsIGd6aXAsIGlkZW50aXR5DQpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTO3E9MC42LGVuO3E9MC40DQpVc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoV2luZG93cyBOVCA1LjE7IHJ2OjkuMC4xKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzkuMC4xDQoNCg==',
79+
'extra.url_scheme': 'http',
80+
'extra.user_agent': 'Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1',
81+
'raw': utils.base64_encode('\n'.join([EXAMPLE_LINES[0],
82+
EXAMPLE_LINES[2]])),
83+
'source.asn': 12345,
84+
'source.geolocation.cc': 'EE',
85+
'source.geolocation.city': 'TALLINN',
86+
'source.geolocation.region': 'HARJUMAA',
87+
'source.ip': '45.67.89.123',
88+
'source.port': 58610,
89+
'time.observation': '2021-08-01T12:00:00+00:00',
90+
'time.source': '2021-08-01T05:21:59+00:00',
91+
},
92+
]
93+
94+
95+
class TestShadowserverParserBot(test.BotTestCase, unittest.TestCase):
96+
"""
97+
A TestCase for a ShadowserverParserBot.
98+
"""
99+
100+
@classmethod
101+
def set_bot(cls):
102+
cls.bot_reference = ShadowserverParserBot
103+
cls.default_input_message = EXAMPLE_REPORT
104+
105+
def test_event(self):
106+
""" Test if correct Event has been produced. """
107+
self.run_bot()
108+
for i, EVENT in enumerate(EVENTS):
109+
self.assertMessageEqual(i, EVENT)
110+
111+
112+
if __name__ == '__main__': # pragma: no cover
113+
unittest.main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
"timestamp","protocol","src_ip","src_port","src_asn","src_geo","src_region","src_city","src_hostname","src_naics","src_sector","device_vendor","device_type","device_model","dst_ip","dst_port","dst_asn","dst_geo","dst_region","dst_city","dst_hostname","dst_naics","dst_sector","public_source","infection","family","tag","application","version","event_id","pattern","http_url","http_agent","http_request_method","url_scheme","session_tags","vulnerability_enum","vulnerability_id","vulnerability_class","vulnerability_score","vulnerability_severity","vulnerability_version","threat_framework","threat_tactic_id","threat_technique_id","target_vendor","target_product","target_class","file_md5","file_sha256","request_raw","body_raw"
2+
"2021-08-01 00:24:08","tcp","191.23.45.67",36455,1234,"EE","HARJUMAA","TALLINN","191-23-45-67-host.example.com",518210,"Communications, Service Provider, and Hosting Service",,,,"109.87.65.43",80,5678,"UK","WINDSOR AND MAIDENHEAD","MAIDENHEAD",,518210,,"CAPRICA-EU","http-scan",,,,"3.1.3-dev",,"unknown","/js/ueditor/wwwroot/way-board.cgi",,,,,,,,,,,,,,,,,,,"GET /js/ueditor/wwwroot/way-board.cgi HTTP/1.0rnAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8rnAccept-Encoding: gzip, deflaternAccept-Language: en-US,en;q=0.5rnConnection: closernDnt: 1rnHost: 109.87.65.43rnOrigin: http://109.87.65.43rnReferer: http://109.87.65.43/rnUser-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.3084.400 QQBrowser/9.6.11346.400",
3+
"2021-08-01 05:21:59","tcp","45.67.89.123",58610,12345,"EE","HARJUMAA","TALLINN",,,,,,,"82.41.20.10",8080,23456,"UA","KHARKIVS'KA OBLAST'","KHARKIV",,,,"CAPRICA-EU","http-scan",,,,,,,"/","Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1","GET","http",,,,,,,,,,,,,,,,"R0VUIC8gSFRUUC8xLjENCkhvc3Q6IDgyLjQxLjIwLjEwOjgwODANCkFjY2VwdDogdGV4dC9odG1sLGFwcGxpY2F0aW9uL3hodG1sK3htbCxhcHBsaWNhdGlvbi94bWw7cT0wLjksKi8qO3E9MC44DQpBY2NlcHQtRW5jb2Rpbmc6IGRlZmxhdGUsIGd6aXAsIGlkZW50aXR5DQpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTO3E9MC42LGVuO3E9MC40DQpVc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoV2luZG93cyBOVCA1LjE7IHJ2OjkuMC4xKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzkuMC4xDQoNCg==",
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
SPDX-FileCopyrightText: 2021 Mikk Margus Möll <mikk@cert.ee>
2+
SPDX-License-Identifier: AGPL-3.0-or-later

0 commit comments

Comments
 (0)