Skip to content

Commit 61c45ac

Browse files
authored
Merge pull request #2375 from gethvi/fix-2167
FIX: Updates Turris Greylist parser and feed URL
2 parents 6d4d889 + 79bb3d0 commit 61c45ac

File tree

7 files changed

+173
-63
lines changed

7 files changed

+173
-63
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ CHANGELOG
3737
- Removed duplicate mappings from the 'Spam-URL' report. (PR#2348)
3838
- `intelmq.bots.parsers.generic.parser_csv`: Changes `time_format` parameter to use new `TimeFormat` class (PR#2329 by Filip Pokorný).
3939
- `intelmq.bots.parsers.html_table.parser`: Changes `time_format` parameter to use new `TimeFormat` class (PR#2329 by Filip Pokorný).
40+
- `intelmq.bots.parsers.turris.parser.py` Updated to the latest data format (issue #2167). (PR#2373 by Filip Pokorný).
4041

4142
#### Experts
4243
- `intelmq.bots.experts.sieve`:
Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,47 @@
1-
# SPDX-FileCopyrightText: 2015 robcza
1+
# SPDX-FileCopyrightText: 2023 Filip Pokorný
22
#
33
# SPDX-License-Identifier: AGPL-3.0-or-later
44

5-
# -*- coding: utf-8 -*-
6-
import csv
7-
import io
8-
9-
from intelmq.lib import utils
105
from intelmq.lib.bot import ParserBot
116

7+
PORTS = {
8+
"ftp": 21,
9+
"telnet": 23,
10+
"http": 80
11+
# smtp uses both 25 and 587, therefore we can't say for certain
12+
}
13+
1214

1315
class TurrisGreylistParserBot(ParserBot):
1416
"""Parse the Turris Greylist feed"""
1517

16-
def process(self):
17-
report = self.receive_message()
18-
19-
columns = [
20-
"source.ip",
21-
"source.geolocation.cc",
22-
"event_description.text",
23-
"source.asn"
24-
]
25-
26-
headers = True
27-
raw_report = utils.base64_decode(report.get("raw"))
28-
raw_report = raw_report.translate({0: None})
29-
for row in csv.reader(io.StringIO(raw_report)):
30-
# ignore headers
31-
if headers:
32-
headers = False
33-
continue
18+
parse = ParserBot.parse_csv_dict
19+
recover_line = ParserBot.recover_line_csv_dict
20+
_ignore_lines_starting = ["#"]
21+
22+
def parse_line(self, line, report):
23+
24+
for tag in line.get("Tags", "").split(","):
3425

3526
event = self.new_event(report)
3627

37-
for key, value in zip(columns, row):
38-
if key == "__IGNORE__":
39-
continue
28+
if tag in ["smtp", "http", "ftp", "telnet"]:
29+
event.add("protocol.transport", "tcp")
30+
event.add("protocol.application", tag)
31+
event.add("classification.type", "brute-force")
32+
event.add("destination.port", PORTS.get(tag))
4033

41-
event.add(key, value)
34+
elif tag == "port_scan":
35+
event.add("classification.type", "scanner")
4236

43-
event.add('classification.type', 'scanner')
44-
event.add("raw", ",".join(row))
37+
else:
38+
# cases such as "haas", "hass_logged" and "hass_not_logged" come from CZ.NIC HaaS Feed (available in IntelMQ)
39+
# it's better to use that feed for this data (it's data from SSH honeypot)
40+
continue
4541

46-
self.send_message(event)
47-
self.acknowledge_message()
42+
event.add("raw", self.recover_line(line))
43+
event.add("source.ip", line.get("Address"))
44+
yield event
4845

4946

5047
BOT = TurrisGreylistParserBot

intelmq/etc/feeds.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -651,14 +651,14 @@ providers:
651651
collector:
652652
module: intelmq.bots.collectors.http.collector_http
653653
parameters:
654-
http_url: https://www.turris.cz/greylist-data/greylist-latest.csv
654+
http_url: https://view.sentinel.turris.cz/greylist-data/greylist-latest.csv
655655
rate_limit: 43200
656656
name: __FEED__
657657
provider: __PROVIDER__
658658
parser:
659659
module: intelmq.bots.parsers.turris.parser
660660
parameters:
661-
revision: 2018-01-20
661+
revision: 2023-06-13
662662
documentation: https://project.turris.cz/en/greylist
663663
public: true
664664
Greylist with PGP signature verification:

intelmq/lib/upgrades.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
'v301_deprecations',
3939
'v310_feed_changes',
4040
'v310_shadowserver_feednames',
41+
'v320_update_turris_greylist_url',
4142
]
4243

4344

@@ -862,6 +863,22 @@ def v310_feed_changes(configuration, harmonization, dry_run, **kwargs):
862863
return messages + ' Remove affected bots yourself.' if messages else None, configuration, harmonization
863864

864865

866+
def v320_update_turris_greylist_url(configuration, harmonization, dry_run, **kwargs):
867+
"""
868+
Updates Turris Greylist feed URL.
869+
"""
870+
871+
messages = []
872+
873+
for bot_id, bot in configuration.items():
874+
if bot.get("module") == "intelmq.bots.collectors.http.collector":
875+
if bot.get("parameters", {}).get("http_url", "").startswith("https://project.turris.cz/greylist-data/greylist-latest.csv"):
876+
bot["parameters"]["http_url"] = "https://view.sentinel.turris.cz/greylist-data/greylist-latest.csv"
877+
messages.append("Turris Greylist feed URL updated.")
878+
879+
return ' '.join(messages) if messages else None, configuration, harmonization
880+
881+
865882
UPGRADES = OrderedDict([
866883
((1, 0, 0, 'dev7'), (v100_dev7_modify_syntax,)),
867884
((1, 1, 0), (v110_shadowserver_feednames, v110_deprecations)),
@@ -887,6 +904,7 @@ def v310_feed_changes(configuration, harmonization, dry_run, **kwargs):
887904
((3, 0, 1), (v301_deprecations,)),
888905
((3, 0, 2), ()),
889906
((3, 1, 0), (v310_feed_changes, v310_shadowserver_feednames)),
907+
((3, 2, 0), (v320_update_turris_greylist_url,)),
890908
])
891909

892910
ALWAYS = (harmonization,)

intelmq/tests/bots/parsers/turris/greylist-latest.csv

Lines changed: 0 additions & 3 deletions
This file was deleted.

intelmq/tests/bots/parsers/turris/greylist-latest.csv.license

Lines changed: 0 additions & 2 deletions
This file was deleted.
Lines changed: 123 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,121 @@
1-
# SPDX-FileCopyrightText: 2015 Sebastian Wagner
1+
# SPDX-FileCopyrightText: 2023 Filip Pokorný
22
#
33
# SPDX-License-Identifier: AGPL-3.0-or-later
44

5-
# -*- coding: utf-8 -*-
6-
import base64
7-
import os
85
import unittest
96

107
import intelmq.lib.test as test
118
from intelmq.bots.parsers.turris.parser import TurrisGreylistParserBot
9+
from intelmq.lib import utils
1210

13-
with open(os.path.join(os.path.dirname(__file__), 'greylist-latest.csv'), 'rb') as fh:
14-
RAW = base64.b64encode(fh.read()).decode()
15-
16-
OUTPUT1 = {'__type': 'Event',
17-
'classification.type': 'scanner',
18-
'event_description.text': 'dns',
19-
'source.geolocation.cc': 'AU',
20-
'source.asn': 15169,
21-
'raw': 'MS4xLjEuMixBVSxkbnMsMTUxNjk=',
22-
'source.ip': '1.1.1.2'}
23-
OUTPUT2 = {'__type': 'Event',
24-
'classification.type': 'scanner',
25-
'event_description.text': 'telnet',
26-
'raw': 'MS4yMC45Ni4xNDIsVEgsdGVsbmV0LDU2MTIw',
27-
'source.geolocation.cc': 'TH',
28-
'source.asn': 56120,
29-
'source.ip': '1.20.96.142'}
11+
INPUT = """\
12+
# For the terms of use see https://view.sentinel.turris.cz/greylist-data/LICENSE.txt
13+
Address,Tags
14+
159.203.8.168,http
15+
103.155.105.100,"ftp,http"
16+
117.247.161.208,telnet
17+
103.185.234.2,telnet
18+
152.32.236.101,"ftp,http,port_scan,smtp,telnet"
19+
61.219.175.42,telnet
20+
"""
21+
22+
OUTPUT = [
23+
{
24+
"protocol.transport": "tcp",
25+
"protocol.application": "http",
26+
"classification.type": "brute-force",
27+
"destination.port": 80,
28+
"raw": "QWRkcmVzcyxUYWdzCjE1OS4yMDMuOC4xNjgsaHR0cA==",
29+
"source.ip": "159.203.8.168",
30+
"__type": "Event"
31+
},
32+
{
33+
"protocol.transport": "tcp",
34+
"protocol.application": "ftp",
35+
"classification.type": "brute-force",
36+
"destination.port": 21,
37+
"raw": "QWRkcmVzcyxUYWdzCjEwMy4xNTUuMTA1LjEwMCwiZnRwLGh0dHAi",
38+
"source.ip": "103.155.105.100",
39+
"__type": "Event"
40+
},
41+
{
42+
"protocol.transport": "tcp",
43+
"protocol.application": "http",
44+
"classification.type": "brute-force",
45+
"destination.port": 80,
46+
"raw": "QWRkcmVzcyxUYWdzCjEwMy4xNTUuMTA1LjEwMCwiZnRwLGh0dHAi",
47+
"source.ip": "103.155.105.100",
48+
"__type": "Event"
49+
},
50+
{
51+
"protocol.transport": "tcp",
52+
"protocol.application": "telnet",
53+
"classification.type": "brute-force",
54+
"destination.port": 23,
55+
"raw": "QWRkcmVzcyxUYWdzCjExNy4yNDcuMTYxLjIwOCx0ZWxuZXQ=",
56+
"source.ip": "117.247.161.208",
57+
"__type": "Event"
58+
},
59+
{
60+
"protocol.transport": "tcp",
61+
"protocol.application": "telnet",
62+
"classification.type": "brute-force",
63+
"destination.port": 23,
64+
"raw": "QWRkcmVzcyxUYWdzCjEwMy4xODUuMjM0LjIsdGVsbmV0",
65+
"source.ip": "103.185.234.2",
66+
"__type": "Event"
67+
},
68+
{
69+
"protocol.transport": "tcp",
70+
"protocol.application": "ftp",
71+
"classification.type": "brute-force",
72+
"destination.port": 21,
73+
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
74+
"source.ip": "152.32.236.101",
75+
"__type": "Event"
76+
},
77+
{
78+
"protocol.transport": "tcp",
79+
"protocol.application": "http",
80+
"classification.type": "brute-force",
81+
"destination.port": 80,
82+
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
83+
"source.ip": "152.32.236.101",
84+
"__type": "Event"
85+
},
86+
{
87+
"classification.type": "scanner",
88+
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
89+
"source.ip": "152.32.236.101",
90+
"__type": "Event"
91+
},
92+
{
93+
"protocol.transport": "tcp",
94+
"protocol.application": "smtp",
95+
"classification.type": "brute-force",
96+
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
97+
"source.ip": "152.32.236.101",
98+
"__type": "Event"
99+
},
100+
{
101+
"protocol.transport": "tcp",
102+
"protocol.application": "telnet",
103+
"classification.type": "brute-force",
104+
"destination.port": 23,
105+
"raw": "QWRkcmVzcyxUYWdzCjE1Mi4zMi4yMzYuMTAxLCJmdHAsaHR0cCxwb3J0X3NjYW4sc210cCx0ZWxuZXQi",
106+
"source.ip": "152.32.236.101",
107+
"__type": "Event"
108+
},
109+
{
110+
"protocol.transport": "tcp",
111+
"protocol.application": "telnet",
112+
"classification.type": "brute-force",
113+
"destination.port": 23,
114+
"raw": "QWRkcmVzcyxUYWdzCjYxLjIxOS4xNzUuNDIsdGVsbmV0",
115+
"source.ip": "61.219.175.42",
116+
"__type": "Event"
117+
}
118+
]
30119

31120

32121
class TestTurrisGreylistParserBot(test.BotTestCase, unittest.TestCase):
@@ -37,12 +126,22 @@ class TestTurrisGreylistParserBot(test.BotTestCase, unittest.TestCase):
37126
@classmethod
38127
def set_bot(cls):
39128
cls.bot_reference = TurrisGreylistParserBot
40-
cls.default_input_message = {'__type': 'Report', 'raw': RAW}
129+
cls.default_input_message = {'__type': 'Report', 'raw': utils.base64_encode(INPUT)}
41130

42131
def test_event(self):
43132
self.run_bot()
44-
self.assertMessageEqual(0, OUTPUT1)
45-
self.assertMessageEqual(1, OUTPUT2)
133+
self.assertMessageEqual(0, OUTPUT[0])
134+
self.assertMessageEqual(1, OUTPUT[1])
135+
self.assertMessageEqual(2, OUTPUT[2])
136+
self.assertMessageEqual(3, OUTPUT[3])
137+
self.assertMessageEqual(4, OUTPUT[4])
138+
self.assertMessageEqual(5, OUTPUT[5])
139+
self.assertMessageEqual(6, OUTPUT[6])
140+
self.assertMessageEqual(7, OUTPUT[7])
141+
self.assertMessageEqual(8, OUTPUT[8])
142+
self.assertMessageEqual(9, OUTPUT[9])
143+
self.assertMessageEqual(10, OUTPUT[10])
144+
46145

47146
if __name__ == '__main__': # pragma: no cover
48147
unittest.main()

0 commit comments

Comments
 (0)