Skip to content

Commit c75c21d

Browse files
postgresql output: escape all null bytes
while null bytes (`\0`, not SQL "NULL") in TEXT and JSON/JSONB fields are valid, data containing null bytes can cause troubles in some combinations of clients, servers and each settings. To prevent unhandled errors, and data which can't be inserted into the database, all null bytes are escaped fixes #2203
1 parent 469dc49 commit c75c21d

File tree

5 files changed

+50
-7
lines changed

5 files changed

+50
-7
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ CHANGELOG
9494
- `intelmq.bots.outputs.templated_smtp.output`:
9595
- Add new function `from_json()` (which just calls `json.loads()` in the standard Python environment), meaning the Templated SMTP output bot can take strings containing JSON documents and do the formatting itself (PR#2120 by Karl-Johan Karlsson).
9696
- Lift restriction on requirement jinja2 < 3 (PR#2158 by Sebastian Wagner).
97+
- `intelmq.bots.outputs.sql`:
98+
- For PostgreSQL, escape Nullbytes in text to prevent "unsupported Unicode escape sequence" issues (PR#2223 by Sebastian Wagner, fixes #2203).
9799

98100
### Documentation
99101
- Feeds: Add documentation for newly supported dataplane feeds, see above (PR#2102 by Mikk Margus Möll).

docs/user/bots.rst

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4025,7 +4025,8 @@ The parameters marked with 'PostgreSQL' will be sent to libpq via psycopg2. Chec
40254025
* `sslmode`: PostgreSQL sslmode, can be `'disable'`, `'allow'`, `'prefer'` (default), `'require'`, `'verify-ca'` or `'verify-full'`. See postgresql docs: https://www.postgresql.org/docs/current/static/libpq-connect.html#libpq-connect-sslmode
40264026
* `table`: name of the database table into which events are to be inserted
40274027
4028-
**PostgreSQL**
4028+
PostgreSQL
4029+
~~~~~~~~~~
40294030
40304031
You have two basic choices to run PostgreSQL:
40314032
@@ -4080,7 +4081,13 @@ if the user `intelmq` can authenticate):
40804081
40814082
psql -h localhost intelmq-events intelmq </tmp/initdb.sql
40824083
4083-
**SQLite**
4084+
**PostgreSQL and null characters**
4085+
4086+
While null characters (`\0`, not SQL "NULL") in TEXT and JSON/JSONB fields are valid, data containing null characters can cause troubles in some combinations of clients, servers and each settings.
4087+
To prevent unhandled errors and data which can't be inserted into the database, all null characters are escaped (`\\u0000`) before insertion.
4088+
4089+
SQLite
4090+
~~~~~~
40844091
40854092
Similarly to PostgreSQL, you can use `intelmq_psql_initdb` to create initial SQL statements
40864093
from `harmonization.conf`. The script will create the required table layout

intelmq/bots/outputs/sql/output.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: 2019 Edvard Rejthar
1+
# SPDX-FileCopyrightText: 2019 Edvard Rejthar, 2022 Intevation GmbH
22
#
33
# SPDX-License-Identifier: AGPL-3.0-or-later
44

@@ -37,7 +37,7 @@ def process(self):
3737
event = self.receive_message().to_dict(jsondict_as_string=self.jsondict_as_string)
3838

3939
keys = '", "'.join(event.keys())
40-
values = list(event.values())
40+
values = self.prepare_values(event.values())
4141
fvalues = len(values) * f'{self.format_char}, '
4242
query = ('INSERT INTO {table} ("{keys}") VALUES ({values})'
4343
''.format(table=self.table, keys=keys, values=fvalues[:-2]))
@@ -46,5 +46,14 @@ def process(self):
4646
self.con.commit()
4747
self.acknowledge_message()
4848

49+
def prepare_values(self, values):
50+
if self._engine_name == self.POSTGRESQL:
51+
# escape JSON-encoded NULL characters. JSON escapes them once, but we need to escape them twice,
52+
# so that Postgres does not encounter a NULL char while decoding it
53+
# https://github.com/certtools/intelmq/issues/2203
54+
return [value.replace('\\u0000', '\\\\u0000') if isinstance(value, str) else value for value in values]
55+
else:
56+
return list(values)
57+
4958

5059
BOT = SQLOutputBot

intelmq/lib/test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def log(name, *args, **kwargs):
188188
return logger
189189
return log
190190

191-
def prepare_bot(self, parameters={}, destination_queues=None):
191+
def prepare_bot(self, parameters={}, destination_queues=None, prepare_source_queue: bool = True):
192192
"""
193193
Reconfigures the bot with the changed attributes.
194194
@@ -238,7 +238,8 @@ def prepare_bot(self, parameters={}, destination_queues=None):
238238
self.pipe.set_queues(parameters.source_queue, "source")
239239
self.pipe.set_queues(parameters.destination_queues, "destination")
240240

241-
self.prepare_source_queue()
241+
if prepare_source_queue:
242+
self.prepare_source_queue()
242243

243244
def prepare_source_queue(self):
244245
if self.input_message is not None:

intelmq/tests/bots/outputs/sql/test_output_postgresql.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
# SPDX-FileCopyrightText: 2019 Sebastian Wagner
1+
# SPDX-FileCopyrightText: 2019 Sebastian Wagner, 2022 Intevation GmbH
22
#
33
# SPDX-License-Identifier: AGPL-3.0-or-later
44

55
# -*- coding: utf-8 -*-
6+
import json
67
import os
78
import unittest
89

@@ -28,6 +29,10 @@
2829
"extra.asn": 64496,
2930
"extra.ip": "192.0.2.1",
3031
}
32+
INPUT_NULL = {"__type": "Event",
33+
"classification.type": "undetermined",
34+
"extra.payload": '{"text": "M41\u0012)3U>\bxӾ6\u0000\u0013M6M6M4M4]4y]4ӭ4"}',
35+
}
3136

3237

3338
@test.skip_database()
@@ -77,6 +82,18 @@ def test_extra(self):
7782
from_db = {k: v for k, v in self.cur.fetchone().items() if v is not None}
7883
self.assertEqual(from_db['extra'], {"asn": 64496, "ip": "192.0.2.1"})
7984

85+
def test_extra_nullbyte(self):
86+
"""
87+
Test a Nullbyte in an extra-field
88+
https://github.com/certtools/intelmq/issues/2203
89+
"""
90+
self.input_message = INPUT_NULL
91+
self.run_bot()
92+
self.cur.execute('SELECT "extra" FROM tests WHERE "classification.type" = \'undetermined\'')
93+
self.assertEqual(self.cur.rowcount, 1)
94+
from_db = {k: v for k, v in self.cur.fetchone().items() if v is not None}
95+
self.assertEqual(from_db['extra'], {"payload": '{"text": "M41\u0012)3U>\bxӾ6\\u0000\u0013M6M6M4M4]4y]4ӭ4"}'})
96+
8097
@classmethod
8198
def tearDownClass(cls):
8299
if not os.environ.get('INTELMQ_TEST_DATABASES'):
@@ -122,6 +139,13 @@ def test_event(self):
122139
from_db = {k: v for k, v in self.cur.fetchone().items() if v is not None}
123140
self.assertDictEqual(from_db, OUTPUT1)
124141

142+
def test_prepare_null(self):
143+
""" Test if a null character in extra is correctly removed. https://github.com/certtools/intelmq/issues/2203 """
144+
values = [json.dumps({"special": "foo\x00bar"})]
145+
self.prepare_bot(prepare_source_queue=False)
146+
output = self.bot.prepare_values(values)
147+
self.assertEqual(output, ['{"special": "foo\\\\u0000bar"}'])
148+
125149
@classmethod
126150
def tearDownClass(cls):
127151
if not os.environ.get('INTELMQ_TEST_DATABASES'):

0 commit comments

Comments
 (0)