Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix-up event_fields in filters #15607

Merged
merged 15 commits into from
May 22, 2023
1 change: 1 addition & 0 deletions changelog.d/15607.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a long-standing bug where filters with multiple backslashes were rejected.
15 changes: 1 addition & 14 deletions synapse/api/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,20 +128,7 @@
"account_data": {"$ref": "#/definitions/filter"},
"room": {"$ref": "#/definitions/room_filter"},
"event_format": {"type": "string", "enum": ["client", "federation"]},
"event_fields": {
"type": "array",
"items": {
"type": "string",
# Don't allow '\\' in event field filters. This makes matching
# events a lot easier as we can then use a negative lookbehind
# assertion to split '\.' If we allowed \\ then it would
# incorrectly split '\\.' See synapse.events.utils.serialize_event
#
# Note that because this is a regular expression, we have to escape
# each backslash in the pattern.
"pattern": r"^((?!\\\\).)*$",
},
},
"event_fields": {"type": "array", "items": {"type": "string"}},
},
"additionalProperties": True, # Allow new fields for forward compatibility
}
Expand Down
80 changes: 64 additions & 16 deletions synapse/events/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import collections.abc
import re
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -46,13 +45,6 @@
from synapse.handlers.relations import BundledAggregations


# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
# (?<!stuff) matches if the current position in the string is not preceded
# by a match for 'stuff'.
# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as
# the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar"
SPLIT_FIELD_REGEX = re.compile(r"(?<!\\)\.")

CANONICALJSON_MAX_INT = (2**53) - 1
CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT

Expand Down Expand Up @@ -253,14 +245,76 @@ def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None:
sub_out_dict[key_to_move] = sub_dict[key_to_move]


def _split_field(field: str) -> List[str]:
"""
Split strings unescaped dots and removes escaping.
clokep marked this conversation as resolved.
Show resolved Hide resolved

Args:
field: A string representing a path to a field.

Returns:
A list of nested fields to traverse.
"""

# Convert the field and remove escaping:
#
# 1. "content.body.thing\.with\.dots"
# 2. ["content", "body", "thing\.with\.dots"]
# 3. ["content", "body", "thing.with.dots"]

result = []

# The current field and whether the previous character was the escape
# character (a backslash).
part = ""
escaped = False

# Iterate over each character, and decide whether to append to the current
# part (following the escape rules) or to start a new part (based on the
# field separator).
for c in field:
# If the previous character was the escape character (a backslash)
# then decide what to append to the current part.
if escaped:
if c in ("\\", "."):
# An escaped backslash or dot just gets added.
part += c
else:
# A character that shouldn't be escaped gets the backslash prepended.
part += "\\" + c
# This always resets being escaped.
escaped = False

# Otherwise, the previous character was not the escape character.
else:
if c == ".":
# The field separator creates a new part.
result.append(part)
part = ""
elif c == "\\":
# A backslash adds no characters, but starts an escape sequence.
escaped = True
else:
# Otherwise, just add the current character.
part += c

# Ensure the final part is included. If there's an open escape sequence
# it should be included.
if escaped:
part += "\\"
result.append(part)

return result
clokep marked this conversation as resolved.
Show resolved Hide resolved


def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
"""Return a new dict with only the fields in 'dictionary' which are present
in 'fields'.

If there are no event fields specified then all fields are included.
The entries may include '.' characters to indicate sub-fields.
So ['content.body'] will include the 'body' field of the 'content' object.
A literal '.' character in a field name may be escaped using a '\'.
A literal '.' or '\' character in a field name may be escaped using a '\'.

Args:
dictionary: The dictionary to read from.
Expand All @@ -275,13 +329,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:

# for each field, convert it:
# ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields]

# for each element of the output array of arrays:
# remove escaping so we can use the right key names.
split_fields[:] = [
[f.replace(r"\.", r".") for f in field_array] for field_array in split_fields
]
split_fields = [_split_field(f) for f in fields]

output: JsonDict = {}
for field_array in split_fields:
Expand Down
6 changes: 0 additions & 6 deletions tests/api/test_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ def test_errors_on_invalid_filters(self) -> None:
invalid_filters: List[JsonDict] = [
# `account_data` must be a dictionary
{"account_data": "Hello World"},
# `event_fields` entries must not contain backslashes
{"event_fields": [r"\\foo"]},
# `event_format` must be "client" or "federation"
{"event_format": "other"},
# `not_rooms` must contain valid room IDs
Expand Down Expand Up @@ -114,10 +112,6 @@ def test_valid_filters(self) -> None:
"event_format": "client",
"event_fields": ["type", "content", "sender"],
},
# a single backslash should be permitted (though it is debatable whether
# it should be permitted before anything other than `.`, and what that
# actually means)
#
# (note that event_fields is implemented in
# synapse.events.utils.serialize_event, and so whether this actually works
# is tested elsewhere. We just want to check that it is allowed through the
Expand Down
39 changes: 39 additions & 0 deletions tests/events/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@
from typing import Any, List, Mapping, Optional

import attr
from parameterized import parameterized

from synapse.api.constants import EventContentFields
from synapse.api.room_versions import RoomVersions
from synapse.events import EventBase, make_event_from_dict
from synapse.events.utils import (
PowerLevelsContent,
SerializeEventConfig,
_split_field,
copy_and_fixup_power_levels_contents,
maybe_upsert_event_field,
prune_event,
Expand Down Expand Up @@ -794,3 +796,40 @@ def test_invalid_types_raise_type_error(self) -> None:
def test_invalid_nesting_raises_type_error(self) -> None:
with self.assertRaises(TypeError):
copy_and_fixup_power_levels_contents({"a": {"b": {"c": 1}}}) # type: ignore[dict-item]


class SplitFieldTestCase(stdlib_unittest.TestCase):
@parameterized.expand(
[
# A field with no dots.
["m", ["m"]],
# Simple dotted fields.
["m.foo", ["m", "foo"]],
["m.foo.bar", ["m", "foo", "bar"]],
# Backslash is used as an escape character.
["m\\.foo", ["m.foo"]],
["m\\\\.foo", ["m\\", "foo"]],
["m\\\\\\.foo", ["m\\.foo"]],
["m\\\\\\\\.foo", ["m\\\\", "foo"]],
["m\\foo", ["m\\foo"]],
["m\\\\foo", ["m\\foo"]],
["m\\\\\\foo", ["m\\\\foo"]],
["m\\\\\\\\foo", ["m\\\\foo"]],
# Ensure that escapes at the end don't cause issues.
["m.foo\\", ["m", "foo\\"]],
["m.foo\\\\", ["m", "foo\\"]],
["m.foo\\.", ["m", "foo."]],
["m.foo\\\\.", ["m", "foo\\", ""]],
["m.foo\\\\\\.", ["m", "foo\\."]],
clokep marked this conversation as resolved.
Show resolved Hide resolved
# Empty parts (corresponding to properties which are an empty string) are allowed.
[".m", ["", "m"]],
["..m", ["", "", "m"]],
["m.", ["m", ""]],
["m..", ["m", "", ""]],
["m..foo", ["m", "", "foo"]],
# Invalid escape sequences.
["\\m", ["\\m"]],
]
)
def test_split_field(self, input: str, expected: str) -> None:
self.assertEqual(_split_field(input), expected)