Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Support filtering the /messages API by relation type (MSC3874) #14148

Merged
merged 2 commits into from
Oct 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/14148.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Experimental support for [MSC3874](https://github.com/matrix-org/matrix-spec-proposals/pull/3874).
27 changes: 24 additions & 3 deletions synapse/api/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from synapse.api.constants import EduTypes, EventContentFields
from synapse.api.errors import SynapseError
from synapse.api.presence import UserPresenceState
from synapse.events import EventBase
from synapse.events import EventBase, relation_from_event
from synapse.types import JsonDict, RoomID, UserID

if TYPE_CHECKING:
Expand All @@ -53,6 +53,12 @@
# check types are valid event types
"types": {"type": "array", "items": {"type": "string"}},
"not_types": {"type": "array", "items": {"type": "string"}},
# MSC3874, filtering /messages.
"org.matrix.msc3874.rel_types": {"type": "array", "items": {"type": "string"}},
"org.matrix.msc3874.not_rel_types": {
"type": "array",
"items": {"type": "string"},
},
},
}

Expand Down Expand Up @@ -334,8 +340,15 @@ def __init__(self, hs: "HomeServer", filter_json: JsonDict):
self.labels = filter_json.get("org.matrix.labels", None)
self.not_labels = filter_json.get("org.matrix.not_labels", [])

self.related_by_senders = self.filter_json.get("related_by_senders", None)
self.related_by_rel_types = self.filter_json.get("related_by_rel_types", None)
self.related_by_senders = filter_json.get("related_by_senders", None)
self.related_by_rel_types = filter_json.get("related_by_rel_types", None)

# For compatibility with _check_fields.
self.rel_types = None
self.not_rel_types = []
if hs.config.experimental.msc3874_enabled:
self.rel_types = filter_json.get("org.matrix.msc3874.rel_types", None)
self.not_rel_types = filter_json.get("org.matrix.msc3874.not_rel_types", [])

def filters_all_types(self) -> bool:
return "*" in self.not_types
Expand Down Expand Up @@ -386,11 +399,19 @@ def _check(self, event: FilterEvent) -> bool:
# check if there is a string url field in the content for filtering purposes
labels = content.get(EventContentFields.LABELS, [])

# Check if the event has a relation.
rel_type = None
if isinstance(event, EventBase):
relation = relation_from_event(event)
if relation:
rel_type = relation.rel_type

field_matchers = {
"rooms": lambda v: room_id == v,
"senders": lambda v: sender == v,
"types": lambda v: _matches_wildcard(ev_type, v),
"labels": lambda v: v in labels,
"rel_types": lambda v: rel_type == v,
}

result = self._check_fields(field_matchers)
Expand Down
3 changes: 3 additions & 0 deletions synapse/config/experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,6 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
self.msc3882_token_timeout = self.parse_duration(
experimental.get("msc3882_token_timeout", "5m")
)

# MSC3874: Filtering /messages with rel_types / not_rel_types.
self.msc3874_enabled: bool = experimental.get("msc3874_enabled", False)
2 changes: 2 additions & 0 deletions synapse/rest/client/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def on_GET(self, request: Request) -> Tuple[int, JsonDict]:
"org.matrix.msc3882": self.config.experimental.msc3882_enabled,
# Adds support for remotely enabling/disabling pushers, as per MSC3881
"org.matrix.msc3881": self.config.experimental.msc3881_enabled,
# Adds support for filtering /messages by event relation.
"org.matrix.msc3874": self.config.experimental.msc3874_enabled,
},
},
)
Expand Down
29 changes: 27 additions & 2 deletions synapse/storage/databases/main/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,24 @@ def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]:
)
args.extend(event_filter.related_by_rel_types)

if event_filter.rel_types:
clauses.append(
"(%s)"
% " OR ".join(
"event_relation.relation_type = ?" for _ in event_filter.rel_types
)
)
args.extend(event_filter.rel_types)

if event_filter.not_rel_types:
clauses.append(
"((%s) OR event_relation.relation_type IS NULL)"
% " AND ".join(
"event_relation.relation_type != ?" for _ in event_filter.not_rel_types
)
)
args.extend(event_filter.not_rel_types)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is fine for now, but I am concerned about the query performance of these fields. In particular, I think for uncommon relation types this could cause us to scan the full events table looking for matches?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you concerned specifically for the not_rel_types case or for both cases?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran a couple of queries against Matrix HQ, one for not m.thread and one for not invalid-rel-type. Both have similar (identical?) query plans:

`m.thread` case
=>EXPLAIN ANALYZE SELECT
->     event.event_id, event.instance_name,
->     event.topological_ordering, event.stream_ordering
-> FROM events AS event
-> LEFT JOIN event_relations AS event_relation USING (event_id)
-> WHERE
->     event.outlier = false
->     AND event.room_id = '!OGEhHVWSdvArJzumhm:matrix.org'
->     AND ((505473,3359746309) < (event.topological_ordering,event.stream_ordering))
->     AND ((event_relation.relation_type != 'm.thread') OR event_relation.relation_type IS NULL)
-> ORDER BY event.topological_ordering DESC,
-> event.stream_ordering DESC LIMIT 20;
                                                                             QUERY PLAN                                                                              
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=1.27..134.20 rows=20 width=73) (actual time=0.775..6.941 rows=20 loops=1)
   ->  Nested Loop Left Join  (cost=1.27..154360.09 rows=23224 width=73) (actual time=0.774..6.937 rows=20 loops=1)
         Filter: ((event_relation.relation_type <> 'm.thread'::text) OR (event_relation.relation_type IS NULL))
         ->  Index Scan Backward using events_order_room on events event  (cost=0.70..48115.51 rows=23288 width=73) (actual time=0.304..0.500 rows=20 loops=1)
               Index Cond: ((room_id = '!OGEhHVWSdvArJzumhm:matrix.org'::text) AND (ROW(505473, '3359746309'::bigint) < ROW(topological_ordering, stream_ordering)))
               Filter: (NOT outlier)
         ->  Index Scan using event_relations_id on event_relations event_relation  (cost=0.57..4.55 rows=1 width=55) (actual time=0.320..0.320 rows=0 loops=20)
               Index Cond: (event.event_id = event_id)
 Planning time: 0.281 ms
 Execution time: 6.974 ms
(10 rows)
`invalid-rel-case` case
=> EXPLAIN ANALYZE SELECT
    event.event_id, event.instance_name,
    event.topological_ordering, event.stream_ordering
FROM events AS event
LEFT JOIN event_relations AS event_relation USING (event_id)
WHERE
    event.outlier = false
    AND event.room_id = '!OGEhHVWSdvArJzumhm:matrix.org'
    AND ((505473,3359746309) < (event.topological_ordering,event.stream_ordering))
    AND ((event_relation.relation_type != 'invalid-rel-type') OR event_relation.relation_type IS NULL)
ORDER BY event.topological_ordering DESC,
event.stream_ordering DESC LIMIT 20;
                                                                             QUERY PLAN                                                                              
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=1.27..134.20 rows=20 width=73) (actual time=0.069..0.362 rows=20 loops=1)
   ->  Nested Loop Left Join  (cost=1.27..154360.09 rows=23224 width=73) (actual time=0.068..0.356 rows=20 loops=1)
         Filter: ((event_relation.relation_type <> 'invalid-rel-type'::text) OR (event_relation.relation_type IS NULL))
         ->  Index Scan Backward using events_order_room on events event  (cost=0.70..48115.51 rows=23288 width=73) (actual time=0.050..0.105 rows=20 loops=1)
               Index Cond: ((room_id = '!OGEhHVWSdvArJzumhm:matrix.org'::text) AND (ROW(505473, '3359746309'::bigint) < ROW(topological_ordering, stream_ordering)))
               Filter: (NOT outlier)
         ->  Index Scan using event_relations_id on event_relations event_relation  (cost=0.57..4.55 rows=1 width=55) (actual time=0.011..0.011 rows=0 loops=20)
               Index Cond: (event.event_id = event_id)
 Planning time: 0.652 ms
 Execution time: 0.425 ms
(10 rows)


return " AND ".join(clauses), args


Expand Down Expand Up @@ -1278,8 +1296,8 @@ def _paginate_room_events_txn(
# Multiple labels could cause the same event to appear multiple times.
needs_distinct = True

# If there is a filter on relation_senders and relation_types join to the
# relations table.
# If there is a relation_senders and relation_types filter join to the
# relations table to get events related to the current event.
if event_filter and (
event_filter.related_by_senders or event_filter.related_by_rel_types
):
Expand All @@ -1294,6 +1312,13 @@ def _paginate_room_events_txn(
LEFT JOIN events AS related_event ON (relation.event_id = related_event.event_id)
"""

# If there is a not_rel_types filter join to the relations table to get
# the event's relation information.
if event_filter and (event_filter.rel_types or event_filter.not_rel_types):
join_clause += """
LEFT JOIN event_relations AS event_relation USING (event_id)
"""

if needs_distinct:
select_keywords += " DISTINCT"

Expand Down
63 changes: 62 additions & 1 deletion tests/api/test_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def MockEvent(**kwargs):
kwargs["event_id"] = "fake_event_id"
if "type" not in kwargs:
kwargs["type"] = "fake_type"
if "content" not in kwargs:
kwargs["content"] = {}
return make_event_from_dict(kwargs)


Expand Down Expand Up @@ -357,6 +359,66 @@ def test_filter_not_labels(self):

self.assertTrue(Filter(self.hs, definition)._check(event))

@unittest.override_config({"experimental_features": {"msc3874_enabled": True}})
def test_filter_rel_type(self):
definition = {"org.matrix.msc3874.rel_types": ["m.thread"]}
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={},
)

self.assertFalse(Filter(self.hs, definition)._check(event))

event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={"m.relates_to": {"event_id": "$abc", "rel_type": "m.reference"}},
)

self.assertFalse(Filter(self.hs, definition)._check(event))

event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={"m.relates_to": {"event_id": "$abc", "rel_type": "m.thread"}},
)

self.assertTrue(Filter(self.hs, definition)._check(event))

@unittest.override_config({"experimental_features": {"msc3874_enabled": True}})
def test_filter_not_rel_type(self):
definition = {"org.matrix.msc3874.not_rel_types": ["m.thread"]}
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={"m.relates_to": {"event_id": "$abc", "rel_type": "m.thread"}},
)

self.assertFalse(Filter(self.hs, definition)._check(event))

event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={},
)

self.assertTrue(Filter(self.hs, definition)._check(event))

event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={"m.relates_to": {"event_id": "$abc", "rel_type": "m.reference"}},
)

self.assertTrue(Filter(self.hs, definition)._check(event))

def test_filter_presence_match(self):
user_filter_json = {"presence": {"types": ["m.*"]}}
filter_id = self.get_success(
Expand Down Expand Up @@ -456,7 +518,6 @@ def test_filter_rooms(self):

self.assertEqual(filtered_room_ids, ["!allowed:example.com"])

@unittest.override_config({"experimental_features": {"msc3440_enabled": True}})
def test_filter_relations(self):
events = [
# An event without a relation.
Expand Down
1 change: 0 additions & 1 deletion tests/rest/client/test_relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1677,7 +1677,6 @@ def test_redact_parent_annotation(self) -> None:
{"chunk": [{"type": "m.reaction", "key": "👍", "count": 1}]},
)

@unittest.override_config({"experimental_features": {"msc3440_enabled": True}})
def test_redact_parent_thread(self) -> None:
"""
Test that thread replies are still available when the root event is redacted.
Expand Down
145 changes: 8 additions & 137 deletions tests/rest/client/test_rooms.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
EventTypes,
Membership,
PublicRoomsFilterFields,
RelationTypes,
RoomTypes,
)
from synapse.api.errors import Codes, HttpResponseException
Expand All @@ -50,6 +49,7 @@

from tests import unittest
from tests.http.server._base import make_request_with_cancellation_test
from tests.storage.test_stream import PaginationTestCase
from tests.test_utils import make_awaitable

PATH_PREFIX = b"/_matrix/client/api/v1"
Expand Down Expand Up @@ -2915,149 +2915,20 @@ def _send_labelled_messages_in_room(self) -> str:
return event_id


class RelationsTestCase(unittest.HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
room.register_servlets,
login.register_servlets,
]

def default_config(self) -> Dict[str, Any]:
config = super().default_config()
config["experimental_features"] = {"msc3440_enabled": True}
return config

def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.user_id = self.register_user("test", "test")
self.tok = self.login("test", "test")
self.room_id = self.helper.create_room_as(self.user_id, tok=self.tok)

self.second_user_id = self.register_user("second", "test")
self.second_tok = self.login("second", "test")
self.helper.join(
room=self.room_id, user=self.second_user_id, tok=self.second_tok
)

self.third_user_id = self.register_user("third", "test")
self.third_tok = self.login("third", "test")
self.helper.join(room=self.room_id, user=self.third_user_id, tok=self.third_tok)

# An initial event with a relation from second user.
res = self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={"msgtype": "m.text", "body": "Message 1"},
tok=self.tok,
)
self.event_id_1 = res["event_id"]
self.helper.send_event(
room_id=self.room_id,
type="m.reaction",
content={
"m.relates_to": {
"rel_type": RelationTypes.ANNOTATION,
"event_id": self.event_id_1,
"key": "👍",
}
},
tok=self.second_tok,
)

# Another event with a relation from third user.
res = self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={"msgtype": "m.text", "body": "Message 2"},
tok=self.tok,
)
self.event_id_2 = res["event_id"]
self.helper.send_event(
room_id=self.room_id,
type="m.reaction",
content={
"m.relates_to": {
"rel_type": RelationTypes.REFERENCE,
"event_id": self.event_id_2,
}
},
tok=self.third_tok,
)

# An event with no relations.
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={"msgtype": "m.text", "body": "No relations"},
tok=self.tok,
)

def _filter_messages(self, filter: JsonDict) -> List[JsonDict]:
class RelationsTestCase(PaginationTestCase):
def _filter_messages(self, filter: JsonDict) -> List[str]:
"""Make a request to /messages with a filter, returns the chunk of events."""
from_token = self.get_success(
self.from_token.to_string(self.hs.get_datastores().main)
)
channel = self.make_request(
"GET",
"/rooms/%s/messages?filter=%s&dir=b" % (self.room_id, json.dumps(filter)),
f"/rooms/{self.room_id}/messages?filter={json.dumps(filter)}&dir=f&from={from_token}",
access_token=self.tok,
)
self.assertEqual(channel.code, HTTPStatus.OK, channel.result)

return channel.json_body["chunk"]

def test_filter_relation_senders(self) -> None:
# Messages which second user reacted to.
filter = {"related_by_senders": [self.second_user_id]}
chunk = self._filter_messages(filter)
self.assertEqual(len(chunk), 1, chunk)
self.assertEqual(chunk[0]["event_id"], self.event_id_1)

# Messages which third user reacted to.
filter = {"related_by_senders": [self.third_user_id]}
chunk = self._filter_messages(filter)
self.assertEqual(len(chunk), 1, chunk)
self.assertEqual(chunk[0]["event_id"], self.event_id_2)

# Messages which either user reacted to.
filter = {"related_by_senders": [self.second_user_id, self.third_user_id]}
chunk = self._filter_messages(filter)
self.assertEqual(len(chunk), 2, chunk)
self.assertCountEqual(
[c["event_id"] for c in chunk], [self.event_id_1, self.event_id_2]
)

def test_filter_relation_type(self) -> None:
# Messages which have annotations.
filter = {"related_by_rel_types": [RelationTypes.ANNOTATION]}
chunk = self._filter_messages(filter)
self.assertEqual(len(chunk), 1, chunk)
self.assertEqual(chunk[0]["event_id"], self.event_id_1)

# Messages which have references.
filter = {"related_by_rel_types": [RelationTypes.REFERENCE]}
chunk = self._filter_messages(filter)
self.assertEqual(len(chunk), 1, chunk)
self.assertEqual(chunk[0]["event_id"], self.event_id_2)

# Messages which have either annotations or references.
filter = {
"related_by_rel_types": [
RelationTypes.ANNOTATION,
RelationTypes.REFERENCE,
]
}
chunk = self._filter_messages(filter)
self.assertEqual(len(chunk), 2, chunk)
self.assertCountEqual(
[c["event_id"] for c in chunk], [self.event_id_1, self.event_id_2]
)

def test_filter_relation_senders_and_type(self) -> None:
# Messages which second user reacted to.
filter = {
"related_by_senders": [self.second_user_id],
"related_by_rel_types": [RelationTypes.ANNOTATION],
}
chunk = self._filter_messages(filter)
self.assertEqual(len(chunk), 1, chunk)
self.assertEqual(chunk[0]["event_id"], self.event_id_1)
return [ev["event_id"] for ev in channel.json_body["chunk"]]


class ContextTestCase(unittest.HomeserverTestCase):
Expand Down
Loading