Skip to content

Policy server part 1: Actually call the policy server #18387

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
May 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1cca9f2
Copy over core of module to Synapse
turt2live May 2, 2025
a34c7af
Check policy server at same spot as spam checker
turt2live May 2, 2025
3b4e7f9
Merge branch 'develop' into travis/policyserv/00-check
turt2live May 13, 2025
753ce5c
First-pass linter appeasement
turt2live May 13, 2025
d8141d8
Merge branch 'develop' into travis/policyserv/00-check
turt2live May 14, 2025
0cd6562
Attempt to fix linting
turt2live May 14, 2025
f71c846
Empty commit to kick CI
turt2live May 14, 2025
bc09886
add changelog
turt2live May 14, 2025
fd78613
Add some rationale commentary
turt2live May 14, 2025
5984463
Improve docs
turt2live May 14, 2025
a27a539
Hopefully avoid cyclic dependency
turt2live May 15, 2025
08341e8
Attempt to fix linting
turt2live May 15, 2025
dfaf62a
Empty commit to kick CI
turt2live May 15, 2025
5af6638
Optional?
turt2live May 15, 2025
1a30ff4
Improve docs
turt2live May 15, 2025
aad740a
Merge remote-tracking branch 'origin/develop' into travis/policyserv/…
turt2live May 15, 2025
7f2d834
Use parse_and_validate_server_name
turt2live May 15, 2025
70987fa
Add logging for live debugging
turt2live May 15, 2025
f35584e
it helps to await
turt2live May 15, 2025
959feb9
Revert "Add logging for live debugging"
turt2live May 15, 2025
7118d4a
Add untested tests
turt2live May 15, 2025
eedefc7
Attempt to fix linting
turt2live May 15, 2025
75826a6
Empty commit to kick CI
turt2live May 15, 2025
402fc80
The CI is more intelligent than the IDE
turt2live May 15, 2025
ef119d6
Apply suggestions from code review
turt2live May 21, 2025
71bec51
Fix docs
turt2live May 21, 2025
b676fb2
Merge branch 'develop' into travis/policyserv/00-check
turt2live May 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/18387.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for calling Policy Servers ([MSC4284](https://github.com/matrix-org/matrix-spec-proposals/pull/4284)) to mark events as spam.
34 changes: 34 additions & 0 deletions synapse/federation/federation_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from synapse.events import EventBase, make_event_from_dict
from synapse.events.utils import prune_event, validate_canonicaljson
from synapse.federation.units import filter_pdus_for_valid_depth
from synapse.handlers.room_policy import RoomPolicyHandler
from synapse.http.servlet import assert_params_in_dict
from synapse.logging.opentracing import log_kv, trace
from synapse.types import JsonDict, get_domain_from_id
Expand Down Expand Up @@ -64,6 +65,24 @@ def __init__(self, hs: "HomeServer"):
self._clock = hs.get_clock()
self._storage_controllers = hs.get_storage_controllers()

# We need to define this lazily otherwise we get a cyclic dependency.
# self._policy_handler = hs.get_room_policy_handler()
self._policy_handler: Optional[RoomPolicyHandler] = None

def _lazily_get_policy_handler(self) -> RoomPolicyHandler:
"""Lazily get the room policy handler.

This is required to avoid an import cycle: RoomPolicyHandler requires a
FederationClient, which requires a FederationBase, which requires a
RoomPolicyHandler.

Returns:
RoomPolicyHandler: The room policy handler.
"""
if self._policy_handler is None:
self._policy_handler = self.hs.get_room_policy_handler()
return self._policy_handler

@trace
async def _check_sigs_and_hash(
self,
Expand All @@ -80,6 +99,10 @@ async def _check_sigs_and_hash(
Also runs the event through the spam checker; if it fails, redacts the event
and flags it as soft-failed.

Also checks that the event is allowed by the policy server, if the room uses
a policy server. If the event is not allowed, the event is flagged as
soft-failed but not redacted.

Args:
room_version: The room version of the PDU
pdu: the event to be checked
Expand Down Expand Up @@ -145,6 +168,17 @@ async def _check_sigs_and_hash(
)
return redacted_event

policy_allowed = await self._lazily_get_policy_handler().is_event_allowed(pdu)
if not policy_allowed:
logger.warning(
"Event not allowed by policy server, soft-failing %s", pdu.event_id
)
pdu.internal_metadata.soft_failed = True
# Note: we don't redact the event so admins can inspect the event after the
# fact. Other processes may redact the event, but that won't be applied to
# the database copy of the event until the server's config requires it.
return pdu

spam_check = await self._spam_checker_module_callbacks.check_event_for_spam(pdu)

if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
Expand Down
57 changes: 57 additions & 0 deletions synapse/federation/federation_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
from synapse.http.types import QueryParams
from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace
from synapse.types import JsonDict, StrCollection, UserID, get_domain_from_id
from synapse.types.handlers.policy_server import RECOMMENDATION_OK, RECOMMENDATION_SPAM
from synapse.util.async_helpers import concurrently_execute
from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.retryutils import NotRetryingDestination
Expand Down Expand Up @@ -421,6 +422,62 @@ async def _record_failure_callback(

return None

@trace
@tag_args
async def get_pdu_policy_recommendation(
self, destination: str, pdu: EventBase, timeout: Optional[int] = None
) -> str:
"""Requests that the destination server (typically a policy server)
check the event and return its recommendation on how to handle the
event.

If the policy server could not be contacted or the policy server
returned an unknown recommendation, this returns an OK recommendation.
This type fixing behaviour is done because the typical caller will be
in a critical call path and would generally interpret a `None` or similar
response as "weird value; don't care; move on without taking action". We
just frontload that logic here.


Args:
destination: The remote homeserver to ask (a policy server)
pdu: The event to check
timeout: How long to try (in ms) the destination for before
giving up. None indicates no timeout.

Returns:
The policy recommendation, or RECOMMENDATION_OK if the policy server was
uncontactable or returned an unknown recommendation.
"""

logger.debug(
"get_pdu_policy_recommendation for event_id=%s from %s",
pdu.event_id,
destination,
)

try:
res = await self.transport_layer.get_policy_recommendation_for_pdu(
destination, pdu, timeout=timeout
)
recommendation = res.get("recommendation")
if not isinstance(recommendation, str):
raise InvalidResponseError("recommendation is not a string")
if recommendation not in (RECOMMENDATION_OK, RECOMMENDATION_SPAM):
logger.warning(
"get_pdu_policy_recommendation: unknown recommendation: %s",
recommendation,
)
return RECOMMENDATION_OK
return recommendation
except Exception as e:
logger.warning(
"get_pdu_policy_recommendation: server %s responded with error, assuming OK recommendation: %s",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I'd rather use f-strings personally, they are waaaay more ergonomic. E.g

logger.warning(f"get_pdu_policy_recommendation: server {destination} responded with error, assuming OK recommendation: {e}")

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't use f-strings in loggers, while they are more ergonomic it means that you are passing in the formatted string, rather than allowing the loggers to do so. The main consequence is that if the logger is disabled you don't necessarily want to pay the cost of string interpolation.

Otherwise, yeah f-strings are awesome.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with my limited python exposure, +1 to fstrings, but definitely don't want to make things harder for the logger

destination,
e,
)
return RECOMMENDATION_OK

@trace
@tag_args
async def get_pdu(
Expand Down
27 changes: 27 additions & 0 deletions synapse/federation/transport/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,33 @@ async def get_event(
destination, path=path, timeout=timeout, try_trailing_slash_on_400=True
)

async def get_policy_recommendation_for_pdu(
self, destination: str, event: EventBase, timeout: Optional[int] = None
) -> JsonDict:
"""Requests the policy recommendation for the given pdu from the given policy server.

Args:
destination: The host name of the remote homeserver checking the event.
event: The event to check.
timeout: How long to try (in ms) the destination for before giving up.
None indicates no timeout.

Returns:
The full recommendation object from the remote server.
"""
logger.debug(
"get_policy_recommendation_for_pdu dest=%s, event_id=%s",
destination,
event.event_id,
)
return await self.client.post_json(
destination=destination,
path=f"/_matrix/policy/unstable/org.matrix.msc4284/event/{event.event_id}/check",
data=event.get_pdu_json(),
ignore_backoff=True,
timeout=timeout,
)

async def backfill(
self, destination: str, room_id: str, event_tuples: Collection[str], limit: int
) -> Optional[Union[JsonDict, list]]:
Expand Down
15 changes: 14 additions & 1 deletion synapse/handlers/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,7 @@ def __init__(self, hs: "HomeServer"):
self._instance_name = hs.get_instance_name()
self._notifier = hs.get_notifier()
self._worker_lock_handler = hs.get_worker_locks_handler()
self._policy_handler = hs.get_room_policy_handler()

self.room_prejoin_state_types = self.hs.config.api.room_prejoin_state

Expand Down Expand Up @@ -1108,6 +1109,18 @@ async def _create_and_send_nonmember_event_locked(
event.sender,
)

policy_allowed = await self._policy_handler.is_event_allowed(event)
if not policy_allowed:
logger.warning(
"Event not allowed by policy server, rejecting %s",
event.event_id,
)
raise SynapseError(
403,
"This message has been rejected as probable spam",
Codes.FORBIDDEN,
)

spam_check_result = (
await self._spam_checker_module_callbacks.check_event_for_spam(
event
Expand All @@ -1119,7 +1132,7 @@ async def _create_and_send_nonmember_event_locked(
[code, dict] = spam_check_result
raise SynapseError(
403,
"This message had been rejected as probable spam",
"This message has been rejected as probable spam",
code,
dict,
)
Expand Down
89 changes: 89 additions & 0 deletions synapse/handlers/room_policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2016-2021 The Matrix.org Foundation C.I.C.
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
#

import logging
from typing import TYPE_CHECKING

from synapse.events import EventBase
from synapse.types.handlers.policy_server import RECOMMENDATION_OK
from synapse.util.stringutils import parse_and_validate_server_name

if TYPE_CHECKING:
from synapse.server import HomeServer

logger = logging.getLogger(__name__)


class RoomPolicyHandler:
def __init__(self, hs: "HomeServer"):
self._hs = hs
self._store = hs.get_datastores().main
self._storage_controllers = hs.get_storage_controllers()
self._event_auth_handler = hs.get_event_auth_handler()
self._federation_client = hs.get_federation_client()

async def is_event_allowed(self, event: EventBase) -> bool:
"""Check if the given event is allowed in the room by the policy server.

Note: This will *always* return True if the room's policy server is Synapse
itself. This is because Synapse can't be a policy server (currently).

If no policy server is configured in the room, this returns True. Similarly, if
the policy server is invalid in any way (not joined, not a server, etc), this
returns True.

If a valid and contactable policy server is configured in the room, this returns
True if that server suggests the event is not spammy, and False otherwise.

Args:
event: The event to check. This should be a fully-formed PDU.

Returns:
bool: True if the event is allowed in the room, False otherwise.
"""
policy_event = await self._storage_controllers.state.get_current_state_event(
event.room_id, "org.matrix.msc4284.policy", ""
)
if not policy_event:
return True # no policy server == default allow

policy_server = policy_event.content.get("via", "")
if policy_server is None or not isinstance(policy_server, str):
return True # no policy server == default allow

if policy_server == self._hs.hostname:
return True # Synapse itself can't be a policy server (currently)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And it can't be run on the same domain with a .well-known lookup or something? I'd rather we didn't hard code this given if/when Synapse can be a PS we need to remember to remove this specific conditional instead of just intuitively pointing the via at the HS.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not entirely sure that Synapse will be happy with trying to contact itself over federation because that's a pretty weird thing to do in Matrix. Any Synapse support for being a policy server (either natively or via modules) would involve a performant hook anyway, and would be inserted here to short-circuit the federation call.

Removing the check would also obviously slow down message sending in rooms we know are misconfigured (not pointing to a real policy server). With the current highly experimental architecture we also do not expect that pre-existing servers will be policy servers, and operators will need to pick a dedicated domain name without human users on that server. This will probably change down the line.

I'll add some text to the docstring to reduce the chances of forgetting in the meantime. Hopefully when someone writes a test for "can Synapse be a policy server", they'll worst case find the if statement by stepping through the call path.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also ended up adding a test for this, so the failure should be even more clear if we do add such functionality


try:
parse_and_validate_server_name(policy_server)
except ValueError:
return True # invalid policy server == default allow

is_in_room = await self._event_auth_handler.is_host_in_room(
event.room_id, policy_server
)
if not is_in_room:
return True # policy server not in room == default allow

# At this point, the server appears valid and is in the room, so ask it to check
# the event.
recommendation = await self._federation_client.get_pdu_policy_recommendation(
policy_server, event
)
if recommendation != RECOMMENDATION_OK:
return False

return True # default allow
5 changes: 5 additions & 0 deletions synapse/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
RoomMemberMasterHandler,
)
from synapse.handlers.room_member_worker import RoomMemberWorkerHandler
from synapse.handlers.room_policy import RoomPolicyHandler
from synapse.handlers.room_summary import RoomSummaryHandler
from synapse.handlers.search import SearchHandler
from synapse.handlers.send_email import SendEmailHandler
Expand Down Expand Up @@ -807,6 +808,10 @@ def get_oidc_handler(self) -> "OidcHandler":

return OidcHandler(self)

@cache_in_self
def get_room_policy_handler(self) -> RoomPolicyHandler:
return RoomPolicyHandler(self)

@cache_in_self
def get_event_client_serializer(self) -> EventClientSerializer:
return EventClientSerializer(self)
Expand Down
16 changes: 16 additions & 0 deletions synapse/types/handlers/policy_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright (C) 2025 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#

RECOMMENDATION_OK = "ok"
RECOMMENDATION_SPAM = "spam"
Loading