Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit a4bf72c

Browse files
committed
Censor redactions in DB after a month
1 parent 92c1550 commit a4bf72c

File tree

3 files changed

+175
-1
lines changed

3 files changed

+175
-1
lines changed

synapse/storage/events.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from six import iteritems, text_type
2424
from six.moves import range
2525

26-
from canonicaljson import json
26+
from canonicaljson import encode_canonical_json, json
2727
from prometheus_client import Counter, Histogram
2828

2929
from twisted.internet import defer
@@ -33,6 +33,7 @@
3333
from synapse.api.errors import SynapseError
3434
from synapse.events import EventBase # noqa: F401
3535
from synapse.events.snapshot import EventContext # noqa: F401
36+
from synapse.events.utils import prune_event_dict
3637
from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
3738
from synapse.logging.utils import log_function
3839
from synapse.metrics import BucketCollector
@@ -262,6 +263,13 @@ def read_forward_extremities():
262263

263264
hs.get_clock().looping_call(read_forward_extremities, 60 * 60 * 1000)
264265

266+
def _censor_redactions():
267+
return run_as_background_process(
268+
"_censor_redactions", self._censor_redactions
269+
)
270+
271+
hs.get_clock().looping_call(_censor_redactions, 10 * 60 * 1000)
272+
265273
@defer.inlineCallbacks
266274
def _read_forward_extremities(self):
267275
def fetch(txn):
@@ -1548,6 +1556,84 @@ def _store_redaction(self, txn, event):
15481556
(event.event_id, event.redacts),
15491557
)
15501558

1559+
@defer.inlineCallbacks
1560+
def _censor_redactions(self):
1561+
"""Censors all redactions older than a month that haven't been censored.
1562+
1563+
By censor we mean update the event_json table with the redacted event.
1564+
1565+
Returns:
1566+
Deferred
1567+
"""
1568+
1569+
if self.stream_ordering_month_ago is None:
1570+
return
1571+
1572+
max_pos = self.stream_ordering_month_ago
1573+
1574+
# We fetch all redactions that point to an event that we have that has
1575+
# a stream ordering from over a month ago, that we haven't yet censored
1576+
# in the DB.
1577+
sql = """
1578+
SELECT er.event_id, redacts FROM redactions
1579+
INNER JOIN events AS er USING (event_id)
1580+
INNER JOIN events AS eb ON (er.room_id = eb.room_id AND redacts = eb.event_id)
1581+
WHERE NOT have_censored
1582+
AND ? <= er.stream_ordering AND er.stream_ordering <= ?
1583+
ORDER BY er.stream_ordering ASC
1584+
LIMIT ?
1585+
"""
1586+
1587+
rows = yield self._execute(
1588+
"_censor_redactions_fetch", None, sql, -max_pos, max_pos, 100
1589+
)
1590+
1591+
updates = []
1592+
1593+
for redaction_id, event_id in rows:
1594+
redaction_event = yield self.get_event(redaction_id, allow_none=True)
1595+
original_event = yield self.get_event(
1596+
event_id, allow_rejected=True, allow_none=True
1597+
)
1598+
1599+
# The SQL above ensures that we have both the redaction and
1600+
# original event, so if the `get_event` calls return None it
1601+
# means that the redaction wasn't allowed. Either way we know that
1602+
# the result won't change so we mark the fact that we've checked.
1603+
if (
1604+
redaction_event
1605+
and original_event
1606+
and original_event.internal_metadata.is_redacted()
1607+
):
1608+
# Redaction was allowed
1609+
pruned_json = encode_canonical_json(
1610+
prune_event_dict(original_event.get_dict())
1611+
)
1612+
else:
1613+
# Redaction wasn't allowed
1614+
pruned_json = None
1615+
1616+
updates.append((redaction_id, event_id, pruned_json))
1617+
1618+
def _update_censor_txn(txn):
1619+
for redaction_id, event_id, pruned_json in updates:
1620+
if pruned_json:
1621+
self._simple_update_one_txn(
1622+
txn,
1623+
table="event_json",
1624+
keyvalues={"event_id": event_id},
1625+
updatevalues={"json": pruned_json},
1626+
)
1627+
1628+
self._simple_update_one_txn(
1629+
txn,
1630+
table="redactions",
1631+
keyvalues={"event_id": redaction_id},
1632+
updatevalues={"have_censored": True},
1633+
)
1634+
1635+
yield self.runInteraction("_update_censor_txn", _update_censor_txn)
1636+
15511637
@defer.inlineCallbacks
15521638
def count_daily_messages(self):
15531639
"""
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/* Copyright 2019 The Matrix.org Foundation C.I.C.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
ALTER TABLE redactions ADD COLUMN have_censored BOOL NOT NULL DEFAULT false;
17+
CREATE INDEX redactions_have_censored ON redactions(event_id) WHERE not have_censored;

tests/storage/test_redaction.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
from mock import Mock
1919

20+
from canonicaljson import json
21+
2022
from twisted.internet import defer
2123

2224
from synapse.api.constants import EventTypes, Membership
@@ -286,3 +288,72 @@ def room_id(self):
286288
self.assertEqual(
287289
fetched.unsigned["redacted_because"].event_id, redaction_event_id2
288290
)
291+
292+
def test_redact_censor(self):
293+
"""Test that a redacted event gets censored in the DB after a month
294+
"""
295+
296+
self.get_success(
297+
self.inject_room_member(self.room1, self.u_alice, Membership.JOIN)
298+
)
299+
300+
msg_event = self.get_success(self.inject_message(self.room1, self.u_alice, "t"))
301+
302+
# Check event has not been redacted:
303+
event = self.get_success(self.store.get_event(msg_event.event_id))
304+
305+
self.assertObjectHasAttributes(
306+
{
307+
"type": EventTypes.Message,
308+
"user_id": self.u_alice.to_string(),
309+
"content": {"body": "t", "msgtype": "message"},
310+
},
311+
event,
312+
)
313+
314+
self.assertFalse("redacted_because" in event.unsigned)
315+
316+
# Redact event
317+
reason = "Because I said so"
318+
self.get_success(
319+
self.inject_redaction(self.room1, msg_event.event_id, self.u_alice, reason)
320+
)
321+
322+
event = self.get_success(self.store.get_event(msg_event.event_id))
323+
324+
self.assertTrue("redacted_because" in event.unsigned)
325+
326+
self.assertObjectHasAttributes(
327+
{
328+
"type": EventTypes.Message,
329+
"user_id": self.u_alice.to_string(),
330+
"content": {},
331+
},
332+
event,
333+
)
334+
335+
event_json = self.get_success(
336+
self.store._simple_select_one_onecol(
337+
table="event_json",
338+
keyvalues={"event_id": msg_event.event_id},
339+
retcol="json",
340+
)
341+
)
342+
343+
self.assert_dict(
344+
{"content": {"body": "t", "msgtype": "message"}}, json.loads(event_json)
345+
)
346+
347+
# Advance by 30 days
348+
self.reactor.advance(60 * 60 * 24 * 31)
349+
self.reactor.advance(60 * 60 * 2)
350+
351+
event_json = self.get_success(
352+
self.store._simple_select_one_onecol(
353+
table="event_json",
354+
keyvalues={"event_id": msg_event.event_id},
355+
retcol="json",
356+
)
357+
)
358+
359+
self.assert_dict({"content": {}}, json.loads(event_json))

0 commit comments

Comments
 (0)