Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit bd380d9

Browse files
authored
Add checks for postgres sequence consistency (#8402)
1 parent 5e3ca12 commit bd380d9

File tree

8 files changed

+160
-6
lines changed

8 files changed

+160
-6
lines changed

changelog.d/8402.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add checks on startup that PostgreSQL sequences are consistent with their associated tables.

docs/postgres.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,17 @@ Note that the above may fail with an error about duplicate rows if corruption
106106
has already occurred, and such duplicate rows will need to be manually removed.
107107

108108

109+
## Fixing inconsistent sequences error
110+
111+
Synapse uses Postgres sequences to generate IDs for various tables. A sequence
112+
and associated table can get out of sync if, for example, Synapse has been
113+
downgraded and then upgraded again.
114+
115+
To fix the issue shut down Synapse (including any and all workers) and run the
116+
SQL command included in the error message. Once done Synapse should start
117+
successfully.
118+
119+
109120
## Tuning Postgres
110121

111122
The default settings should be fine for most deployments. For larger

synapse/storage/databases/main/registration.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ def __init__(self, database: DatabasePool, db_conn, hs):
4141
self.config = hs.config
4242
self.clock = hs.get_clock()
4343

44+
# Note: we don't check this sequence for consistency as we'd have to
45+
# call `find_max_generated_user_id_localpart` each time, which is
46+
# expensive if there are many entries.
4447
self._user_id_seq = build_sequence_generator(
4548
database.engine, find_max_generated_user_id_localpart, "user_id_seq",
4649
)

synapse/storage/databases/state/store.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ def get_max_state_group_txn(txn: Cursor):
9999
self._state_group_seq_gen = build_sequence_generator(
100100
self.database_engine, get_max_state_group_txn, "state_group_id_seq"
101101
)
102+
self._state_group_seq_gen.check_consistency(
103+
db_conn, table="state_groups", id_column="id"
104+
)
102105

103106
@cached(max_entries=10000, iterable=True)
104107
async def get_state_group_delta(self, state_group):

synapse/storage/util/id_generators.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,11 @@ def __init__(
258258

259259
self._sequence_gen = PostgresSequenceGenerator(sequence_name)
260260

261+
# We check that the table and sequence haven't diverged.
262+
self._sequence_gen.check_consistency(
263+
db_conn, table=table, id_column=id_column, positive=positive
264+
)
265+
261266
# This goes and fills out the above state from the database.
262267
self._load_current_ids(db_conn, table, instance_column, id_column)
263268

synapse/storage/util/sequence.py

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,34 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
import abc
16+
import logging
1617
import threading
1718
from typing import Callable, List, Optional
1819

19-
from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
20-
from synapse.storage.types import Cursor
20+
from synapse.storage.engines import (
21+
BaseDatabaseEngine,
22+
IncorrectDatabaseSetup,
23+
PostgresEngine,
24+
)
25+
from synapse.storage.types import Connection, Cursor
26+
27+
logger = logging.getLogger(__name__)
28+
29+
30+
_INCONSISTENT_SEQUENCE_ERROR = """
31+
Postgres sequence '%(seq)s' is inconsistent with associated
32+
table '%(table)s'. This can happen if Synapse has been downgraded and
33+
then upgraded again, or due to a bad migration.
34+
35+
To fix this error, shut down Synapse (including any and all workers)
36+
and run the following SQL:
37+
38+
SELECT setval('%(seq)s', (
39+
%(max_id_sql)s
40+
));
41+
42+
See docs/postgres.md for more information.
43+
"""
2144

2245

2346
class SequenceGenerator(metaclass=abc.ABCMeta):
@@ -28,6 +51,19 @@ def get_next_id_txn(self, txn: Cursor) -> int:
2851
"""Gets the next ID in the sequence"""
2952
...
3053

54+
@abc.abstractmethod
55+
def check_consistency(
56+
self, db_conn: Connection, table: str, id_column: str, positive: bool = True
57+
):
58+
"""Should be called during start up to test that the current value of
59+
the sequence is greater than or equal to the maximum ID in the table.
60+
61+
This is to handle various cases where the sequence value can get out
62+
of sync with the table, e.g. if Synapse gets rolled back to a previous
63+
version and the rolled forwards again.
64+
"""
65+
...
66+
3167

3268
class PostgresSequenceGenerator(SequenceGenerator):
3369
"""An implementation of SequenceGenerator which uses a postgres sequence"""
@@ -45,6 +81,50 @@ def get_next_mult_txn(self, txn: Cursor, n: int) -> List[int]:
4581
)
4682
return [i for (i,) in txn]
4783

84+
def check_consistency(
85+
self, db_conn: Connection, table: str, id_column: str, positive: bool = True
86+
):
87+
txn = db_conn.cursor()
88+
89+
# First we get the current max ID from the table.
90+
table_sql = "SELECT GREATEST(%(agg)s(%(id)s), 0) FROM %(table)s" % {
91+
"id": id_column,
92+
"table": table,
93+
"agg": "MAX" if positive else "-MIN",
94+
}
95+
96+
txn.execute(table_sql)
97+
row = txn.fetchone()
98+
if not row:
99+
# Table is empty, so nothing to do.
100+
txn.close()
101+
return
102+
103+
# Now we fetch the current value from the sequence and compare with the
104+
# above.
105+
max_stream_id = row[0]
106+
txn.execute(
107+
"SELECT last_value, is_called FROM %(seq)s" % {"seq": self._sequence_name}
108+
)
109+
last_value, is_called = txn.fetchone()
110+
txn.close()
111+
112+
# If `is_called` is False then `last_value` is actually the value that
113+
# will be generated next, so we decrement to get the true "last value".
114+
if not is_called:
115+
last_value -= 1
116+
117+
if max_stream_id > last_value:
118+
logger.warning(
119+
"Postgres sequence %s is behind table %s: %d < %d",
120+
last_value,
121+
max_stream_id,
122+
)
123+
raise IncorrectDatabaseSetup(
124+
_INCONSISTENT_SEQUENCE_ERROR
125+
% {"seq": self._sequence_name, "table": table, "max_id_sql": table_sql}
126+
)
127+
48128

49129
GetFirstCallbackType = Callable[[Cursor], int]
50130

@@ -81,6 +161,12 @@ def get_next_id_txn(self, txn: Cursor) -> int:
81161
self._current_max_id += 1
82162
return self._current_max_id
83163

164+
def check_consistency(
165+
self, db_conn: Connection, table: str, id_column: str, positive: bool = True
166+
):
167+
# There is nothing to do for in memory sequences
168+
pass
169+
84170

85171
def build_sequence_generator(
86172
database_engine: BaseDatabaseEngine,

tests/storage/test_id_generators.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
16-
1715
from synapse.storage.database import DatabasePool
16+
from synapse.storage.engines import IncorrectDatabaseSetup
1817
from synapse.storage.util.id_generators import MultiWriterIdGenerator
1918

2019
from tests.unittest import HomeserverTestCase
@@ -59,7 +58,7 @@ def _create(conn):
5958
writers=writers,
6059
)
6160

62-
return self.get_success(self.db_pool.runWithConnection(_create))
61+
return self.get_success_or_raise(self.db_pool.runWithConnection(_create))
6362

6463
def _insert_rows(self, instance_name: str, number: int):
6564
"""Insert N rows as the given instance, inserting with stream IDs pulled
@@ -411,6 +410,23 @@ async def _get_next_async():
411410
self.get_success(_get_next_async())
412411
self.assertEqual(id_gen_3.get_persisted_upto_position(), 6)
413412

413+
def test_sequence_consistency(self):
414+
"""Test that we error out if the table and sequence diverges.
415+
"""
416+
417+
# Prefill with some rows
418+
self._insert_row_with_id("master", 3)
419+
420+
# Now we add a row *without* updating the stream ID
421+
def _insert(txn):
422+
txn.execute("INSERT INTO foobar VALUES (26, 'master')")
423+
424+
self.get_success(self.db_pool.runInteraction("_insert", _insert))
425+
426+
# Creating the ID gen should error
427+
with self.assertRaises(IncorrectDatabaseSetup):
428+
self._create_id_generator("first")
429+
414430

415431
class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase):
416432
"""Tests MultiWriterIdGenerator that produce *negative* stream IDs.

tests/unittest.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
17-
1817
import gc
1918
import hashlib
2019
import hmac
@@ -28,6 +27,7 @@
2827
from canonicaljson import json
2928

3029
from twisted.internet.defer import Deferred, ensureDeferred, succeed
30+
from twisted.python.failure import Failure
3131
from twisted.python.threadpool import ThreadPool
3232
from twisted.trial import unittest
3333

@@ -476,6 +476,35 @@ def get_failure(self, d, exc):
476476
self.pump()
477477
return self.failureResultOf(d, exc)
478478

479+
def get_success_or_raise(self, d, by=0.0):
480+
"""Drive deferred to completion and return result or raise exception
481+
on failure.
482+
"""
483+
484+
if inspect.isawaitable(d):
485+
deferred = ensureDeferred(d)
486+
if not isinstance(deferred, Deferred):
487+
return d
488+
489+
results = [] # type: list
490+
deferred.addBoth(results.append)
491+
492+
self.pump(by=by)
493+
494+
if not results:
495+
self.fail(
496+
"Success result expected on {!r}, found no result instead".format(
497+
deferred
498+
)
499+
)
500+
501+
result = results[0]
502+
503+
if isinstance(result, Failure):
504+
result.raiseException()
505+
506+
return result
507+
479508
def register_user(self, username, password, admin=False):
480509
"""
481510
Register a user. Requires the Admin API be registered.

0 commit comments

Comments
 (0)