Synapse stops responding to incoming requests if PostgreSQL stops responding #8574
Description
Description
For a while I've been having an issue where after a certain amount of time after a fresh boot, Synapse stops receiving (or at least handling) federation events and often even stops responding to client connections. CPU usage goes down to basically 0% and only a machine reboot (not just rebooting the service) fixes the issue. Strangely enough the federation tester at that point reports federation works fine, but clearly it doesn't.
Note that I have the federation reader and synchrotron workers enabled and they replicate via Redis.
I get some errors in the federation reader log which might be relevant:
2020-10-16 08:15:27,206 - twisted - 250 - CRITICAL - - Rollback failed
Traceback (most recent call last):
File "/usr/lib/python3.8/site-packages/twisted/python/threadpool.py", line 250, in inContext
result = inContext.theWork()
File "/usr/lib/python3.8/site-packages/twisted/python/threadpool.py", line 266, in <lambda>
inContext.theWork = lambda: context.call(ctx, func, *args, **kw)
File "/usr/lib/python3.8/site-packages/twisted/python/context.py", line 122, in callWithContext
return self.currentContext().callWithContext(ctx, func, *args, **kw)
File "/usr/lib/python3.8/site-packages/twisted/python/context.py", line 85, in callWithContext
return func(*args,**kw)
--- <exception caught here> ---
File "/usr/lib/python3.8/site-packages/twisted/enterprise/adbapi.py", line 303, in _runWithConnection
conn.rollback()
File "/usr/lib/python3.8/site-packages/twisted/enterprise/adbapi.py", line 52, in rollback
self._connection.rollback()
psycopg2.InterfaceError: connection already closed
2020-10-16 08:15:27,217 - synapse.util.ratelimitutils - 182 - DEBUG - PUT-52551- Ratelimit [281473023937824]: Processed req
2020-10-16 08:15:27,219 - synapse.util.async_helpers - 293 - DEBUG - PUT-52551- Releasing linearizer lock 'fed_txn_handler' for key 'en-root.org'
2020-10-16 08:15:27,222 - synapse.federation.transport.server - 428 - ERROR - PUT-52551- on_incoming_transaction failed
Traceback (most recent call last):
File "/usr/lib/python3.8/site-packages/synapse/federation/transport/server.py", line 424, in on_PUT
code, response = await self.handler.on_incoming_transaction(
File "/usr/lib/python3.8/site-packages/synapse/federation/federation_server.py", line 164, in on_incoming_transaction
return await self._transaction_resp_cache.wrap(
File "/usr/lib/python3.8/site-packages/twisted/internet/defer.py", line 1416, in _inlineCallbacks
result = result.throwExceptionIntoGenerator(g)
File "/usr/lib/python3.8/site-packages/twisted/python/failure.py", line 512, in throwExceptionIntoGenerator
return g.throw(self.type, self.value, self.tb)
File "/usr/lib/python3.8/site-packages/synapse/federation/federation_server.py", line 190, in _on_incoming_transaction_inner
result = await self._handle_incoming_transaction(
File "/usr/lib/python3.8/site-packages/synapse/federation/federation_server.py", line 209, in _handle_incoming_transaction
response = await self.transaction_actions.have_responded(origin, transaction)
File "/usr/lib/python3.8/site-packages/synapse/federation/persistence.py", line 54, in have_responded
return await self.store.get_received_txn_response(transaction_id, origin)
File "/usr/lib/python3.8/site-packages/synapse/storage/databases/main/transactions.py", line 77, in get_received_txn_response
return await self.db_pool.runInteraction(
File "/usr/lib/python3.8/site-packages/synapse/storage/database.py", line 569, in runInteraction
result = await self.runWithConnection(
File "/usr/lib/python3.8/site-packages/synapse/storage/database.py", line 646, in runWithConnection
return await make_deferred_yieldable(
File "/usr/lib/python3.8/site-packages/twisted/python/threadpool.py", line 250, in inContext
result = inContext.theWork()
File "/usr/lib/python3.8/site-packages/twisted/python/threadpool.py", line 266, in <lambda>
inContext.theWork = lambda: context.call(ctx, func, *args, **kw)
File "/usr/lib/python3.8/site-packages/twisted/python/context.py", line 122, in callWithContext
return self.currentContext().callWithContext(ctx, func, *args, **kw)
File "/usr/lib/python3.8/site-packages/twisted/python/context.py", line 85, in callWithContext
return func(*args,**kw)
File "/usr/lib/python3.8/site-packages/twisted/enterprise/adbapi.py", line 306, in _runWithConnection
compat.reraise(excValue, excTraceback)
File "/usr/lib/python3.8/site-packages/twisted/python/compat.py", line 464, in reraise
raise exception.with_traceback(traceback)
File "/usr/lib/python3.8/site-packages/twisted/enterprise/adbapi.py", line 297, in _runWithConnection
result = func(conn, *args, **kw)
File "/usr/lib/python3.8/site-packages/synapse/storage/database.py", line 626, in inner_func
assert not self.engine.in_transaction(conn)
AssertionError
2020-10-16 08:15:27,230 - synapse.http.server - 81 - ERROR - PUT-52551- Failed handle request via 'FederationSendServlet': <SynapseRequest at 0xffffa21b07f0 method='PUT' uri='/_matrix/federation/v1/send/1594808804394' clientproto='HTTP/1.0' site=8084>
Traceback (most recent call last):
File "/usr/lib/python3.8/site-packages/twisted/internet/defer.py", line 1416, in _inlineCallbacks
result = result.throwExceptionIntoGenerator(g)
File "/usr/lib/python3.8/site-packages/twisted/python/failure.py", line 512, in throwExceptionIntoGenerator
return g.throw(self.type, self.value, self.tb)
File "/usr/lib/python3.8/site-packages/synapse/federation/federation_server.py", line 190, in _on_incoming_transaction_inner
result = await self._handle_incoming_transaction(
File "/usr/lib/python3.8/site-packages/synapse/federation/federation_server.py", line 209, in _handle_incoming_transaction
response = await self.transaction_actions.have_responded(origin, transaction)
File "/usr/lib/python3.8/site-packages/synapse/federation/persistence.py", line 54, in have_responded
return await self.store.get_received_txn_response(transaction_id, origin)
File "/usr/lib/python3.8/site-packages/synapse/storage/databases/main/transactions.py", line 77, in get_received_txn_response
return await self.db_pool.runInteraction(
File "/usr/lib/python3.8/site-packages/synapse/storage/database.py", line 569, in runInteraction
result = await self.runWithConnection(
File "/usr/lib/python3.8/site-packages/synapse/storage/database.py", line 646, in runWithConnection
return await make_deferred_yieldable(
File "/usr/lib/python3.8/site-packages/twisted/python/threadpool.py", line 250, in inContext
result = inContext.theWork()
File "/usr/lib/python3.8/site-packages/twisted/python/threadpool.py", line 266, in <lambda>
inContext.theWork = lambda: context.call(ctx, func, *args, **kw)
File "/usr/lib/python3.8/site-packages/twisted/python/context.py", line 122, in callWithContext
return self.currentContext().callWithContext(ctx, func, *args, **kw)
File "/usr/lib/python3.8/site-packages/twisted/python/context.py", line 85, in callWithContext
return func(*args,**kw)
File "/usr/lib/python3.8/site-packages/twisted/enterprise/adbapi.py", line 306, in _runWithConnection
compat.reraise(excValue, excTraceback)
File "/usr/lib/python3.8/site-packages/twisted/python/compat.py", line 464, in reraise
raise exception.with_traceback(traceback)
File "/usr/lib/python3.8/site-packages/twisted/enterprise/adbapi.py", line 297, in _runWithConnection
result = func(conn, *args, **kw)
File "/usr/lib/python3.8/site-packages/synapse/storage/database.py", line 626, in inner_func
assert not self.engine.in_transaction(conn)
AssertionError
Steps to reproduce
I don't know honestly. It seems to be specific to something in my setup, but I can't pinpoint what it is.
Version information
- Homeserver: fam-ribbers.com
If not matrix.org:
-
Version: 1.21.2
-
Install method: The Alpine Linux package manager
-
Platform: Alpine Linux, bare metal. Running on a RockPro64 so ARM64, with the PostgreSQL database running on a different (also RockPro64) machine.