Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit e4f72dd

Browse files
authored
Move additional tasks to the background worker (#8458)
1 parent 8dbf62f commit e4f72dd

File tree

8 files changed

+195
-197
lines changed

8 files changed

+195
-197
lines changed

changelog.d/8458.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Allow running background tasks in a separate worker process.

synapse/app/generic_worker.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@
127127
from synapse.rest.key.v2 import KeyApiV2Resource
128128
from synapse.server import HomeServer, cache_in_self
129129
from synapse.storage.databases.main.censor_events import CensorEventsStore
130+
from synapse.storage.databases.main.client_ips import ClientIpWorkerStore
130131
from synapse.storage.databases.main.media_repository import MediaRepositoryStore
131132
from synapse.storage.databases.main.metrics import ServerMetricsStore
132133
from synapse.storage.databases.main.monthly_active_users import (
@@ -135,6 +136,7 @@
135136
from synapse.storage.databases.main.presence import UserPresenceState
136137
from synapse.storage.databases.main.search import SearchWorkerStore
137138
from synapse.storage.databases.main.stats import StatsStore
139+
from synapse.storage.databases.main.transactions import TransactionWorkerStore
138140
from synapse.storage.databases.main.ui_auth import UIAuthWorkerStore
139141
from synapse.storage.databases.main.user_directory import UserDirectoryStore
140142
from synapse.types import ReadReceipt
@@ -466,6 +468,7 @@ class GenericWorkerSlavedStore(
466468
SlavedAccountDataStore,
467469
SlavedPusherStore,
468470
CensorEventsStore,
471+
ClientIpWorkerStore,
469472
SlavedEventStore,
470473
SlavedKeyStore,
471474
RoomStore,
@@ -481,6 +484,7 @@ class GenericWorkerSlavedStore(
481484
MediaRepositoryStore,
482485
ServerMetricsStore,
483486
SearchWorkerStore,
487+
TransactionWorkerStore,
484488
BaseSlavedStore,
485489
):
486490
pass

synapse/app/phone_stats_home.py

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,17 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
1514
import logging
1615
import math
1716
import resource
1817
import sys
1918

2019
from prometheus_client import Gauge
2120

22-
from synapse.metrics.background_process_metrics import run_as_background_process
21+
from synapse.metrics.background_process_metrics import (
22+
run_as_background_process,
23+
wrap_as_background_process,
24+
)
2325

2426
logger = logging.getLogger("synapse.app.homeserver")
2527

@@ -41,6 +43,7 @@
4143
)
4244

4345

46+
@wrap_as_background_process("phone_stats_home")
4447
async def phone_stats_home(hs, stats, stats_process=_stats_process):
4548
logger.info("Gathering stats for reporting")
4649
now = int(hs.get_clock().time())
@@ -143,20 +146,10 @@ def performance_stats_init():
143146
(int(hs.get_clock().time()), resource.getrusage(resource.RUSAGE_SELF))
144147
)
145148

146-
def start_phone_stats_home():
147-
return run_as_background_process(
148-
"phone_stats_home", phone_stats_home, hs, stats
149-
)
150-
151-
def generate_user_daily_visit_stats():
152-
return run_as_background_process(
153-
"generate_user_daily_visits", hs.get_datastore().generate_user_daily_visits
154-
)
155-
156149
# Rather than update on per session basis, batch up the requests.
157150
# If you increase the loop period, the accuracy of user_daily_visits
158151
# table will decrease
159-
clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000)
152+
clock.looping_call(hs.get_datastore().generate_user_daily_visits, 5 * 60 * 1000)
160153

161154
# monthly active user limiting functionality
162155
def reap_monthly_active_users():
@@ -167,6 +160,7 @@ def reap_monthly_active_users():
167160
clock.looping_call(reap_monthly_active_users, 1000 * 60 * 60)
168161
reap_monthly_active_users()
169162

163+
@wrap_as_background_process("generate_monthly_active_users")
170164
async def generate_monthly_active_users():
171165
current_mau_count = 0
172166
current_mau_count_by_service = {}
@@ -186,24 +180,19 @@ async def generate_monthly_active_users():
186180
registered_reserved_users_mau_gauge.set(float(len(reserved_users)))
187181
max_mau_gauge.set(float(hs.config.max_mau_value))
188182

189-
def start_generate_monthly_active_users():
190-
return run_as_background_process(
191-
"generate_monthly_active_users", generate_monthly_active_users
192-
)
193-
194183
if hs.config.limit_usage_by_mau or hs.config.mau_stats_only:
195-
start_generate_monthly_active_users()
196-
clock.looping_call(start_generate_monthly_active_users, 5 * 60 * 1000)
184+
generate_monthly_active_users()
185+
clock.looping_call(generate_monthly_active_users, 5 * 60 * 1000)
197186
# End of monthly active user settings
198187

199188
if hs.config.report_stats:
200189
logger.info("Scheduling stats reporting for 3 hour intervals")
201-
clock.looping_call(start_phone_stats_home, 3 * 60 * 60 * 1000)
190+
clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000, hs, stats)
202191

203192
# We need to defer this init for the cases that we daemonize
204193
# otherwise the process ID we get is that of the non-daemon process
205194
clock.call_later(0, performance_stats_init)
206195

207196
# We wait 5 minutes to send the first set of stats as the server can
208197
# be quite busy the first few minutes
209-
clock.call_later(5 * 60, start_phone_stats_home)
198+
clock.call_later(5 * 60, phone_stats_home, hs, stats)

synapse/storage/databases/main/client_ips.py

Lines changed: 57 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,63 @@ def _devices_last_seen_update_txn(txn):
351351
return updated
352352

353353

354-
class ClientIpStore(ClientIpBackgroundUpdateStore):
354+
class ClientIpWorkerStore(ClientIpBackgroundUpdateStore):
355+
def __init__(self, database: DatabasePool, db_conn, hs):
356+
super().__init__(database, db_conn, hs)
357+
358+
self.user_ips_max_age = hs.config.user_ips_max_age
359+
360+
if hs.config.run_background_tasks and self.user_ips_max_age:
361+
self._clock.looping_call(self._prune_old_user_ips, 5 * 1000)
362+
363+
@wrap_as_background_process("prune_old_user_ips")
364+
async def _prune_old_user_ips(self):
365+
"""Removes entries in user IPs older than the configured period.
366+
"""
367+
368+
if self.user_ips_max_age is None:
369+
# Nothing to do
370+
return
371+
372+
if not await self.db_pool.updates.has_completed_background_update(
373+
"devices_last_seen"
374+
):
375+
# Only start pruning if we have finished populating the devices
376+
# last seen info.
377+
return
378+
379+
# We do a slightly funky SQL delete to ensure we don't try and delete
380+
# too much at once (as the table may be very large from before we
381+
# started pruning).
382+
#
383+
# This works by finding the max last_seen that is less than the given
384+
# time, but has no more than N rows before it, deleting all rows with
385+
# a lesser last_seen time. (We COALESCE so that the sub-SELECT always
386+
# returns exactly one row).
387+
sql = """
388+
DELETE FROM user_ips
389+
WHERE last_seen <= (
390+
SELECT COALESCE(MAX(last_seen), -1)
391+
FROM (
392+
SELECT last_seen FROM user_ips
393+
WHERE last_seen <= ?
394+
ORDER BY last_seen ASC
395+
LIMIT 5000
396+
) AS u
397+
)
398+
"""
399+
400+
timestamp = self.clock.time_msec() - self.user_ips_max_age
401+
402+
def _prune_old_user_ips_txn(txn):
403+
txn.execute(sql, (timestamp,))
404+
405+
await self.db_pool.runInteraction(
406+
"_prune_old_user_ips", _prune_old_user_ips_txn
407+
)
408+
409+
410+
class ClientIpStore(ClientIpWorkerStore):
355411
def __init__(self, database: DatabasePool, db_conn, hs):
356412

357413
self.client_ip_last_seen = Cache(
@@ -360,8 +416,6 @@ def __init__(self, database: DatabasePool, db_conn, hs):
360416

361417
super().__init__(database, db_conn, hs)
362418

363-
self.user_ips_max_age = hs.config.user_ips_max_age
364-
365419
# (user_id, access_token, ip,) -> (user_agent, device_id, last_seen)
366420
self._batch_row_update = {}
367421

@@ -372,9 +426,6 @@ def __init__(self, database: DatabasePool, db_conn, hs):
372426
"before", "shutdown", self._update_client_ips_batch
373427
)
374428

375-
if self.user_ips_max_age:
376-
self._clock.looping_call(self._prune_old_user_ips, 5 * 1000)
377-
378429
async def insert_client_ip(
379430
self, user_id, access_token, ip, user_agent, device_id, now=None
380431
):
@@ -525,49 +576,3 @@ async def get_user_ip_and_agents(self, user):
525576
}
526577
for (access_token, ip), (user_agent, last_seen) in results.items()
527578
]
528-
529-
@wrap_as_background_process("prune_old_user_ips")
530-
async def _prune_old_user_ips(self):
531-
"""Removes entries in user IPs older than the configured period.
532-
"""
533-
534-
if self.user_ips_max_age is None:
535-
# Nothing to do
536-
return
537-
538-
if not await self.db_pool.updates.has_completed_background_update(
539-
"devices_last_seen"
540-
):
541-
# Only start pruning if we have finished populating the devices
542-
# last seen info.
543-
return
544-
545-
# We do a slightly funky SQL delete to ensure we don't try and delete
546-
# too much at once (as the table may be very large from before we
547-
# started pruning).
548-
#
549-
# This works by finding the max last_seen that is less than the given
550-
# time, but has no more than N rows before it, deleting all rows with
551-
# a lesser last_seen time. (We COALESCE so that the sub-SELECT always
552-
# returns exactly one row).
553-
sql = """
554-
DELETE FROM user_ips
555-
WHERE last_seen <= (
556-
SELECT COALESCE(MAX(last_seen), -1)
557-
FROM (
558-
SELECT last_seen FROM user_ips
559-
WHERE last_seen <= ?
560-
ORDER BY last_seen ASC
561-
LIMIT 5000
562-
) AS u
563-
)
564-
"""
565-
566-
timestamp = self.clock.time_msec() - self.user_ips_max_age
567-
568-
def _prune_old_user_ips_txn(txn):
569-
txn.execute(sql, (timestamp,))
570-
571-
await self.db_pool.runInteraction(
572-
"_prune_old_user_ips", _prune_old_user_ips_txn
573-
)

synapse/storage/databases/main/metrics.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from typing import Dict
1919

2020
from synapse.metrics import GaugeBucketCollector
21-
from synapse.metrics.background_process_metrics import run_as_background_process
21+
from synapse.metrics.background_process_metrics import wrap_as_background_process
2222
from synapse.storage._base import SQLBaseStore
2323
from synapse.storage.database import DatabasePool
2424
from synapse.storage.databases.main.event_push_actions import (
@@ -57,18 +57,13 @@ def __init__(self, database: DatabasePool, db_conn, hs):
5757
super().__init__(database, db_conn, hs)
5858

5959
# Read the extrems every 60 minutes
60-
def read_forward_extremities():
61-
# run as a background process to make sure that the database transactions
62-
# have a logcontext to report to
63-
return run_as_background_process(
64-
"read_forward_extremities", self._read_forward_extremities
65-
)
66-
67-
hs.get_clock().looping_call(read_forward_extremities, 60 * 60 * 1000)
60+
if hs.config.run_background_tasks:
61+
self._clock.looping_call(self._read_forward_extremities, 60 * 60 * 1000)
6862

6963
# Used in _generate_user_daily_visits to keep track of progress
7064
self._last_user_visit_update = self._get_start_of_day()
7165

66+
@wrap_as_background_process("read_forward_extremities")
7267
async def _read_forward_extremities(self):
7368
def fetch(txn):
7469
txn.execute(
@@ -274,6 +269,7 @@ def _get_start_of_day(self):
274269
today_start = calendar.timegm((now.tm_year, now.tm_mon, now.tm_mday, 0, 0, 0))
275270
return today_start * 1000
276271

272+
@wrap_as_background_process("generate_user_daily_visits")
277273
async def generate_user_daily_visits(self) -> None:
278274
"""
279275
Generates daily visit data for use in cohort/ retention analysis

0 commit comments

Comments
 (0)