diff --git a/.circleci/integration-test.py b/.circleci/integration-test.py index 1572e8e1d..7d214c237 100755 --- a/.circleci/integration-test.py +++ b/.circleci/integration-test.py @@ -32,7 +32,7 @@ def run_systemd_image(image_name, container_name, bootstrap_pip_spec): # This is the minimum VM size we support. JupyterLab extensions seem # to need at least this much RAM to build. Boo? # If we change this, need to change all other references to this number. - '--memory', '1G', + '--memory', '1.0G', ] if bootstrap_pip_spec: diff --git a/tests/test_configurer.py b/tests/test_configurer.py index 0481585bf..fa3090ecd 100644 --- a/tests/test_configurer.py +++ b/tests/test_configurer.py @@ -213,7 +213,7 @@ def test_cull_service_default(): c = apply_mock_config({}) cull_cmd = [ - sys.executable, '-m', 'tljh.cull_idle_servers', + sys.executable, '-m', 'jupyterhub_idle_culler', '--timeout=600', '--cull-every=60', '--concurrency=5', '--max-age=0' ] @@ -238,7 +238,7 @@ def test_set_cull_service(): } }) cull_cmd = [ - sys.executable, '-m', 'tljh.cull_idle_servers', + sys.executable, '-m', 'jupyterhub_idle_culler', '--timeout=600', '--cull-every=10', '--concurrency=5', '--max-age=60', '--cull-users' ] @@ -261,7 +261,7 @@ def test_load_secrets(tljh_dir): c = apply_mock_config(tljh_config) assert c.TraefikTomlProxy.traefik_api_password == "traefik-password" - + def test_auth_native(): """ Test setting Native Authenticator diff --git a/tljh/configurer.py b/tljh/configurer.py index 3a9ec05d3..73f602bfc 100644 --- a/tljh/configurer.py +++ b/tljh/configurer.py @@ -218,7 +218,7 @@ def set_cull_idle_service(config): Set Idle Culler service """ cull_cmd = [ - sys.executable, '-m', 'tljh.cull_idle_servers' + sys.executable, '-m', 'jupyterhub_idle_culler' ] cull_config = config['services']['cull'] print() diff --git a/tljh/cull_idle_servers.py b/tljh/cull_idle_servers.py deleted file mode 100644 index 32524dda8..000000000 --- a/tljh/cull_idle_servers.py +++ /dev/null @@ -1,342 +0,0 @@ -#!/usr/bin/env python3 -"""script to monitor and cull idle single-user servers - -Imported from https://github.com/jupyterhub/jupyterhub/blob/6b1046697/examples/cull-idle/cull_idle_servers.py - -Caveats: - -last_activity is not updated with high frequency, -so cull timeout should be greater than the sum of: - -- single-user websocket ping interval (default: 30s) -- JupyterHub.last_activity_interval (default: 5 minutes) - -You can run this as a service managed by JupyterHub with this in your config:: - - - c.JupyterHub.services = [ - { - 'name': 'cull-idle', - 'admin': True, - 'command': 'python cull_idle_servers.py --timeout=3600'.split(), - } - ] - -Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`: - - export JUPYTERHUB_API_TOKEN=`jupyterhub token` - python cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api] -""" - -from datetime import datetime, timezone -from functools import partial -import json -import os - -try: - from urllib.parse import quote -except ImportError: - from urllib import quote - -import dateutil.parser - -from tornado.gen import coroutine, multi -from tornado.locks import Semaphore -from tornado.log import app_log -from tornado.httpclient import AsyncHTTPClient, HTTPRequest -from tornado.ioloop import IOLoop, PeriodicCallback -from tornado.options import define, options, parse_command_line - - -def parse_date(date_string): - """Parse a timestamp - - If it doesn't have a timezone, assume utc - - Returned datetime object will always be timezone-aware - """ - dt = dateutil.parser.parse(date_string) - if not dt.tzinfo: - # assume naïve timestamps are UTC - dt = dt.replace(tzinfo=timezone.utc) - return dt - - -def format_td(td): - """ - Nicely format a timedelta object - - as HH:MM:SS - """ - if td is None: - return "unknown" - if isinstance(td, str): - return td - seconds = int(td.total_seconds()) - h = seconds // 3600 - seconds = seconds % 3600 - m = seconds // 60 - seconds = seconds % 60 - return f"{h:02}:{m:02}:{seconds:02}" - - -@coroutine -def cull_idle(url, api_token, inactive_limit, cull_users=False, max_age=0, concurrency=10): - """Shutdown idle single-user servers - - If cull_users, inactive *users* will be deleted as well. - """ - auth_header = { - 'Authorization': 'token %s' % api_token, - } - req = HTTPRequest( - url=url + '/users', - headers=auth_header, - ) - now = datetime.now(timezone.utc) - client = AsyncHTTPClient() - - if concurrency: - semaphore = Semaphore(concurrency) - @coroutine - def fetch(req): - """client.fetch wrapped in a semaphore to limit concurrency""" - yield semaphore.acquire() - try: - return (yield client.fetch(req)) - finally: - yield semaphore.release() - else: - fetch = client.fetch - - resp = yield fetch(req) - users = json.loads(resp.body.decode('utf8', 'replace')) - futures = [] - - @coroutine - def handle_server(user, server_name, server): - """Handle (maybe) culling a single server - - Returns True if server is now stopped (user removable), - False otherwise. - """ - log_name = user['name'] - if server_name: - log_name = '%s/%s' % (user['name'], server_name) - if server.get('pending'): - app_log.warning( - "Not culling server %s with pending %s", - log_name, server['pending']) - return False - - if server.get('started'): - age = now - parse_date(server['started']) - else: - # started may be undefined on jupyterhub < 0.9 - age = None - - # check last activity - # last_activity can be None in 0.9 - if server['last_activity']: - inactive = now - parse_date(server['last_activity']) - else: - # no activity yet, use start date - # last_activity may be None with jupyterhub 0.9, - # which introduces the 'started' field which is never None - # for running servers - inactive = age - - should_cull = (inactive is not None and - inactive.total_seconds() >= inactive_limit) - if should_cull: - app_log.info( - "Culling server %s (inactive for %s)", - log_name, format_td(inactive)) - - if max_age and not should_cull: - # only check started if max_age is specified - # so that we can still be compatible with jupyterhub 0.8 - # which doesn't define the 'started' field - if age is not None and age.total_seconds() >= max_age: - app_log.info( - "Culling server %s (age: %s, inactive for %s)", - log_name, format_td(age), format_td(inactive)) - should_cull = True - - if not should_cull: - app_log.debug( - "Not culling server %s (age: %s, inactive for %s)", - log_name, format_td(age), format_td(inactive)) - return False - - req = HTTPRequest( - url=url + '/users/%s/server' % quote(user['name']), - method='DELETE', - headers=auth_header, - ) - resp = yield fetch(req) - if resp.code == 202: - app_log.warning( - "Server %s is slow to stop", - log_name, - ) - # return False to prevent culling user with pending shutdowns - return False - return True - - @coroutine - def handle_user(user): - """Handle one user. - - Create a list of their servers, and async exec them. Wait for - that to be done, and if all servers are stopped, possibly cull - the user. - """ - # shutdown servers first. - # Hub doesn't allow deleting users with running servers. - # named servers contain the 'servers' dict - if 'servers' in user: - servers = user['servers'] - # Otherwise, server data is intermingled in with the user - # model - else: - servers = {} - if user['server']: - servers[''] = { - 'started': user.get('started'), - 'last_activity': user['last_activity'], - 'pending': user['pending'], - 'url': user['server'], - } - server_futures = [ - handle_server(user, server_name, server) - for server_name, server in servers.items() - ] - results = yield multi(server_futures) - if not cull_users: - return - # some servers are still running, cannot cull users - still_alive = len(results) - sum(results) - if still_alive: - app_log.debug( - "Not culling user %s with %i servers still alive", - user['name'], still_alive) - return False - - should_cull = False - if user.get('created'): - age = now - parse_date(user['created']) - else: - # created may be undefined on jupyterhub < 0.9 - age = None - - # check last activity - # last_activity can be None in 0.9 - if user['last_activity']: - inactive = now - parse_date(user['last_activity']) - else: - # no activity yet, use start date - # last_activity may be None with jupyterhub 0.9, - # which introduces the 'created' field which is never None - inactive = age - - should_cull = (inactive is not None and - inactive.total_seconds() >= inactive_limit) - if should_cull: - app_log.info( - "Culling user %s (inactive for %s)", - user['name'], inactive) - - if max_age and not should_cull: - # only check created if max_age is specified - # so that we can still be compatible with jupyterhub 0.8 - # which doesn't define the 'started' field - if age is not None and age.total_seconds() >= max_age: - app_log.info( - "Culling user %s (age: %s, inactive for %s)", - user['name'], format_td(age), format_td(inactive)) - should_cull = True - - if not should_cull: - app_log.debug( - "Not culling user %s (created: %s, last active: %s)", - user['name'], format_td(age), format_td(inactive)) - return False - - req = HTTPRequest( - url=url + '/users/%s' % user['name'], - method='DELETE', - headers=auth_header, - ) - yield fetch(req) - return True - - for user in users: - futures.append((user['name'], handle_user(user))) - - for (name, f) in futures: - try: - result = yield f - except Exception: - app_log.exception("Error processing %s", name) - else: - if result: - app_log.debug("Finished culling %s", name) - - -if __name__ == '__main__': - define( - 'url', - default=os.environ.get('JUPYTERHUB_API_URL'), - help="The JupyterHub API URL", - ) - define('timeout', type=int, default=600, help="The idle timeout (in seconds)") - define('cull_every', type=int, default=0, - help="The interval (in seconds) for checking for idle servers to cull") - define('max_age', type=int, default=0, - help="The maximum age (in seconds) of servers that should be culled even if they are active") - define('cull_users', type=bool, default=False, - help="""Cull users in addition to servers. - This is for use in temporary-user cases such as tmpnb.""", - ) - define('concurrency', type=int, default=10, - help="""Limit the number of concurrent requests made to the Hub. - - Deleting a lot of users at the same time can slow down the Hub, - so limit the number of API requests we have outstanding at any given time. - """ - ) - - parse_command_line() - if not options.cull_every: - options.cull_every = options.timeout // 2 - api_token = os.environ['JUPYTERHUB_API_TOKEN'] - - try: - AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") - except ImportError as e: - app_log.warning( - "Could not load pycurl: %s\n" - "pycurl is recommended if you have a large number of users.", - e) - - loop = IOLoop.current() - cull = partial( - cull_idle, - url=options.url, - api_token=api_token, - inactive_limit=options.timeout, - cull_users=options.cull_users, - max_age=options.max_age, - concurrency=options.concurrency, - ) - # schedule first cull immediately - # because PeriodicCallback doesn't start until the end of the first interval - loop.add_callback(cull) - # schedule periodic cull - pc = PeriodicCallback(cull, 1e3 * options.cull_every) - pc.start() - try: - loop.start() - except KeyboardInterrupt: - pass diff --git a/tljh/installer.py b/tljh/installer.py index c13a6e9c5..0a32173dd 100644 --- a/tljh/installer.py +++ b/tljh/installer.py @@ -225,6 +225,7 @@ def ensure_jupyterhub_package(prefix): 'jupyterhub-ldapauthenticator==1.3.0', 'jupyterhub-tmpauthenticator==0.6', 'oauthenticator==0.10.0', + 'jupyterhub-idle-culler==1.0' ]) traefik.ensure_traefik_binary(prefix)