Skip to content

Commit ad6b392

Browse files
committed
update configuration of JupyterHub to rootless and userless container with tight SSH connection
1 parent 5816861 commit ad6b392

File tree

12 files changed

+501
-209
lines changed

12 files changed

+501
-209
lines changed
Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
# Copyright 2023 Vrije Universiteit Brussel
2+
#
3+
# This file is part of notebook-platform,
4+
# originally created by the HPC team of Vrij Universiteit Brussel (http://hpc.vub.be),
5+
# with support of Vrije Universiteit Brussel (http://www.vub.be),
6+
# the Flemish Supercomputer Centre (VSC) (https://www.vscentrum.be),
7+
# the Flemish Research Foundation (FWO) (http://www.fwo.be/en)
8+
# and the Department of Economy, Science and Innovation (EWI) (http://www.ewi-vlaanderen.be/en).
9+
#
10+
# https://github.com/vub-hpc/notebook-platform
11+
#
12+
# notebook-platform is free software: you can redistribute it and/or modify
13+
# it under the terms of the GNU General Public License v3 as published by
14+
# the Free Software Foundation.
15+
#
16+
# notebook-platform is distributed in the hope that it will be useful,
17+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
18+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19+
# GNU General Public License for more details.
20+
#
21+
#------------------------------------------------------------------------------
22+
# Network configuration
23+
#------------------------------------------------------------------------------
24+
# Listen on all interfaces
25+
# proxy is in localhost, users are external and spawners are internal
26+
c.JupyterHub.bind_url = 'https://0.0.0.0:8000'
27+
c.JupyterHub.hub_ip = '0.0.0.0'
28+
# IP address or hostname that spawners should use to connect to the Hub API
29+
c.JupyterHub.hub_connect_ip = 'jupyterhub.internal.domain'
30+
31+
#------------------------------------------------------------------------------
32+
# OAuthenticator configuration
33+
# - use GenericOAuthenticator with the VSC account page
34+
# - work without local VSC users in the JupyterHub container
35+
# - enable SSL
36+
#------------------------------------------------------------------------------
37+
from oauthenticator.generic import GenericOAuthenticator
38+
c.JupyterHub.authenticator_class = GenericOAuthenticator
39+
40+
# Oauth application secrets in the VSC account page
41+
c.GenericOAuthenticator.login_service = 'VSC Account'
42+
c.GenericOAuthenticator.client_id = 'SECRET'
43+
c.GenericOAuthenticator.client_secret = 'SECRET'
44+
c.GenericOAuthenticator.oauth_callback_url = 'https://notebooks.hpc.vub.be/hub/oauth_callback'
45+
c.GenericOAuthenticator.scope = ['read']
46+
47+
# SSL certificates
48+
c.JupyterHub.ssl_cert = '/home/jupyterhub/.ssl/jupyterhub.crt'
49+
c.JupyterHub.ssl_key = '/home/jupyterhub/.ssl/jupyterhub.key'
50+
51+
#------------------------------------------------------------------------------
52+
# Custom notebook spawner for VSC users
53+
# - determine UID and home directory from VSC config
54+
# - works without local VSC users
55+
#------------------------------------------------------------------------------
56+
from jupyterhub_moss import MOSlurmSpawner, set_config
57+
from traitlets import default
58+
from vsc.config.base import DATA_KEY, HOME_KEY, VSC
59+
60+
class VSCSlurmSpawner(MOSlurmSpawner):
61+
"""
62+
Spawner that derives user environment from vsc-config to not rely on local users
63+
"""
64+
vsc = VSC()
65+
66+
def vsc_user_institute(self):
67+
"return institute of VSC user"
68+
vsc_uid = self.vsc.user_uid_institute_map[self.user.name[:3]][0] + int(self.user.name[3:])
69+
return self.vsc.user_id_to_institute(vsc_uid)
70+
71+
@default("req_homedir")
72+
def vsc_homedir(self):
73+
"set default home directory to VSC_HOME"
74+
vsc_user_paths = self.vsc.user_pathnames(self.user.name, self.vsc_user_institute())
75+
return vsc_user_paths[HOME_KEY]
76+
77+
@default("notebook_dir")
78+
def vsc_datadir(self):
79+
"set default notebook root directory to VSC_DATA"
80+
vsc_user_paths = self.vsc.user_pathnames(self.user.name, self.vsc_user_institute())
81+
return vsc_user_paths[DATA_KEY]
82+
83+
def user_env(self, env):
84+
"""get VSC user environment"""
85+
env["USER"] = self.user.name
86+
env["SHELL"] = "/bin/bash"
87+
env["HOME"] = self.req_homedir
88+
env["JUPYTERHUB_ROOT_DIR"] = self.notebook_dir
89+
return env
90+
91+
#------------------------------------------------------------------------------
92+
# BatchSpawner configuration
93+
# - use VSCSlurmSpawner
94+
# - submit notebook job to Slurm by connecting with SSH to a login node
95+
# - SSH connection stablished as JupyterHub operator
96+
# - define job script parameters and commands launching the notebook
97+
#------------------------------------------------------------------------------
98+
set_config(c)
99+
c.JupyterHub.spawner_class = VSCSlurmSpawner
100+
c.Spawner.start_timeout = 600 # seconds from job submit to job start
101+
c.Spawner.http_timeout = 120 # seconds from job start to reachable single-user server
102+
103+
# JupyterLab Environments in VUB
104+
vub_lab_environments = {
105+
"2022_default": {
106+
# Text displayed for this environment select option
107+
"description": "2022a: Python v3.10.4 + kernels (default)",
108+
# Space separated list of modules to be loaded
109+
"modules": "JupyterHub/2.3.1-GCCcore-11.3.0",
110+
# Path to Python environment bin/ used to start jupyter on the Slurm nodes
111+
"path": "",
112+
# Toggle adding the environment to shell PATH (default: True)
113+
"add_to_path": False,
114+
},
115+
"2022_rstudio": {
116+
"description": "2022a: Python v3.10.4 + RStudio",
117+
"modules": (
118+
"JupyterHub/2.3.1-GCCcore-11.3.0 "
119+
"jupyter-rsession-proxy/2.1.0-GCCcore-11.3.0 "
120+
"RStudio-Server/2022.07.2+576-foss-2022a-Java-11-R-4.2.1 "
121+
"IRkernel/1.3.2-foss-2022a-R-4.2.1 "
122+
),
123+
"path": "",
124+
"add_to_path": False,
125+
},
126+
"2022_matlab": {
127+
"description": "2022a: Python v3.10.4 + MATLAB",
128+
"modules": (
129+
"MATLAB/2022a-r5 "
130+
"JupyterHub/2.3.1-GCCcore-11.3.0 "
131+
"jupyter-matlab-proxy/0.5.0-GCCcore-11.3.0 "
132+
),
133+
"path": "",
134+
"add_to_path": False,
135+
},
136+
"2022_dask": {
137+
"description": "2022a: Python v3.10.4 + dask",
138+
"modules": (
139+
"JupyterHub/2.3.1-GCCcore-11.3.0 "
140+
"dask-labextension/6.0.0-foss-2022a "
141+
),
142+
"path": "",
143+
"add_to_path": False,
144+
},
145+
"2022_nglview": {
146+
"description": "2022a: Python v3.10.4 + nglview",
147+
"modules": (
148+
"JupyterHub/2.3.1-GCCcore-11.3.0 "
149+
"nglview/3.0.3-foss-2022a "
150+
),
151+
"path": "",
152+
"add_to_path": False,
153+
},
154+
"2021_default": {
155+
"description": "2021a: Python v3.9.5 + kernels (default)",
156+
"modules": "JupyterHub/2.3.1-GCCcore-10.3.0",
157+
"path": "",
158+
"add_to_path": False,
159+
},
160+
"2021_rstudio": {
161+
"description": "2021a: Python v3.9.5 + RStudio",
162+
"modules": (
163+
"JupyterHub/2.3.1-GCCcore-10.3.0 "
164+
"jupyter-rsession-proxy/2.1.0-GCCcore-10.3.0 "
165+
"RStudio-Server/1.4.1717-foss-2021a-Java-11-R-4.1.0 "
166+
"IRkernel/1.2-foss-2021a-R-4.1.0 "
167+
),
168+
"path": "",
169+
"add_to_path": False,
170+
},
171+
"2021_matlab": {
172+
"description": "2021a: Python v3.9.5 + MATLAB",
173+
"modules": (
174+
"MATLAB/2021a "
175+
"JupyterHub/2.3.1-GCCcore-10.3.0 "
176+
"jupyter-matlab-proxy/0.3.4-GCCcore-10.3.0 "
177+
"MATLAB-Kernel/0.17.1-GCCcore-10.3.0 "
178+
),
179+
"path": "",
180+
"add_to_path": False,
181+
},
182+
"2021_dask": {
183+
"description": "2021a: Python v3.9.5 + dask",
184+
"modules": (
185+
"JupyterHub/2.3.1-GCCcore-10.3.0 "
186+
"dask-labextension/5.3.1-foss-2021a "
187+
),
188+
"path": "",
189+
"add_to_path": False,
190+
},
191+
"2021_nglview": {
192+
"description": "2021a: Python v3.9.5 + nglview",
193+
"modules": (
194+
"JupyterHub/2.3.1-GCCcore-10.3.0 "
195+
"nglview/3.0.3-foss-2021a "
196+
),
197+
"path": "",
198+
"add_to_path": False,
199+
},
200+
}
201+
202+
# Partition descriptions
203+
vub_partitions_hydra = {
204+
"broadwell": { # Partition name
205+
"architecture": "x86_86", # Nodes architecture
206+
"description": "Intel Broadwell", # Displayed description
207+
"max_runtime": 12*3600, # Maximum time limit in seconds (Must be at least 1hour)
208+
"simple": True, # True to show in Simple tab
209+
"jupyter_environments": vub_lab_environments,
210+
},
211+
"skylake": {
212+
"architecture": "x86_86",
213+
"description": "Intel Skylake",
214+
"max_runtime": 12*3600,
215+
"simple": True,
216+
"jupyter_environments": vub_lab_environments,
217+
},
218+
"pascal_gpu": {
219+
"architecture": "CUDA",
220+
"description": "Nvidia Pascal P100",
221+
"max_runtime": 6*3600,
222+
"simple": True,
223+
"jupyter_environments": vub_lab_environments,
224+
},
225+
"skylake_mpi": {
226+
"architecture": "x86_86",
227+
"description": "Intel Skylake with InfiniBand",
228+
"max_runtime": 6*3600,
229+
"simple": False,
230+
"jupyter_environments": vub_lab_environments,
231+
},
232+
}
233+
234+
vub_partitions_manticore = {
235+
"ivybridge": {
236+
"architecture": "x86_86",
237+
"description": "Intel Ivybridge",
238+
"max_runtime": 8*3600,
239+
"simple": True,
240+
"jupyter_environments": vub_lab_environments,
241+
},
242+
"ampere_gpu": {
243+
"architecture": "CUDA",
244+
"description": "Nvidia Ampere",
245+
"max_runtime": 8*3600,
246+
"simple": True,
247+
"jupyter_environments": vub_lab_environments,
248+
},
249+
"skylake_mpi": {
250+
"architecture": "x86_86",
251+
"description": "Intel Skylake with InfiniBand",
252+
"max_runtime": 4*3600,
253+
"simple": False,
254+
"jupyter_environments": vub_lab_environments,
255+
},
256+
}
257+
258+
c.MOSlurmSpawner.partitions = vub_partitions_hydra
259+
260+
# Single-user serve job loads its own JupyterHub with batchspawner (for comms)
261+
# plus either JupyterLab or JupyterNotebook
262+
# Job environment is reset to an aseptic state avoiding user's customizations
263+
c.BatchSpawnerBase.req_prologue = """
264+
function serialize_env(){
265+
# Pick all environment variables matching each given pattern
266+
# output their definitions ready to be exported to the environment
267+
for var_pattern in $@; do
268+
var_pattern="^${var_pattern}="
269+
while read envar; do
270+
# Protect contents of variables with printf %q because this job
271+
# script is sent as standard input to sbatch through ssh and sudo
272+
envar_name=${envar/=*}
273+
printf "export %q=%q\n" "${envar_name}" "${!envar_name}"
274+
done < <(env | grep "$var_pattern" )
275+
done
276+
}
277+
278+
# Launch notebook in aseptic environment
279+
# note: the initial shell of the job script will evaluate the whole `exec env -i bash`
280+
# command before its execution. This means that any variable ${} or command substitution $()
281+
# in the input will be carried out before entering the minimal environment of `env -i`.
282+
exec env -i bash --norc --noprofile <<EOF
283+
$(serialize_env HOME SHELL TMPDIR SLURM.* UCX_TLS JUPYTER.* JPY_API_TOKEN)
284+
source /etc/profile
285+
source /etc/bashrc
286+
"""
287+
c.BatchSpawnerBase.req_epilogue = """
288+
EOF
289+
"""
290+
291+
# Execute all Slurm commands on the login node
292+
# jump to login node as JupyterHub user with SSH and the following settings (~/.ssh/config):
293+
# - disable StrictHostKeyChecking to auto-accept keys from login nodes
294+
# - pass full JupyterHub environment with SendEnv
295+
c.SlurmSpawner.exec_prefix = "ssh login.internal.domain "
296+
# slurm cluster settings
297+
c.SlurmSpawner.exec_prefix += "SLURM_CLUSTERS=hydra SLURM_CONF=/etc/slurm/slurm.conf "
298+
# switch to end-user in the login node
299+
c.SlurmSpawner.exec_prefix += "sudo -u {username} "
300+
301+
# fix templating for scancel
302+
c.SlurmSpawner.batch_cancel_cmd = "scancel {{job_id}} "
303+
# protect argument quoting in squeque and sinfo sent through SSH
304+
c.SlurmSpawner.batch_query_cmd = r"squeue -h -j {{job_id}} -o \'%T %B\' "
305+
c.MOSlurmSpawner.slurm_info_cmd = r"sinfo -a --noheader -o \'%R %D %C %G %m\'"
306+
307+
# single-user server is launched with srun, it needs the full environment of the job script
308+
c.SlurmSpawner.req_srun = 'srun --export=ALL'
309+
310+
# expand the execution hostname returned by squeue to a FQDN
311+
c.SlurmSpawner.state_exechost_exp = r'\1.hydra.internal.domain'
312+
313+
#------------------------------------------------------------------------------
314+
# Web UI template configuration
315+
#------------------------------------------------------------------------------
316+
# Paths to search for jinja templates, before using the default templates.
317+
c.JupyterHub.template_paths = ["/home/jupyterhub/.config/templates"]
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{% extends "templates/login.html" %} {% block login %}
2+
<style type="text/css">
3+
.container #login-main {
4+
width: 100%;
5+
height: auto;
6+
margin-top: 8ex;
7+
margin-bottom: 8ex;
8+
}
9+
</style>
10+
<div class="container">
11+
<div class="row">
12+
<div class="col-md-6">
13+
<h1>Notebook Platform of VUB-HPC</h1>
14+
<p>
15+
Welcome to the notebook platform of <a href="https://hpc.vub.be">VUB-HPC</a>.
16+
This portal can be used by any <a href="https://www.vscentrum.be/">VSC</a> user
17+
to manage and launch <a href="https://jupyter.org/">Jupyter</a> notebooks
18+
directly on the Tier-2 HPC cluster of <a href="https://www.vub.be">VUB</a> (Hydra).
19+
Once you log in with your VSC account, you will be able to
20+
<a href="https://hpc.vub.be/docs/notebooks/#computational-resources">select the computational
21+
resources</a> of your notebook session and start a
22+
<a href="https://jupyterlab.readthedocs.io">JupyterLab</a> environment.
23+
</p>
24+
<p>
25+
<a href="https://hpc.vub.be/docs/notebooks/#jupyter-environment">Multiple Jupyter environments</a>
26+
are available using different versions of Python, different software
27+
module generations or different lab extensions.
28+
All JupyterLab environments in this platform are integrated with the
29+
<a href="https://hpc.vub.be/docs/notebooks/#software-modules">software module system</a>
30+
in our HPC cluster. This means that you can load and use in your notebooks the
31+
same software packages used in your computational jobs.
32+
</p>
33+
<p>
34+
You will also find multiple kernels available for your notebooks, such as
35+
<a href="https://www.python.org/">Python</a>,
36+
<a href="https://www.r-project.org/">R</a>,
37+
<a href="https://julialang.org/">Julia</a> or
38+
<a href="https://www.mathworks.com/products/matlab.html">MATLAB</a>.
39+
As well as the option to start other environments from your web
40+
browser, such as <a href="https://www.rstudio.com/">RStudio</a> or
41+
<a href="https://www.mathworks.com/products/matlab.html">MATLAB Desktop</a>.
42+
</p>
43+
{{ super() }}
44+
</div>
45+
<div class="col-md-6">
46+
<p style="margin-top: 10ex; margin-bottom: 10ex;">
47+
<a href="https://hpc.vub.be">
48+
<img src="{{static_url("images/vub-hpc-logo-horiz-color.png") }}" class="img-responsive center-block" style="max-height: 100px" alt="VUB-HPC Logo">
49+
</a>
50+
</p>
51+
<p style="margin-top: 10ex; margin-bottom: 10ex;">
52+
<a href="https://www.vscentrum.be/">
53+
<img src="{{static_url("images/vsc-logo.png") }}" class="img-responsive center-block" style="max-height: 100px" alt="VSC Logo">
54+
</a>
55+
</p>
56+
</div>
57+
</div>
58+
{% endblock %}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{% extends "templates/page.html" %} {% block nav_bar_right_items %}
2+
<li>
3+
<a href="https://hpc.vub.be/docs/notebooks/">Documentation</a>
4+
</li>
5+
{{ super() }}
6+
{% endblock %}

0 commit comments

Comments
 (0)