Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add 'safe' slug scheme #744

Merged
merged 22 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
7bac57c
add 'safe' slug scheme
minrk Jun 8, 2023
d6be9f5
Merge branch 'main' into wip-slug
minrk Jun 12, 2024
bdf0f37
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 12, 2024
a1b524d
toward compatible transition for safe slugs
minrk Jun 12, 2024
c0b79af
Merge from main
minrk Jun 13, 2024
058afd5
revert use_legacy_labels config
minrk Jun 17, 2024
6968b68
let safe slug scheme live side-by-side with escape scheme
minrk Jun 17, 2024
45299ce
add multi_slug mechanism for multi-word slugs (username--servername--…
minrk Jun 17, 2024
e24b1f6
sub '-' for any sequence of unsafe characters
minrk Jun 26, 2024
cbd983b
restore trailing hyphen logic
minrk Jun 26, 2024
15ead3c
track kubespawner version in state, annotations
minrk Jun 26, 2024
fc4f628
allow opting out of persisted pvc name with remember_pvc_name = False
minrk Jun 26, 2024
fd4135f
document new template scheme and upgrade notes
minrk Jun 26, 2024
3ece569
update some test expectations
minrk Jun 26, 2024
f96ab38
exercise pvc_name upgrade cases
minrk Jun 26, 2024
09ade1c
Fix markdown table formatting
consideRatio Jul 20, 2024
10ba1fb
Document escaped_username and escaped_servername to be added in v7
consideRatio Jul 20, 2024
d4c2308
clearer comment about values being loaded by get_state
minrk Jul 30, 2024
f44b178
Merge branch 'main' into wip-slug
minrk Jul 30, 2024
97399f0
remove hardcoded safe slug scheme from namespace
minrk Jul 30, 2024
6232c4b
only handle legacy pvc name when remember_pvc_name is true
minrk Jul 31, 2024
845f3d8
Sync with main
minrk Aug 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
let safe slug scheme live side-by-side with escape scheme
- add user_server to template namespace
- add `safe_` prefixed fields to template namespace using new 'safe' slug scheme
- add pod_name, namespace, pvc_name to template namespace
  • Loading branch information
minrk committed Jun 17, 2024
commit 6968b68ca9aa0997d231d26f3e4bd0e4f2030ec4
63 changes: 44 additions & 19 deletions kubespawner/slugs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,23 @@
_object_pattern = re.compile(r'^[a-z0-9\.-]+$')
_label_pattern = re.compile(r'^[a-z0-9\.-_]+$', flags=re.IGNORECASE)

# match two or more hyphens
_hyphen_plus_pattern = re.compile('--+')

# length of hash suffix
_hash_length = 8


def _is_valid_general(
s, starts_with=None, ends_with=None, pattern=None, max_length=None
s, starts_with=None, ends_with=None, pattern=None, min_length=None, max_length=None
):
"""General is_valid check

Checks rules:
"""
if not 1 <= len(s) <= max_length:
if min_length and len(s) < min_length:
return False
if max_length and len(s) > max_length:
return False
if starts_with and not s.startswith(starts_with):
return False
Expand All @@ -47,12 +55,16 @@ def is_valid_object_name(s):
ends_with=_alphanum_lower,
pattern=_object_pattern,
max_length=255,
min_length=1,
)


def is_valid_label(s):
"""is_valid check for label values"""
# label rules: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
if not s:
# empty strings are valid labels
return True
return _is_valid_general(
s,
starts_with=_alphanum,
Expand All @@ -77,28 +89,40 @@ def is_valid_default(s):
starts_with=_alphanum_lower,
ends_with=_alphanum_lower,
pattern=_object_pattern,
min_length=1,
max_length=63,
)


def strip_and_hash(name):
"""Generate an always-safe, unique string for any input"""
# make sure we start with a prefix
# quick, short hash to avoid name collsions
name_hash = hashlib.sha256(name.encode("utf8")).hexdigest()[:8]
safe_chars = [c for c in name.lower() if c in _lower_plus_hyphen]
safe_name = ''.join(safe_chars[:24])
def strip_and_hash(name, max_length=32):
"""Generate an always-safe, unique string for any input

truncates name to max_length - len(hash_suffix) to fit in max_length
after adding hash suffix
"""
name_length = max_length - (_hash_length + 3)
if name_length < 1:
raise ValueError(f"Cannot make safe names shorter than {_hash_length + 4}")
# quick, short hash to avoid name collisions
name_hash = hashlib.sha256(name.encode("utf8")).hexdigest()[:_hash_length]
# compute safe slug from name (don't worry about collisions, hash handles that)
# cast to lowercase, exclude all but lower & hyphen
safe_name = ''.join([c for c in name.lower() if c in _lower_plus_hyphen])
# strip leading '-'
# squash repeated '--' to one
safe_name = _hyphen_plus_pattern.sub("-", safe_name.lstrip("-"))
# truncate to 24 chars, strip trailing '-'
safe_name = safe_name[:name_length].rstrip("-")
if not safe_name:
# make sure it's non-empty
safe_name = 'x'
if safe_name.startswith('-'):
# starting with '-' is generally not allowed,
# start with 'j-' instead
# Question: always do this so it's consistent, instead of as-needed?
safe_name = f"j{safe_name}"
return f"{safe_name}--{name_hash}"
# due to stripping of '-' above,
# the result will always have _exactly_ '---', never '--' nor '----'
# use '---' to avoid colliding with `{username}--{servername}` template join
return f"{safe_name}---{name_hash}"


def safe_slug(name, is_valid=is_valid_default):
def safe_slug(name, is_valid=is_valid_default, max_length=None):
"""Always generate a safe slug

is_valid should be a callable that returns True if a given string follows appropriate rules,
Expand All @@ -112,8 +136,9 @@ def safe_slug(name, is_valid=is_valid_default):
"""
if '--' in name:
# don't accept any names that could collide with the safe slug
return strip_and_hash(name)
if is_valid(name):
return strip_and_hash(name, max_length=max_length or 32)
# allow max_length override for truncated sub-strings
if is_valid(name) and (max_length is None or len(name) <= max_length):
return name
else:
return strip_and_hash(name)
return strip_and_hash(name, max_length=max_length or 32)
134 changes: 83 additions & 51 deletions kubespawner/spawner.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,11 @@ def __init__(self, *args, **kwargs):
# compute other attributes

# namespace, pod_name, dns_name are persisted in state
# thse same assignments should match clear_state
# these same assignments should match clear_state
if self.enable_user_namespaces:
self.namespace = self._expand_user_properties(self.user_namespace_template)
self.namespace = self._expand_user_properties(
self.user_namespace_template, scheme="slug"
)
self.log.info(f"Using user namespace: {self.namespace}")

self.pod_name = self._expand_user_properties(self.pod_name_template)
Expand Down Expand Up @@ -542,7 +544,7 @@ def _namespace_default(self):
)

pod_name_template = Unicode(
'jupyter-{username}--{servername}',
'jupyter-{user_server}',
config=True,
help="""
Template to use to form the name of user's pods.
Expand Down Expand Up @@ -573,7 +575,7 @@ def _namespace_default(self):
The IP address (or hostname) of user's pods which KubeSpawner connects to.
If you do not specify the value, KubeSpawner will use the pod IP.

e.g. `jupyter-{username}--{servername}.notebooks.jupyterhub.svc.cluster.local`,
e.g. `{pod_name}.notebooks.jupyterhub.svc.cluster.local`,

`{username}`, `{userid}`, `{servername}`, `{hubnamespace}`,
`{unescaped_username}`, and `{unescaped_servername}` will be expanded if
Expand Down Expand Up @@ -617,7 +619,7 @@ def _namespace_default(self):
""",
)
pvc_name_template = Unicode(
'claim-{username}--{servername}',
'claim-{user_server}',
config=True,
help="""
Template to use to form the name of user's pvc.
Expand Down Expand Up @@ -1884,71 +1886,100 @@ def _expand_user_properties(self, template, slug_scheme=None):
safe_chars = set(string.ascii_lowercase + string.digits)

raw_servername = self.name or ''
safe_servername = escapism.escape(
escaped_servername = escapism.escape(
raw_servername, safe=safe_chars, escape_char='-'
).lower()

# TODO: measure string template?
# for object names, max is 255, so very unlikely to exceed
# but labels are only 64, so adding a few fields together could easily exceed length limit
# if the per-field limit is more than half the whole budget
# for now, recommend {user_server} anywhere both username and servername are desired
_slug_max_length = 48

if raw_servername:
safe_servername = safe_slug(raw_servername, max_length=_slug_max_length)
else:
safe_servername = ""

username = raw_username = self.user.name
safe_username = safe_slug(raw_username, max_length=_slug_max_length)

# compute safe_user_server = {username}--{servername}
if (
# escaping after joining means
# possible collision with `--` delimiter
'--' in username
or '--' in raw_servername
or username.endswith("-")
or raw_servername.startswith("-")
# length exceeded
or len(safe_username) + len(safe_username) + 2 > _slug_max_length
):
# need double-escape if there's a chance of collision
safe_user_server = safe_slug(
f"{safe_username}--{safe_servername}", max_length=_slug_max_length
)
else:
if raw_servername:
safe_user_server = safe_slug(
f"{username}--{raw_servername}", max_length=_slug_max_length
)
else:
safe_user_server = safe_username

hub_namespace = self._namespace_default()
if hub_namespace == "default":
hub_namespace = "user"

legacy_escaped_username = ''.join(
[s if s in safe_chars else '-' for s in self.user.name.lower()]
)
safe_username = escapism.escape(
escaped_username = escapism.escape(
self.user.name, safe=safe_chars, escape_char='-'
).lower()

if slug_scheme == "safe":
# safe slug scheme escapes _after_ rendering the template
username = self.user.name
servername = raw_servername
# but not escaping before joining {username}--{servername}
# reintroduces the possibility of collision
# e.g. {user--name}--{server} == {user}--{name--server}
# and {user-}--{server} == {user}--{-server}
# in that case, double-escape username and server name.
# double-escape will not collide,
# because it will match always match this condition and be escaped again
if (
'--' in username
or '--' in servername
or username.endswith("-")
or servername.startswith("-")
):
username = safe_slug(username)
if servername:
servername = safe_slug(servername)
elif slug_scheme == "escape":
username = safe_username
servername = safe_servername
user_server = safe_user_server
elif slug_scheme == "escape":
# backward-compatible 'escape' scheme is not safe
username = escaped_username
servername = escaped_servername
if servername:
user_server = f"{escaped_username}--{escaped_servername}"
else:
user_server = escaped_username
else:
raise ValueError(
f"slug scheme must be 'safe' or 'escape', not '{slug_scheme}'"
)

rendered = template.format(
ns = dict(
# raw values, always consistent
userid=self.user.id,
username=username,
escaped_username=safe_username,
unescaped_username=self.user.name,
legacy_escape_username=legacy_escaped_username,
servername=servername,
unescaped_servername=raw_servername,
escaped_servername=safe_servername,
hubnamespace=hub_namespace,
# scheme-dependent
username=username,
servername=servername,
user_server=user_server,
# safe values (new 'safe' scheme)
safe_username=safe_username,
safe_servername=safe_servername,
safe_user_server=safe_user_server,
# legacy values (old 'escape' scheme)
escaped_username=escaped_username,
escaped_servername=escaped_servername,
escaped_user_server=f"{escaped_username}--{escaped_servername}",
)
# strip trailing - delimiter in case of empty servername.
# k8s object names cannot have trailing '-'
if slug_scheme == "safe":
if not (username.endswith("-") or self.name.endswith("-")):
# strip trailing - delimiter _unless_ it's actually the end of the user or server name
rendered = rendered.rstrip("-")
# 'safe' slug scheme does processing after
rendered = safe_slug(rendered)
else:
# safe to strip trailing - in escape scheme because escaped username/servername will never end in '-'
rendered = rendered.rstrip("-")
# add some resolved values so they can be referred to.
# these may not be defined yet (i.e. when computing the values themselves).
for attr_name in ("pod_name", "pvc_name", "namespace"):
ns[attr_name] = getattr(self, attr_name, f"{attr_name}_unavailable!")

rendered = template.format(**ns)
# strip trailing '-' in case of old '{username}--{servername}' template
rendered = rendered.rstrip("-")
return rendered

def _expand_env(self, env):
Expand Down Expand Up @@ -2054,12 +2085,11 @@ def _get_pod_url(self, pod):
hostname = f"[{hostname}]"

if self.pod_connect_ip:
# pod_connect_ip is not a slug
hostname = ".".join(
[
s.rstrip("-")
for s in self._expand_user_properties(self.pod_connect_ip).split(
"."
)
self._expand_user_properties(s) if '{' in s else s
for s in self.pod_connect_ip.split(".")
]
)

Expand Down Expand Up @@ -2277,6 +2307,8 @@ def get_state(self):

We also save the namespace and DNS name for use cases where the namespace is
calculated dynamically, or it changes between restarts.

`pvc_name` is also saved, to prevent data loss if template changes across restarts.
"""
state = super().get_state()
# these should only be persisted if our pod is running
Expand Down
54 changes: 47 additions & 7 deletions tests/test_slugs.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,61 @@
import pytest

from kubespawner.slugs import safe_slug
from kubespawner.slugs import is_valid_label, safe_slug


@pytest.mark.parametrize(
"name, expected",
[
("jupyter-alex", "jupyter-alex"),
("jupyter-Alex", "jupyter-alex--3a1c285c"),
("jupyter-üni", "jupyter-ni--a5aaf5dd"),
("jupyter-Alex", "jupyter-alex---3a1c285c"),
("jupyter-üni", "jupyter-ni---a5aaf5dd"),
("endswith-", "endswith---165f1166"),
("-start", "j-start--f587e2dc"),
("j-start--f587e2dc", "j-start--f587e2dc--f007ef7c"),
("x" * 65, "xxxxxxxxxxxxxxxxxxxxxxxx--9537c5fd"),
("x" * 66, "xxxxxxxxxxxxxxxxxxxxxxxx--6eb879f1"),
("-start", "start---f587e2dc"),
("username--servername", "username-servername---d957f1de"),
("start---f587e2dc", "start-f587e2dc---cc5bb9c9"),
pytest.param("x" * 63, "x" * 63, id="x63"),
pytest.param("x" * 64, "xxxxxxxxxxxxxxxxxxxxx---7ce10097", id="x64"),
pytest.param("x" * 65, "xxxxxxxxxxxxxxxxxxxxx---9537c5fd", id="x65"),
("", "x---e3b0c442"),
],
)
def test_safe_slug(name, expected):
slug = safe_slug(name)
assert slug == expected


@pytest.mark.parametrize(
"max_length, length, expected",
[
(16, 16, "x" * 16),
(16, 17, "xxxxx---d04fd59f"),
(11, 16, "error"),
(12, 16, "x---9c572959"),
],
)
def test_safe_slug_max_length(max_length, length, expected):
name = "x" * length
if expected == "error":
with pytest.raises(ValueError):
safe_slug(name, max_length=max_length)
return

slug = safe_slug(name, max_length=max_length)
assert slug == expected


@pytest.mark.parametrize(
"name, expected",
[
("", ""),
("x", "x"),
("-x", "x---a4209624"),
("x-", "x---c8b60efc"),
pytest.param("x" * 63, "x" * 63, id="x64"),
pytest.param("x" * 64, "xxxxxxxxxxxxxxxxxxxxx---7ce10097", id="x64"),
pytest.param("x" * 65, "xxxxxxxxxxxxxxxxxxxxx---9537c5fd", id="x65"),
],
)
def test_safe_slug_label(name, expected):
slug = safe_slug(name, is_valid=is_valid_label)
assert slug == expected
Loading