Skip to content

Allow specifying extra params to scrub from logs #1538

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions docs/source/operators/configuring-logging.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,28 @@ A minimal example which logs Jupyter Server output to a file:
redirecting to log files ensure they have appropriate permissions.


.. _configurable_logging.log_scrub:

Configuring Log Scrubbing
^^^^^^^^^^^^^^^^^^^^^^^^^^

By default, Jupyter Server scrubs sensitive URL parameters from log output to prevent
security tokens and other sensitive information from being leaked in log files. Additional
parameters to be scrubbed can be configured using the ``extra_log_scrub_param_keys`` trait.

Default scrubbed parameter keys include: ``token``, ``auth``, ``key``, ``code``, ``state``, and ``xsrf``.

Example configuration to add additional parameters to scrub:

.. code-block:: python

# jupyter_server_config.py

# Add additional parameter keys to scrub (these will be added to the defaults)
c.ServerApp.extra_log_scrub_param_keys = [
"password", "secret", "api_key", "jwt-token"
]

.. _configurable_logging.extension_applications:

Jupyter Server Extension Applications (e.g. Jupyter Lab)
Expand Down
17 changes: 12 additions & 5 deletions jupyter_server/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@
# url params to be scrubbed if seen
# any url param that *contains* one of these
# will be scrubbed from logs
_SCRUB_PARAM_KEYS = {"token", "auth", "key", "code", "state", "xsrf"}
_DEFAULT_SCRUB_PARAM_KEYS = {"token", "auth", "key", "code", "state", "xsrf"}


def _scrub_uri(uri: str) -> str:
def _scrub_uri(uri: str, extra_param_keys=None) -> str:
"""scrub auth info from uri"""

scrub_param_keys = _DEFAULT_SCRUB_PARAM_KEYS.union(set(extra_param_keys or []))

parsed = urlparse(uri)
if parsed.query:
# check for potentially sensitive url params
Expand All @@ -31,7 +34,7 @@ def _scrub_uri(uri: str) -> str:
changed = False
for i, s in enumerate(parts):
key, sep, value = s.partition("=")
for substring in _SCRUB_PARAM_KEYS:
for substring in scrub_param_keys:
if substring in key:
parts[i] = f"{key}{sep}[secret]"
changed = True
Expand Down Expand Up @@ -59,6 +62,10 @@ def log_request(handler, record_prometheus_metrics=True):
except AttributeError:
logger = access_log

extra_param_keys = []
if hasattr(handler, "serverapp") and hasattr(handler.serverapp, "extra_log_scrub_param_keys"):
extra_param_keys = handler.serverapp.extra_log_scrub_param_keys

if status < 300 or status == 304:
# Successes (or 304 FOUND) are debug-level
log_method = logger.debug
Expand All @@ -74,7 +81,7 @@ def log_request(handler, record_prometheus_metrics=True):
"status": status,
"method": request.method,
"ip": request.remote_ip,
"uri": _scrub_uri(request.uri),
"uri": _scrub_uri(request.uri, extra_param_keys),
"request_time": request_time,
}
# log username
Expand All @@ -90,7 +97,7 @@ def log_request(handler, record_prometheus_metrics=True):
msg = "{status} {method} {uri} ({username}@{ip}) {request_time:.2f}ms"
if status >= 400:
# log bad referrers
ns["referer"] = _scrub_uri(request.headers.get("Referer", "None"))
ns["referer"] = _scrub_uri(request.headers.get("Referer", "None"), extra_param_keys)
msg = msg + " referer={referer}"
if status >= 500 and status != 502:
# Log a subset of the headers if it caused an error.
Expand Down
18 changes: 18 additions & 0 deletions jupyter_server/serverapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2006,6 +2006,24 @@ def _default_terminals_enabled(self) -> bool:

Set to False to disable recording the http_request_duration_seconds metric.
""",
config=True,
)

extra_log_scrub_param_keys = List(
Unicode(),
default_value=[],
config=True,
help="""
Additional URL parameter keys to scrub from logs.

These will be added to the default list of scrubbed parameter keys.
Any URL parameter whose key contains one of these substrings will have
its value replaced with '[secret]' in the logs. This is to prevent
sensitive information like authentication tokens from being leaked
in log files.

Default scrubbed keys: ["token", "auth", "key", "code", "state", "xsrf"]
""",
)

static_immutable_cache = List(
Expand Down
70 changes: 70 additions & 0 deletions tests/test_log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Tests for log utilities."""

from unittest.mock import Mock

import pytest

from jupyter_server.log import log_request
from jupyter_server.serverapp import ServerApp


@pytest.fixture
def server_app_with_extra_scrub_keys():
"""Fixture that returns a ServerApp with custom extra_log_scrub_param_keys."""
app = ServerApp()
app.extra_log_scrub_param_keys = ["password", "secret"]
return app


@pytest.fixture
def server_app_with_default_scrub_keys():
"""Fixture that returns a ServerApp with default extra_log_scrub_param_keys."""
app = ServerApp()
return app


def test_log_request_scrubs_sensitive_params_default(server_app_with_default_scrub_keys, caplog):
"""Test that log_request scrubs sensitive parameters using default configuration."""
handler = Mock()
handler.get_status.return_value = 200
handler.request.method = "GET"
handler.request.remote_ip = "127.0.0.1"
handler.request.uri = "http://example.com/path?token=secret123&normal=value"
handler.request.request_time.return_value = 0.1
handler.serverapp = server_app_with_default_scrub_keys
handler.log = Mock()
handler.current_user = None

log_request(handler, record_prometheus_metrics=False)

handler.log.debug.assert_called_once()
call_args = handler.log.debug.call_args[0][0]

assert "secret123" not in call_args
assert "[secret]" in call_args
assert "normal=value" in call_args


def test_log_request_scrubs_sensitive_params_extra(server_app_with_extra_scrub_keys, caplog):
"""Test that log_request scrubs sensitive parameters using extra configuration."""
handler = Mock()
handler.get_status.return_value = 200
handler.request.method = "GET"
handler.request.remote_ip = "127.0.0.1"
handler.request.uri = (
"http://example.com/path?password=secret123&token=default_token&normal=value"
)
handler.request.request_time.return_value = 0.1
handler.serverapp = server_app_with_extra_scrub_keys
handler.log = Mock()
handler.current_user = None

log_request(handler, record_prometheus_metrics=False)

handler.log.debug.assert_called_once()
call_args = handler.log.debug.call_args[0][0]

assert "secret123" not in call_args
assert "default_token" not in call_args
assert "[secret]" in call_args
assert "normal=value" in call_args