Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions chart/templates/s3proxy/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@ data:
S3PROXY_DASHBOARD_UI: "true"
S3PROXY_DASHBOARD_PATH: {{ .Values.dashboard.path | quote }}
{{- end }}
{{- /* Arbitrary extra env (e.g. memory debug mode: S3PROXY_MEMORY_DEBUG=1). */ -}}
{{- range $k, $v := .Values.extraConfig }}
{{ $k }}: {{ $v | quote }}
{{- end }}
8 changes: 7 additions & 1 deletion chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -264,4 +264,10 @@ topologySpreadConstraints: []
podDisruptionBudget:
enabled: true
minAvailable: 1
# maxUnavailable: 1 # Alternative to minAvailable
# maxUnavailable: 1 # Alternative to minAvailable
# Arbitrary extra S3PROXY_* env, injected via the config ConfigMap (envFrom).
# Used for time-boxed diagnostics, e.g. memory debug mode:
# extraConfig: { S3PROXY_MEMORY_DEBUG: "1" }
# which logs RSS vs tracked heap + top allocations every interval (raise the pod
# memory limit first so it survives long enough to dump).
extraConfig: {}
81 changes: 81 additions & 0 deletions s3proxy/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

from __future__ import annotations

import asyncio
import contextlib
import logging
import os
import signal
import sys
import time
import tracemalloc
import uuid
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
Expand All @@ -17,6 +21,7 @@
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
from structlog.stdlib import BoundLogger

from . import concurrency
from .client import SigV4Verifier
from .config import Settings
from .errors import S3Error, get_s3_error_code
Expand Down Expand Up @@ -65,6 +70,78 @@ def _silence_health_probe_access_logs() -> None:
access_logger.addFilter(_health_probe_filter)


def _rss_mb() -> float | None:
"""Process resident set size in MB from /proc (Linux). None elsewhere."""
try:
with open("/proc/self/status") as f:
for line in f:
if line.startswith("VmRSS:"):
return int(line.split()[1]) / 1024 # kB -> MB
except OSError:
return None
return None


def _dump_tracemalloc(limit: int = 20) -> None:
"""Log real RSS vs tracked Python heap + the top live allocations by call site.

Diagnostic only (memory debug mode). The whole point is the gap: RSS is what
the kernel OOM-kills on, while tracemalloc only sees Python allocations. A
large rss-minus-tracked gap means the memory is C-level (uvicorn/httptools
socket buffers, openssl, allocator retention) -- NOT something a call site in
the top list will explain. A small gap means it IS Python, and the top list
names the exact line. Logging both each interval settles which world we're in.
"""
if not tracemalloc.is_tracing():
return
snap = tracemalloc.take_snapshot()
stats = snap.statistics("lineno")
tracked_mb = sum(s.size for s in stats) / 1024 / 1024
rss = _rss_mb()
governed_mb = concurrency.get_active_memory() / 1024 / 1024
logger.warning(
"MEMORY_DEBUG",
rss_mb=round(rss, 1) if rss is not None else None,
tracked_mb=round(tracked_mb, 1),
untracked_mb=round(rss - tracked_mb, 1) if rss is not None else None,
governed_active_mb=round(governed_mb, 1),
shown=limit,
)
for i, st in enumerate(stats[:limit], 1):
fr = st.traceback[0]
logger.warning(
"MEMORY_DEBUG_TOP",
rank=i,
size_mb=round(st.size / 1024 / 1024, 2),
count=st.count,
loc=f"{fr.filename}:{fr.lineno}",
)


async def _periodic_tracemalloc(interval: int) -> None:
while True:
await asyncio.sleep(interval)
_dump_tracemalloc()


def _maybe_start_tracemalloc() -> asyncio.Task | None:
"""Enable memory debug mode (RSS + tracemalloc heap dumps) when requested.

Gated by S3PROXY_MEMORY_DEBUG (alias: S3PROXY_TRACEMALLOC). No-op with zero
overhead when unset. Used for one-pod, time-boxed profiling: dumps every
S3PROXY_MEMORY_DEBUG_INTERVAL secs and on SIGUSR1.
"""
if not (os.environ.get("S3PROXY_MEMORY_DEBUG") or os.environ.get("S3PROXY_TRACEMALLOC")):
return None
frames = int(os.environ.get("S3PROXY_MEMORY_DEBUG_FRAMES", "4"))
interval = int(os.environ.get("S3PROXY_MEMORY_DEBUG_INTERVAL", "15"))
tracemalloc.start(frames)
logger.warning("MEMORY_DEBUG_ENABLED", frames=frames, interval_sec=interval, rss_mb=_rss_mb())
with contextlib.suppress(NotImplementedError, RuntimeError):
asyncio.get_running_loop().add_signal_handler(signal.SIGUSR1, _dump_tracemalloc)
return asyncio.create_task(_periodic_tracemalloc(interval))


def create_lifespan(settings: Settings, credentials_store: dict[str, str]) -> AsyncIterator[None]:
"""Create lifespan context manager for FastAPI app.

Expand Down Expand Up @@ -114,8 +191,12 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
app.state.stats_store = stats_store
app.state.start_time = time.monotonic()

tracemalloc_task = _maybe_start_tracemalloc()

yield

if tracemalloc_task is not None:
tracemalloc_task.cancel()
await stats_store.aclose() # flush buffered samples before Redis closes
await close_redis()
await close_http_client()
Expand Down
45 changes: 45 additions & 0 deletions tests/unit/test_tracemalloc_profiling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Self-check for the gated tracemalloc heap-dump diagnostic.

Off by default (no env) => zero overhead, no tracing started. When enabled it
must take a snapshot and not raise. Used for one-pod, time-boxed prod profiling
to identify the live allocations driving the OOM.
"""

import tracemalloc

from s3proxy import app


def test_disabled_by_default(monkeypatch):
monkeypatch.delenv("S3PROXY_MEMORY_DEBUG", raising=False)
monkeypatch.delenv("S3PROXY_TRACEMALLOC", raising=False)
assert app._maybe_start_tracemalloc() is None


def test_dump_is_noop_when_not_tracing():
# Should not raise even if tracemalloc isn't running.
if tracemalloc.is_tracing():
tracemalloc.stop()
app._dump_tracemalloc() # no exception = pass


def test_dump_reports_allocations_when_tracing():
tracemalloc.start(2)
try:
blob = bytearray(4 * 1024 * 1024) # 4MB, should show up
# Capture warning logs to confirm it emits a snapshot + top lines.
events = []
import structlog

app.logger = structlog.wrap_logger(
app.logger, processors=[lambda _l, _m, ev: events.append(ev) or ev]
)
app._dump_tracemalloc(limit=5)
assert blob is not None
snap = next(e for e in events if e.get("event") == "MEMORY_DEBUG")
# The debug line must carry the tracked total; rss/untracked are present
# on Linux (None elsewhere) -- the gap between them is the whole point.
assert "tracked_mb" in snap and "untracked_mb" in snap
assert any(e.get("event") == "MEMORY_DEBUG_TOP" for e in events)
finally:
tracemalloc.stop()