Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[build-system]
requires = ["setuptools>=61"]
# ``cython`` is required to build optional extension modules that speed up
# certain numerical routines such as the Rosenbrock Hessian.
requires = ["setuptools>=61", "cython>=0.29.36", "numpy>=1.24"]
build-backend = "setuptools.build_meta"

[project]
Expand Down Expand Up @@ -49,6 +51,12 @@ include = ["psd*", "psd_optimizer*"]
psd = ["py.typed"]
psd_optimizer = ["py.typed"]

# Extension modules
[[tool.setuptools.ext-modules]]
name = "psd._rosenbrock"
sources = ["src/psd/_rosenbrock.pyx"]
optional = true

[tool.mypy]
python_version = "3.11"
strict = false
Expand Down
39 changes: 39 additions & 0 deletions src/psd/_rosenbrock.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# cython: language_level=3
"""Cython implementation of the Rosenbrock Hessian.

The pure NumPy version in :mod:`psd.functions` is adequate for small
problems but becomes a bottleneck for large ``d``. This Cython routine
uses explicit loops and typed memoryviews to eliminate Python overhead
and serves as a reference for how performance‑critical sections could be
ported to a compiled extension.
"""

import numpy as np
cimport numpy as np


def rosenbrock_hess_fast(np.ndarray[np.float64_t, ndim=1] x):
"""Compute the Rosenbrock Hessian using Cython loops.

Parameters
----------
x:
Input vector of length ``d``.

Returns
-------
np.ndarray
Hessian matrix of shape ``(d, d)``.
"""
cdef Py_ssize_t d = x.shape[0]
cdef np.ndarray[np.float64_t, ndim=2] hess = np.zeros((d, d), dtype=np.float64)
cdef Py_ssize_t i
if d > 1:
for i in range(d - 1):
hess[i, i] = 1200.0 * x[i] * x[i] - 400.0 * x[i + 1] + 2.0
hess[i + 1, i + 1] += 200.0
hess[i, i + 1] = -400.0 * x[i]
hess[i + 1, i] = -400.0 * x[i]
else:
hess[0, 0] = 200.0
return hess
30 changes: 26 additions & 4 deletions src/psd/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,21 +122,43 @@ def psd(
rng = random_state
x = x0.copy()
d = x.size
# ------------------------------------------------------------------
# Derived parameters
# ------------------------------------------------------------------
# ``gamma`` sets the curvature threshold that distinguishes a true
# saddle point from numerical noise. It arises from the
# Hessian‑Lipschitz constant ``rho`` and the gradient tolerance
# ``epsilon`` via ``gamma = sqrt(rho * epsilon)``.
gamma = np.sqrt(rho * epsilon)
# Perturbation radius r = gamma/(8*rho)

# The perturbation radius ``r`` is *curvature‑calibrated* – the scale
# ensures we only move far enough to escape regions of significant
# negative curvature while remaining inside the neighbourhood where the
# Taylor expansion controlled by ``rho`` is valid.
# r = gamma / (8 * rho) = sqrt(epsilon / rho) / 8
if rho > 0:
r = (1.0 / 8.0) * np.sqrt(epsilon / rho)
else:
# When ``rho`` vanishes the Hessian is constant and no perturbation
# is required.
r = 0.0
# Maximum number of escape episodes

# ``M`` bounds the number of allowed escape episodes. The constant is
# obtained by applying a union bound over episodes so that the overall
# failure probability stays below ``delta``.
M = int(1 + np.ceil(128.0 * ell * delta_f / (epsilon**2)))
# Episode length

# Episode length ``_T`` is the number of gradient steps performed
# during an escape attempt. The logarithmic term again results from a
# union bound over dimensions and episodes.
if rho > 0 and epsilon > 0:
_T = int(np.ceil(8.0 * ell / gamma * np.log((16.0 * d * M) / max(delta, 1e-12))))
else:
_T = 0
# Step size

# ``eta`` is the gradient descent step size. A feature flag allows a
# slightly more aggressive choice that is still theoretically
# justified.
eta = 1.0 / (ell if FLAGS.new_escape_condition else 2.0 * ell)
grad_evals = 0
episodes_used = 0
Expand Down
23 changes: 22 additions & 1 deletion src/psd/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@
import numpy as np
from numpy.typing import NDArray

# Optional Cython acceleration for the Rosenbrock Hessian. Importing is
# wrapped in a ``try`` so that the pure Python fall-back is used when the
# extension has not been built.
try: # pragma: no cover - depends on build environment
from ._rosenbrock import rosenbrock_hess_fast
except Exception: # pragma: no cover - extension may be missing
rosenbrock_hess_fast = None # type: ignore

Array: TypeAlias = NDArray[np.float64]


Expand Down Expand Up @@ -227,7 +235,16 @@ def rosenbrock_grad(x: Array) -> Array:


def rosenbrock_hess(x: Array) -> Array:
"""Hessian of the Rosenbrock function.
r"""Hessian of the Rosenbrock function.

The implementation dispatches to a Cython routine when available for
efficiency. The mathematical form of the Hessian is

.. math::

H_{i,i} = 1200 x_i^2 - 400 x_{i+1} + 2, \quad
H_{i,i+1} = H_{i+1,i} = -400 x_i, \quad
H_{d,d} = 200.

Parameters
----------
Expand All @@ -240,6 +257,10 @@ def rosenbrock_hess(x: Array) -> Array:
Hessian matrix.
"""
x = np.asarray(x)
if rosenbrock_hess_fast is not None:
# Use the compiled version for large ``d`` to avoid Python overhead.
return rosenbrock_hess_fast(x)

d = len(x)
hess = np.zeros((d, d))
if d > 1:
Expand Down
15 changes: 15 additions & 0 deletions tests/test_algorithms_property.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,18 @@ def hess(x: np.ndarray) -> np.ndarray:
cfg = PSDConfig(epsilon=1e-6, ell=1.0, rho=1.0, max_iter=10)
with pytest.warns(DeprecationWarning):
algorithms.deprecated_psd(x0, grad, hess, 1e-6, 1.0, 1.0, config=cfg)


def test_psd_handles_zero_rho() -> None:
"""Ensure the algorithm remains stable when the Hessian is constant."""

def grad(x: np.ndarray) -> np.ndarray:
return x

def hess(x: np.ndarray) -> np.ndarray:
return np.eye(len(x))

x0 = np.array([1.0, -1.0])
cfg = PSDConfig(epsilon=1e-3, ell=1.0, rho=0.0, max_iter=10)
x, _ = algorithms.psd(x0, grad, hess, 1e-3, 1.0, 0.0, config=cfg)
assert np.all(np.isfinite(x))
45 changes: 45 additions & 0 deletions tests/test_functions_numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,51 @@ def test_rosenbrock_grad_hess_match(x: np.ndarray) -> None:
np.testing.assert_allclose(h(x), num_hess, rtol=1e-4, atol=1e-4)


def test_rosenbrock_hess_edge_case() -> None:
"""The Rosenbrock Hessian should handle one‑dimensional inputs."""
x = np.array([1.23])
h = functions.rosenbrock_hess(x)
assert h.shape == (1, 1)
assert np.isclose(h[0, 0], 200.0)


def test_rosenbrock_hess_large_dim_is_finite() -> None:
"""Evaluate numerical stability on a large random vector."""
rng = np.random.default_rng(0)
x = rng.standard_normal(200)
h = functions.rosenbrock_hess(x)
assert np.all(np.isfinite(h))


def test_rosenbrock_hess_cython_matches_python() -> None:
"""Cython and NumPy implementations should agree."""
try:
from psd._rosenbrock import rosenbrock_hess_fast
except Exception: # pragma: no cover - extension not built
pytest.skip("Cython extension not available")

rng = np.random.default_rng(1)
x = rng.standard_normal(10)
h_fast = rosenbrock_hess_fast(x)

# Re‑implement the Python version locally for comparison. This keeps
# the test independent of whether ``functions.rosenbrock_hess``
# dispatches to the Cython version.
d = len(x)
h_py = np.zeros((d, d))
if d > 1:
idx = np.arange(d - 1)
diag = 1200.0 * x[idx] ** 2 - 400.0 * x[idx + 1] + 2.0
h_py[idx, idx] = diag
h_py[idx + 1, idx + 1] += 200.0
off = -400.0 * x[idx]
h_py[idx, idx + 1] = off
h_py[idx + 1, idx] = off
else:
h_py[0, 0] = 200.0
np.testing.assert_allclose(h_fast, h_py, rtol=1e-12, atol=1e-12)


@settings(max_examples=20, deadline=None, derandomize=True)
@given(_vector_1d, st.integers(min_value=0, max_value=2**32 - 1))
@pytest.mark.fast
Expand Down