feat: gate optimization by dof multiplier (#116)

LouisLetcher · web-flow · commit c3c57cf84649 · 2026-02-24T22:32:39.000+01:00
## Summary Add a data‑sufficiency gate for parameter optimization that scales with degrees of freedom, to avoid overfitting on small samples. ## Changes Added param_dof_multiplier config (default 100) and updated example config. Optimization is skipped when bars < max(2000, multiplier * n_params) with a log message. Breaking changes: None (defaults applied for older configs). ## How to Test `docker-compose run --rm app bash -lc "poetry run pytest -q tests/test_backtest_runner.py -k min_bars_and_dof_guard_behavior"` ### Optional Additional Tests (not implemented) - **DoF scaling with `n_params > 1`** Verify correct Degrees-of-Freedom scaling when optimizing across multiple grid dimensions. - **No search space (fixed parameters only)** Ensure the guard condition does **not** trigger when no parameter search space is defined. - **Boundary case: `len(df) == param_min_bars`** Confirm that no skip occurs when the dataset length is exactly equal to `param_min_bars`. - **`param_search="optuna"` behavior** Validate that skip / no-skip logic behaves identically to grid search. - **`stats["optimization"]` assignment** Ensure `stats["optimization"]` is set **only** in the skip case. ## Checklist (KISS) - [x] Pre-commit passes locally (`pre-commit run --all-files`) - [x] Tests added/updated where it makes sense (80% cov gate) - [x] Docs/README updated if needed - [x] No secrets committed; `.env` values are excluded - [x] Backward compatibility considered (configs, CLI flags) ## Related Issues/Links - Closes # - References #  --- > [!NOTE] > **Medium Risk** > Changes the optimization execution path and result metadata, which can alter which parameter sets are evaluated and persisted; mitigated by defaults and targeted tests covering skip/boundary behavior. > > **Overview** > Adds a *data-sufficiency guardrail* that **skips parameter optimization** when there aren’t enough bars for the size of the search space. > > `BacktestRunner.run_all` now computes a minimum required history as `max(param_min_bars, param_dof_multiplier * n_params)` and, when unmet, logs a structured `optimization_skipped` event, runs only the fixed/default parameters (no grid/Optuna loop), and annotates result `stats` with an `optimization` block describing the skip. > > Configuration is extended with `param_dof_multiplier` and `param_min_bars` (defaults applied for older configs), README documents the new reliability behavior, and tests add coverage for skip/no-skip and boundary cases by asserting reduced eval counts and the presence/absence of `stats["optimization"]`. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit aabcab7. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup>
diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@ This repository provides a Docker-based, cache-aware backtesting system to syste
 - Dockerized runtime for reproducibility
 - Results cache (SQLite) to resume and skip already-computed grids
 - Structured logging and timing metrics per data fetch and grid search
+- Reliability guardrails: optimization auto-skips when bar history is insufficient (min-bars/DoF thresholds)
 
 ## Requirements
 
diff --git a/src/backtest/runner.py b/src/backtest/runner.py
@@ -25,7 +25,7 @@
 from ..data.yfinance_source import YFinanceSource
 from ..strategies.base import BaseStrategy
 from ..strategies.registry import discover_external_strategies
-from ..utils.telemetry import get_logger, time_block
+from ..utils.telemetry import get_logger, log_json, time_block
 from .metrics import (
     omega_ratio,
     pain_index,
@@ -514,6 +514,29 @@ def run_all(self, only_cached: bool = False) -> list[BestResult]:
                 else:
                     search_space[name] = options
 
+            n_params = len(search_space)
+            dof_multiplier = self.cfg.param_dof_multiplier
+            min_bars_floor = self.cfg.param_min_bars
+            min_bars_for_optimization = max(min_bars_floor, dof_multiplier * n_params)
+            optimization_skip_reason = None
+            if search_space and len(df) < min_bars_for_optimization:
+                optimization_skip_reason = "insufficient_bars_for_optimization"
+                log_json(
+                    self.logger,
+                    "optimization_skipped",
+                    reason=optimization_skip_reason,
+                    collection=col.name,
+                    symbol=symbol,
+                    timeframe=timeframe,
+                    bars=len(df),
+                    min_bars=min_bars_for_optimization,
+                    n_params=n_params,
+                    dof_multiplier=dof_multiplier,
+                    min_bars_floor=min_bars_floor,
+                    strategy=strat.name,
+                    search_method=search_method,
+                )
+
             best_val = -np.inf
             best_params: dict[str, Any] | None = None
             best_stats: dict[str, Any] | None = None
@@ -623,6 +646,14 @@ def evaluate(
                 if sim_result is None:
                     return float("-inf")
                 returns, equity_curve, stats = sim_result
+                if optimization_skip_reason:
+                    stats = dict(stats)
+                    stats["optimization"] = {
+                        "skipped": True,
+                        "reason": optimization_skip_reason,
+                        "min_bars_required": min_bars_for_optimization,
+                        "bars_available": len(df_local),
+                    }
                 self.metrics["param_evals"] += 1
                 metric_val = self._evaluate_metric(
                     self.cfg.metric, returns, equity_curve, bars_per_year_local
@@ -651,7 +682,7 @@ def evaluate(
 
             space_items = list(search_space.items())
 
-            if search_space:
+            if search_space and not optimization_skip_reason:
                 if search_method == "optuna":
                     try:
                         import optuna
diff --git a/src/config.py b/src/config.py
@@ -50,6 +50,8 @@ class Config:
     engine: str = "pybroker"  # pybroker engine
     param_search: str = "grid"  # grid | optuna
     param_trials: int = 25
+    param_dof_multiplier: int = 100
+    param_min_bars: int = 2000
     max_workers: int = 1
     asset_workers: int = 1
     param_workers: int = 1
@@ -127,6 +129,8 @@ def load_config(path: str | Path) -> Config:
         engine=str(raw.get("engine", "pybroker")).lower(),
         param_search=str(raw.get("param_search", raw.get("param_optimizer", "grid"))).lower(),
         param_trials=int(raw.get("param_trials", raw.get("opt_trials", 25))),
+        param_dof_multiplier=int(raw.get("param_dof_multiplier", 100)),
+        param_min_bars=int(raw.get("param_min_bars", 2000)),
         max_workers=int(raw.get("max_workers", raw.get("asset_workers", 1))),
         asset_workers=int(raw.get("asset_workers", raw.get("max_workers", 1))),
         param_workers=int(raw.get("param_workers", 1)),
diff --git a/tests/test_backtest_runner.py b/tests/test_backtest_runner.py
@@ -597,3 +597,100 @@ def fetch(self, symbol, timeframe, only_cached=False):
     results = runner.run_all()
     assert results == []
     assert not runner.results_cache.saved
+
+
+def _make_ohlcv(periods: int) -> pd.DataFrame:
+    dates = pd.date_range("2024-01-01", periods=periods, freq="D")
+    return pd.DataFrame(
+        {
+            "Open": [10] * len(dates),
+            "High": [11] * len(dates),
+            "Low": [9] * len(dates),
+            "Close": [10.5] * len(dates),
+            "Volume": [100] * len(dates),
+        },
+        index=dates,
+    )
+
+
+def _patch_source_with_bars(monkeypatch, bars: int) -> None:
+    class _Source:
+        def fetch(self, symbol, timeframe, only_cached=False):
+            return _make_ohlcv(bars)
+
+    monkeypatch.setattr(BacktestRunner, "_make_source", lambda self, col: _Source())
+
+
+def _patch_pybroker_simulation(monkeypatch) -> dict[str, int]:
+    eval_calls = {"count": 0}
+
+    def _fake_sim(self, *args, **kwargs):
+        eval_calls["count"] += 1
+        returns = pd.Series(
+            [0.01, -0.005, 0.02],
+            index=pd.date_range("2024-01-01", periods=3, freq="D"),
+        )
+        equity = (1 + returns.fillna(0.0)).cumprod()
+        stats = {
+            "sharpe": 1.0,
+            "sortino": 0.8,
+            "omega": 1.2,
+            "tail_ratio": 1.1,
+            "profit": 0.1,
+            "pain_index": 0.02,
+            "trades": 2,
+            "max_drawdown": -0.05,
+            "cagr": 0.12,
+            "calmar": -2.4,
+            "equity_curve": [],
+            "drawdown_curve": [],
+            "trades_log": [],
+        }
+        return returns, equity, stats
+
+    monkeypatch.setattr(BacktestRunner, "_run_pybroker_simulation", _fake_sim)
+    return eval_calls
+
+
+@pytest.mark.parametrize(
+    ("param_dof_multiplier", "param_min_bars", "bars", "expected_eval_calls", "expect_skip"),
+    [
+        (100, 2000, 50, 1, True),  # skip via min-bars floor
+        (60, 1, 50, 1, True),  # skip via DoF threshold
+        (1, 1, 50, 2, False),  # no guard, full grid evals
+        (50, 1, 50, 2, False),  # boundary: len(df) == required => no skip
+    ],
+    ids=["min_bars_floor_skip", "dof_skip", "no_guard", "dof_boundary_no_skip"],
+)
+def test_run_all_min_bars_and_dof_guard_behavior(
+    tmp_path,
+    monkeypatch,
+    param_dof_multiplier,
+    param_min_bars,
+    bars,
+    expected_eval_calls,
+    expect_skip,
+):
+    runner = _make_runner(tmp_path, monkeypatch)
+    runner.cfg.param_dof_multiplier = param_dof_multiplier
+    runner.cfg.param_min_bars = param_min_bars
+    runner.cfg.param_search = "grid"
+
+    _patch_source_with_bars(monkeypatch, bars)
+    eval_calls = _patch_pybroker_simulation(monkeypatch)
+
+    results = runner.run_all()
+    assert results
+    assert eval_calls["count"] == expected_eval_calls
+
+    optimization = results[0].stats.get("optimization")
+    if expect_skip:
+        assert optimization is not None
+        assert optimization["skipped"] is True
+        assert optimization["reason"] == "insufficient_bars_for_optimization"
+        # search_space has one dimension (`window`) in _make_runner, so n_params=1.
+        expected_min_bars = max(runner.cfg.param_min_bars, runner.cfg.param_dof_multiplier * 1)
+        assert optimization["min_bars_required"] == expected_min_bars
+        assert optimization["bars_available"] == bars
+    else:
+        assert optimization is None