benchopt · floriankozikowski · Jun 20, 2025 · Jun 23, 2025 · Jul 8, 2025 · Jul 15, 2025
diff --git a/config.yml b/config.yml
@@ -0,0 +1,25 @@
+timeout: 300
+
+objective:
+   - L1-regularized Quantile Regression[fit_intercept=True,reg=0.05,quantile=0.2]
+
+dataset: # dataset to run benchmark on
+  # - simulated
+  - simulated[n_samples=100, n_features=1000, rho=0.6]
+  - simulated[n_samples=1000, n_features=100, rho=0.6]
+  - simulated[n_samples=1000, n_features=1000, rho=0.6]
+  # - MEG
+  - finance # sparse dataset
+
+solver: # list of example solvers to do benchmark
+  # - scipy-linprog # does not take sparse datasets
+  - sklearn-QuantileRegressor # works in all combinations (delete later)
+  - skglm-SmoothQuantileRegressor # works in all combinations (delete later)
+  - asgl-QuantileRegressor # does not support sparse datasets
+  - statsmodels-qr # does not support sparse datasets, would likely work with a lot of RAM
+
+
+
+# note: use more repetitions to have quantiles on the plot
+n-repetitions: 1
+max-runs: 20
diff --git a/datasets/finance.py b/datasets/finance.py
@@ -0,0 +1,26 @@
+from benchopt import BaseDataset
+from benchopt import safe_import_context
+
+with safe_import_context() as import_ctx:
+    from libsvmdata import fetch_libsvm
+
+
+class Dataset(BaseDataset):
+    name = "finance"
+    install_cmd = "pip"
+    requirements = ["libsvmdata"]
+
+    @staticmethod
+    def _load_finance_data():
+        # Load the finance dataset using libsvmdata
+        # == E2006-log1p
+        X, y = fetch_libsvm("finance")
+        return X, y
+
+    def get_data(self):
+        try:
+            X, y = self.X, self.y
+        except AttributeError:
+            X, y = self._load_finance_data()
+            self.X, self.y = X, y
+        return dict(X=X, y=y)
diff --git a/objective.py b/objective.py
@@ -43,15 +43,17 @@ def _get_lambda_max(self, X, y):
         # optimality condition for w = 0.
         #   for all g in subdiff pinball(y), g must be in subdiff ||.||_1(0)
         # hint: consider max(x, 0) = (x + |x|) / 2 to compute subdiff pinball
-        subdiff_zero = np.sign(y)/2 + (self.quantile - 1/2)
-        lmbd_max = norm(X.T @ subdiff_zero, ord=np.inf) / len(y)
 
-        # intercept is equivalent to adding a column of ones in X
         if self.fit_intercept:
-            lmbd_max = max(
-                lmbd_max,
-                np.mean(subdiff_zero)
-            )
+            # Optimal intercept when beta=0 is the tau-quantile of y
+            beta_0_star = np.quantile(y, self.quantile)
+            residuals = y - beta_0_star
+            # Compute subdifferential at these residuals
+            subdiff_zero = np.sign(residuals)/2 + (self.quantile - 1/2)
+        else:
+            # No intercept case
+            subdiff_zero = np.sign(y)/2 + (self.quantile - 1/2)
+        lmbd_max = norm(X.T @ subdiff_zero, ord=np.inf) / len(y)
 
         return lmbd_max
 

diff --git a/solvers/asgl_qr.py b/solvers/asgl_qr.py
@@ -0,0 +1,60 @@
+from benchopt import BaseSolver, safe_import_context
+import numpy as np
+import warnings
+
+with safe_import_context() as import_ctx:
+    from asgl import Regressor
+
+
+class Solver(BaseSolver):
+    """Quantile regression solver using asgl."""
+    name = 'asgl-QuantileRegressor'
+
+    install_cmd = 'pip'
+    requirements = ["numpy", "scikit-learn", "asgl"]
+    parameters = {
+        "penalization": ["lasso"],
+    }
+    sampling_strategy = 'tolerance'
+
+    def set_objective(self, X, y, lmbd, quantile, fit_intercept):
+        self.X, self.y = X, y
+        self.lmbd = lmbd
+        self.quantile = quantile
+        self.fit_intercept = fit_intercept
+        self.coef_ = None
+        self.intercept_ = 0.0
+
+    def run(self, tol):
+        # asgl uses lambda1 for L1 penalty
+        reg = Regressor(
+            model='qr',
+            penalization=self.penalization,
+            lambda1=self.lmbd,
+            quantile=self.quantile,
+            fit_intercept=self.fit_intercept,
+            tol=max(tol, 1e-4),
+        )
+        warnings.filterwarnings('ignore')
+        reg.fit(self.X, self.y)
+        self.coef_ = reg.coef_
+        if self.fit_intercept:
+            self.intercept_ = reg.intercept_
+
+    def get_result(self):
+        if getattr(self, "coef_", None) is None:
+            self.coef_ = np.zeros(self.X.shape[1])
+            self.intercept_ = (
+                np.quantile(self.y, self.quantile) if self.fit_intercept else 0.0
+            )
+        if self.fit_intercept:
+            params = np.concatenate((self.coef_, [self.intercept_]))
+        else:
+            params = self.coef_
+        return dict(params=params)
+
+    def skip(self, X, y, lmbd, quantile, fit_intercept):  # noqa: D401, E501
+        """Skip only when design matrix is scipy sparse."""
+        if hasattr(X, "tocoo"):
+            return True, "asgl does not accept sparse design matrices."
+        return False, None
diff --git a/solvers/scipy-linprog.py b/solvers/scipy-linprog.py
@@ -95,6 +95,11 @@ class Solver(BaseSolver):
     }
     stop_strategy = 'tolerance'
 
+    def skip(self, X, y, lmbd, quantile, fit_intercept):
+        if hasattr(X, "tocoo"):
+            return True, "scipy linprog does not support sparse input."
+        return False, None
+
     def set_objective(self, X, y, lmbd, quantile, fit_intercept):
         self.X, self.y, self.lmbd, self.quantile = X, y, lmbd, quantile
         self.fit_intercept = fit_intercept

diff --git a/solvers/skglm_sqr.py b/solvers/skglm_sqr.py
@@ -0,0 +1,65 @@
+# solvers/solver_skglm_quantile_huber.py
+
+from benchopt import BaseSolver, safe_import_context
+import numpy as np
+import warnings
+
+with safe_import_context() as import_ctx:
+    from skglm.experimental.quantile_huber import SmoothQuantileRegressor
+
+
+class Solver(BaseSolver):
+    """Smooth quantile regression solver using skglm."""
+    name = 'skglm-SmoothQuantileRegressor'
+
+    install_cmd = 'conda'
+    requirements = ["numpy", "scikit-learn", "numba", "skglm"]
+    parameters = {}
+    sampling_strategy = 'tolerance'
+
+    def set_objective(self, X, y, lmbd, quantile, fit_intercept):
+        self.X, self.y = X, y
+        self.lmbd = lmbd
+        self.quantile = quantile
+        self.fit_intercept = fit_intercept
+        # ensure attributes exist even if run() is skipped (cache hit)
+        self.coef_ = None
+        self.intercept_ = 0.0
+
+    def warm_up(self):
+        # Cache pre-compilation and other one-time setups that should
+        # not be included in the benchmark timing.
+        self.run(1)  # For sampling_strategy == 'tolerance'
+
+    def run(self, tol):
+        est = SmoothQuantileRegressor(
+            quantile=self.quantile,
+            alpha=self.lmbd,
+            delta_init=1,
+            delta_final=0.0001,
+            n_deltas=5,
+            max_iter=500,
+            tol=max(tol, 1e-4),
+            verbose=False,
+            fit_intercept=self.fit_intercept,
+        )
+        warnings.filterwarnings('ignore')
+        est.fit(self.X, self.y)
+
+        self.coef_ = est.coef_
+        if self.fit_intercept:
+            self.intercept_ = est.intercept_
+
+    def get_result(self):
+        # fallback when run() never executed in this Python session
+        if getattr(self, "coef_", None) is None:
+            self.coef_ = np.zeros(self.X.shape[1])
+            self.intercept_ = (
+                np.quantile(self.y, self.quantile) if self.fit_intercept else 0.0
+            )
+
+        if self.fit_intercept:
+            params = np.concatenate((self.coef_, [self.intercept_]))
+        else:
+            params = self.coef_
+        return dict(params=params)
diff --git a/solvers/sklearn_quantile.py b/solvers/sklearn_quantile.py
@@ -0,0 +1,61 @@
+from benchopt import BaseSolver, safe_import_context
+import numpy as np
+
+with safe_import_context() as import_ctx:
+    from sklearn.linear_model import QuantileRegressor
+
+
+class Solver(BaseSolver):
+    """Quantile regression solver using scikit-learn's QuantileRegressor."""
+    name = 'sklearn-QuantileRegressor'
+
+    install_cmd = 'conda'
+    requirements = ['numpy', 'scikit-learn']
+    parameters = {
+        # To benchmark different solvers, uncomment the following line:
+        # 'solver': ['highs-ds', 'highs-ipm', 'highs',
+        #            'interior-point', 'revised simplex'],
+    }
+    sampling_strategy = 'tolerance'
+
+    def set_objective(self, X, y, lmbd, quantile, fit_intercept):
+        self.X, self.y = X, y
+        self.lmbd = lmbd
+        self.quantile = quantile
+        self.fit_intercept = fit_intercept
+        # ensure attributes exist even if run() is skipped (cache hit)
+        self.coef_ = None
+        self.intercept_ = 0.0
+
+    def run(self, tol):
+        tol = max(tol, 1e-4)
+        est = QuantileRegressor(
+            quantile=self.quantile,
+            alpha=self.lmbd,
+            fit_intercept=self.fit_intercept,
+            solver="highs",
+            solver_options={
+                "time_limit": 300,  # 5 minutes
+                "dual_feasibility_tolerance": tol
+            }
+        )
+        est.fit(self.X, self.y)
+        if est.coef_ is None:        # HiGHS aborted
+            raise RuntimeError(
+                "HiGHS time-limit reached before a feasible solution was found.")
+        self.coef_ = est.coef_
+        self.intercept_ = est.intercept_ if self.fit_intercept else 0.0
+
+    def get_result(self):
+        # fallback when run() never executed in this Python session
+        if getattr(self, "coef_", None) is None:
+            self.coef_ = np.zeros(self.X.shape[1])
+            self.intercept_ = (
+                np.quantile(self.y, self.quantile) if self.fit_intercept else 0.0
+            )
+
+        if self.fit_intercept:
+            params = np.concatenate((self.coef_, [self.intercept_]))
+        else:
+            params = self.coef_
+        return dict(params=params)
diff --git a/solvers/statsmodels_qr.py b/solvers/statsmodels_qr.py
@@ -0,0 +1,44 @@
+from benchopt import BaseSolver, safe_import_context
+
+with safe_import_context() as import_ctx:
+    import numpy as np
+    import statsmodels.api as sm
+
+
+class Solver(BaseSolver):
+    name = "statsmodels-qr"
+    install_cmd = 'conda'
+    requirements = ['statsmodels', 'pandas']
+    sampling_strategy = 'iteration'
+
+    def skip(self, X, y, lmbd, quantile, fit_intercept):  # noqa: D401, E501
+        """Skip only when design matrix is scipy sparse."""
+        if hasattr(X, "tocoo"):
+            return True, "asgl does not accept sparse design matrices."
+        return False, None
+
+    def set_objective(self, X, y, lmbd, quantile, fit_intercept):
+        self.X, self.y = X, y
+        self.quantile = quantile
+        self.fit_intercept = fit_intercept
+
+        if self.fit_intercept:
+            self.X_fit = sm.add_constant(self.X)
+        else:
+            self.X_fit = self.X
+
+        self.model = sm.QuantReg(self.y, self.X_fit)
+        self.beta = np.zeros(self.X_fit.shape[1])
+
+    def run(self, n_iter):
+        if n_iter == 0:
+            p = self.X_fit.shape[1]
+            self.beta = np.zeros(p)
+            return
+
+        # We fit the model with at most n_iter iterations.
+        res = self.model.fit(q=self.quantile, max_iter=n_iter)
+        self.beta = res.params
+
+    def get_result(self):
+        return dict(params=self.beta)