Skip to content
25 changes: 25 additions & 0 deletions config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
timeout: 300

objective:
- L1-regularized Quantile Regression[fit_intercept=True,reg=0.05,quantile=0.2]

dataset: # dataset to run benchmark on
# - simulated
- simulated[n_samples=100, n_features=1000, rho=0.6]
- simulated[n_samples=1000, n_features=100, rho=0.6]
- simulated[n_samples=1000, n_features=1000, rho=0.6]
# - MEG
- finance # sparse dataset

solver: # list of example solvers to do benchmark
# - scipy-linprog # does not take sparse datasets
- sklearn-QuantileRegressor # works in all combinations (delete later)
- skglm-SmoothQuantileRegressor # works in all combinations (delete later)
- asgl-QuantileRegressor # does not support sparse datasets
- statsmodels-qr # does not support sparse datasets, would likely work with a lot of RAM



# note: use more repetitions to have quantiles on the plot
n-repetitions: 1
max-runs: 20
26 changes: 26 additions & 0 deletions datasets/finance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from benchopt import BaseDataset
from benchopt import safe_import_context

with safe_import_context() as import_ctx:
from libsvmdata import fetch_libsvm


class Dataset(BaseDataset):
name = "finance"
install_cmd = "pip"
requirements = ["libsvmdata"]

@staticmethod
def _load_finance_data():
# Load the finance dataset using libsvmdata
# == E2006-log1p
X, y = fetch_libsvm("finance")
return X, y

def get_data(self):
try:
X, y = self.X, self.y
except AttributeError:
X, y = self._load_finance_data()
self.X, self.y = X, y
return dict(X=X, y=y)
16 changes: 9 additions & 7 deletions objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,17 @@ def _get_lambda_max(self, X, y):
# optimality condition for w = 0.
# for all g in subdiff pinball(y), g must be in subdiff ||.||_1(0)
# hint: consider max(x, 0) = (x + |x|) / 2 to compute subdiff pinball
subdiff_zero = np.sign(y)/2 + (self.quantile - 1/2)
lmbd_max = norm(X.T @ subdiff_zero, ord=np.inf) / len(y)

# intercept is equivalent to adding a column of ones in X
if self.fit_intercept:
lmbd_max = max(
lmbd_max,
np.mean(subdiff_zero)
)
# Optimal intercept when beta=0 is the tau-quantile of y
beta_0_star = np.quantile(y, self.quantile)
residuals = y - beta_0_star
# Compute subdifferential at these residuals
subdiff_zero = np.sign(residuals)/2 + (self.quantile - 1/2)
else:
# No intercept case
subdiff_zero = np.sign(y)/2 + (self.quantile - 1/2)
lmbd_max = norm(X.T @ subdiff_zero, ord=np.inf) / len(y)

return lmbd_max

Expand Down
60 changes: 60 additions & 0 deletions solvers/asgl_qr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from benchopt import BaseSolver, safe_import_context
import numpy as np
import warnings

with safe_import_context() as import_ctx:
from asgl import Regressor


class Solver(BaseSolver):
"""Quantile regression solver using asgl."""
name = 'asgl-QuantileRegressor'

install_cmd = 'pip'
requirements = ["numpy", "scikit-learn", "asgl"]
parameters = {
"penalization": ["lasso"],
}
sampling_strategy = 'tolerance'

def set_objective(self, X, y, lmbd, quantile, fit_intercept):
self.X, self.y = X, y
self.lmbd = lmbd
self.quantile = quantile
self.fit_intercept = fit_intercept
self.coef_ = None
self.intercept_ = 0.0

def run(self, tol):
# asgl uses lambda1 for L1 penalty
reg = Regressor(
model='qr',
penalization=self.penalization,
lambda1=self.lmbd,
quantile=self.quantile,
fit_intercept=self.fit_intercept,
tol=max(tol, 1e-4),
)
warnings.filterwarnings('ignore')
reg.fit(self.X, self.y)
self.coef_ = reg.coef_
if self.fit_intercept:
self.intercept_ = reg.intercept_

def get_result(self):
if getattr(self, "coef_", None) is None:
self.coef_ = np.zeros(self.X.shape[1])
self.intercept_ = (
np.quantile(self.y, self.quantile) if self.fit_intercept else 0.0
)
if self.fit_intercept:
params = np.concatenate((self.coef_, [self.intercept_]))
else:
params = self.coef_
return dict(params=params)

def skip(self, X, y, lmbd, quantile, fit_intercept): # noqa: D401, E501
"""Skip only when design matrix is scipy sparse."""
if hasattr(X, "tocoo"):
return True, "asgl does not accept sparse design matrices."
return False, None
5 changes: 5 additions & 0 deletions solvers/scipy-linprog.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ class Solver(BaseSolver):
}
stop_strategy = 'tolerance'

def skip(self, X, y, lmbd, quantile, fit_intercept):
if hasattr(X, "tocoo"):
return True, "scipy linprog does not support sparse input."
return False, None

def set_objective(self, X, y, lmbd, quantile, fit_intercept):
self.X, self.y, self.lmbd, self.quantile = X, y, lmbd, quantile
self.fit_intercept = fit_intercept
Expand Down
65 changes: 65 additions & 0 deletions solvers/skglm_sqr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# solvers/solver_skglm_quantile_huber.py

from benchopt import BaseSolver, safe_import_context
import numpy as np
import warnings

with safe_import_context() as import_ctx:
from skglm.experimental.quantile_huber import SmoothQuantileRegressor


class Solver(BaseSolver):
"""Smooth quantile regression solver using skglm."""
name = 'skglm-SmoothQuantileRegressor'

install_cmd = 'conda'
requirements = ["numpy", "scikit-learn", "numba", "skglm"]
parameters = {}
sampling_strategy = 'tolerance'

def set_objective(self, X, y, lmbd, quantile, fit_intercept):
self.X, self.y = X, y
self.lmbd = lmbd
self.quantile = quantile
self.fit_intercept = fit_intercept
# ensure attributes exist even if run() is skipped (cache hit)
self.coef_ = None
self.intercept_ = 0.0

def warm_up(self):
# Cache pre-compilation and other one-time setups that should
# not be included in the benchmark timing.
self.run(1) # For sampling_strategy == 'tolerance'

def run(self, tol):
est = SmoothQuantileRegressor(
quantile=self.quantile,
alpha=self.lmbd,
delta_init=1,
delta_final=0.0001,
n_deltas=5,
max_iter=500,
tol=max(tol, 1e-4),
verbose=False,
fit_intercept=self.fit_intercept,
)
warnings.filterwarnings('ignore')
est.fit(self.X, self.y)

self.coef_ = est.coef_
if self.fit_intercept:
self.intercept_ = est.intercept_

def get_result(self):
# fallback when run() never executed in this Python session
if getattr(self, "coef_", None) is None:
self.coef_ = np.zeros(self.X.shape[1])
self.intercept_ = (
np.quantile(self.y, self.quantile) if self.fit_intercept else 0.0
)

if self.fit_intercept:
params = np.concatenate((self.coef_, [self.intercept_]))
else:
params = self.coef_
return dict(params=params)
61 changes: 61 additions & 0 deletions solvers/sklearn_quantile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from benchopt import BaseSolver, safe_import_context
import numpy as np

with safe_import_context() as import_ctx:
from sklearn.linear_model import QuantileRegressor


class Solver(BaseSolver):
"""Quantile regression solver using scikit-learn's QuantileRegressor."""
name = 'sklearn-QuantileRegressor'

install_cmd = 'conda'
requirements = ['numpy', 'scikit-learn']
parameters = {
# To benchmark different solvers, uncomment the following line:
# 'solver': ['highs-ds', 'highs-ipm', 'highs',
# 'interior-point', 'revised simplex'],
}
sampling_strategy = 'tolerance'

def set_objective(self, X, y, lmbd, quantile, fit_intercept):
self.X, self.y = X, y
self.lmbd = lmbd
self.quantile = quantile
self.fit_intercept = fit_intercept
# ensure attributes exist even if run() is skipped (cache hit)
self.coef_ = None
self.intercept_ = 0.0

def run(self, tol):
tol = max(tol, 1e-4)
est = QuantileRegressor(
quantile=self.quantile,
alpha=self.lmbd,
fit_intercept=self.fit_intercept,
solver="highs",
solver_options={
"time_limit": 300, # 5 minutes
"dual_feasibility_tolerance": tol
}
)
est.fit(self.X, self.y)
if est.coef_ is None: # HiGHS aborted
raise RuntimeError(
"HiGHS time-limit reached before a feasible solution was found.")
self.coef_ = est.coef_
self.intercept_ = est.intercept_ if self.fit_intercept else 0.0

def get_result(self):
# fallback when run() never executed in this Python session
if getattr(self, "coef_", None) is None:
self.coef_ = np.zeros(self.X.shape[1])
self.intercept_ = (
np.quantile(self.y, self.quantile) if self.fit_intercept else 0.0
)

if self.fit_intercept:
params = np.concatenate((self.coef_, [self.intercept_]))
else:
params = self.coef_
return dict(params=params)
44 changes: 44 additions & 0 deletions solvers/statsmodels_qr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from benchopt import BaseSolver, safe_import_context

with safe_import_context() as import_ctx:
import numpy as np
import statsmodels.api as sm


class Solver(BaseSolver):
name = "statsmodels-qr"
install_cmd = 'conda'
requirements = ['statsmodels', 'pandas']
sampling_strategy = 'iteration'

def skip(self, X, y, lmbd, quantile, fit_intercept): # noqa: D401, E501
"""Skip only when design matrix is scipy sparse."""
if hasattr(X, "tocoo"):
return True, "asgl does not accept sparse design matrices."
return False, None

def set_objective(self, X, y, lmbd, quantile, fit_intercept):
self.X, self.y = X, y
self.quantile = quantile
self.fit_intercept = fit_intercept

if self.fit_intercept:
self.X_fit = sm.add_constant(self.X)
else:
self.X_fit = self.X

self.model = sm.QuantReg(self.y, self.X_fit)
self.beta = np.zeros(self.X_fit.shape[1])

def run(self, n_iter):
if n_iter == 0:
p = self.X_fit.shape[1]
self.beta = np.zeros(p)
return

# We fit the model with at most n_iter iterations.
res = self.model.fit(q=self.quantile, max_iter=n_iter)
self.beta = res.params

def get_result(self):
return dict(params=self.beta)