Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
64a7df4
fit_intercept for cd solver (using sklearn _preprocess_data)
cassiofragadantas May 12, 2022
de13e7d
fit_intercept support for python_pgd solver
cassiofragadantas May 12, 2022
a003c8a
cosmit
cassiofragadantas May 12, 2022
169faa3
Explicitly excluding sparse case on fit intercept
cassiofragadantas May 13, 2022
4a6feda
fit_intercept for blitz solver (supports sparse data!)
cassiofragadantas May 13, 2022
eb98950
fit_intercept for skglm solver (only dense data)
cassiofragadantas May 13, 2022
eeb0143
fit_intercept for lightning solver (only for dense data)
cassiofragadantas May 17, 2022
d284962
fit_intercept for noncvx_pro solver (only for dense data for now)
cassiofragadantas May 17, 2022
ce00f0a
Merge branch 'main' into fit_intercept
cassiofragadantas May 17, 2022
2b5bf12
Fix linting
cassiofragadantas May 17, 2022
07f9cb1
Merge branch 'benchopt:main' into fit_intercept
cassiofragadantas May 18, 2022
3a36595
fit_intercept support on solver L-BFGS-B (only dense data for now)
cassiofragadantas May 18, 2022
a8cb44e
fit_intercept support for solver Julia-PGD (only dense for now)
cassiofragadantas May 18, 2022
2394eb2
fit_intercept support for r-pgd solver (only dense data for now)
cassiofragadantas May 19, 2022
75177bf
Adding sklearn requirements for fit_intercept support
cassiofragadantas May 31, 2022
6dca53c
Merge branch 'main' into fit_intercept
cassiofragadantas May 31, 2022
2b634fc
Removing sklearn dependency wherever possible. Handling fit_intercept…
cassiofragadantas Jul 20, 2022
fbf7f95
Linter
cassiofragadantas Jul 20, 2022
d81d5b7
Linter
cassiofragadantas Jul 20, 2022
fb38f68
removing scikit-learn dependency in python-pgd solver
cassiofragadantas Jul 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions solvers/blitz.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

with safe_import_context() as import_ctx:
import blitzl1
import numpy as np


class Solver(BaseSolver):
Expand All @@ -20,16 +21,11 @@ class Solver(BaseSolver):
'vol. 37, pp. 1171-1179 (2015)'
]

def skip(self, X, y, lmbd, fit_intercept):
if fit_intercept:
return True, f"{self.name} does not handle fit_intercept"

return False, None

def set_objective(self, X, y, lmbd, fit_intercept):
self.X, self.y, self.lmbd = X, y, lmbd
self.fit_intercept = fit_intercept

blitzl1.set_use_intercept(False)
blitzl1.set_use_intercept(self.fit_intercept)
blitzl1.set_tolerance(0)
self.problem = blitzl1.LassoProblem(self.X, self.y)

Expand All @@ -39,7 +35,10 @@ def get_next(previous):
return previous + 1

def run(self, n_iter):
self.coef_ = self.problem.solve(self.lmbd, max_iter=n_iter).x
self.sol_ = self.problem.solve(self.lmbd, max_iter=n_iter)

def get_result(self):
return self.coef_.flatten()
if self.fit_intercept:
return np.r_[self.sol_.x, self.sol_.intercept]
else:
return self.sol_.x.flatten()
22 changes: 18 additions & 4 deletions solvers/cd.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,24 @@ class Solver(BaseSolver):
]

def skip(self, X, y, lmbd, fit_intercept):
# XXX - not implemented but this should be quite easy
if fit_intercept:
return True, f"{self.name} does not handle fit_intercept"
# XXX - intercept not implemented for sparse X but it shouldn't be hard
if fit_intercept and sparse.issparse(X):
return (
True,
f"{self.name} doesn't handle fit_intercept with sparse data",
)

return False, None

def set_objective(self, X, y, lmbd, fit_intercept):
self.y, self.lmbd = y, lmbd
# Handling intercept: center y and X (dense data only)
if fit_intercept and not sparse.issparse(self.X):
self.X_offset = np.average(X, axis=0)
X -= self.X_offset
self.y_offset = np.average(y, axis=0)
y -= self.y_offset

self.y, self.lmbd, self.fit_intercept = y, lmbd, fit_intercept

if sparse.issparse(X):
self.X = X
Expand All @@ -66,6 +76,10 @@ def run(self, n_iter):
L = (self.X ** 2).sum(axis=0)
self.w = self.cd(self.X, self.y, self.lmbd, L, n_iter)

if self.fit_intercept and not sparse.issparse(self.X):
intercept = self.y_offset - self.X_offset @ self.w
self.w = np.r_[self.w, intercept]

@staticmethod
@njit
def cd(X, y, lmbd, L, n_iter):
Expand Down
23 changes: 20 additions & 3 deletions solvers/julia_pgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from benchopt.helpers.julia import assert_julia_installed

with safe_import_context() as import_ctx:
import numpy as np
from scipy.sparse import issparse
assert_julia_installed()


Expand All @@ -27,15 +29,26 @@ class Solver(JuliaSolver):
'algorithm for linear inverse problems", SIAM J. Imaging Sci., '
'vol. 2, no. 1, pp. 183-202 (2009)'
]
support_sparse = False

def skip(self, X, y, lmbd, fit_intercept):
# XXX - fit intercept is not yet implemented in julia.jl
if fit_intercept:
return True, f"{self.name} does not handle fit_intercept"
# XXX - fit intercept is not yet implemented in julia.jl for sparse X
if fit_intercept and issparse(X):
return (
True,
f"{self.name} doesn't handle fit_intercept with sparse data"
)

return False, None

def set_objective(self, X, y, lmbd, fit_intercept):
# Handling intercept: center y and X (dense data only)
if fit_intercept and not issparse(self.X):
self.X_offset = np.average(X, axis=0)
X -= self.X_offset
self.y_offset = np.average(y, axis=0)
y -= self.y_offset

self.X, self.y, self.lmbd = X, y, lmbd
self.fit_intercept = fit_intercept

Expand All @@ -45,5 +58,9 @@ def set_objective(self, X, y, lmbd, fit_intercept):
def run(self, n_iter):
self.beta = self.solve_lasso(self.X, self.y, self.lmbd, n_iter)

if self.fit_intercept and not issparse(self.X):
intercept = self.y_offset - self.X_offset @ self.beta
self.beta = np.r_[self.beta.ravel(), intercept]

def get_result(self):
return self.beta.ravel()
21 changes: 18 additions & 3 deletions solvers/l_bfgs_b.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
from numpy.linalg import norm
from scipy.optimize import fmin_l_bfgs_b
from scipy.sparse import issparse


class Solver(BaseSolver):
Expand All @@ -28,13 +29,23 @@ class Solver(BaseSolver):
]

def skip(self, X, y, lmbd, fit_intercept):
# XXX - not implemented but this should be quite easy
if fit_intercept:
return True, f"{self.name} does not handle fit_intercept"
# XXX - intercept not implemented for sparse X for now
if fit_intercept and issparse(X):
return (
True,
f"{self.name} doesn't handle fit_intercept with sparse data"
)

return False, None

def set_objective(self, X, y, lmbd, fit_intercept):
# Handling intercept: center y and X (dense data only)
if fit_intercept and not issparse(self.X):
self.X_offset = np.average(X, axis=0)
X -= self.X_offset
self.y_offset = np.average(y, axis=0)
y -= self.y_offset

self.X, self.y, self.lmbd = X, y, lmbd
self.fit_intercept = fit_intercept

Expand Down Expand Up @@ -65,5 +76,9 @@ def gradf(w):

self.w = w_hat

if self.fit_intercept and not issparse(self.X):
intercept = self.y_offset - self.X_offset @ self.w
self.w = np.r_[self.w, intercept]

def get_result(self):
return self.w.flatten()
23 changes: 18 additions & 5 deletions solvers/lightning.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@

with safe_import_context() as import_ctx:
import numpy as np
from scipy import sparse
from lightning.regression import CDRegressor


# TODO: lightning always fit an intercept
# it is thus not optimizing the same cost function
class Solver(BaseSolver):
name = 'Lightning'

Expand All @@ -25,12 +24,24 @@ class Solver(BaseSolver):
]

def skip(self, X, y, lmbd, fit_intercept):
if fit_intercept:
return True, f"{self.name} does not handle fit_intercept"
if fit_intercept and sparse.issparse(X):
return (
True,
f"{self.name} doesn't handle fit_intercept with sparse data",
)

return False, None

def set_objective(self, X, y, lmbd, fit_intercept):
# lightning has an attribut intercept_ but it is not handled properly
# (as it is simply set to zero). For this reason, we handle intercept
# manually: center y and X beforehand (for dense data only)
if fit_intercept and not sparse.issparse(self.X):
self.X_offset = np.average(X, axis=0)
X -= self.X_offset
self.y_offset = np.average(y, axis=0)
y -= self.y_offset

self.X, self.y, self.lmbd = X, y, lmbd
self.fit_intercept = fit_intercept

Expand All @@ -45,6 +56,8 @@ def run(self, n_iter):

def get_result(self):
beta = self.clf.coef_.flatten()

if self.fit_intercept:
beta = np.r_[beta, self.clf.intercept_]
intercept = self.y_offset - self.X_offset @ beta
beta = np.r_[beta, intercept]
return beta
26 changes: 24 additions & 2 deletions solvers/noncvx_pro.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,33 @@ class Solver(BaseSolver):
]

def set_objective(self, X, y, lmbd, fit_intercept):
# Handling intercept: center y and X (dense data only)
if fit_intercept and not issparse(self.X):
self.X_offset = np.average(X, axis=0)
X -= self.X_offset
self.y_offset = np.average(y, axis=0)
y -= self.y_offset

self.X, self.y, self.lmbd = X, y, lmbd
self.fit_intercept = fit_intercept

if X.shape[0] >= X.shape[1]:
self.C = X.T @ X
if issparse(self.C):
self.C = self.C.toarray()

def skip(self, X, y, lmbd, fit_intercept):
if fit_intercept:
return True, f"{self.name} does not handle fit_intercept"
# XXX: make this solver work with sparse matrices.
if issparse(X):
return True, f"{self.name} does not support sparse design matrices"
# XXX: even if sparse support is added, the test below should be kept
# unless fit_intercept is properly handled for sparse matrices
# (by manually considering X_offset in calculations)
if fit_intercept and issparse(X):
return (
True,
f"{self.name} doesn't handle fit_intercept with sparse data"
)
return False, None

def run(self, n_iter):
Expand Down Expand Up @@ -80,5 +98,9 @@ def nabla_f(v):
v = lbfgs_res.x
self.w = v * u_opt(v)

if self.fit_intercept and not issparse(self.X):
intercept = self.y_offset - self.X_offset @ self.w
self.w = np.r_[self.w, intercept]

def get_result(self):
return self.w.flatten()
25 changes: 20 additions & 5 deletions solvers/python_pgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,23 @@ class Solver(BaseSolver):
]

def skip(self, X, y, lmbd, fit_intercept):
# XXX - not implemented but not too complicated to implement
if fit_intercept:
return True, f"{self.name} does not handle fit_intercept"
# XXX - intercept not implemented for sparse X but it shouldn't be hard
if fit_intercept and sparse.issparse(X):
return (
True,
f"{self.name} doesn't handle fit_intercept with sparse data",
)

return False, None

def set_objective(self, X, y, lmbd, fit_intercept):
# Handling intercept: center y and X (dense data only)
if fit_intercept and not sparse.issparse(self.X):
self.X_offset = np.average(X, axis=0)
X -= self.X_offset
self.y_offset = np.average(y, axis=0)
y -= self.y_offset

self.X, self.y, self.lmbd = X, y, lmbd
self.fit_intercept = fit_intercept

Expand All @@ -40,8 +50,10 @@ def run(self, callback):
if self.use_acceleration:
z = np.zeros(n_features)

intercept = self.y_offset if self.fit_intercept else []

t_new = 1
while callback(w):
while callback(np.r_[w, intercept]):
if self.use_acceleration:
t_old = t_new
t_new = (1 + np.sqrt(1 + 4 * t_old ** 2)) / 2
Expand All @@ -53,7 +65,10 @@ def run(self, callback):
w -= self.X.T @ (self.X @ w - self.y) / L
w = self.st(w, self.lmbd / L)

self.w = w
if self.fit_intercept:
intercept = self.y_offset - self.X_offset @ w

self.w = np.r_[w, intercept]

def st(self, w, mu):
w -= np.clip(w, -mu, mu)
Expand Down
22 changes: 19 additions & 3 deletions solvers/r_pgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

with safe_import_context() as import_ctx:
import numpy as np
from scipy.sparse import issparse

from rpy2 import robjects
from rpy2.robjects import numpy2ri
Expand Down Expand Up @@ -34,12 +35,23 @@ class Solver(BaseSolver):
]

def skip(self, X, y, lmbd, fit_intercept):
if fit_intercept:
return True, f"{self.name} does not handle fit_intercept"
# rpy2 does not directly support sparse matrices (workaround exists)
if fit_intercept and issparse(X):
return (
True,
f"{self.name} doesn't handle fit_intercept with sparse data"
)

return False, None

def set_objective(self, X, y, lmbd, fit_intercept):
# Handling intercept: center y and X (dense data only)
if fit_intercept and not issparse(self.X):
self.X_offset = np.average(X, axis=0)
X -= self.X_offset
self.y_offset = np.average(y, axis=0)
y -= self.y_offset

self.X, self.y, self.lmbd = X, y, lmbd
self.fit_intercept = fit_intercept
self.r_pgd = robjects.r['proximal_gradient_descent']
Expand All @@ -52,4 +64,8 @@ def run(self, n_iter):
self.w = np.array(as_r(coefs, "vector"))

def get_result(self):
return self.w.flatten()
if self.fit_intercept:
intercept = self.y_offset - self.X_offset @ self.w
return np.r_[self.w.flatten(), intercept]
else:
return self.w.flatten()
Loading