Skip to content

Commit 56cb0e0

Browse files
authored
Merge pull request #162 from jump-dev/jg/ridgeex
Cleanup ridge auto tuning
2 parents dec0fca + 9e12b41 commit 56cb0e0

File tree

1 file changed

+64
-70
lines changed

1 file changed

+64
-70
lines changed

docs/src/examples/autotuning-ridge.jl

Lines changed: 64 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -22,105 +22,96 @@
2222
# computed with DiffOpt, we can perform a gradient descent on top of the inner model
2323
# to minimize the test loss.
2424

25-
using DiffOpt
26-
using Statistics
27-
using OSQP
28-
using JuMP
29-
using Plots
30-
import Random
31-
using LinearAlgebra
32-
25+
# This tutorial uses the following packages
3326

34-
"""
35-
R2(y_true, y_pred)
36-
37-
Return the coefficient of determination R2 of the prediction in `[0,1]`.
38-
"""
39-
function R2(y_true, y_pred)
40-
u = norm(y_pred - y_true)^2 # Regression sum of squares
41-
v = norm(y_true .- mean(y_true))^2 # Total sum of squares
42-
return 1 - u/v
43-
end
27+
using JuMP # The mathematical programming modelling language
28+
import DiffOpt # JuMP extension for differentiable optimization
29+
import OSQP # Optimization solver that handles quadratic programs
30+
import Plots # Graphing tool
31+
import LinearAlgebra: norm, dot
32+
import Random
4433

4534
# ## Generating a noisy regression dataset
4635

47-
function create_problem(N, D, noise)
48-
w = 10 * randn(D)
49-
X = 10 * randn(N, D)
50-
y = X * w + noise * randn(N)
51-
l = N ÷ 2 # test train split
52-
return (X[1:l, :], X[l+1:N, :], y[1:l], y[l+1:N])
53-
end
54-
5536
Random.seed!(42)
5637

57-
X_train, X_test, y_train, y_test = create_problem(1000, 200, 50);
38+
N = 100
39+
D = 20
40+
noise = 5
41+
42+
w_real = 10 * randn(D)
43+
X = 10 * randn(N, D)
44+
y = X * w_real + noise * randn(N)
45+
l = N ÷ 2 # test train split
46+
47+
X_train = X[1:l, :]
48+
X_test = X[l+1:N, :]
49+
y_train = y[1:l]
50+
y_test = y[l+1:N];
5851

5952
# ## Defining the regression problem
6053

6154
# We implement the regularized regression problem as a function taking the problem data,
6255
# building a JuMP model and solving it.
6356

64-
function fit_ridge(X, y, α, model = Model(() -> diff_optimizer(OSQP.Optimizer)))
57+
function fit_ridge(model, X, y, α)
6558
JuMP.empty!(model)
66-
set_optimizer_attribute(model, MOI.Silent(), true)
59+
set_silent(model)
6760
N, D = size(X)
6861
@variable(model, w[1:D])
6962
err_term = X * w - y
7063
@objective(
7164
model,
7265
Min,
73-
1/(2 * N * D) * dot(err_term, err_term) + 1/(2 * D) * α * dot(w, w),
66+
dot(err_term, err_term) / (2 * N * D) + α * dot(w, w) / (2 * D),
7467
)
7568
optimize!(model)
76-
if termination_status(model) != MOI.OPTIMAL
77-
error("Unexpected status: $(termination_status(model))")
78-
end
79-
regularized_loss_value = objective_value(model)
80-
training_loss_value = 1/(2 * N * D) * JuMP.value(dot(err_term, err_term))
81-
return model, w, value.(w), training_loss_value, regularized_loss_value
69+
@assert termination_status(model) == MOI.OPTIMAL
70+
return w
8271
end
8372

8473
# We can solve the problem for several values of α
8574
# to visualize the effect of regularization on the testing and training loss.
8675

87-
αs = 0.0:0.01:0.5
88-
Rs = Float64[]
76+
αs = 0.05:0.01:0.35
8977
mse_test = Float64[]
9078
mse_train = Float64[]
91-
model = JuMP.Model(() -> diff_optimizer(OSQP.Optimizer))
79+
model = Model(() -> DiffOpt.diff_optimizer(OSQP.Optimizer))
9280
(Ntest, D) = size(X_test)
81+
(Ntrain, D) = size(X_train)
9382
for α in αs
94-
_, _, w_train, training_loss_value, _ = fit_ridge(X_train, y_train, α, model)
95-
y_pred = X_test * w_train
96-
push!(Rs, R2(y_test, y_pred))
97-
push!(mse_test, dot(y_pred - y_test, y_pred - y_test) / (2 * Ntest * D))
98-
push!(mse_train, training_loss_value)
83+
w = fit_ridge(model, X_train, y_train, α)
84+
= value.(w)
85+
ŷ_test = X_test *
86+
ŷ_train = X_train *
87+
push!(mse_test, norm(ŷ_test - y_test)^2 / (2 * Ntest * D))
88+
push!(mse_train, norm(ŷ_train - y_train)^2 / (2 * Ntrain * D))
9989
end
10090

101-
# Visualize the R2 correlation metric
102-
103-
plot(αs, Rs, label=nothing, xaxis="α", yaxis="R2")
104-
title!("Test coefficient of determination R2")
105-
10691
# Visualize the Mean Score Error metric
10792

108-
plot(αs, mse_test ./ sum(mse_test), label="MSE test", xaxis = "α", yaxis="MSE", legend=(0.8,0.2))
109-
plot!(αs, mse_train ./ sum(mse_train), label="MSE train")
110-
title!("Normalized MSE on training and testing sets")
93+
Plots.plot(
94+
αs, mse_test ./ sum(mse_test),
95+
label="MSE test", xaxis = "α", yaxis="MSE", legend=(0.8, 0.2)
96+
)
97+
Plots.plot!(
98+
αs, mse_train ./ sum(mse_train),
99+
label="MSE train"
100+
)
101+
Plots.title!("Normalized MSE on training and testing sets")
111102

112103
# ## Leveraging differentiable optimization: computing the derivative of the solution
113104

114105
# Using DiffOpt, we can compute `∂w_i/∂α`, the derivative of the learned solution `̂w`
115106
# w.r.t. the regularization parameter.
116107

117-
function compute_dw_dparam(model, w)
108+
function compute_dw_dα(model, w)
118109
D = length(w)
119110
dw_dα = zeros(D)
120111
MOI.set(
121112
model,
122113
DiffOpt.ForwardInObjective(),
123-
1/2 * dot(w, w) / D,
114+
dot(w, w) / (2 * D),
124115
)
125116
DiffOpt.forward(model)
126117
for i in 1:D
@@ -133,13 +124,13 @@ function compute_dw_dparam(model, w)
133124
return dw_dα
134125
end
135126

136-
# Using `∂w_i/∂α` computed with `compute_dw_dparam`,
127+
# Using `∂w_i/∂α` computed with `compute_dw_dα`,
137128
# we can compute the derivative of the test loss w.r.t. the parameter α
138129
# by composing derivatives.
139130

140131
function d_testloss_dα(model, X_test, y_test, w, ŵ)
141132
N, D = size(X_test)
142-
dw_dα = compute_dw_dparam(model, w)
133+
dw_dα = compute_dw_dα(model, w)
143134
err_term = X_test *- y_test
144135
return sum(eachindex(err_term)) do i
145136
dot(X_test[i,:], dw_dα) * err_term[i]
@@ -151,31 +142,34 @@ end
151142

152143
function descent(α0, max_iters=100; fixed_step = 0.01, grad_tol=1e-3)
153144
α_s = Float64[]
154-
test_loss_values = Float64[]
145+
test_loss = Float64[]
155146
α = α0
156-
model = JuMP.Model(() -> DiffOpt.diff_optimizer(OSQP.Optimizer))
147+
N, D = size(X_test)
148+
model = Model(() -> DiffOpt.diff_optimizer(OSQP.Optimizer))
157149
for iter in 1:max_iters
158-
push!(α_s, α)
159-
_, w, ŵ, _, = fit_ridge(X_train, y_train, α, model)
150+
w = fit_ridge(model, X_train, y_train, α)
151+
= value.(w)
160152
err_term = X_test *- y_test
161-
test_loss = norm(err_term)^2 / (2 * length(X_test))
162-
push!(
163-
test_loss_values,
164-
test_loss,
165-
)
153+
push!(α_s, α)
154+
push!(test_loss, norm(err_term)^2 / (2 * N * D))
166155
∂α = d_testloss_dα(model, X_test, y_test, w, ŵ)
167156
α -= fixed_step * ∂α
168157
if abs(∂α) grad_tol
169158
break
170159
end
171160
end
172-
return α_s, test_loss_values
161+
return α_s, test_loss
173162
end
174163

175-
ᾱ, msē = descent(0.1, 500);
164+
ᾱ_l, msē_l = descent(0.10, 500);
165+
ᾱ_r, msē_r = descent(0.33, 500);
176166

177167
# Visualize gradient descent and convergence
178168

179-
plot(αs, mse_test, label="MSE test", xaxis = ("α"), legend=:topleft)
180-
plot!(ᾱ, msē, label="learned α", lw = 2)
181-
title!("Regularizer learning")
169+
Plots.plot(
170+
αs, mse_test,
171+
label="MSE test", xaxis = ("α"), legend=:topleft
172+
)
173+
Plots.plot!(ᾱ_l, msē_l, label="learned α, start = 0.10", lw = 2)
174+
Plots.plot!(ᾱ_r, msē_r, label="learned α, start = 0.33", lw = 2)
175+
Plots.title!("Regularizer learning")

0 commit comments

Comments
 (0)