From be66f411305db03d20479270e2d9c5b3646a9adc Mon Sep 17 00:00:00 2001 From: Jakob Asslaender Date: Fri, 28 Jun 2024 11:50:34 -0400 Subject: [PATCH] increase accuracy of power iterations and remove flag `normalize_rho` --- src/FISTA.jl | 18 ++++++------------ src/OptISTA.jl | 17 ++++++----------- src/POGM.jl | 18 ++++++------------ src/RegularizedLeastSquares.jl | 2 +- src/Utils.jl | 10 +++++----- 5 files changed, 24 insertions(+), 41 deletions(-) diff --git a/src/FISTA.jl b/src/FISTA.jl index cb7586dc..ce14fff1 100644 --- a/src/FISTA.jl +++ b/src/FISTA.jl @@ -22,8 +22,8 @@ mutable struct FISTA{rT <: Real, vecT <: Union{AbstractVector{rT}, AbstractVecto end """ - FISTA(A; AHA=A'*A, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), rho=0.95, normalize_rho=true, theta=1, relTol=eps(real(eltype(AHA))), iterations=50, restart = :none, verbose = false) - FISTA( ; AHA=, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), rho=0.95, normalize_rho=true, theta=1, relTol=eps(real(eltype(AHA))), iterations=50, restart = :none, verbose = false) + FISTA(A; AHA=A'*A, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), iterations=50, verbose = false, rho = 0.95 / power_iterations(AHA), theta=1, relTol=eps(real(eltype(AHA))), restart = :none) + FISTA( ; AHA=, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), iterations=50, verbose = false, rho = 0.95 / power_iterations(AHA), theta=1, relTol=eps(real(eltype(AHA))), restart = :none) creates a `FISTA` object for the forward operator `A` or normal operator `AHA`. @@ -37,8 +37,7 @@ creates a `FISTA` object for the forward operator `A` or normal operator `AHA`. * `precon` - preconditionner for the internal CG algorithm * `reg::AbstractParameterizedRegularization` - regularization term; can also be a vector of regularization terms * `normalizeReg::AbstractRegularizationNormalization` - regularization normalization scheme; options are `NoNormalization()`, `MeasurementBasedNormalization()`, `SystemMatrixBasedNormalization()` -* `rho::Real` - step size for gradient step -* `normalize_rho::Bool` - normalize step size by the largest eigenvalue of `AHA` +* `rho::Real` - step size for gradient step; the default is `0.95 / max_eigenvalue` as determined with power iterations. * `theta::Real` - parameter for predictor-corrector step * `relTol::Real` - tolerance for stopping criterion * `iterations::Int` - maximum number of iterations @@ -53,13 +52,12 @@ function FISTA(A ; AHA = A'*A , reg = L1Regularization(zero(real(eltype(AHA)))) , normalizeReg = NoNormalization() - , rho = 0.95 - , normalize_rho = true + , iterations = 50 + , verbose = false + , rho = 0.95 / power_iterations(AHA; verbose) , theta = 1 , relTol = eps(real(eltype(AHA))) - , iterations = 50 , restart = :none - , verbose = false ) T = eltype(AHA) @@ -71,10 +69,6 @@ function FISTA(A res = similar(x) res[1] = Inf # avoid spurious convergence in first iterations - if normalize_rho - rho /= abs(power_iterations(AHA)) - end - # Prepare regularization terms reg = isa(reg, AbstractVector) ? reg : [reg] indices = findsinks(AbstractProjectionRegularization, reg) diff --git a/src/OptISTA.jl b/src/OptISTA.jl index ea0dbd65..eaf41e7c 100644 --- a/src/OptISTA.jl +++ b/src/OptISTA.jl @@ -27,8 +27,8 @@ mutable struct OptISTA{rT <: Real, vecT <: Union{AbstractVector{rT}, AbstractVec end """ - OptISTA(A; AHA=A'*A, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), rho=0.95, normalize_rho=true, theta=1, relTol=eps(real(eltype(AHA))), iterations=50, verbose = false) - OptISTA( ; AHA=, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), rho=0.95, normalize_rho=true, theta=1, relTol=eps(real(eltype(AHA))), iterations=50, verbose = false) + OptISTA(A; AHA=A'*A, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), iterations=50, verbose = false, rho=0.95 / power_iterations(AHA), theta=1, relTol=eps(real(eltype(AHA)))) + OptISTA( ; AHA=, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), iterations=50, verbose = false, rho=0.95 / power_iterations(AHA), theta=1, relTol=eps(real(eltype(AHA)))) creates a `OptISTA` object for the forward operator `A` or normal operator `AHA`. OptISTA has a 2x better worst-case bound than FISTA, but actual performance varies by application. It stores 2 extra intermediate variables the size of the image compared to FISTA. @@ -44,8 +44,7 @@ OR * `AHA` - normal operator is optional if `A` is supplied * `reg::AbstractParameterizedRegularization` - regularization term * `normalizeReg::AbstractRegularizationNormalization` - regularization normalization scheme; options are `NoNormalization()`, `MeasurementBasedNormalization()`, `SystemMatrixBasedNormalization()` -* `rho::Real` - step size for gradient step -* `normalize_rho::Bool` - normalize step size by the largest eigenvalue of `AHA` +* `rho::Real` - step size for gradient step; the default is `0.95 / max_eigenvalue` as determined with power iterations. * `theta::Real` - parameter for predictor-corrector step * `relTol::Real` - tolerance for stopping criterion * `iterations::Int` - maximum number of iterations @@ -59,12 +58,11 @@ function OptISTA(A ; AHA = A'*A , reg = L1Regularization(zero(real(eltype(AHA)))) , normalizeReg = NoNormalization() - , rho = 0.95 - , normalize_rho = true - , theta = 1 - , relTol = eps(real(eltype(AHA))) , iterations = 50 , verbose = false + , rho = 0.95 / power_iterations(AHA; verbose) + , theta = 1 + , relTol = eps(real(eltype(AHA))) ) T = eltype(AHA) @@ -78,9 +76,6 @@ function OptISTA(A res = similar(x) res[1] = Inf # avoid spurious convergence in first iterations - if normalize_rho - rho /= abs(power_iterations(AHA)) - end θn = 1 for _ = 1:(iterations-1) θn = (1 + sqrt(1 + 4 * θn^2)) / 2 diff --git a/src/POGM.jl b/src/POGM.jl index 5b86054e..e06f4e15 100644 --- a/src/POGM.jl +++ b/src/POGM.jl @@ -31,8 +31,8 @@ mutable struct POGM{rT<:Real,vecT<:Union{AbstractVector{rT},AbstractVector{Compl end """ - POGM(A; AHA = A'*A, reg = L1Regularization(zero(real(eltype(AHA)))), normalizeReg = NoNormalization(), rho = 0.95, normalize_rho = true, theta = 1, sigma_fac = 1, relTol = eps(real(eltype(AHA))), iterations = 50, restart = :none, verbose = false) - POGM( ; AHA = , reg = L1Regularization(zero(real(eltype(AHA)))), normalizeReg = NoNormalization(), rho = 0.95, normalize_rho = true, theta = 1, sigma_fac = 1, relTol = eps(real(eltype(AHA))), iterations = 50, restart = :none, verbose = false) + POGM(A; AHA = A'*A, reg = L1Regularization(zero(real(eltype(AHA)))), normalizeReg = NoNormalization(), iterations = 50, verbose = false, rho = 0.95 / power_iterations(AHA), theta = 1, sigma_fac = 1, relTol = eps(real(eltype(AHA))), restart = :none) + POGM( ; AHA = , reg = L1Regularization(zero(real(eltype(AHA)))), normalizeReg = NoNormalization(), iterations = 50, verbose = false, rho = 0.95 / power_iterations(AHA), theta = 1, sigma_fac = 1, relTol = eps(real(eltype(AHA))), restart = :none) Creates a `POGM` object for the forward operator `A` or normal operator `AHA`. POGM has a 2x better worst-case bound than FISTA, but actual performance varies by application. It stores 3 extra intermediate variables the size of the image compared to FISTA. Only gradient restart scheme is implemented for now. @@ -56,8 +56,7 @@ Creates a `POGM` object for the forward operator `A` or normal operator `AHA`. P * `AHA` - normal operator is optional if `A` is supplied * `reg::AbstractParameterizedRegularization` - regularization term * `normalizeReg::AbstractRegularizationNormalization` - regularization normalization scheme; options are `NoNormalization()`, `MeasurementBasedNormalization()`, `SystemMatrixBasedNormalization()` - * `rho::Real` - step size for gradient step - * `normalize_rho::Bool` - normalize step size by the largest eigenvalue of `AHA` + * `rho::Real` - step size for gradient step; the default is `0.95 / max_eigenvalue` as determined with power iterations. * `theta::Real` - parameter for predictor-corrector step * `sigma_fac::Real` - parameter for decreasing γ-momentum ∈ [0,1] * `relTol::Real` - tolerance for stopping criterion @@ -73,14 +72,13 @@ function POGM(A ; AHA = A'*A , reg = L1Regularization(zero(real(eltype(AHA)))) , normalizeReg = NoNormalization() - , rho = 0.95 - , normalize_rho = true + , iterations = 50 + , verbose = false + , rho = 0.95 / power_iterations(AHA; verbose) , theta = 1 , sigma_fac = 1 , relTol = eps(real(eltype(AHA))) - , iterations = 50 , restart = :none - , verbose = false ) T = eltype(AHA) @@ -95,10 +93,6 @@ function POGM(A res = similar(x) res[1] = Inf # avoid spurious convergence in first iterations - if normalize_rho - rho /= abs(power_iterations(AHA)) - end - reg = isa(reg, AbstractVector) ? reg : [reg] indices = findsinks(AbstractProjectionRegularization, reg) other = [reg[i] for i in indices] diff --git a/src/RegularizedLeastSquares.jl b/src/RegularizedLeastSquares.jl index 18402f4b..709df2c3 100644 --- a/src/RegularizedLeastSquares.jl +++ b/src/RegularizedLeastSquares.jl @@ -15,7 +15,7 @@ using StatsBase using LinearOperatorCollection using InteractiveUtils -export AbstractLinearSolver, createLinearSolver, init, deinit, solve!, linearSolverList, linearSolverListReal, applicableSolverList +export AbstractLinearSolver, createLinearSolver, init, deinit, solve!, linearSolverList, linearSolverListReal, applicableSolverList, power_iterations abstract type AbstractLinearSolver end diff --git a/src/Utils.jl b/src/Utils.jl index 8b4d29f7..98f317a6 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -242,7 +242,7 @@ function nrmsd(I,Ireco) end """ - power_iterations(AᴴA; rtol=1e-2, maxiter=30, verbose=false) + power_iterations(AᴴA; rtol=1e-3, maxiter=30, verbose=false) Power iterations to determine the maximum eigenvalue of a normal operator or square matrix. @@ -250,14 +250,14 @@ Power iterations to determine the maximum eigenvalue of a normal operator or squ * `AᴴA` - operator or matrix; has to be square # Keyword Arguments -* `rtol=1e-2` - relative tolerance; function terminates if the change of the max. eigenvalue is smaller than this values +* `rtol=1e-3` - relative tolerance; function terminates if the change of the max. eigenvalue is smaller than this values * `maxiter=30` - maximum number of power iterations * `verbose=false` - print maximum eigenvalue if `true` # Output maximum eigenvalue of the operator """ -function power_iterations(AᴴA; rtol=1e-2, maxiter=30, verbose=false) +function power_iterations(AᴴA; rtol=1e-3, maxiter=30, verbose=false) b = randn(eltype(AᴴA), size(AᴴA,2)) bᵒˡᵈ = similar(b) λ = Inf @@ -273,8 +273,8 @@ function power_iterations(AᴴA; rtol=1e-2, maxiter=30, verbose=false) mul!(b, AᴴA, bᵒˡᵈ) λᵒˡᵈ = λ - λ = (bᵒˡᵈ' * b) / (bᵒˡᵈ' * bᵒˡᵈ) - verbose && println("iter = $i; λ = $λ") + λ = abs(bᵒˡᵈ' * b) # λ is real-valued for Hermitian matrices + verbose && println("iter = $i; λ = $λ; abs(λ/λᵒˡᵈ - 1) = $(abs(λ/λᵒˡᵈ - 1))