From be66f411305db03d20479270e2d9c5b3646a9adc Mon Sep 17 00:00:00 2001
From: Jakob Asslaender <jakob.asslaender@nyumc.org>
Date: Fri, 28 Jun 2024 11:50:34 -0400
Subject: [PATCH] increase accuracy of power iterations and remove flag
 `normalize_rho`

---
 src/FISTA.jl                   | 18 ++++++------------
 src/OptISTA.jl                 | 17 ++++++-----------
 src/POGM.jl                    | 18 ++++++------------
 src/RegularizedLeastSquares.jl |  2 +-
 src/Utils.jl                   | 10 +++++-----
 5 files changed, 24 insertions(+), 41 deletions(-)

diff --git a/src/FISTA.jl b/src/FISTA.jl
index cb7586dc..ce14fff1 100644
--- a/src/FISTA.jl
+++ b/src/FISTA.jl
@@ -22,8 +22,8 @@ mutable struct FISTA{rT <: Real, vecT <: Union{AbstractVector{rT}, AbstractVecto
 end
 
 """
-    FISTA(A; AHA=A'*A, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), rho=0.95, normalize_rho=true, theta=1, relTol=eps(real(eltype(AHA))), iterations=50, restart = :none, verbose = false)
-    FISTA( ; AHA=,     reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), rho=0.95, normalize_rho=true, theta=1, relTol=eps(real(eltype(AHA))), iterations=50, restart = :none, verbose = false)
+    FISTA(A; AHA=A'*A, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), iterations=50, verbose = false, rho = 0.95 / power_iterations(AHA), theta=1, relTol=eps(real(eltype(AHA))), restart = :none)
+    FISTA( ; AHA=,     reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), iterations=50, verbose = false, rho = 0.95 / power_iterations(AHA), theta=1, relTol=eps(real(eltype(AHA))), restart = :none)
 
 creates a `FISTA` object for the forward operator `A` or normal operator `AHA`.
 
@@ -37,8 +37,7 @@ creates a `FISTA` object for the forward operator `A` or normal operator `AHA`.
 * `precon`                                            - preconditionner for the internal CG algorithm
 * `reg::AbstractParameterizedRegularization`          - regularization term; can also be a vector of regularization terms
 * `normalizeReg::AbstractRegularizationNormalization` - regularization normalization scheme; options are `NoNormalization()`, `MeasurementBasedNormalization()`, `SystemMatrixBasedNormalization()`
-* `rho::Real`                                         - step size for gradient step
-* `normalize_rho::Bool`                               - normalize step size by the largest eigenvalue of `AHA`
+* `rho::Real`                                         - step size for gradient step; the default is `0.95 / max_eigenvalue` as determined with power iterations.
 * `theta::Real`                                       - parameter for predictor-corrector step
 * `relTol::Real`                                      - tolerance for stopping criterion
 * `iterations::Int`                                   - maximum number of iterations
@@ -53,13 +52,12 @@ function FISTA(A
              ; AHA = A'*A
              , reg = L1Regularization(zero(real(eltype(AHA))))
              , normalizeReg = NoNormalization()
-             , rho = 0.95
-             , normalize_rho = true
+             , iterations = 50
+             , verbose = false
+             , rho = 0.95 / power_iterations(AHA; verbose)
              , theta = 1
              , relTol = eps(real(eltype(AHA)))
-             , iterations = 50
              , restart = :none
-             , verbose = false
              )
 
   T  = eltype(AHA)
@@ -71,10 +69,6 @@ function FISTA(A
   res  = similar(x)
   res[1] = Inf # avoid spurious convergence in first iterations
 
-  if normalize_rho
-    rho /= abs(power_iterations(AHA))
-  end
-
   # Prepare regularization terms
   reg = isa(reg, AbstractVector) ? reg : [reg]
   indices = findsinks(AbstractProjectionRegularization, reg)
diff --git a/src/OptISTA.jl b/src/OptISTA.jl
index ea0dbd65..eaf41e7c 100644
--- a/src/OptISTA.jl
+++ b/src/OptISTA.jl
@@ -27,8 +27,8 @@ mutable struct OptISTA{rT <: Real, vecT <: Union{AbstractVector{rT}, AbstractVec
 end
 
 """
-    OptISTA(A; AHA=A'*A, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), rho=0.95, normalize_rho=true, theta=1, relTol=eps(real(eltype(AHA))), iterations=50, verbose = false)
-    OptISTA( ; AHA=,     reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), rho=0.95, normalize_rho=true, theta=1, relTol=eps(real(eltype(AHA))), iterations=50, verbose = false)
+    OptISTA(A; AHA=A'*A, reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), iterations=50, verbose = false, rho=0.95 / power_iterations(AHA), theta=1, relTol=eps(real(eltype(AHA))))
+    OptISTA( ; AHA=,     reg=L1Regularization(zero(real(eltype(AHA)))), normalizeReg=NoNormalization(), iterations=50, verbose = false, rho=0.95 / power_iterations(AHA), theta=1, relTol=eps(real(eltype(AHA))))
 
 creates a `OptISTA` object for the forward operator `A` or normal operator `AHA`. OptISTA has a 2x better worst-case bound than FISTA, but actual performance varies by application. It stores 2 extra intermediate variables the size of the image compared to FISTA.
 
@@ -44,8 +44,7 @@ OR
 * `AHA`                                               - normal operator is optional if `A` is supplied
 * `reg::AbstractParameterizedRegularization`          - regularization term
 * `normalizeReg::AbstractRegularizationNormalization` - regularization normalization scheme; options are `NoNormalization()`, `MeasurementBasedNormalization()`, `SystemMatrixBasedNormalization()`
-* `rho::Real`                                         - step size for gradient step
-* `normalize_rho::Bool`                               - normalize step size by the largest eigenvalue of `AHA`
+* `rho::Real`                                         - step size for gradient step; the default is `0.95 / max_eigenvalue` as determined with power iterations.
 * `theta::Real`                                       - parameter for predictor-corrector step
 * `relTol::Real`                                      - tolerance for stopping criterion
 * `iterations::Int`                                   - maximum number of iterations
@@ -59,12 +58,11 @@ function OptISTA(A
                ; AHA = A'*A
                , reg = L1Regularization(zero(real(eltype(AHA))))
                , normalizeReg = NoNormalization()
-               , rho = 0.95
-               , normalize_rho = true
-               , theta = 1
-               , relTol = eps(real(eltype(AHA)))
                , iterations = 50
                , verbose = false
+               , rho = 0.95 / power_iterations(AHA; verbose)
+               , theta = 1
+               , relTol = eps(real(eltype(AHA)))
                )
 
   T  = eltype(AHA)
@@ -78,9 +76,6 @@ function OptISTA(A
   res  = similar(x)
   res[1] = Inf # avoid spurious convergence in first iterations
 
-  if normalize_rho
-    rho /= abs(power_iterations(AHA))
-  end
   θn = 1
   for _ = 1:(iterations-1)
     θn = (1 + sqrt(1 + 4 * θn^2)) / 2
diff --git a/src/POGM.jl b/src/POGM.jl
index 5b86054e..e06f4e15 100644
--- a/src/POGM.jl
+++ b/src/POGM.jl
@@ -31,8 +31,8 @@ mutable struct POGM{rT<:Real,vecT<:Union{AbstractVector{rT},AbstractVector{Compl
 end
 
 """
-    POGM(A; AHA = A'*A, reg = L1Regularization(zero(real(eltype(AHA)))), normalizeReg = NoNormalization(), rho = 0.95, normalize_rho = true, theta = 1, sigma_fac = 1, relTol = eps(real(eltype(AHA))), iterations = 50, restart = :none, verbose = false)
-    POGM( ; AHA = ,     reg = L1Regularization(zero(real(eltype(AHA)))), normalizeReg = NoNormalization(), rho = 0.95, normalize_rho = true, theta = 1, sigma_fac = 1, relTol = eps(real(eltype(AHA))), iterations = 50, restart = :none, verbose = false)
+    POGM(A; AHA = A'*A, reg = L1Regularization(zero(real(eltype(AHA)))), normalizeReg = NoNormalization(), iterations = 50, verbose = false, rho = 0.95 / power_iterations(AHA), theta = 1, sigma_fac = 1, relTol = eps(real(eltype(AHA))), restart = :none)
+    POGM( ; AHA = ,     reg = L1Regularization(zero(real(eltype(AHA)))), normalizeReg = NoNormalization(), iterations = 50, verbose = false, rho = 0.95 / power_iterations(AHA), theta = 1, sigma_fac = 1, relTol = eps(real(eltype(AHA))), restart = :none)
 
 Creates a `POGM` object for the forward operator `A` or normal operator `AHA`. POGM has a 2x better worst-case bound than FISTA, but actual performance varies by application. It stores 3 extra intermediate variables the size of the image compared to FISTA. Only gradient restart scheme is implemented for now.
 
@@ -56,8 +56,7 @@ Creates a `POGM` object for the forward operator `A` or normal operator `AHA`. P
   * `AHA`                                               - normal operator is optional if `A` is supplied
   * `reg::AbstractParameterizedRegularization`          - regularization term
   * `normalizeReg::AbstractRegularizationNormalization` - regularization normalization scheme; options are `NoNormalization()`, `MeasurementBasedNormalization()`, `SystemMatrixBasedNormalization()`
-  * `rho::Real`                                         - step size for gradient step
-  * `normalize_rho::Bool`                               - normalize step size by the largest eigenvalue of `AHA`
+  * `rho::Real`                                         - step size for gradient step; the default is `0.95 / max_eigenvalue` as determined with power iterations.
   * `theta::Real`                                       - parameter for predictor-corrector step
   * `sigma_fac::Real`                                   - parameter for decreasing γ-momentum ∈ [0,1]
   * `relTol::Real`                                      - tolerance for stopping criterion
@@ -73,14 +72,13 @@ function POGM(A
             ; AHA = A'*A
             , reg = L1Regularization(zero(real(eltype(AHA))))
             , normalizeReg = NoNormalization()
-            , rho = 0.95
-            , normalize_rho = true
+            , iterations = 50
+            , verbose = false
+            , rho = 0.95 / power_iterations(AHA; verbose)
             , theta = 1
             , sigma_fac = 1
             , relTol = eps(real(eltype(AHA)))
-            , iterations = 50
             , restart = :none
-            , verbose = false
 )
 
   T = eltype(AHA)
@@ -95,10 +93,6 @@ function POGM(A
   res = similar(x)
   res[1] = Inf # avoid spurious convergence in first iterations
 
-  if normalize_rho
-    rho /= abs(power_iterations(AHA))
-  end
-
   reg = isa(reg, AbstractVector) ? reg : [reg]
   indices = findsinks(AbstractProjectionRegularization, reg)
   other = [reg[i] for i in indices]
diff --git a/src/RegularizedLeastSquares.jl b/src/RegularizedLeastSquares.jl
index 18402f4b..709df2c3 100644
--- a/src/RegularizedLeastSquares.jl
+++ b/src/RegularizedLeastSquares.jl
@@ -15,7 +15,7 @@ using StatsBase
 using LinearOperatorCollection
 using InteractiveUtils
 
-export AbstractLinearSolver, createLinearSolver, init, deinit, solve!, linearSolverList, linearSolverListReal, applicableSolverList
+export AbstractLinearSolver, createLinearSolver, init, deinit, solve!, linearSolverList, linearSolverListReal, applicableSolverList, power_iterations
 
 abstract type AbstractLinearSolver end
 
diff --git a/src/Utils.jl b/src/Utils.jl
index 8b4d29f7..98f317a6 100644
--- a/src/Utils.jl
+++ b/src/Utils.jl
@@ -242,7 +242,7 @@ function nrmsd(I,Ireco)
 end
 
 """
-    power_iterations(AᴴA; rtol=1e-2, maxiter=30, verbose=false)
+    power_iterations(AᴴA; rtol=1e-3, maxiter=30, verbose=false)
 
 Power iterations to determine the maximum eigenvalue of a normal operator or square matrix.
 
@@ -250,14 +250,14 @@ Power iterations to determine the maximum eigenvalue of a normal operator or squ
 * `AᴴA`                 - operator or matrix; has to be square
 
 # Keyword Arguments
-* `rtol=1e-2`           - relative tolerance; function terminates if the change of the max. eigenvalue is smaller than this values
+* `rtol=1e-3`           - relative tolerance; function terminates if the change of the max. eigenvalue is smaller than this values
 * `maxiter=30`          - maximum number of power iterations
 * `verbose=false`       - print maximum eigenvalue if `true`
 
 # Output
 maximum eigenvalue of the operator
 """
-function power_iterations(AᴴA; rtol=1e-2, maxiter=30, verbose=false)
+function power_iterations(AᴴA; rtol=1e-3, maxiter=30, verbose=false)
   b = randn(eltype(AᴴA), size(AᴴA,2))
   bᵒˡᵈ = similar(b)
   λ = Inf
@@ -273,8 +273,8 @@ function power_iterations(AᴴA; rtol=1e-2, maxiter=30, verbose=false)
     mul!(b, AᴴA, bᵒˡᵈ)
 
     λᵒˡᵈ = λ
-    λ = (bᵒˡᵈ' * b) / (bᵒˡᵈ' * bᵒˡᵈ)
-    verbose && println("iter = $i; λ = $λ")
+    λ = abs(bᵒˡᵈ' * b) # λ is real-valued for Hermitian matrices
+    verbose && println("iter = $i; λ = $λ; abs(λ/λᵒˡᵈ - 1) = $(abs(λ/λᵒˡᵈ - 1)) <? $rtol")
     abs(λ/λᵒˡᵈ - 1) < rtol && return λ
   end