-
Notifications
You must be signed in to change notification settings - Fork 19
/
Optimization.jl
147 lines (130 loc) · 5.23 KB
/
Optimization.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Returns the stereographic projection u(α) = (2v + (1-v²)n)/(1+v²), which
# involves the orthographic projection v = (1-nn̄)α. The input `n` must be
# normalized. When `α=0`, the output is `u=n`, and when `|α|→ ∞` the output is
# `u=-n`. In all cases, `|u|=1`.
function stereographic_projection(α, n)
@assert n'*n ≈ 1
v = α - n*(n'*α) # project out component parallel to `n`
v² = real(v'*v)
u = (2v + (1-v²)*n) / (1+v²) # stereographic projection
return u
end
# Calculate the vector-Jacobian-product x̄ du(α)/dα, where
# u(v) = (2v + (1-v²)n)/(1+v²), v(α,ᾱ)=Pα, and P=1-nn̄.
#
# From the chain rule for Wirtinger derivatives,
# du/dα = (du/dv) (dv/dα) + (du/dv̄) (dv̄/dα) = du/dv P.
#
# In the second step, we used
# dv/dα = P
# dv̄/dα = conj(dv/dᾱ) = 0.
#
# The remaining Jacobian matrix is
# du/dv = (2-2nv̄)/(1+v²) - 2(2v+(1-v²)n)/(1+v²)² v̄
# = c - c[(1+cb)n + cv]v̄,
# where b = (1-v²)/2 and c = 2/(1+v²).
#
# Using the above definitions, return:
# x̄ du/dα = x̄ du/dv P
#
@inline function vjp_stereographic_projection(x, α, n)
@assert n'*n ≈ 1
v = α - n*(n'*α)
v² = real(v'*v)
b = (1-v²)/2
c = 2/(1+v²)
# Perform dot products first to avoid constructing outer-product
x̄_dudv = c*x' - c * (x' * ((1+c*b)*n + c*v)) * v'
# Apply projection P=1-nn̄ on right
return x̄_dudv - (x̄_dudv * n) * n'
end
# Returns v such that u = (2v + (1-v²)n)/(1+v²) and v⋅n = 0
function inverse_stereographic_projection(u, n)
@assert u'*u ≈ 1
uperp = u - n*(n'*u)
uperp² = real(uperp' * uperp)
s = sign(u⋅n)
if isone(s) && uperp² < 1e-5
c = 1/2 + uperp²/8 + uperp²*uperp²/16
else
c = (1 - s * sqrt(max(1 - uperp², 0))) / uperp²
end
return c * uperp
end
function optim_set_spins!(sys::System{0}, αs, ns)
αs = reinterpret(reshape, Vec3, αs)
for site in eachsite(sys)
s = stereographic_projection(αs[site], ns[site])
set_dipole!(sys, s, site)
end
end
function optim_set_spins!(sys::System{N}, αs, ns) where N
αs = reinterpret(reshape, CVec{N}, αs)
for site in eachsite(sys)
Z = stereographic_projection(αs[site], ns[site])
set_coherent!(sys, Z, site)
end
end
function optim_set_gradient!(G, sys::System{0}, αs, ns)
(αs, G) = reinterpret.(reshape, Vec3, (αs, G))
set_energy_grad_dipoles!(G, sys.dipoles, sys) # G = dE/dS
@. G *= norm(sys.dipoles) # G = dE/dS * ds/du = dE/du
@. G = adjoint(vjp_stereographic_projection(G, αs, ns)) # G = dE/du du/dα = dE/dα
end
function optim_set_gradient!(G, sys::System{N}, αs, ns) where N
(αs, G) = reinterpret.(reshape, CVec{N}, (αs, G))
set_energy_grad_coherents!(G, sys.coherents, sys) # G = dE/dZ
@. G *= norm(sys.coherents) # G = dE/dZ * dZ/du = dE/du
@. G = adjoint(vjp_stereographic_projection(G, αs, ns)) # G = dE/du du/dα = dE/dα
end
"""
minimize_energy!(sys::System{N}; maxiters=1000, method=Optim.ConjugateGradient(),
g_tol=1e-10, kwargs...) where N
Optimizes the spin configuration in `sys` to minimize energy. A total of
`maxiters` iterations will be attempted. Convergence is reached when the root
mean squared energy gradient goes below `g_tol`. The remaining `kwargs` will be
forwarded to the `optimize` method of the Optim.jl package.
"""
function minimize_energy!(sys::System{N}; maxiters=1000, subiters=10, method=Optim.ConjugateGradient(),
g_tol=1e-10, kwargs...) where N
# Allocate buffers for optimization:
# - Each `ns[site]` defines a direction for stereographic projection.
# - Each `αs[:,site]` will be optimized in the space orthogonal to `ns[site]`.
if iszero(N)
ns = normalize.(sys.dipoles)
αs = zeros(Float64, 3, size(sys.dipoles)...)
else
ns = normalize.(sys.coherents)
αs = zeros(ComplexF64, N, size(sys.coherents)...)
end
# Functions to calculate energy and gradient for the state `αs`
function f(αs)
optim_set_spins!(sys, αs, ns)
return energy(sys)
end
function g!(G, αs)
optim_set_spins!(sys, αs, ns)
optim_set_gradient!(G, sys, αs, ns)
end
# Monitor energy gradient magnitude relative to absolute tolerance `g_tol`.
g_res = Inf
# Repeatedly optimize using a small number (`subiters`) of steps.
options = Optim.Options(; iterations=subiters, g_tol, kwargs...)
for iter in 1 : div(maxiters, subiters, RoundUp)
output = Optim.optimize(f, g!, αs, method, options)
g_res = Optim.g_residual(output)
@assert g_tol == Optim.g_tol(output)
# Exit if converged
if g_res ≤ g_tol
cnt = (iter-1)*subiters + output.iterations
return cnt
end
# Reset stereographic projection based on current state
ns .= normalize.(iszero(N) ? sys.dipoles : sys.coherents)
αs .*= 0
end
# res_str = number_to_simple_string(g_res; digits=2)
# tol_str = number_to_simple_string(g_tol; digits=2)
# @warn "Optimization failed to converge within $maxiters iterations ($res_str ≰ $tol_str)"
return -1
end