Skip to content
This repository was archived by the owner on May 15, 2025. It is now read-only.

Commit 10ce0fa

Browse files
Merge pull request #25 from CCsimon123/main
Implementation of a Trust-Region solver
2 parents d6e4633 + 95b2f40 commit 10ce0fa

File tree

4 files changed

+281
-4
lines changed

4 files changed

+281
-4
lines changed

src/SimpleNonlinearSolve.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ include("raphson.jl")
2121
include("ad.jl")
2222
include("broyden.jl")
2323
include("klement.jl")
24+
include("trustRegion.jl")
2425

2526
import SnoopPrecompile
2627

@@ -30,6 +31,10 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
3031
solve(prob_no_brack, alg(), tol = T(1e-2))
3132
end
3233

34+
for alg in (TrustRegion(10.0),)
35+
solve(prob_no_brack, alg, tol = T(1e-2))
36+
end
37+
3338
#=
3439
for alg in (SimpleNewtonRaphson,)
3540
for u0 in ([1., 1.], StaticArraysCore.SA[1.0, 1.0])
@@ -47,6 +52,6 @@ SnoopPrecompile.@precompile_all_calls begin for T in (Float32, Float64)
4752
end end
4853

4954
# DiffEq styled algorithms
50-
export Bisection, Broyden, Falsi, Klement, SimpleNewtonRaphson
55+
export Bisection, Broyden, Falsi, Klement, SimpleNewtonRaphson, TrustRegion
5156

5257
end # module

src/trustRegion.jl

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
"""
2+
```julia
3+
TrustRegion(max_trust_radius::Number; chunk_size = Val{0}(),
4+
autodiff = Val{true}(), diff_type = Val{:forward})
5+
```
6+
7+
A low-overhead implementation of a
8+
[trust-region](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods)
9+
solver
10+
11+
### Arguments
12+
- `max_trust_radius`: the maximum radius of the trust region. The step size in the algorithm
13+
will change dynamically. However, it will never be greater than the `max_trust_radius`.
14+
15+
### Keyword Arguments
16+
17+
- `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation
18+
system. This allows for multiple derivative columns to be computed simultaneously,
19+
improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's
20+
default chunk size mechanism. For more details, see the documentation for
21+
[ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/).
22+
- `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian.
23+
Note that this argument is ignored if an analytical Jacobian is passed; as that will be
24+
used instead. Defaults to `Val{true}`, which means ForwardDiff.jl is used by default.
25+
If `Val{false}`, then FiniteDiff.jl is used for finite differencing.
26+
- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to
27+
`Val{:forward}` for forward finite differences. For more details on the choices, see the
28+
[FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation.
29+
- `initial_trust_radius`: the initial trust region radius. Defaults to
30+
`max_trust_radius / 11`.
31+
- `step_threshold`: the threshold for taking a step. In every iteration, the threshold is
32+
compared with a value `r`, which is the actual reduction in the objective function divided
33+
by the predicted reduction. If `step_threshold > r` the model is not a good approximation,
34+
and the step is rejected. Defaults to `0.1`. For more details, see
35+
[Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods)
36+
- `shrink_threshold`: the threshold for shrinking the trust region radius. In every
37+
iteration, the threshold is compared with a value `r` which is the actual reduction in the
38+
objective function divided by the predicted reduction. If `shrink_threshold > r` the trust
39+
region radius is shrunk by `shrink_factor`. Defaults to `0.25`. For more details, see
40+
[Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods)
41+
- `expand_threshold`: the threshold for expanding the trust region radius. If a step is
42+
taken, i.e `step_threshold < r` (with `r` defined in `shrink_threshold`), a check is also
43+
made to see if `expand_threshold < r`. If that is true, the trust region radius is
44+
expanded by `expand_factor`. Defaults to `0.75`.
45+
- `shrink_factor`: the factor to shrink the trust region radius with if
46+
`shrink_threshold > r` (with `r` defined in `shrink_threshold`). Defaults to `0.25`.
47+
- `expand_factor`: the factor to expand the trust region radius with if
48+
`expand_threshold < r` (with `r` defined in `shrink_threshold`). Defaults to `2.0`.
49+
- `max_shrink_times`: the maximum number of times to shrink the trust region radius in a
50+
row, `max_shrink_times` is exceeded, the algorithm returns. Defaults to `32`.
51+
"""
52+
struct TrustRegion{CS, AD, FDT} <: AbstractNewtonAlgorithm{CS, AD, FDT}
53+
max_trust_radius::Number
54+
initial_trust_radius::Number
55+
step_threshold::Number
56+
shrink_threshold::Number
57+
expand_threshold::Number
58+
shrink_factor::Number
59+
expand_factor::Number
60+
max_shrink_times::Int
61+
function TrustRegion(max_trust_radius::Number; chunk_size = Val{0}(),
62+
autodiff = Val{true}(),
63+
diff_type = Val{:forward},
64+
initial_trust_radius::Number = max_trust_radius / 11,
65+
step_threshold::Number = 0.1,
66+
shrink_threshold::Number = 0.25,
67+
expand_threshold::Number = 0.75,
68+
shrink_factor::Number = 0.25,
69+
expand_factor::Number = 2.0,
70+
max_shrink_times::Int = 32)
71+
new{SciMLBase._unwrap_val(chunk_size), SciMLBase._unwrap_val(autodiff),
72+
SciMLBase._unwrap_val(diff_type)}(max_trust_radius, initial_trust_radius,
73+
step_threshold,
74+
shrink_threshold, expand_threshold,
75+
shrink_factor,
76+
expand_factor, max_shrink_times)
77+
end
78+
end
79+
80+
function SciMLBase.solve(prob::NonlinearProblem,
81+
alg::TrustRegion, args...; abstol = nothing,
82+
reltol = nothing,
83+
maxiters = 1000, kwargs...)
84+
f = Base.Fix2(prob.f, prob.p)
85+
x = float(prob.u0)
86+
T = typeof(x)
87+
Δₘₐₓ = float(alg.max_trust_radius)
88+
Δ = float(alg.initial_trust_radius)
89+
η₁ = float(alg.step_threshold)
90+
η₂ = float(alg.shrink_threshold)
91+
η₃ = float(alg.expand_threshold)
92+
t₁ = float(alg.shrink_factor)
93+
t₂ = float(alg.expand_factor)
94+
max_shrink_times = alg.max_shrink_times
95+
96+
if SciMLBase.isinplace(prob)
97+
error("TrustRegion currently only supports out-of-place nonlinear problems")
98+
end
99+
100+
atol = abstol !== nothing ? abstol :
101+
real(oneunit(eltype(T))) * (eps(real(one(eltype(T)))))^(4 // 5)
102+
rtol = reltol !== nothing ? reltol : eps(real(one(eltype(T))))^(4 // 5)
103+
104+
if alg_autodiff(alg)
105+
F, ∇f = value_derivative(f, x)
106+
elseif x isa AbstractArray
107+
F = f(x)
108+
∇f = FiniteDiff.finite_difference_jacobian(f, x, diff_type(alg), eltype(x), F)
109+
else
110+
F = f(x)
111+
∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), eltype(x), F)
112+
end
113+
114+
fₖ = 0.5 * norm(F)^2
115+
H = ∇f * ∇f
116+
g = ∇f * F
117+
shrink_counter = 0
118+
119+
for k in 1:maxiters
120+
# Solve the trust region subproblem.
121+
δ = dogleg_method(H, g, Δ)
122+
xₖ₊₁ = x + δ
123+
Fₖ₊₁ = f(xₖ₊₁)
124+
fₖ₊₁ = 0.5 * norm(Fₖ₊₁)^2
125+
126+
# Compute the ratio of the actual to predicted reduction.
127+
model = -' * g + 0.5 * δ' * H * δ)
128+
r = model \ (fₖ - fₖ₊₁)
129+
130+
# Update the trust region radius.
131+
if r < η₂
132+
Δ = t₁ * Δ
133+
shrink_counter += 1
134+
if shrink_counter > max_shrink_times
135+
return SciMLBase.build_solution(prob, alg, x, F;
136+
retcode = ReturnCode.Success)
137+
end
138+
else
139+
shrink_counter = 0
140+
end
141+
if r > η₁
142+
if isapprox(xₖ₊₁, x, atol = atol, rtol = rtol)
143+
return SciMLBase.build_solution(prob, alg, xₖ₊₁, Fₖ₊₁;
144+
retcode = ReturnCode.Success)
145+
end
146+
# Take the step.
147+
x = xₖ₊₁
148+
F = Fₖ₊₁
149+
if alg_autodiff(alg)
150+
F, ∇f = value_derivative(f, x)
151+
elseif x isa AbstractArray
152+
∇f = FiniteDiff.finite_difference_jacobian(f, x, diff_type(alg), eltype(x),
153+
F)
154+
else
155+
∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg),
156+
eltype(x),
157+
F)
158+
end
159+
160+
iszero(F) &&
161+
return SciMLBase.build_solution(prob, alg, x, F;
162+
retcode = ReturnCode.Success)
163+
164+
# Update the trust region radius.
165+
if r > η₃ && norm(δ) Δ
166+
Δ = min(t₂ * Δ, Δₘₐₓ)
167+
end
168+
fₖ = fₖ₊₁
169+
H = ∇f * ∇f
170+
g = ∇f * F
171+
end
172+
end
173+
return SciMLBase.build_solution(prob, alg, x, F; retcode = ReturnCode.MaxIters)
174+
end

src/utils.jl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,28 @@ function init_J(x)
4343
end
4444
return J
4545
end
46+
47+
function dogleg_method(H, g, Δ)
48+
# Compute the Newton step.
49+
δN = -H \ g
50+
# Test if the full step is within the trust region.
51+
if norm(δN) Δ
52+
return δN
53+
end
54+
55+
# Calcualte Cauchy point, optimum along the steepest descent direction.
56+
δsd = -g
57+
norm_δsd = norm(δsd)
58+
if norm_δsd Δ
59+
return δsd .* Δ / norm_δsd
60+
end
61+
62+
# Find the intersection point on the boundary.
63+
δN_δsd = δN - δsd
64+
dot_δN_δsd = dot(δN_δsd, δN_δsd)
65+
dot_δsd_δN_δsd = dot(δsd, δN_δsd)
66+
dot_δsd = dot(δsd, δsd)
67+
fact = dot_δsd_δN_δsd^2 - dot_δN_δsd * (dot_δsd - Δ^2)
68+
tau = (-dot_δsd_δN_δsd + sqrt(fact)) / dot_δN_δsd
69+
return δsd + tau * δN_δsd
70+
end

test/basictests.jl

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,24 @@ sol = benchmark_scalar(sf, csu0)
4646
@test sol.u * sol.u - 2 < 1e-9
4747
@test (@ballocated benchmark_scalar(sf, csu0)) == 0
4848

49+
# TrustRegion
50+
function benchmark_scalar(f, u0)
51+
probN = NonlinearProblem{false}(f, u0)
52+
sol = (solve(probN, TrustRegion(10.0)))
53+
end
54+
55+
sol = benchmark_scalar(sf, csu0)
56+
@test sol.retcode === ReturnCode.Success
57+
@test sol.u * sol.u - 2 < 1e-9
58+
4959
# AD Tests
5060
using ForwardDiff
5161

5262
# Immutable
5363
f, u0 = (u, p) -> u .* u .- p, @SVector[1.0, 1.0]
5464

55-
for alg in [SimpleNewtonRaphson(), Broyden(), Klement()]
65+
for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
66+
TrustRegion(10.0)]
5667
g = function (p)
5768
probN = NonlinearProblem{false}(f, csu0, p)
5869
sol = solve(probN, alg, tol = 1e-9)
@@ -67,7 +78,8 @@ end
6778

6879
# Scalar
6980
f, u0 = (u, p) -> u * u - p, 1.0
70-
for alg in [SimpleNewtonRaphson(), Broyden(), Klement()]
81+
for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
82+
TrustRegion(10.0)]
7183
g = function (p)
7284
probN = NonlinearProblem{false}(f, oftype(p, u0), p)
7385
sol = solve(probN, alg)
@@ -108,7 +120,8 @@ for alg in [Bisection(), Falsi()]
108120
@test ForwardDiff.jacobian(g, p) ForwardDiff.jacobian(t, p)
109121
end
110122

111-
for alg in [SimpleNewtonRaphson(), Broyden(), Klement()]
123+
for alg in [SimpleNewtonRaphson(), Broyden(), Klement(),
124+
TrustRegion(10.0)]
112125
global g, p
113126
g = function (p)
114127
probN = NonlinearProblem{false}(f, 0.5, p)
@@ -128,6 +141,11 @@ probN = NonlinearProblem(f, u0)
128141
@test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] sqrt(2.0)
129142
@test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u[end] sqrt(2.0)
130143

144+
@test solve(probN, TrustRegion(10.0)).u[end] sqrt(2.0)
145+
@test solve(probN, TrustRegion(10.0); immutable = false).u[end] sqrt(2.0)
146+
@test solve(probN, TrustRegion(10.0; autodiff = false)).u[end] sqrt(2.0)
147+
@test solve(probN, TrustRegion(10.0; autodiff = false)).u[end] sqrt(2.0)
148+
131149
@test solve(probN, Broyden()).u[end] sqrt(2.0)
132150
@test solve(probN, Broyden(); immutable = false).u[end] sqrt(2.0)
133151

@@ -144,6 +162,10 @@ for u0 in [1.0, [1, 1.0]]
144162
@test solve(probN, SimpleNewtonRaphson()).u sol
145163
@test solve(probN, SimpleNewtonRaphson(; autodiff = false)).u sol
146164

165+
@test solve(probN, TrustRegion(10.0)).u sol
166+
@test solve(probN, TrustRegion(10.0)).u sol
167+
@test solve(probN, TrustRegion(10.0; autodiff = false)).u sol
168+
147169
@test solve(probN, Broyden()).u sol
148170

149171
@test solve(probN, Klement()).u sol
@@ -185,3 +207,54 @@ sol = solve(probB, Bisection(; exact_left = true, exact_right = true); immutable
185207
@test f(nextfloat(sol.left), nothing) >= 0.0
186208
@test f(sol.right, nothing) >= 0.0
187209
@test f(prevfloat(sol.right), nothing) <= 0.0
210+
211+
# Test that `TrustRegion` passes a test that `SimpleNewtonRaphson` fails on.
212+
u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
213+
global g, f
214+
f = (u, p) -> 0.010000000000000002 .+
215+
10.000000000000002 ./ (1 .+
216+
(0.21640425613334457 .+
217+
216.40425613334457 ./ (1 .+
218+
(0.21640425613334457 .+
219+
216.40425613334457 ./
220+
(1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .-
221+
0.0011552453009332421u
222+
.-p
223+
g = function (p)
224+
probN = NonlinearProblem{false}(f, u0, p)
225+
sol = solve(probN, TrustRegion(100.0))
226+
return sol.u
227+
end
228+
p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
229+
u = g(p)
230+
f(u, p)
231+
@test all(f(u, p) .< 1e-10)
232+
233+
# Test kwars in `TrustRegion`
234+
max_trust_radius = [10.0, 100.0, 1000.0]
235+
initial_trust_radius = [10.0, 1.0, 0.1]
236+
step_threshold = [0.0, 0.01, 0.25]
237+
shrink_threshold = [0.25, 0.3, 0.5]
238+
expand_threshold = [0.5, 0.8, 0.9]
239+
shrink_factor = [0.1, 0.3, 0.5]
240+
expand_factor = [1.5, 2.0, 3.0]
241+
max_shrink_times = [10, 20, 30]
242+
243+
list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold,
244+
shrink_threshold, expand_threshold, shrink_factor,
245+
expand_factor, max_shrink_times)
246+
for options in list_of_options
247+
local probN, sol, alg
248+
alg = TrustRegion(options[1];
249+
initial_trust_radius = options[2],
250+
step_threshold = options[3],
251+
shrink_threshold = options[4],
252+
expand_threshold = options[5],
253+
shrink_factor = options[6],
254+
expand_factor = options[7],
255+
max_shrink_times = options[8])
256+
257+
probN = NonlinearProblem(f, u0, p)
258+
sol = solve(probN, alg)
259+
@test all(f(u, p) .< 1e-10)
260+
end

0 commit comments

Comments
 (0)