|
| 1 | +""" |
| 2 | +```julia |
| 3 | +TrustRegion(max_trust_radius::Number; chunk_size = Val{0}(), |
| 4 | + autodiff = Val{true}(), diff_type = Val{:forward}) |
| 5 | +``` |
| 6 | +
|
| 7 | +A low-overhead implementation of a |
| 8 | +[trust-region](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods) |
| 9 | +solver |
| 10 | +
|
| 11 | +### Arguments |
| 12 | +- `max_trust_radius`: the maximum radius of the trust region. The step size in the algorithm |
| 13 | + will change dynamically. However, it will never be greater than the `max_trust_radius`. |
| 14 | +
|
| 15 | +### Keyword Arguments |
| 16 | +
|
| 17 | +- `chunk_size`: the chunk size used by the internal ForwardDiff.jl automatic differentiation |
| 18 | + system. This allows for multiple derivative columns to be computed simultaneously, |
| 19 | + improving performance. Defaults to `0`, which is equivalent to using ForwardDiff.jl's |
| 20 | + default chunk size mechanism. For more details, see the documentation for |
| 21 | + [ForwardDiff.jl](https://juliadiff.org/ForwardDiff.jl/stable/). |
| 22 | +- `autodiff`: whether to use forward-mode automatic differentiation for the Jacobian. |
| 23 | + Note that this argument is ignored if an analytical Jacobian is passed; as that will be |
| 24 | + used instead. Defaults to `Val{true}`, which means ForwardDiff.jl is used by default. |
| 25 | + If `Val{false}`, then FiniteDiff.jl is used for finite differencing. |
| 26 | +- `diff_type`: the type of finite differencing used if `autodiff = false`. Defaults to |
| 27 | + `Val{:forward}` for forward finite differences. For more details on the choices, see the |
| 28 | + [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) documentation. |
| 29 | +- `initial_trust_radius`: the initial trust region radius. Defaults to |
| 30 | + `max_trust_radius / 11`. |
| 31 | +- `step_threshold`: the threshold for taking a step. In every iteration, the threshold is |
| 32 | + compared with a value `r`, which is the actual reduction in the objective function divided |
| 33 | + by the predicted reduction. If `step_threshold > r` the model is not a good approximation, |
| 34 | + and the step is rejected. Defaults to `0.1`. For more details, see |
| 35 | + [Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods) |
| 36 | +- `shrink_threshold`: the threshold for shrinking the trust region radius. In every |
| 37 | + iteration, the threshold is compared with a value `r` which is the actual reduction in the |
| 38 | + objective function divided by the predicted reduction. If `shrink_threshold > r` the trust |
| 39 | + region radius is shrunk by `shrink_factor`. Defaults to `0.25`. For more details, see |
| 40 | + [Trust-region methods](https://optimization.mccormick.northwestern.edu/index.php/Trust-region_methods) |
| 41 | +- `expand_threshold`: the threshold for expanding the trust region radius. If a step is |
| 42 | + taken, i.e `step_threshold < r` (with `r` defined in `shrink_threshold`), a check is also |
| 43 | + made to see if `expand_threshold < r`. If that is true, the trust region radius is |
| 44 | + expanded by `expand_factor`. Defaults to `0.75`. |
| 45 | +- `shrink_factor`: the factor to shrink the trust region radius with if |
| 46 | + `shrink_threshold > r` (with `r` defined in `shrink_threshold`). Defaults to `0.25`. |
| 47 | +- `expand_factor`: the factor to expand the trust region radius with if |
| 48 | + `expand_threshold < r` (with `r` defined in `shrink_threshold`). Defaults to `2.0`. |
| 49 | +- `max_shrink_times`: the maximum number of times to shrink the trust region radius in a |
| 50 | + row, `max_shrink_times` is exceeded, the algorithm returns. Defaults to `32`. |
| 51 | +""" |
| 52 | +struct TrustRegion{CS, AD, FDT} <: AbstractNewtonAlgorithm{CS, AD, FDT} |
| 53 | + max_trust_radius::Number |
| 54 | + initial_trust_radius::Number |
| 55 | + step_threshold::Number |
| 56 | + shrink_threshold::Number |
| 57 | + expand_threshold::Number |
| 58 | + shrink_factor::Number |
| 59 | + expand_factor::Number |
| 60 | + max_shrink_times::Int |
| 61 | + function TrustRegion(max_trust_radius::Number; chunk_size = Val{0}(), |
| 62 | + autodiff = Val{true}(), |
| 63 | + diff_type = Val{:forward}, |
| 64 | + initial_trust_radius::Number = max_trust_radius / 11, |
| 65 | + step_threshold::Number = 0.1, |
| 66 | + shrink_threshold::Number = 0.25, |
| 67 | + expand_threshold::Number = 0.75, |
| 68 | + shrink_factor::Number = 0.25, |
| 69 | + expand_factor::Number = 2.0, |
| 70 | + max_shrink_times::Int = 32) |
| 71 | + new{SciMLBase._unwrap_val(chunk_size), SciMLBase._unwrap_val(autodiff), |
| 72 | + SciMLBase._unwrap_val(diff_type)}(max_trust_radius, initial_trust_radius, |
| 73 | + step_threshold, |
| 74 | + shrink_threshold, expand_threshold, |
| 75 | + shrink_factor, |
| 76 | + expand_factor, max_shrink_times) |
| 77 | + end |
| 78 | +end |
| 79 | + |
| 80 | +function SciMLBase.solve(prob::NonlinearProblem, |
| 81 | + alg::TrustRegion, args...; abstol = nothing, |
| 82 | + reltol = nothing, |
| 83 | + maxiters = 1000, kwargs...) |
| 84 | + f = Base.Fix2(prob.f, prob.p) |
| 85 | + x = float(prob.u0) |
| 86 | + T = typeof(x) |
| 87 | + Δₘₐₓ = float(alg.max_trust_radius) |
| 88 | + Δ = float(alg.initial_trust_radius) |
| 89 | + η₁ = float(alg.step_threshold) |
| 90 | + η₂ = float(alg.shrink_threshold) |
| 91 | + η₃ = float(alg.expand_threshold) |
| 92 | + t₁ = float(alg.shrink_factor) |
| 93 | + t₂ = float(alg.expand_factor) |
| 94 | + max_shrink_times = alg.max_shrink_times |
| 95 | + |
| 96 | + if SciMLBase.isinplace(prob) |
| 97 | + error("TrustRegion currently only supports out-of-place nonlinear problems") |
| 98 | + end |
| 99 | + |
| 100 | + atol = abstol !== nothing ? abstol : |
| 101 | + real(oneunit(eltype(T))) * (eps(real(one(eltype(T)))))^(4 // 5) |
| 102 | + rtol = reltol !== nothing ? reltol : eps(real(one(eltype(T))))^(4 // 5) |
| 103 | + |
| 104 | + if alg_autodiff(alg) |
| 105 | + F, ∇f = value_derivative(f, x) |
| 106 | + elseif x isa AbstractArray |
| 107 | + F = f(x) |
| 108 | + ∇f = FiniteDiff.finite_difference_jacobian(f, x, diff_type(alg), eltype(x), F) |
| 109 | + else |
| 110 | + F = f(x) |
| 111 | + ∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), eltype(x), F) |
| 112 | + end |
| 113 | + |
| 114 | + fₖ = 0.5 * norm(F)^2 |
| 115 | + H = ∇f * ∇f |
| 116 | + g = ∇f * F |
| 117 | + shrink_counter = 0 |
| 118 | + |
| 119 | + for k in 1:maxiters |
| 120 | + # Solve the trust region subproblem. |
| 121 | + δ = dogleg_method(H, g, Δ) |
| 122 | + xₖ₊₁ = x + δ |
| 123 | + Fₖ₊₁ = f(xₖ₊₁) |
| 124 | + fₖ₊₁ = 0.5 * norm(Fₖ₊₁)^2 |
| 125 | + |
| 126 | + # Compute the ratio of the actual to predicted reduction. |
| 127 | + model = -(δ' * g + 0.5 * δ' * H * δ) |
| 128 | + r = model \ (fₖ - fₖ₊₁) |
| 129 | + |
| 130 | + # Update the trust region radius. |
| 131 | + if r < η₂ |
| 132 | + Δ = t₁ * Δ |
| 133 | + shrink_counter += 1 |
| 134 | + if shrink_counter > max_shrink_times |
| 135 | + return SciMLBase.build_solution(prob, alg, x, F; |
| 136 | + retcode = ReturnCode.Success) |
| 137 | + end |
| 138 | + else |
| 139 | + shrink_counter = 0 |
| 140 | + end |
| 141 | + if r > η₁ |
| 142 | + if isapprox(xₖ₊₁, x, atol = atol, rtol = rtol) |
| 143 | + return SciMLBase.build_solution(prob, alg, xₖ₊₁, Fₖ₊₁; |
| 144 | + retcode = ReturnCode.Success) |
| 145 | + end |
| 146 | + # Take the step. |
| 147 | + x = xₖ₊₁ |
| 148 | + F = Fₖ₊₁ |
| 149 | + if alg_autodiff(alg) |
| 150 | + F, ∇f = value_derivative(f, x) |
| 151 | + elseif x isa AbstractArray |
| 152 | + ∇f = FiniteDiff.finite_difference_jacobian(f, x, diff_type(alg), eltype(x), |
| 153 | + F) |
| 154 | + else |
| 155 | + ∇f = FiniteDiff.finite_difference_derivative(f, x, diff_type(alg), |
| 156 | + eltype(x), |
| 157 | + F) |
| 158 | + end |
| 159 | + |
| 160 | + iszero(F) && |
| 161 | + return SciMLBase.build_solution(prob, alg, x, F; |
| 162 | + retcode = ReturnCode.Success) |
| 163 | + |
| 164 | + # Update the trust region radius. |
| 165 | + if r > η₃ && norm(δ) ≈ Δ |
| 166 | + Δ = min(t₂ * Δ, Δₘₐₓ) |
| 167 | + end |
| 168 | + fₖ = fₖ₊₁ |
| 169 | + H = ∇f * ∇f |
| 170 | + g = ∇f * F |
| 171 | + end |
| 172 | + end |
| 173 | + return SciMLBase.build_solution(prob, alg, x, F; retcode = ReturnCode.MaxIters) |
| 174 | +end |
0 commit comments