|
| 1 | +module SimpleBatchedNonlinearSolveExt |
| 2 | + |
| 3 | +using ArrayInterfaceCore, LinearAlgebra, SimpleNonlinearSolve, SciMLBase |
| 4 | +isdefined(Base, :get_extension) ? (using NNlib) : (using ..NNlib) |
| 5 | + |
| 6 | +_batch_transpose(x) = reshape(x, 1, size(x)...) |
| 7 | + |
| 8 | +_batched_mul(x, y) = x * y |
| 9 | + |
| 10 | +function _batched_mul(x::AbstractArray{T, 3}, y::AbstractMatrix) where {T} |
| 11 | + return dropdims(batched_mul(x, reshape(y, size(y, 1), 1, size(y, 2))); dims = 2) |
| 12 | +end |
| 13 | + |
| 14 | +function _batched_mul(x::AbstractMatrix, y::AbstractArray{T, 3}) where {T} |
| 15 | + return batched_mul(reshape(x, size(x, 1), 1, size(x, 2)), y) |
| 16 | +end |
| 17 | + |
| 18 | +function _batched_mul(x::AbstractArray{T1, 3}, y::AbstractArray{T2, 3}) where {T1, T2} |
| 19 | + return batched_mul(x, y) |
| 20 | +end |
| 21 | + |
| 22 | +function _init_J_batched(x::AbstractMatrix{T}) where {T} |
| 23 | + J = ArrayInterfaceCore.zeromatrix(x[:, 1]) |
| 24 | + if ismutable(x) |
| 25 | + J[diagind(J)] .= one(eltype(x)) |
| 26 | + else |
| 27 | + J += I |
| 28 | + end |
| 29 | + return repeat(J, 1, 1, size(x, 2)) |
| 30 | +end |
| 31 | + |
| 32 | +function SciMLBase.__solve(prob::NonlinearProblem, alg::Broyden{true}, args...; |
| 33 | + abstol = nothing, reltol = nothing, maxiters = 1000, kwargs...) |
| 34 | + f = Base.Fix2(prob.f, prob.p) |
| 35 | + x = float(prob.u0) |
| 36 | + |
| 37 | + if ndims(x) != 2 |
| 38 | + error("`batch` mode works only if `ndims(prob.u0) == 2`") |
| 39 | + end |
| 40 | + |
| 41 | + fₙ = f(x) |
| 42 | + T = eltype(x) |
| 43 | + J⁻¹ = _init_J_batched(x) |
| 44 | + |
| 45 | + if SciMLBase.isinplace(prob) |
| 46 | + error("Broyden currently only supports out-of-place nonlinear problems") |
| 47 | + end |
| 48 | + |
| 49 | + atol = abstol !== nothing ? abstol : |
| 50 | + real(oneunit(eltype(T))) * (eps(real(one(eltype(T)))))^(4 // 5) |
| 51 | + rtol = reltol !== nothing ? reltol : eps(real(one(eltype(T))))^(4 // 5) |
| 52 | + |
| 53 | + xₙ = x |
| 54 | + xₙ₋₁ = x |
| 55 | + fₙ₋₁ = fₙ |
| 56 | + for i in 1:maxiters |
| 57 | + xₙ = xₙ₋₁ .- _batched_mul(J⁻¹, fₙ₋₁) |
| 58 | + fₙ = f(xₙ) |
| 59 | + Δxₙ = xₙ .- xₙ₋₁ |
| 60 | + Δfₙ = fₙ .- fₙ₋₁ |
| 61 | + J⁻¹Δfₙ = _batched_mul(J⁻¹, Δfₙ) |
| 62 | + J⁻¹ += _batched_mul(((Δxₙ .- J⁻¹Δfₙ) ./ |
| 63 | + (_batched_mul(_batch_transpose(Δxₙ), J⁻¹Δfₙ) .+ T(1e-5))), |
| 64 | + _batched_mul(_batch_transpose(Δxₙ), J⁻¹)) |
| 65 | + |
| 66 | + iszero(fₙ) && |
| 67 | + return SciMLBase.build_solution(prob, alg, xₙ, fₙ; |
| 68 | + retcode = ReturnCode.Success) |
| 69 | + |
| 70 | + if isapprox(xₙ, xₙ₋₁, atol = atol, rtol = rtol) |
| 71 | + return SciMLBase.build_solution(prob, alg, xₙ, fₙ; |
| 72 | + retcode = ReturnCode.Success) |
| 73 | + end |
| 74 | + xₙ₋₁ = xₙ |
| 75 | + fₙ₋₁ = fₙ |
| 76 | + end |
| 77 | + |
| 78 | + return SciMLBase.build_solution(prob, alg, xₙ, fₙ; retcode = ReturnCode.MaxIters) |
| 79 | +end |
| 80 | + |
| 81 | +end |
0 commit comments