fix erroring out when using solve! as is.

AstitvaAggarwal · AstitvaAggarwal · commit 80b0cb643381 · 2025-10-20T21:20:16.000+05:30
diff --git a/ext/LinearSolveMooncakeExt.jl b/ext/LinearSolveMooncakeExt.jl
@@ -30,9 +30,9 @@ function Mooncake.to_cr_tangent(x::Mooncake.PossiblyUninitTangent{T}) where {T}
 end
 
 function Mooncake.increment_and_get_rdata!(f, r::NoRData, t::LinearCache)
-    println("inside increment and get rdata 2")
     f.fields.A .+= t.A
     f.fields.b .+= t.b
+    f.fields.u .+= t.u
 
     return NoRData()
 end
diff --git a/src/adjoint.jl b/src/adjoint.jl
@@ -124,8 +124,7 @@ end
 
 function CRC.rrule(::typeof(SciMLBase.solve!), cache::LinearSolve.LinearCache, alg::LinearSolve.SciMLLinearSolveAlgorithm, args...; alias_A=default_alias_A(
         alg, cache.A, cache.b), kwargs...)
-    _cache = deepcopy(cache)
-    (; A, sensealg) = _cache
+    (; A, sensealg) = cache
     @assert sensealg isa LinearSolveAdjoint "Currently only `LinearSolveAdjoint` is supported for adjoint sensitivity analysis."
 
     # logic behind caching `A` and `b` for the reverse pass based on rrule above for SciMLBase.solve
@@ -138,22 +137,21 @@ function CRC.rrule(::typeof(SciMLBase.solve!), cache::LinearSolve.LinearCache, a
         A_ = deepcopy(A)
     end
 
-    sol = solve!(_cache)
-
+    sol = solve!(cache)
     function solve!_adjoint(∂sol)
         ∂∅ = NoTangent()
         ∂u = ∂sol.u
 
         if sensealg.linsolve === missing
-            λ = if _cache.cacheval isa Factorization
-                _cache.cacheval' \ ∂u
-            elseif _cache.cacheval isa Tuple && _cache.cacheval[1] isa Factorization
-                first(_cache.cacheval)' \ ∂u
+            λ = if cache.cacheval isa Factorization
+                cache.cacheval' \ ∂u
+            elseif cache.cacheval isa Tuple && cache.cacheval[1] isa Factorization
+                first(cache.cacheval)' \ ∂u
             elseif alg isa AbstractKrylovSubspaceMethod
-                invprob = LinearProblem(adjoint(_cache.A), ∂u)
+                invprob = LinearProblem(adjoint(cache.A), ∂u)
                 solve(invprob, alg; cache.abstol, cache.reltol, cache.verbose).u
             elseif alg isa DefaultLinearSolver
-                LinearSolve.defaultalg_adjoint_eval(_cache, ∂u)
+                LinearSolve.defaultalg_adjoint_eval(cache, ∂u)
             else
                 invprob = LinearProblem(adjoint(A_), ∂u) # We cached `A`
                 solve(invprob, alg; cache.abstol, cache.reltol, cache.verbose).u
@@ -167,8 +165,13 @@ function CRC.rrule(::typeof(SciMLBase.solve!), cache::LinearSolve.LinearCache, a
         tu = adjoint(sol.u)
         ∂A = BroadcastArray(@~ .-(λ .* tu))
         ∂b = λ
+
+        if (iszero(∂b) || iszero(∂A)) && !iszero(tu)
+            error("Adjoint case currently not handled. Instead of using `solve!(cache); s1 = copy(cache.u) ...`, use `sol = solve!(cache); s1 = copy(sol.u)`.")
+        end
+
         ∂prob = LinearProblem(∂A, ∂b, ∂∅)
-        ∂cache = LinearSolve.init(∂prob)
+        ∂cache = LinearSolve.init(∂prob, u=∂u)
         return (∂∅, ∂cache, ∂∅, ntuple(_ -> ∂∅, length(args))...)
     end
 
diff --git a/test/nopre/mooncake.jl b/test/nopre/mooncake.jl
@@ -257,10 +257,6 @@ end
     @test A_grad ≈ fd_jac_A rtol = 1e-5
 end
 
-# The below test function cases fails !
-# AVOID Adjoint case in code as : `solve!(cache); s1 = copy(cache.u)`.
-# Instead stick to code like : `sol = solve!(cache); s1 = copy(sol.u)`.
-
 function f4(A, b1, b2; alg=LUFactorization())
     prob = LinearProblem(A, b1)
     cache = init(prob, alg)
@@ -272,11 +268,16 @@ function f4(A, b1, b2; alg=LUFactorization())
     norm(s1 + s2)
 end
 
-# value, grad = Mooncake.value_and_gradient!!(
-# prepare_gradient_cache(f4, copy(A), copy(b1), copy(b2)),
-# f4, copy(A), copy(b1), copy(b2)
-# )
-# (0.0, (Mooncake.NoTangent(), [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]))
+A = rand(n, n);
+b1 = rand(n);
+b2 = rand(n);
+# f_primal = f4(copy(A), copy(b1), copy(b2))
+
+rule = Mooncake.build_rrule(f4, copy(A), copy(b1), copy(b2))
+@test_throws "Adjoint case currently not handled" Mooncake.value_and_pullback!!(
+    rule, 1.0,
+    f4, copy(A), copy(b1), copy(b2)
+)
 
 # dA2 = ForwardDiff.gradient(x -> f4(x, eltype(x).(b1), eltype(x).(b2)), copy(A))
 # db12 = ForwardDiff.gradient(x -> f4(eltype(x).(A), x, eltype(x).(b2)), copy(b1))
@@ -285,47 +286,4 @@ end
 # @test value == f_primal
 # @test grad[2] ≈ dA2
 # @test grad[3] ≈ db12
-# @test grad[4] ≈ db22
-
-function testls(A, b, u)
-    oa = OperatorAssumptions(
-        true, condition=LinearSolve.OperatorCondition.WellConditioned)
-    prob = LinearProblem(A, b)
-    linsolve = init(prob, LUFactorization(), assumptions=oa)
-    cache = solve!(linsolve)
-    sum(cache.u)
-end
-
-# A = [1.0 2.0; 3.0 4.0]
-# b = [1.0, 2.0]
-# u = zero(b)
-# value, gradient = Mooncake.value_and_gradient!!(
-#     prepare_gradient_cache(testls, copy(A), copy(b), copy(u)),
-#     testls, copy(A), copy(b), copy(u)
-# )
-
-# dA = gradient[2]
-# db = gradient[3]
-# du = gradient[4]
-
-function testls(A, b, u)
-    oa = OperatorAssumptions(
-        true, condition=LinearSolve.OperatorCondition.WellConditioned)
-    prob = LinearProblem(A, b)
-    linsolve = init(prob, LUFactorization(), assumptions=oa)
-    solve!(linsolve)
-    sum(linsolve.u)
-end
-
-# value, gradient = Mooncake.value_and_gradient!!(
-#     prepare_gradient_cache(testls, copy(A), copy(b), copy(u)),
-#     testls, copy(A), copy(b), copy(u)
-# )
-
-# dA2 = gradient[2]
-# db2 = gradient[3]
-# du2 = gradient[4]
-
-# @test dA == dA2
-# @test db == db2
-# @test du == du2
+# @test grad[4] ≈ db22