Merge pull request #1151 from willtebbutt/wct/mooncake

ChrisRackauckas · web-flow · commit b5b73d303ffe · 2025-05-24T09:46:46.000Z
Add Mooncake to Alternative AD Frontends
diff --git a/Project.toml b/Project.toml
@@ -24,6 +24,7 @@ GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 OrdinaryDiffEqCore = "bbf590c4-e513-4bbe-9b18-05decba2e5d8"
 PreallocationTools = "d236fae5-4411-538c-8e31-a6e3d9e00b46"
 QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
@@ -59,7 +60,7 @@ Calculus = "0.5.1"
 ChainRulesCore = "0.10.7, 1"
 ComponentArrays = "0.15.5"
 DelayDiffEq = "5.43.2"
-DiffEqBase = "6.166.1"
+DiffEqBase = "6.175"
 DiffEqCallbacks = "4"
 DiffEqNoiseProcess = "5.19"
 Distributed = "1"
@@ -94,7 +95,7 @@ RecursiveArrayTools = "3.27.2"
 Reexport = "1.0"
 ReverseDiff = "1.15.1"
 SafeTestsets = "0.1.0"
-SciMLBase = "2.79"
+SciMLBase = "2.94"
 SciMLJacobianOperators = "0.1"
 SciMLStructures = "1.3"
 SparseArrays = "1.10"
diff --git a/docs/src/manual/differential_equation_sensitivities.md b/docs/src/manual/differential_equation_sensitivities.md
@@ -12,9 +12,10 @@ Current AD libraries whose calls are captured by the sensitivity
 system are:
 
   - [Enzyme.jl](https://github.com/EnzymeAD/Enzyme.jl)
-  - [Zygote.jl](https://fluxml.ai/Zygote.jl/stable/)
-  - [Diffractor.jl](https://github.com/JuliaDiff/Diffractor.jl)
+  - [Mooncake.jl](https://github.com/chalk-lab/Mooncake.jl)
   - [ReverseDiff.jl](https://github.com/JuliaDiff/ReverseDiff.jl)
+  - [Tracker.jl](https://github.com/FluxML/Tracker.jl)
+  - [Zygote.jl](https://fluxml.ai/Zygote.jl/stable/)
 
 ## Using and Controlling Sensitivity Algorithms within AD
 
diff --git a/ext/SciMLSensitivityMooncakeExt.jl b/ext/SciMLSensitivityMooncakeExt.jl
@@ -1,7 +1,7 @@
 module SciMLSensitivityMooncakeExt
 
 using SciMLSensitivity, Mooncake
-import SciMLSensitivity: get_paramjac_config, mooncake_run_ad, MooncakeVJP, MooncakeLoaded
+import SciMLSensitivity: get_paramjac_config, mooncake_run_ad, MooncakeVJP, MooncakeLoaded, DiffEqBase
 
 function get_paramjac_config(::MooncakeLoaded, ::MooncakeVJP, pf, p, f, y, _t)
     dy_mem = zero(y)
diff --git a/src/concrete_solve.jl b/src/concrete_solve.jl
@@ -460,6 +460,7 @@ function DiffEqBase._concrete_solve_adjoint(
     end
 
     _prob = remake(_prob, u0 = new_u0, p = new_p)
+    
 
     if sensealg isa BacksolveAdjoint
         sol = solve(_prob, alg, args...; initializealg = new_initializealg, save_noise = true,
@@ -870,8 +871,9 @@ function DiffEqBase._concrete_solve_adjoint(
     end
 
     # use the callback in kwargs, not prob
-    sol = solve(remake(prob, p = p, u0 = u0, callback = nothing),
-        alg, args...; saveat = _saveat, kwargs...)
+    kwargs_prob = NamedTuple(filter(x -> x[1] != :callback, prob.kwargs))
+    _prob = remake(prob, p = p, u0 = u0, kwargs = kwargs_prob)
+    sol = solve(_prob, alg, args...; saveat = _saveat, kwargs...)
 
     if originator isa SciMLBase.EnzymeOriginator
         @reset sol.prob = prob
@@ -1273,6 +1275,21 @@ function Base.showerror(io::IO, e::EnzymeTrackedRealError)
     println(io, ENZYME_TRACKED_REAL_ERROR_MESSAGE)
 end
 
+const MOONCAKE_TRACKED_REAL_ERROR_MESSAGE = """
+                                             `Mooncake` is not compatible with `ReverseDiffAdjoint` nor with `TrackerAdjoint`.
+                                             Either choose a different adjoint method like `GaussAdjoint`,
+                                             or use a different AD system like `ReverseDiff`.
+                                             For more details, on these methods see
+                                             https://docs.sciml.ai/SciMLSensitivity/stable/.
+                                             """
+
+struct MooncakeTrackedRealError <: Exception
+end
+
+function Base.showerror(io::IO, e::MooncakeTrackedRealError)
+    println(io, MOONCAKE_TRACKED_REAL_ERROR_MESSAGE)
+end
+
 function DiffEqBase._concrete_solve_adjoint(
         prob::Union{SciMLBase.AbstractDiscreteProblem,
             SciMLBase.AbstractODEProblem,
@@ -1290,6 +1307,10 @@ function DiffEqBase._concrete_solve_adjoint(
         throw(EnzymeTrackedRealError())
     end
 
+    if originator isa SciMLBase.MooncakeOriginator
+        throw(MooncakeTrackedRealError())
+    end
+
     if !(p === nothing || p isa SciMLBase.NullParameters)
         if !isscimlstructure(p)
             throw(SciMLStructuresCompatibilityError())
@@ -1514,6 +1535,10 @@ function DiffEqBase._concrete_solve_adjoint(
         throw(EnzymeTrackedRealError())
     end
 
+    if originator isa SciMLBase.MooncakeOriginator
+        throw(MooncakeTrackedRealError())
+    end
+
     t = eltype(prob.tspan)[]
     u = typeof(u0)[]
 
diff --git a/test/alternative_ad_frontend.jl b/test/alternative_ad_frontend.jl
@@ -1,8 +1,10 @@
 using OrdinaryDiffEq, SciMLSensitivity, ForwardDiff, Zygote, ReverseDiff, Tracker, Enzyme,
-      FiniteDiff
+      FiniteDiff, Mooncake
 using Test
 Enzyme.API.typeWarning!(false)
 
+mooncake_gradient(f, x) = Mooncake.value_and_gradient!!(Mooncake.build_rrule(f, x), f, x)[2][2]
+
 odef(du, u, p, t) = du .= u .* p
 const prob = ODEProblem(odef, [2.0], (0.0, 1.0), [3.0])
 
@@ -17,7 +19,9 @@ u0p = [2.0, 3.0]
 du0p = zeros(2)
 dup = Zygote.gradient(senseloss0(InterpolatingAdjoint()), u0p)[1]
 Enzyme.autodiff(Reverse, senseloss0(InterpolatingAdjoint()), Active, Duplicated(u0p, du0p))
+dup_mc = mooncake_gradient(senseloss0(InterpolatingAdjoint()), u0p)
 @test du0p ≈ dup
+@test dup_mc ≈ dup
 
 struct senseloss{T}
     sense::T
@@ -56,6 +60,14 @@ dup = Zygote.gradient(senseloss(InterpolatingAdjoint()), u0p)[1]
 @test only(Enzyme.gradient(Reverse, senseloss(ForwardDiffSensitivity()), u0p)) ≈ dup
 @test_broken only(Enzyme.gradient(Reverse, senseloss(ForwardSensitivity()), u0p)) ≈ dup # broken because ForwardSensitivity not compatible with perturbing u0
 
+@test mooncake_gradient(senseloss(InterpolatingAdjoint()), u0p) ≈ dup
+@test_throws TypeError mooncake_gradient(senseloss(ReverseDiffAdjoint()), u0p) ≈ dup
+@test_throws TypeError mooncake_gradient(senseloss(TrackerAdjoint()), u0p) ≈ dup
+#@test_throws SciMLSensitivity.MooncakeTrackedRealError mooncake_gradient(senseloss(ReverseDiffAdjoint()), u0p) ≈ dup
+#@test_throws SciMLSensitivity.MooncakeTrackedRealError mooncake_gradient(senseloss(TrackerAdjoint()), u0p) ≈ dup
+@test mooncake_gradient(senseloss(ForwardDiffSensitivity()), u0p) ≈ dup
+@test_broken mooncake_gradient(senseloss(ForwardSensitivity()), u0p) ≈ dup # broken because ForwardSensitivity not compatible with perturbing u0
+
 struct senseloss2{T}
     sense::T
 end
@@ -90,6 +102,14 @@ dup = Zygote.gradient(senseloss2(InterpolatingAdjoint()), u0p)[1]
 @test_broken only(Enzyme.gradient(Reverse, senseloss2(ForwardDiffSensitivity()), u0p)) ≈ dup
 @test_broken only(Enzyme.gradient(Reverse, senseloss2(ForwardSensitivity()), u0p)) ≈ dup # broken because ForwardSensitivity not compatible with perturbing u0
 
+@test mooncake_gradient(senseloss2(InterpolatingAdjoint()), u0p) ≈ dup
+@test_throws TypeError mooncake_gradient(senseloss2(ReverseDiffAdjoint()), u0p) ≈ dup
+@test_throws TypeError mooncake_gradient(senseloss2(TrackerAdjoint()), u0p) ≈ dup
+#@test_throws SciMLSensitivity.MooncakeTrackedRealError mooncake_gradient(senseloss2(ReverseDiffAdjoint()), u0p) ≈ dup
+#@test_throws SciMLSensitivity.MooncakeTrackedRealError mooncake_gradient(senseloss2(TrackerAdjoint()), u0p) ≈ dup
+@test mooncake_gradient(senseloss2(ForwardDiffSensitivity()), u0p) ≈ dup
+@test_broken mooncake_gradient(senseloss2(ForwardSensitivity()), u0p) ≈ dup # broken because ForwardSensitivity not compatible with perturbing u0
+
 struct senseloss3{T}
     sense::T
 end
@@ -122,6 +142,14 @@ dup = Zygote.gradient(senseloss3(InterpolatingAdjoint()), u0p)[1]
 @test_broken only(Enzyme.gradient(Reverse, senseloss3(ForwardDiffSensitivity()), u0p)) ≈ dup
 @test_broken only(Enzyme.gradient(Reverse, senseloss3(ForwardSensitivity()), u0p)) ≈ dup
 
+@test mooncake_gradient(senseloss3(InterpolatingAdjoint()), u0p) ≈ dup
+@test_throws TypeError mooncake_gradient(senseloss3(ReverseDiffAdjoint()), u0p) ≈ dup
+@test_throws TypeError mooncake_gradient(senseloss3(TrackerAdjoint()), u0p) ≈ dup
+#@test_throws SciMLSensitivity.MooncakeTrackedRealError mooncake_gradient(senseloss3(ReverseDiffAdjoint()), u0p) ≈ dup
+#@test_throws SciMLSensitivity.MooncakeTrackedRealError mooncake_gradient(senseloss3(TrackerAdjoint()), u0p) ≈ dup
+@test mooncake_gradient(senseloss3(ForwardDiffSensitivity()), u0p) ≈ dup
+@test_broken mooncake_gradient(senseloss3(ForwardSensitivity()), u0p) ≈ dup
+
 struct senseloss4{T}
     sense::T
 end
@@ -156,6 +184,14 @@ dup = Zygote.gradient(senseloss4(InterpolatingAdjoint()), u0p)[1]
 @test only(Enzyme.gradient(Reverse, senseloss4(ForwardDiffSensitivity()), u0p)) ≈ dup
 @test_broken only(Enzyme.gradient(Reverse, senseloss4(ForwardSensitivity()), u0p)) ≈ dup # broken because ForwardSensitivity not compatible with perturbing u0
 
+@test mooncake_gradient(senseloss4(InterpolatingAdjoint()), u0p) ≈ dup
+@test_throws TypeError mooncake_gradient(senseloss4(ReverseDiffAdjoint()), u0p) ≈ dup
+@test_throws TypeError mooncake_gradient(senseloss4(TrackerAdjoint()), u0p) ≈ dup
+#@test_throws SciMLSensitivity.MooncakeTrackedRealError mooncake_gradient(senseloss4(ReverseDiffAdjoint()), u0p) ≈ dup
+#@test_throws SciMLSensitivity.MooncakeTrackedRealError mooncake_gradient(senseloss4(TrackerAdjoint()), u0p) ≈ dup
+@test mooncake_gradient(senseloss4(ForwardDiffSensitivity()), u0p) ≈ dup
+@test_broken mooncake_gradient(senseloss4(ForwardSensitivity()), u0p) ≈ dup
+
 solvealg_test = Tsit5()
 sensealg_test = InterpolatingAdjoint()
 tspan = (0.0, 1.0)
@@ -186,6 +222,9 @@ res4 = ReverseDiff.gradient(loss2, p0)
 @test_broken res2≈Enzyme.gradient(Reverse, loss, p0) atol=1e-14
 @test_broken res4≈Enzyme.gradient(Reverse, loss2, p0) atol=1e-14
 
+@test res2 ≈ mooncake_gradient(loss, p0)
+@test res4 ≈ mooncake_gradient(loss2, p0)
+
 # Test for recursion https://discourse.julialang.org/t/diffeqsensitivity-jl-issues-with-reversediffadjoint-sensealg/88774
 function ode!(derivative, state, parameters, t)
     derivative .= parameters
@@ -205,6 +244,7 @@ const initial_state = ones(2)
 const solution_times = [1.0, 2.0]
 ReverseDiff.gradient(p -> sum(sum(solve_euler(initial_state, solution_times, p))), zeros(2))
 # Enzyme.gradient(Reverse, p -> sum(sum(solve_euler(initial_state, solution_times, p))), zeros(2))
+# mooncake_gradient(p -> sum(sum(solve_euler(initial_state, solution_times, p))), zeros(2))
 
 # https://github.com/SciML/SciMLSensitivity.jl/issues/943
 
@@ -249,3 +289,4 @@ grad_rd = ReverseDiff.gradient(loss2, p)
 @test grad_fd≈grad_fi atol=1e-2
 @test grad_fd≈grad_zg atol=1e-4
 @test grad_fd≈grad_rd atol=1e-4
+@test_broken mooncake_gradient(loss2, p) ≈ grad_rd atol=1e-4