Bugfix: use inverse softplus when converting alpha_t in extended lengyel

jcitrin · Torax team · commit 35eb3da73dfa · 2026-01-29T16:02:49.000-08:00
Previously the conversion was unidirectional. The solver was still able to eventually find the solutions, but now it should be more robust.

PiperOrigin-RevId: 860647236
diff --git a/torax/_src/edge/divertor_sol_1d.py b/torax/_src/edge/divertor_sol_1d.py
@@ -421,9 +421,10 @@ def calc_alpha_t(
   nu_ee = jnp.exp(log_nu_ee)
 
   # Z_eff correction to transform electron-electron collisions to ion-electron
-  # collisions. Equation B2 in Eich 2020
+  # collisions. Equation B2 in Eich 2020. Adding a small addition to Z_eff to
+  # avoid numerical issues with the gradient at Z_eff=1 (no impurities).
   Z_eff_correction = (1.0 - 0.569) * jnp.exp(
-      -(((Z_eff_separatrix - 1.0) / 3.25) ** 0.85)
+      -(((Z_eff_separatrix - 1.0 + constants.CONSTANTS.eps) / 3.25) ** 0.85)
   ) + 0.569
 
   nu_ei = nu_ee * Z_eff_correction * Z_eff_separatrix
diff --git a/torax/_src/edge/extended_lengyel_solvers.py b/torax/_src/edge/extended_lengyel_solvers.py
@@ -19,6 +19,7 @@
 import jax
 from jax import numpy as jnp
 from torax._src import constants
+from torax._src import math_utils
 from torax._src.edge import collisional_radiative_models
 from torax._src.edge import divertor_sol_1d as divertor_sol_1d_lib
 from torax._src.edge import extended_lengyel_defaults
@@ -101,9 +102,11 @@ def body_fun(_, carry):
     # Solve for the impurity concentration required to achieve the target
     # temperature for a given q_parallel. This also updates the divertor and
     # separatrix Z_eff values in sol_model, used downstream.
-    current_sol_model.state.c_z_prefactor, physics_outcome = (
-        _solve_for_c_z_prefactor(sol_model=current_sol_model)
+    c_z_prefactor, physics_outcome = _solve_for_c_z_prefactor(
+        sol_model=current_sol_model
     )
+    # Clip to physical values (non-negative impurity concentration).
+    current_sol_model.state.c_z_prefactor = jnp.maximum(c_z_prefactor, 0.0)
 
     # Update alpha_t for the next loop iteration.
     current_sol_model.state.alpha_t = divertor_sol_1d_lib.calc_alpha_t(
@@ -246,12 +249,12 @@ def forward_mode_newton_solver(
   """
   # 1. Create initial guess state vector.
   # Uses log space for strictly positive variables and to improve conditioning.
-  # alpha_t is left linear since should always remain O(1) and log steps
-  # can lead to numerical issues due to exponential amplification. Positivity is
-  # enforced via softplus when unpacking.
+  # alpha_t is strictly positive, but is enforced via softplus in the residual.
+  # Therefore we must inverse softplus the initial guess to maintain
+  # consistency.
   x0 = jnp.stack([
       jnp.log(initial_sol_model.state.q_parallel),
-      initial_sol_model.state.alpha_t,
+      math_utils.inverse_softplus(initial_sol_model.state.alpha_t),
       jnp.log(initial_sol_model.state.kappa_e),
       jnp.log(initial_sol_model.state.T_e_target),
   ])
@@ -317,13 +320,13 @@ def inverse_mode_newton_solver(
   # 1. Create initial guess state vector.
 
   # Uses log space for strictly positive variables and to improve conditioning.
-  # alpha_t is left linear since should always remain O(1) and log steps
-  # can lead to numerical issues due to exponential amplification. Positivity is
-  # enforced via softplus when unpacking.
+  # alpha_t is strictly positive, but is enforced via softplus in the residual.
+  # Therefore we must inverse softplus the initial guess to maintain
+  # consistency.
 
   x0 = jnp.stack([
       jnp.log(initial_sol_model.state.q_parallel),
-      initial_sol_model.state.alpha_t,
+      math_utils.inverse_softplus(initial_sol_model.state.alpha_t),
       jnp.log(initial_sol_model.state.kappa_e),
       initial_sol_model.state.c_z_prefactor,
   ])
@@ -342,11 +345,15 @@ def inverse_mode_newton_solver(
   )
 
   # 4. Construct final model.
+  # Clip c_z_prefactor to 0.0 if it is negative (unphysical solution).
+  # Negative values are allowed during the solve process (to ensure smooth
+  # gradients for the solver), but the final physical state must have
+  # non-negative concentrations.
   final_state = divertor_sol_1d_lib.ExtendedLengyelState(
       q_parallel=jnp.exp(x_root[0]),
       alpha_t=jax.nn.softplus(x_root[1]),
       kappa_e=jnp.exp(x_root[2]),
-      c_z_prefactor=x_root[3],
+      c_z_prefactor=jnp.maximum(x_root[3], 0.0),
       T_e_target=fixed_Tt,
   )
 
@@ -355,6 +362,7 @@ def inverse_mode_newton_solver(
   )
 
   # 5. Re-calculate physics outcome at final state to return the physics_outcome
+  # This uses the clipped (physical) c_z_prefactor.
   _, physics_outcome = _solve_for_c_z_prefactor(sol_model=final_sol_model)
 
   solver_status = ExtendedLengyelSolverStatus(
@@ -454,7 +462,7 @@ def _forward_residual(
   at_calc_safe = jnp.maximum(at_calc, constants.CONSTANTS.eps)
 
   r_qp = jnp.log(qp_calc_safe) - x_vec[0]
-  r_at = at_calc_safe - current_state.alpha_t
+  r_at = math_utils.inverse_softplus(at_calc_safe) - x_vec[1]
   r_ke = jnp.log(ke_calc_safe) - x_vec[2]
   r_Tt = jnp.log(Tt_calc_safe) - x_vec[3]
 
@@ -468,11 +476,15 @@ def _inverse_residual(
 ) -> jax.Array:
   """Calculates the residual vector for Inverse Mode F(x) = 0."""
   # 1. Construct physical state from vector guess.
+  # Note: c_z_prefactor is clipped to be non-negative for physics calculations
+  # to avoid NaNs (e.g. in Z_eff -> alpha_t). The solver is allowed to explore
+  # negative values in x_vec[3] to properly find the root (even if unphysical),
+  # but the state used for consistent physics checks must be valid.
   current_state = divertor_sol_1d_lib.ExtendedLengyelState(
       q_parallel=jnp.exp(x_vec[0]),
       alpha_t=jax.nn.softplus(x_vec[1]),
       kappa_e=jnp.exp(x_vec[2]),
-      c_z_prefactor=x_vec[3],
+      c_z_prefactor=jnp.maximum(x_vec[3], 0.0),
       T_e_target=fixed_Tt,
   )
 
@@ -508,9 +520,12 @@ def _inverse_residual(
   at_calc_safe = jnp.maximum(at_calc, constants.CONSTANTS.eps)
 
   r_qp = jnp.log(qp_calc_safe) - x_vec[0]
-  r_at = at_calc_safe - current_state.alpha_t
+  r_at = math_utils.inverse_softplus(at_calc_safe) - x_vec[1]
   r_ke = jnp.log(ke_calc_safe) - x_vec[2]
-  r_cz = cz_calc - current_state.c_z_prefactor
+  # Residual for c_z compares the calculated required c_z against the
+  # *raw* solver guess x_vec[3], not the clipped state value.
+  # This provides a gradient signal even when x_vec[3] is negative.
+  r_cz = cz_calc - x_vec[3]
 
   return jnp.stack([r_qp, r_at, r_ke, r_cz])
 
@@ -629,12 +644,6 @@ def _solve_for_c_z_prefactor(
       PhysicsOutcome.SUCCESS,
   )
 
-  # c_z is related to impurity density which physically cannot be negative.
-  # The natural floor of c_z_prefactor is zero.
-  c_z_prefactor = jnp.where(
-      status == PhysicsOutcome.SUCCESS, c_z_prefactor, 0.0
-  )
-
   return c_z_prefactor, status
 
 
diff --git a/torax/_src/edge/tests/extended_lengyel_solver_test.py b/torax/_src/edge/tests/extended_lengyel_solver_test.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from absl.testing import absltest
+from absl.testing import parameterized
 import numpy as np
 from torax._src.edge import divertor_sol_1d
 from torax._src.edge import extended_lengyel_defaults
@@ -23,7 +24,7 @@
 # pylint: disable=invalid-name
 
 
-class ExtendedLengyelSolverInverseTest(absltest.TestCase):
+class ExtendedLengyelSolverInverseTest(parameterized.TestCase):
 
   def setUp(self):
     super().setUp()
@@ -153,15 +154,11 @@ def test_unsuccessful_solve_for_c_z(self):
     calculated_c_z, status = extended_lengyel_solvers._solve_for_c_z_prefactor(
         sol_model=sol_model,
     )
-    expected_c_z = 0.0
 
     self.assertEqual(
         status, extended_lengyel_solvers.PhysicsOutcome.C_Z_PREFACTOR_NEGATIVE
     )
-    np.testing.assert_allclose(
-        calculated_c_z,
-        expected_c_z,
-    )
+    self.assertLess(calculated_c_z, 0.0)
 
   def test_inverse_unsuccessful_newton_solve_but_successful_hybrid_solve(self):
     # The initial guess state is deliberately set far from the solution, by
diff --git a/torax/_src/edge/tests/extended_lengyel_standalone_test.py b/torax/_src/edge/tests/extended_lengyel_standalone_test.py
@@ -14,6 +14,7 @@
 
 from unittest import mock
 from absl.testing import absltest
+from absl.testing import parameterized
 import numpy as np
 from torax._src.edge import extended_lengyel_defaults
 from torax._src.edge import extended_lengyel_enums
@@ -23,7 +24,7 @@
 # pylint: disable=invalid-name
 
 
-class ExtendedLengyelTest(absltest.TestCase):
+class ExtendedLengyelTest(parameterized.TestCase):
 
   def test_run_extended_lengyel_model_inverse_mode_fixed_point(self):
     """Integration test for the full extended_lengyel model in inverse mode."""
@@ -549,6 +550,68 @@ def test_validate_inputs_for_computation_mode(self):
           seed_impurity_weights={},
       )
 
+  @parameterized.named_parameters(
+      ('low_ip', {'plasma_current': 2.0e6, 'power_crossing_separatrix': 10e6}),
+      (
+          'low_power',
+          {'plasma_current': 15.0e6, 'power_crossing_separatrix': 1.0e6},
+      ),
+  )
+  def test_underpowered_scenario(self, inputs_update):
+    """Test scenario where input power is too low to reach target temperature.
+
+    This uses unrealistically low inputs for ITER-like scenarios (Ip or P_SOL)
+    which results in required impurity concentration being negative
+    (physically impossible).
+    The solver should report this via physics_outcome and a non-zero residual.
+
+    Args:
+      inputs_update: Dictionary of input parameters to override defaults.
+    """
+    inputs = {
+        'T_e_target': 5.0,
+        'power_crossing_separatrix': 10e6,
+        'separatrix_electron_density': 3e19,
+        'main_ion_charge': 1.0,
+        'mean_ion_charge_state': 1.0,
+        'fixed_impurity_concentrations': {},
+        'magnetic_field_on_axis': 5.3,
+        'plasma_current': 15.0e6,
+        'connection_length_target': 50.0,
+        'connection_length_divertor': 10.0,
+        'major_radius': 6.2,
+        'minor_radius': 2.0,
+        'elongation_psi95': 1.7,
+        'triangularity_psi95': 0.33,
+        'average_ion_mass': 2.0,
+        'computation_mode': extended_lengyel_enums.ComputationMode.INVERSE,
+        'solver_mode': extended_lengyel_enums.SolverMode.HYBRID,
+        'seed_impurity_weights': {'Ne': 1.0},
+    }
+    inputs.update(inputs_update)
+
+    outputs = extended_lengyel_standalone.run_extended_lengyel_standalone(
+        **inputs
+    )
+
+    numerics = outputs.solver_status.numerics_outcome
+
+    # 1. Assert no NaNs in output.
+    self.assertFalse(np.any(np.isnan(numerics.residual)))
+    self.assertFalse(np.isnan(outputs.Z_eff_separatrix))
+    self.assertFalse(np.isnan(outputs.alpha_t))
+
+    # 2. Physics outcome should flag the issue.
+    self.assertEqual(
+        outputs.solver_status.physics_outcome.item(),
+        extended_lengyel_solvers.PhysicsOutcome.C_Z_PREFACTOR_NEGATIVE,
+    )
+
+    # 3. Impurities should be clamped to 0.
+    np.testing.assert_allclose(
+        outputs.seed_impurity_concentrations['Ne'], 0.0, atol=1e-5
+    )
+
 
 if __name__ == '__main__':
   absltest.main()
diff --git a/torax/_src/math_utils.py b/torax/_src/math_utils.py
@@ -323,3 +323,16 @@ def cumulative_volume_integration(
 
 def safe_divide(y: chex.Array, x: chex.Array) -> chex.Array:
   return y / (x + constants.CONSTANTS.eps)
+
+
+def inverse_softplus(x: jax.Array) -> jax.Array:
+  """Inverse of softplus function."""
+  # Enforce minimum value to avoid log(0) or log(negative).
+  # We want a function that maps x back to y such that softplus(y) = x.
+  # y = log(exp(x) - 1).
+  # If x -> 0, y -> -inf.
+  # For avoiding overflow/underflow issues with float32:
+  # exp(x) overflows if x > 88.
+  # But for x > 30, softplus(x) ~ x.
+  # For x < 1e-32, exp(x) = 1 and we get log(0). Avoid by clipping.
+  return jnp.where(x > 30.0, x, jnp.log(jnp.expm1(jnp.maximum(x, 1e-20))))
diff --git a/torax/_src/tests/math_utils_test.py b/torax/_src/tests/math_utils_test.py
@@ -315,6 +315,35 @@ def test_cumulative_volume_integration(self, num_cell_grid_points: int):
         expected,
     )
 
+  @parameterized.parameters(1e-14, 1e-6, 1e-4, 0.1)
+  def test_inverse_softplus_small_values(self, value):
+    x_val = jnp.array(value)
+    y_val = math_utils.inverse_softplus(x_val)
+    x_rec = jax.nn.softplus(y_val)
+    np.testing.assert_allclose(x_val, x_rec, rtol=1e-6)
+
+  @parameterized.parameters(1.0, 5.0, 10.0)
+  def test_inverse_softplus_medium_values(self, value):
+    x_val = jnp.array(value)
+    y_val = math_utils.inverse_softplus(x_val)
+    x_rec = jax.nn.softplus(y_val)
+    np.testing.assert_allclose(x_val, x_rec, rtol=1e-6)
+
+  @parameterized.parameters(25.0, 50.0, 100.0)
+  def test_inverse_softplus_large_values(self, value):
+    x_val = jnp.array(value)
+    y_val = math_utils.inverse_softplus(x_val)
+    np.testing.assert_allclose(x_val, y_val, rtol=1e-6)
+    x_rec = jax.nn.softplus(y_val)
+    np.testing.assert_allclose(x_val, x_rec, rtol=1e-6)
+
+  @parameterized.parameters(-20, -10, -1, 1e-10, 1e-6, 0.1, 1.0, 10.0, 100.0)
+  def test_softplus_round_trip(self, value):
+    x = jnp.array(value)
+    y = jax.nn.softplus(x)
+    x_rec = math_utils.inverse_softplus(y)
+    np.testing.assert_allclose(x, x_rec, rtol=1e-6)
+
 
 if __name__ == '__main__':
   absltest.main()
diff --git a/torax/tests/sim_test.py b/torax/tests/sim_test.py
@@ -230,6 +230,8 @@ class SimTest(sim_test_case.SimTestCase):
       (
           'test_iterhybrid_predictor_corrector_mavrin_n_e_ratios_lengyel',
           'test_iterhybrid_predictor_corrector_mavrin_n_e_ratios_lengyel.py',
+          _ALL_PROFILES,
+          1e-8,
       ),
       # Predictor-corrector with Mavrin and n_e_ratios_Z_eff impurity mode.
       (

Original file line number	Diff line number	Diff line change
`@@ -230,6 +230,8 @@ class SimTest(sim_test_case.SimTestCase):`
`230`	`230`	`(`
`231`	`231`	`'test_iterhybrid_predictor_corrector_mavrin_n_e_ratios_lengyel',`
`232`	`232`	`'test_iterhybrid_predictor_corrector_mavrin_n_e_ratios_lengyel.py',`
	`233`	`+ _ALL_PROFILES,`
	`234`	`+ 1e-8,`
`233`	`235`	`),`
`234`	`236`	`# Predictor-corrector with Mavrin and n_e_ratios_Z_eff impurity mode.`
`235`	`237`	`(`