experimental vmap_method support

rdyro · rdyro · commit b11ddb128605 · 2025-04-06T17:55:24.000-07:00
diff --git a/README.md b/README.md
@@ -310,6 +310,10 @@ the GPU.
 
 # Changelog
 
+- version 0.6.1
+  - added `vmap_method=` support for experimental pytorch-side batching support,
+    see [https://github.com/rdyro/torch2jax/issues/28](https://github.com/rdyro/torch2jax/issues/28)
+
 - version 0.6.0
   - proper multi-GPU support mostly with `shard_map` but also via `jax.jit` automatic sharding
   - `shard_map` and automatic `jax.jit` device parallelization should work, but `pmap` doesn't work
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,5 +1,9 @@
 # Changelog
 
+- version 0.6.1
+  - added `vmap_method=` support for experimental pytorch-side batching support,
+    see [https://github.com/rdyro/torch2jax/issues/28](https://github.com/rdyro/torch2jax/issues/28)
+
 - version 0.6.0
   - proper multi-GPU support mostly with `shard_map` but also via `jax.jit` automatic sharding
   - `shard_map` and automatic `jax.jit` device parallelization should work, but `pmap` doesn't work
diff --git a/docs/index.md b/docs/index.md
@@ -310,6 +310,10 @@ the GPU.
 
 # Changelog
 
+- version 0.6.1
+  - added `vmap_method=` support for experimental pytorch-side batching support,
+    see [https://github.com/rdyro/torch2jax/issues/28](https://github.com/rdyro/torch2jax/issues/28)
+
 - version 0.6.0
   - proper multi-GPU support mostly with `shard_map` but also via `jax.jit` automatic sharding
   - `shard_map` and automatic `jax.jit` device parallelization should work, but `pmap` doesn't work
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "torch2jax"
-version = "0.6.0"
+version = "0.6.1"
 authors = [
   { name="Robert Dyro", email="robert.dyro@gmail.com" },
 ]
diff --git a/tests/test_vmap.py b/tests/test_vmap.py
@@ -7,14 +7,15 @@
 import torch
 import jax
 from jax import numpy as jnp
+from jax import random
 from jax.scipy.linalg import cho_factor, cho_solve
 
 paths = [Path(__file__).absolute().parents[1], Path(__file__).absolute().parent]
 for path in paths:
     if str(path) not in sys.path:
         sys.path.append(str(path))
 
-from torch2jax import torch2jax_with_vjp  # noqa: E402
+from torch2jax import torch2jax, torch2jax_with_vjp  # noqa: E402
 from utils import jax_randn  # noqa: E402
 
 ####################################################################################################
@@ -45,6 +46,67 @@ def expected_fn(A, x):
         err = jnp.linalg.norm(sol - sol_expected) / jnp.linalg.norm(sol_expected)
         assert err < 1e-3
 
+    @parameterized.product(device=["cuda", "cpu"], dtype=[jnp.float32, jnp.float64])
+    def test_simple_vmap(self, device, dtype):
+        if device == "cuda" and not torch.cuda.is_available():
+            self.skipTest("Skipping CUDA tests when CUDA is not available")
+
+        device = jax.devices(device)[0]
+        keys = iter(random.split(random.key(17), 1024))
+
+        torch_counter = 0
+
+        def torch_fn(x):
+            nonlocal torch_counter
+            torch_counter += 1
+            print(f"torch_counter: {torch_counter}")
+            return 2 * x
+
+        x = jax_randn((1024,), device=device, dtype=jnp.float32)
+        X = jax_randn((572, 1024), dtype=jnp.float32, device=device)
+
+        # test sequential
+        fn = torch2jax(torch_fn, x, output_shapes=x, vmap_method="sequential")
+        current_counter_val = torch_counter
+        y = fn(x)
+        assert current_counter_val + 1 == torch_counter
+        err = jnp.linalg.norm(y - 2 * x, axis=None)
+        assert err < 1e-6
+
+        current_counter_val = torch_counter
+        Y = jax.vmap(fn)(X)
+        assert current_counter_val + X.shape[0] == torch_counter
+        err = jnp.linalg.norm(Y - 2 * X, axis=None)
+        assert err < 1e-6
+
+        # test broadcast_all
+        fn = torch2jax(torch_fn, x, output_shapes=x, vmap_method="broadcast_all")
+        current_counter_val = torch_counter
+        y = fn(x)
+        assert current_counter_val + 1 == torch_counter
+        err = jnp.linalg.norm(y - 2 * x, axis=None)
+        assert err < 1e-6
+
+        current_counter_val = torch_counter
+        Y = jax.vmap(fn)(X)
+        assert current_counter_val + 1 == torch_counter
+        err = jnp.linalg.norm(Y - 2 * X, axis=None)
+        assert err < 1e-6
+
+        # test expand_dims
+        fn = torch2jax(torch_fn, x, output_shapes=x, vmap_method="expand_dims")
+        current_counter_val = torch_counter
+        y = fn(x)
+        assert current_counter_val + 1 == torch_counter
+        err = jnp.linalg.norm(y - 2 * x, axis=None)
+        assert err < 1e-6
+
+        current_counter_val = torch_counter
+        Y = jax.vmap(fn)(X)
+        assert current_counter_val + 1 == torch_counter
+        err = jnp.linalg.norm(Y - 2 * X, axis=None)
+        assert err < 1e-6
+
 
 if __name__ == "__main__":
     absltest.main()
diff --git a/torch2jax/api.py b/torch2jax/api.py
@@ -24,10 +24,15 @@
 from .utils import find_unique_id, dtype_t2j, normalize_shapes, warn_once
 
 
-def _gen_ffi_call(outshapes):
+def _gen_ffi_call(outshapes, vmap_method: str):
     if signature(ffi.ffi_call).return_annotation.startswith("Callable"):
-        fn_ = ffi.ffi_call("torch_call", outshapes, vmap_method="sequential")
+        fn_ = ffi.ffi_call("torch_call", outshapes, vmap_method=vmap_method)
     else:
+        if vmap_method != "sequential":
+            raise ValueError(
+                f"You specificed {vmap_method=}, but your jax version {jax.__version__} does not support new style of"
+                " `vmap_method=` specification. Please upgrade your JAX version to use this features"
+            )
         fn_ = lambda *args_flat, fn_id: ffi.ffi_call("torch_call", outshapes, *args_flat, vectorized=False, fn_id=fn_id)
     return fn_
 
@@ -37,6 +42,7 @@ def _torch2jax_flat(
     input_shapes: list[jax.Array | Tensor | ShapeDtypeStruct] = None,
     output_shapes: list[jax.Array | Tensor | ShapeDtypeStruct] = None,
     output_sharding_spec: PartitionSpec | None = None,
+    vmap_method: str = "sequential",
 ) -> Callable:
     """Define a jit-compatible JAX function that calls a PyTorch function. Flat
     arguments and outputs.
@@ -69,7 +75,7 @@ def torch_call_fn_(args: list[torch.Tensor]):
     @jax.jit
     def wrapped_flat_fn(*args_flat):
         nonlocal inshapes, outshapes
-        fn_ = _gen_ffi_call(outshapes)
+        fn_ = _gen_ffi_call(outshapes, vmap_method=vmap_method)
 
         if output_sharding_spec is None:
             fn_id = f"{id:d}"
@@ -114,7 +120,7 @@ def _map_outshape(outshape: jax.ShapeDtypeStruct, result_info, result_sharding):
                     return jax.ShapeDtypeStruct(new_outshape, dtype=outshape.dtype)
 
                 new_outshapes = jax.tree.map(_map_outshape, outshapes, result_info, result_sharding)
-                fn_part_ = _gen_ffi_call(new_outshapes)
+                fn_part_ = _gen_ffi_call(new_outshapes, vmap_method=vmap_method)
                 return fn_part_(*args_flat, fn_id=fn_id)
 
             return mesh, _partitioned_fn_, result_sharding, args_sharding
@@ -133,6 +139,7 @@ def torch2jax(
     example_kw: Any | None = None,
     output_shapes: Any = None,
     output_sharding_spec: PartitionSpec | None = None,
+    vmap_method: str = "sequential",
 ) -> Callable:
     """Define a jit-compatible JAX function that calls a PyTorch function.  Arbitrary nesting of
     arguments and outputs is supported.
@@ -143,6 +150,12 @@ def torch2jax(
         example_kw: Example keyword arguments. Defaults to None.
         output_shapes: Output shapes or shapes + dtype struct. Defaults to None.
         output_sharding_spec: jax.sharding.PartitionSpec specifying the sharding spec of the output, uses input mesh.
+        vmap_method: batching method, see
+            [https://docs.jax.dev/en/latest/ffi.html#batching-with-vmap](https://docs.jax.dev/en/latest/ffi.html#batching-with-vmap)
+
+            NOTE: only vmap_method="sequntial" is supported non-experimentally
+
+            NOTE: try "expand_dims", "broadcast_all" if you want to experiment with pytorch-side batching
     Returns:
         Callable: JIT-compatible JAX function.
 
@@ -214,7 +227,11 @@ def flat_fn(*args_flat):
 
     # define the wrapped function using flat interface
     wrapped_fn_flat = _torch2jax_flat(
-        flat_fn, input_shapes=None, output_shapes=output_shapes, output_sharding_spec=output_sharding_spec_flat
+        flat_fn,
+        input_shapes=None,
+        output_shapes=output_shapes,
+        output_sharding_spec=output_sharding_spec_flat,
+        vmap_method=vmap_method,
     )
 
     # define the actual wrapper function
diff --git a/torch2jax/gradients.py b/torch2jax/gradients.py
@@ -27,6 +27,7 @@ def torch2jax_with_vjp(
     use_zeros: bool = True,
     use_torch_vjp: bool = True,
     output_sharding_spec: P | None = None,
+    vmap_method: str = "sequential",
 ) -> Callable:
     """Convert a torch function to a jax function and define a custom vjp rule for it up to `depth` recursively deep.
 
@@ -45,7 +46,12 @@ def torch2jax_with_vjp(
             library PyTorch code may need this fallback. Defaults to True (i.e., do not use fallback).
         output_sharding_spec: (not supported) sharding spec of the output, use shard_map instead for a device-local
             version of this function
+        vmap_method: batching method, see
+            [https://docs.jax.dev/en/latest/ffi.html#batching-with-vmap](https://docs.jax.dev/en/latest/ffi.html#batching-with-vmap)
 
+            NOTE: only vmap_method="sequntial" is supported non-experimentally
+
+            NOTE: try "expand_dims", "broadcast_all" if you want to experiment with pytorch-side batching
     Returns:
         Callable: JIT-compatible JAX version of the torch function (VJP defined up to depth `depth`).
 
@@ -86,7 +92,13 @@ def torch2jax_with_vjp(
     if output_shapes is None:
         outputs = torch_fn(*example_args)
         output_shapes = tree_map(lambda x: ShapeDtypeStruct(dtype=dtype_t2j(x.dtype), shape=x.shape), outputs)
-    fn = torch2jax(torch_fn, *example_args, output_shapes=output_shapes, output_sharding_spec=output_sharding_spec)
+    fn = torch2jax(
+        torch_fn,
+        *example_args,
+        output_shapes=output_shapes,
+        output_sharding_spec=output_sharding_spec,
+        vmap_method=vmap_method,
+    )
 
     # if this we've reached the requested differentiation depth, refrain from defining a vjp rule ##
     if depth <= 0:
@@ -181,6 +193,7 @@ def bwd_fn_torch(args, gs):
         output_shapes=next_output_shapes,
         depth=depth - 1,
         use_torch_vjp=use_torch_vjp,
+        vmap_method=vmap_method,
     )
     # define the custom vjp using the fwd_fn and bwd_fn ############################################
     fn.defvjp(fwd_fn, bwd_fn)

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`[project]`
`2`	`2`	`name = "torch2jax"`
`3`		`-version = "0.6.0"`
	`3`	`+version = "0.6.1"`
`4`	`4`	`authors = [`
`5`	`5`	`{ name="Robert Dyro", email="[email protected]" },`
`6`	`6`	`]`