ray-project
diff --git a/‎doc/source/rllib/rllib-examples.rst
Lines changed: 5 additions & 0 deletions b/‎doc/source/rllib/rllib-examples.rst
Lines changed: 5 additions & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml
Lines changed: 2 additions & 0 deletions
diff --git a/‎python/ray/dashboard/modules/reporter/gpu_profile_manager.py
Lines changed: 1 addition & 1 deletion b/‎python/ray/dashboard/modules/reporter/gpu_profile_manager.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/ray/data/BUILD
Lines changed: 1 addition & 0 deletions b/‎python/ray/data/BUILD
Lines changed: 1 addition & 0 deletions
diff --git a/‎python/ray/train/v2/tests/test_local_mode.py
Lines changed: 4 additions & 0 deletions b/‎python/ray/train/v2/tests/test_local_mode.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎python/ray/tune/tests/test_commands.py
Lines changed: 1 addition & 1 deletion b/‎python/ray/tune/tests/test_commands.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎rllib/BUILD
Lines changed: 117 additions & 20 deletions b/‎rllib/BUILD
Lines changed: 117 additions & 20 deletions
diff --git a/‎rllib/algorithms/algorithm.py
Lines changed: 4 additions & 4 deletions b/‎rllib/algorithms/algorithm.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎rllib/algorithms/algorithm_config.py
Lines changed: 1 addition & 0 deletions b/‎rllib/algorithms/algorithm_config.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎rllib/examples/envs/classes/multi_agent/footsies/README.md
Lines changed: 10 additions & 0 deletions b/‎rllib/examples/envs/classes/multi_agent/footsies/README.md
Lines changed: 10 additions & 0 deletions
@@ -363,6 +363,11 @@ Multi-agent RL
    Uses OpenSpiel to demonstrate league-based self-play, where agents play against various
    versions of themselves, frozen or in-training, to improve through competitive interaction.
 
+- `Self-play with Footsies and PPO algorithm <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py>`__:
+    Implements self-play with the Footsies environment (two player zero-sum game).
+    This example highlights RLlib's capabilities in connecting to the external binaries running the game engine, as well as
+    setting up a multi-agent self-play training scenario.
+
 - `Self-play with OpenSpiel <https://github.com/ray-project/ray/blob/master/rllib/examples/multi_agent/self_play_with_open_spiel.py>`__:
    Similar to the league-based self-play, but simpler. This script leverages OpenSpiel for two-player games, allowing agents to improve
    through direct self-play without building a complex, structured league.
 
@@ -10,6 +10,8 @@ extend-exclude = [
     "python/build/",
     "python/ray/workflow/tests/mock_server.py",
     "python/ray/serve/tests/test_config_files/syntax_error.py",
+    "rllib/examples/envs/classes/multi_agent/footsies/game/proto/footsies_service_pb2.py",
+    "rllib/examples/envs/classes/multi_agent/footsies/game/proto/footsies_service_pb2_grpc.py",
 ]
 
 [tool.ruff.lint]
 
@@ -106,7 +106,7 @@ def node_has_gpus(cls) -> bool:
         try:
             subprocess.check_output(["nvidia-smi"], stderr=subprocess.DEVNULL)
             return True
-        except (subprocess.CalledProcessError, FileNotFoundError):
+        except Exception:
             return False
 
     @classmethod
 
@@ -383,6 +383,7 @@ py_test(
     size = "enormous",
     srcs = ["tests/test_groupby_e2e.py"],
     tags = [
+        "data_non_parallel",
         "exclusive",
         "team:data",
     ],
 
@@ -253,6 +253,10 @@ def train_loop():
     assert "val_loss" in results.metrics
 
 
+@pytest.mark.skipif(
+    sys.version_info >= (3, 12),
+    reason="Tensorflow is not installed for Python 3.12 because of keras compatibility.",
+)
 def test_tensorflow_linear_local_mode(ray_start_4_cpus):
     """Also tests air Keras callback."""
     epochs = 1
 
@@ -73,7 +73,7 @@ def train_fn(config):
         times += [time.time() - start]
 
     print("Average CLI time: ", sum(times) / len(times))
-    assert sum(times) / len(times) < 2, "CLI is taking too long!"
+    assert sum(times) / len(times) < 5, "CLI is taking too long!"
 
 
 @mock.patch(
 
@@ -1538,6 +1538,86 @@ py_test(
     ],
 )
 
+# Footsies
+py_test(
+    name = "learning_tests_multi_agent_footsies_ppo",
+    size = "large",
+    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    args = [
+        "--as-test",
+        "--num-env-runners=6",
+        "--evaluation-num-env-runners=2",
+    ],
+    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    tags = [
+        "exclusive",
+        "learning_tests",
+        "learning_tests_discrete",
+        "team:rllib",
+    ],
+)
+
+py_test(
+    name = "learning_tests_multi_agent_footsies_ppo_gpu",
+    size = "large",
+    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    args = [
+        "--as-test",
+        "--num-env-runners=20",
+        "--evaluation-num-env-runners=3",
+        "--num-learners=1",
+        "--num-gpus-per-learner=1",
+    ],
+    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    tags = [
+        "exclusive",
+        "learning_tests",
+        "learning_tests_discrete",
+        "multi_gpu",
+        "team:rllib",
+    ],
+)
+
+py_test(
+    name = "learning_tests_multi_agent_footsies_ppo_multi_cpu",
+    size = "large",
+    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    args = [
+        "--as-test",
+        "--num-env-runners=6",
+        "--evaluation-num-env-runners=2",
+        "--num-learners=2",
+    ],
+    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    tags = [
+        "exclusive",
+        "learning_tests",
+        "learning_tests_discrete",
+        "team:rllib",
+    ],
+)
+
+py_test(
+    name = "learning_tests_multi_agent_footsies_ppo_multi_gpu",
+    size = "large",
+    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    args = [
+        "--as-test",
+        "--num-env-runners=20",
+        "--evaluation-num-env-runners=3",
+        "--num-learners=2",
+        "--num-gpus-per-learner=1",
+    ],
+    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    tags = [
+        "exclusive",
+        "learning_tests",
+        "learning_tests_discrete",
+        "multi_gpu",
+        "team:rllib",
+    ],
+)
+
 # Pendulum
 py_test(
     name = "learning_tests_pendulum_ppo",
@@ -4084,14 +4164,14 @@ py_test(
 # subdirectory: envs/
 # ....................................
 py_test(
-    name = "examples/envs/agents_act_simultaneously",
+    name = "examples/envs/agents_act_in_sequence",
     size = "medium",
-    srcs = ["examples/envs/agents_act_simultaneously.py"],
+    srcs = ["examples/envs/agents_act_in_sequence.py"],
     args = [
         "--num-agents=2",
         "--stop-iters=3",
     ],
-    main = "examples/envs/agents_act_simultaneously.py",
+    main = "examples/envs/agents_act_in_sequence.py",
     tags = [
         "examples",
         "exclusive",
@@ -4100,14 +4180,14 @@ py_test(
 )
 
 py_test(
-    name = "examples/envs/agents_act_in_sequence",
+    name = "examples/envs/agents_act_simultaneously",
     size = "medium",
-    srcs = ["examples/envs/agents_act_in_sequence.py"],
+    srcs = ["examples/envs/agents_act_simultaneously.py"],
     args = [
         "--num-agents=2",
         "--stop-iters=3",
     ],
-    main = "examples/envs/agents_act_in_sequence.py",
+    main = "examples/envs/agents_act_simultaneously.py",
     tags = [
         "examples",
         "exclusive",
@@ -5014,13 +5094,34 @@ py_test(
 )
 
 py_test(
-    name = "examples/multi_agent/shared_encoder_cartpole",
-    size = "medium",
-    srcs = ["examples/multi_agent/shared_encoder_cartpole.py"],
+    name = "examples/multi_agent/self_play_footsies",
+    size = "large",
+    srcs = ["examples/multi_agent/self_play_footsies.py"],
     args = [
-        "--stop-iter=10",
+        "--as-test",
+        "--num-cpus=4",
     ],
-    main = "examples/multi_agent/shared_encoder_cartpole.py",
+    main = "examples/multi_agent/self_play_footsies.py",
+    tags = [
+        "examples",
+        "examples_use_all_core",
+        "exclusive",
+        "team:rllib",
+    ],
+)
+
+py_test(
+    name = "examples/multi_agent/self_play_league_based_with_open_spiel_connect_4_ppo_torch",
+    size = "large",
+    srcs = ["examples/multi_agent/self_play_league_based_with_open_spiel.py"],
+    args = [
+        "--framework=torch",
+        "--env=connect_four",
+        "--win-rate-threshold=0.8",
+        "--num-episodes-human-play=0",
+        "--min-league-size=8",
+    ],
+    main = "examples/multi_agent/self_play_league_based_with_open_spiel.py",
     tags = [
         "examples",
         "exclusive",
@@ -5090,17 +5191,13 @@ py_test(
 )
 
 py_test(
-    name = "examples/multi_agent/self_play_league_based_with_open_spiel_connect_4_ppo_torch",
-    size = "large",
-    srcs = ["examples/multi_agent/self_play_league_based_with_open_spiel.py"],
+    name = "examples/multi_agent/shared_encoder_cartpole",
+    size = "medium",
+    srcs = ["examples/multi_agent/shared_encoder_cartpole.py"],
     args = [
-        "--framework=torch",
-        "--env=connect_four",
-        "--win-rate-threshold=0.8",
-        "--num-episodes-human-play=0",
-        "--min-league-size=8",
+        "--stop-iter=10",
     ],
-    main = "examples/multi_agent/self_play_league_based_with_open_spiel.py",
+    main = "examples/multi_agent/shared_encoder_cartpole.py",
     tags = [
         "examples",
         "exclusive",
 
@@ -2211,11 +2211,11 @@ def add_module(
                 EnvRunnerGroup (with its o EnvRunners plus the local one).
 
         Returns:
-            The new MultiAgentRLModuleSpec (after the RLModule has been added).
+            The new MultiRLModuleSpec (after the RLModule has been added).
         """
         validate_module_id(module_id, error=True)
 
-        # The to-be-returned new MultiAgentRLModuleSpec.
+        # The to-be-returned new MultiRLModuleSpec.
         multi_rl_module_spec = None
 
         if not self.config.is_multi_agent:
@@ -2337,9 +2337,9 @@ def remove_module(
                 EnvRunnerGroup (with its o EnvRunners plus the local one).
 
         Returns:
-            The new MultiAgentRLModuleSpec (after the RLModule has been removed).
+            The new MultiRLModuleSpec (after the RLModule has been removed).
         """
-        # The to-be-returned new MultiAgentRLModuleSpec.
+        # The to-be-returned new MultiRLModuleSpec.
         multi_rl_module_spec = None
 
         # Remove RLModule from the LearnerGroup.
 
@@ -143,6 +143,7 @@ def DEFAULT_AGENT_TO_MODULE_MAPPING_FN(agent_id, episode):
         # Map any agent ID to "default_policy".
         return DEFAULT_MODULE_ID
 
+    # @OldAPIStack
     # TODO (sven): Deprecate in new API stack.
     @staticmethod
     def DEFAULT_POLICY_MAPPING_FN(aid, episode, worker, **kwargs):
 
@@ -0,0 +1,10 @@
+# Footsies Environment
+
+This environment implementation is based on the [FootsiesGym project](https://github.com/chasemcd/FootsiesGym),
+specifically the version as of **July 28, 2025**.
+
+## Notes
+
+All examples in the RLlib documentation that use the Footsies environment are self-contained.
+This means that you do not need to install anything from the FootsiesGym repository or other places.
+Examples handle binary automatically (downloading, extracting, starting, stopping, etc.).
Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,8 @@ extend-exclude = [`
`10`	`10`	`"python/build/",`
`11`	`11`	`"python/ray/workflow/tests/mock_server.py",`
`12`	`12`	`"python/ray/serve/tests/test_config_files/syntax_error.py",`
	`13`	`+ "rllib/examples/envs/classes/multi_agent/footsies/game/proto/footsies_service_pb2.py",`
	`14`	`+ "rllib/examples/envs/classes/multi_agent/footsies/game/proto/footsies_service_pb2_grpc.py",`
`13`	`15`	`]`
`14`	`16`
`15`	`17`	`[tool.ruff.lint]`