ray-project
diff --git a/‎doc/source/rllib/rllib-examples.rst
Lines changed: 5 additions & 0 deletions b/‎doc/source/rllib/rllib-examples.rst
Lines changed: 5 additions & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml
Lines changed: 2 additions & 0 deletions
diff --git a/‎rllib/BUILD
Lines changed: 117 additions & 20 deletions b/‎rllib/BUILD
Lines changed: 117 additions & 20 deletions
diff --git a/‎rllib/algorithms/algorithm.py
Lines changed: 4 additions & 4 deletions b/‎rllib/algorithms/algorithm.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎rllib/algorithms/algorithm_config.py
Lines changed: 1 addition & 0 deletions b/‎rllib/algorithms/algorithm_config.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎rllib/examples/envs/classes/multi_agent/footsies/README.md
Lines changed: 10 additions & 0 deletions b/‎rllib/examples/envs/classes/multi_agent/footsies/README.md
Lines changed: 10 additions & 0 deletions
diff --git a/‎rllib/examples/envs/classes/multi_agent/footsies/__init__.py b/‎rllib/examples/envs/classes/multi_agent/footsies/__init__.py
@@ -363,6 +363,11 @@ Multi-agent RL
    Uses OpenSpiel to demonstrate league-based self-play, where agents play against various
    versions of themselves, frozen or in-training, to improve through competitive interaction.
 
+- `Self-play with Footsies and PPO algorithm <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py>`__:
+    Implements self-play with the Footsies environment (two player zero-sum game).
+    This example highlights RLlib's capabilities in connecting to the external binaries running the game engine, as well as
+    setting up a multi-agent self-play training scenario.
+
 - `Self-play with OpenSpiel <https://github.com/ray-project/ray/blob/master/rllib/examples/multi_agent/self_play_with_open_spiel.py>`__:
    Similar to the league-based self-play, but simpler. This script leverages OpenSpiel for two-player games, allowing agents to improve
    through direct self-play without building a complex, structured league.
 
@@ -10,6 +10,8 @@ extend-exclude = [
     "python/build/",
     "python/ray/workflow/tests/mock_server.py",
     "python/ray/serve/tests/test_config_files/syntax_error.py",
+    "rllib/examples/envs/classes/multi_agent/footsies/game/proto/footsies_service_pb2.py",
+    "rllib/examples/envs/classes/multi_agent/footsies/game/proto/footsies_service_pb2_grpc.py",
 ]
 
 [tool.ruff.lint]
 
@@ -1538,6 +1538,86 @@ py_test(
     ],
 )
 
+# Footsies
+py_test(
+    name = "learning_tests_multi_agent_footsies_ppo",
+    size = "large",
+    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    args = [
+        "--as-test",
+        "--num-env-runners=6",
+        "--evaluation-num-env-runners=2",
+    ],
+    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    tags = [
+        "exclusive",
+        "learning_tests",
+        "learning_tests_discrete",
+        "team:rllib",
+    ],
+)
+
+py_test(
+    name = "learning_tests_multi_agent_footsies_ppo_gpu",
+    size = "large",
+    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    args = [
+        "--as-test",
+        "--num-env-runners=20",
+        "--evaluation-num-env-runners=3",
+        "--num-learners=1",
+        "--num-gpus-per-learner=1",
+    ],
+    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    tags = [
+        "exclusive",
+        "learning_tests",
+        "learning_tests_discrete",
+        "multi_gpu",
+        "team:rllib",
+    ],
+)
+
+py_test(
+    name = "learning_tests_multi_agent_footsies_ppo_multi_cpu",
+    size = "large",
+    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    args = [
+        "--as-test",
+        "--num-env-runners=6",
+        "--evaluation-num-env-runners=2",
+        "--num-learners=2",
+    ],
+    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    tags = [
+        "exclusive",
+        "learning_tests",
+        "learning_tests_discrete",
+        "team:rllib",
+    ],
+)
+
+py_test(
+    name = "learning_tests_multi_agent_footsies_ppo_multi_gpu",
+    size = "large",
+    srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
+    args = [
+        "--as-test",
+        "--num-env-runners=20",
+        "--evaluation-num-env-runners=3",
+        "--num-learners=2",
+        "--num-gpus-per-learner=1",
+    ],
+    main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
+    tags = [
+        "exclusive",
+        "learning_tests",
+        "learning_tests_discrete",
+        "multi_gpu",
+        "team:rllib",
+    ],
+)
+
 # Pendulum
 py_test(
     name = "learning_tests_pendulum_ppo",
@@ -4084,14 +4164,14 @@ py_test(
 # subdirectory: envs/
 # ....................................
 py_test(
-    name = "examples/envs/agents_act_simultaneously",
+    name = "examples/envs/agents_act_in_sequence",
     size = "medium",
-    srcs = ["examples/envs/agents_act_simultaneously.py"],
+    srcs = ["examples/envs/agents_act_in_sequence.py"],
     args = [
         "--num-agents=2",
         "--stop-iters=3",
     ],
-    main = "examples/envs/agents_act_simultaneously.py",
+    main = "examples/envs/agents_act_in_sequence.py",
     tags = [
         "examples",
         "exclusive",
@@ -4100,14 +4180,14 @@ py_test(
 )
 
 py_test(
-    name = "examples/envs/agents_act_in_sequence",
+    name = "examples/envs/agents_act_simultaneously",
     size = "medium",
-    srcs = ["examples/envs/agents_act_in_sequence.py"],
+    srcs = ["examples/envs/agents_act_simultaneously.py"],
     args = [
         "--num-agents=2",
         "--stop-iters=3",
     ],
-    main = "examples/envs/agents_act_in_sequence.py",
+    main = "examples/envs/agents_act_simultaneously.py",
     tags = [
         "examples",
         "exclusive",
@@ -5014,13 +5094,34 @@ py_test(
 )
 
 py_test(
-    name = "examples/multi_agent/shared_encoder_cartpole",
-    size = "medium",
-    srcs = ["examples/multi_agent/shared_encoder_cartpole.py"],
+    name = "examples/multi_agent/self_play_footsies",
+    size = "large",
+    srcs = ["examples/multi_agent/self_play_footsies.py"],
     args = [
-        "--stop-iter=10",
+        "--as-test",
+        "--num-cpus=4",
     ],
-    main = "examples/multi_agent/shared_encoder_cartpole.py",
+    main = "examples/multi_agent/self_play_footsies.py",
+    tags = [
+        "examples",
+        "examples_use_all_core",
+        "exclusive",
+        "team:rllib",
+    ],
+)
+
+py_test(
+    name = "examples/multi_agent/self_play_league_based_with_open_spiel_connect_4_ppo_torch",
+    size = "large",
+    srcs = ["examples/multi_agent/self_play_league_based_with_open_spiel.py"],
+    args = [
+        "--framework=torch",
+        "--env=connect_four",
+        "--win-rate-threshold=0.8",
+        "--num-episodes-human-play=0",
+        "--min-league-size=8",
+    ],
+    main = "examples/multi_agent/self_play_league_based_with_open_spiel.py",
     tags = [
         "examples",
         "exclusive",
@@ -5090,17 +5191,13 @@ py_test(
 )
 
 py_test(
-    name = "examples/multi_agent/self_play_league_based_with_open_spiel_connect_4_ppo_torch",
-    size = "large",
-    srcs = ["examples/multi_agent/self_play_league_based_with_open_spiel.py"],
+    name = "examples/multi_agent/shared_encoder_cartpole",
+    size = "medium",
+    srcs = ["examples/multi_agent/shared_encoder_cartpole.py"],
     args = [
-        "--framework=torch",
-        "--env=connect_four",
-        "--win-rate-threshold=0.8",
-        "--num-episodes-human-play=0",
-        "--min-league-size=8",
+        "--stop-iter=10",
     ],
-    main = "examples/multi_agent/self_play_league_based_with_open_spiel.py",
+    main = "examples/multi_agent/shared_encoder_cartpole.py",
     tags = [
         "examples",
         "exclusive",
 
@@ -2211,11 +2211,11 @@ def add_module(
                 EnvRunnerGroup (with its o EnvRunners plus the local one).
 
         Returns:
-            The new MultiAgentRLModuleSpec (after the RLModule has been added).
+            The new MultiRLModuleSpec (after the RLModule has been added).
         """
         validate_module_id(module_id, error=True)
 
-        # The to-be-returned new MultiAgentRLModuleSpec.
+        # The to-be-returned new MultiRLModuleSpec.
         multi_rl_module_spec = None
 
         if not self.config.is_multi_agent:
@@ -2337,9 +2337,9 @@ def remove_module(
                 EnvRunnerGroup (with its o EnvRunners plus the local one).
 
         Returns:
-            The new MultiAgentRLModuleSpec (after the RLModule has been removed).
+            The new MultiRLModuleSpec (after the RLModule has been removed).
         """
-        # The to-be-returned new MultiAgentRLModuleSpec.
+        # The to-be-returned new MultiRLModuleSpec.
         multi_rl_module_spec = None
 
         # Remove RLModule from the LearnerGroup.
 
@@ -143,6 +143,7 @@ def DEFAULT_AGENT_TO_MODULE_MAPPING_FN(agent_id, episode):
         # Map any agent ID to "default_policy".
         return DEFAULT_MODULE_ID
 
+    # @OldAPIStack
     # TODO (sven): Deprecate in new API stack.
     @staticmethod
     def DEFAULT_POLICY_MAPPING_FN(aid, episode, worker, **kwargs):
 
@@ -0,0 +1,10 @@
+# Footsies Environment
+
+This environment implementation is based on the [FootsiesGym project](https://github.com/chasemcd/FootsiesGym),
+specifically the version as of **July 28, 2025**.
+
+## Notes
+
+All examples in the RLlib documentation that use the Footsies environment are self-contained.
+This means that you do not need to install anything from the FootsiesGym repository or other places.
+Examples handle binary automatically (downloading, extracting, starting, stopping, etc.).
Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,8 @@ extend-exclude = [`
`10`	`10`	`"python/build/",`
`11`	`11`	`"python/ray/workflow/tests/mock_server.py",`
`12`	`12`	`"python/ray/serve/tests/test_config_files/syntax_error.py",`
	`13`	`+ "rllib/examples/envs/classes/multi_agent/footsies/game/proto/footsies_service_pb2.py",`
	`14`	`+ "rllib/examples/envs/classes/multi_agent/footsies/game/proto/footsies_service_pb2_grpc.py",`
`13`	`15`	`]`
`14`	`16`
`15`	`17`	`[tool.ruff.lint]`