Skip to content

Commit b396c75

Browse files
authored
Merge branch 'master' into elliot-barn/raydepsets-verify-lock-files
2 parents c729c42 + b6ab542 commit b396c75

33 files changed

+2397
-52
lines changed

doc/source/rllib/rllib-examples.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,11 @@ Multi-agent RL
363363
Uses OpenSpiel to demonstrate league-based self-play, where agents play against various
364364
versions of themselves, frozen or in-training, to improve through competitive interaction.
365365

366+
- `Self-play with Footsies and PPO algorithm <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/ppo/multi_agent_footsies_ppo.py>`__:
367+
Implements self-play with the Footsies environment (two player zero-sum game).
368+
This example highlights RLlib's capabilities in connecting to the external binaries running the game engine, as well as
369+
setting up a multi-agent self-play training scenario.
370+
366371
- `Self-play with OpenSpiel <https://github.com/ray-project/ray/blob/master/rllib/examples/multi_agent/self_play_with_open_spiel.py>`__:
367372
Similar to the league-based self-play, but simpler. This script leverages OpenSpiel for two-player games, allowing agents to improve
368373
through direct self-play without building a complex, structured league.

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ extend-exclude = [
1010
"python/build/",
1111
"python/ray/workflow/tests/mock_server.py",
1212
"python/ray/serve/tests/test_config_files/syntax_error.py",
13+
"rllib/examples/envs/classes/multi_agent/footsies/game/proto/footsies_service_pb2.py",
14+
"rllib/examples/envs/classes/multi_agent/footsies/game/proto/footsies_service_pb2_grpc.py",
1315
]
1416

1517
[tool.ruff.lint]

python/ray/dashboard/modules/reporter/gpu_profile_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def node_has_gpus(cls) -> bool:
106106
try:
107107
subprocess.check_output(["nvidia-smi"], stderr=subprocess.DEVNULL)
108108
return True
109-
except (subprocess.CalledProcessError, FileNotFoundError):
109+
except Exception:
110110
return False
111111

112112
@classmethod

python/ray/data/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ py_test(
383383
size = "enormous",
384384
srcs = ["tests/test_groupby_e2e.py"],
385385
tags = [
386+
"data_non_parallel",
386387
"exclusive",
387388
"team:data",
388389
],

python/ray/train/v2/tests/test_local_mode.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,10 @@ def train_loop():
253253
assert "val_loss" in results.metrics
254254

255255

256+
@pytest.mark.skipif(
257+
sys.version_info >= (3, 12),
258+
reason="Tensorflow is not installed for Python 3.12 because of keras compatibility.",
259+
)
256260
def test_tensorflow_linear_local_mode(ray_start_4_cpus):
257261
"""Also tests air Keras callback."""
258262
epochs = 1

python/ray/tune/tests/test_commands.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def train_fn(config):
7373
times += [time.time() - start]
7474

7575
print("Average CLI time: ", sum(times) / len(times))
76-
assert sum(times) / len(times) < 2, "CLI is taking too long!"
76+
assert sum(times) / len(times) < 5, "CLI is taking too long!"
7777

7878

7979
@mock.patch(

rllib/BUILD

Lines changed: 117 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1538,6 +1538,86 @@ py_test(
15381538
],
15391539
)
15401540

1541+
# Footsies
1542+
py_test(
1543+
name = "learning_tests_multi_agent_footsies_ppo",
1544+
size = "large",
1545+
srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
1546+
args = [
1547+
"--as-test",
1548+
"--num-env-runners=6",
1549+
"--evaluation-num-env-runners=2",
1550+
],
1551+
main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
1552+
tags = [
1553+
"exclusive",
1554+
"learning_tests",
1555+
"learning_tests_discrete",
1556+
"team:rllib",
1557+
],
1558+
)
1559+
1560+
py_test(
1561+
name = "learning_tests_multi_agent_footsies_ppo_gpu",
1562+
size = "large",
1563+
srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
1564+
args = [
1565+
"--as-test",
1566+
"--num-env-runners=20",
1567+
"--evaluation-num-env-runners=3",
1568+
"--num-learners=1",
1569+
"--num-gpus-per-learner=1",
1570+
],
1571+
main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
1572+
tags = [
1573+
"exclusive",
1574+
"learning_tests",
1575+
"learning_tests_discrete",
1576+
"multi_gpu",
1577+
"team:rllib",
1578+
],
1579+
)
1580+
1581+
py_test(
1582+
name = "learning_tests_multi_agent_footsies_ppo_multi_cpu",
1583+
size = "large",
1584+
srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
1585+
args = [
1586+
"--as-test",
1587+
"--num-env-runners=6",
1588+
"--evaluation-num-env-runners=2",
1589+
"--num-learners=2",
1590+
],
1591+
main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
1592+
tags = [
1593+
"exclusive",
1594+
"learning_tests",
1595+
"learning_tests_discrete",
1596+
"team:rllib",
1597+
],
1598+
)
1599+
1600+
py_test(
1601+
name = "learning_tests_multi_agent_footsies_ppo_multi_gpu",
1602+
size = "large",
1603+
srcs = ["tuned_examples/ppo/multi_agent_footsies_ppo.py"],
1604+
args = [
1605+
"--as-test",
1606+
"--num-env-runners=20",
1607+
"--evaluation-num-env-runners=3",
1608+
"--num-learners=2",
1609+
"--num-gpus-per-learner=1",
1610+
],
1611+
main = "tuned_examples/ppo/multi_agent_footsies_ppo.py",
1612+
tags = [
1613+
"exclusive",
1614+
"learning_tests",
1615+
"learning_tests_discrete",
1616+
"multi_gpu",
1617+
"team:rllib",
1618+
],
1619+
)
1620+
15411621
# Pendulum
15421622
py_test(
15431623
name = "learning_tests_pendulum_ppo",
@@ -4084,14 +4164,14 @@ py_test(
40844164
# subdirectory: envs/
40854165
# ....................................
40864166
py_test(
4087-
name = "examples/envs/agents_act_simultaneously",
4167+
name = "examples/envs/agents_act_in_sequence",
40884168
size = "medium",
4089-
srcs = ["examples/envs/agents_act_simultaneously.py"],
4169+
srcs = ["examples/envs/agents_act_in_sequence.py"],
40904170
args = [
40914171
"--num-agents=2",
40924172
"--stop-iters=3",
40934173
],
4094-
main = "examples/envs/agents_act_simultaneously.py",
4174+
main = "examples/envs/agents_act_in_sequence.py",
40954175
tags = [
40964176
"examples",
40974177
"exclusive",
@@ -4100,14 +4180,14 @@ py_test(
41004180
)
41014181

41024182
py_test(
4103-
name = "examples/envs/agents_act_in_sequence",
4183+
name = "examples/envs/agents_act_simultaneously",
41044184
size = "medium",
4105-
srcs = ["examples/envs/agents_act_in_sequence.py"],
4185+
srcs = ["examples/envs/agents_act_simultaneously.py"],
41064186
args = [
41074187
"--num-agents=2",
41084188
"--stop-iters=3",
41094189
],
4110-
main = "examples/envs/agents_act_in_sequence.py",
4190+
main = "examples/envs/agents_act_simultaneously.py",
41114191
tags = [
41124192
"examples",
41134193
"exclusive",
@@ -5014,13 +5094,34 @@ py_test(
50145094
)
50155095

50165096
py_test(
5017-
name = "examples/multi_agent/shared_encoder_cartpole",
5018-
size = "medium",
5019-
srcs = ["examples/multi_agent/shared_encoder_cartpole.py"],
5097+
name = "examples/multi_agent/self_play_footsies",
5098+
size = "large",
5099+
srcs = ["examples/multi_agent/self_play_footsies.py"],
50205100
args = [
5021-
"--stop-iter=10",
5101+
"--as-test",
5102+
"--num-cpus=4",
50225103
],
5023-
main = "examples/multi_agent/shared_encoder_cartpole.py",
5104+
main = "examples/multi_agent/self_play_footsies.py",
5105+
tags = [
5106+
"examples",
5107+
"examples_use_all_core",
5108+
"exclusive",
5109+
"team:rllib",
5110+
],
5111+
)
5112+
5113+
py_test(
5114+
name = "examples/multi_agent/self_play_league_based_with_open_spiel_connect_4_ppo_torch",
5115+
size = "large",
5116+
srcs = ["examples/multi_agent/self_play_league_based_with_open_spiel.py"],
5117+
args = [
5118+
"--framework=torch",
5119+
"--env=connect_four",
5120+
"--win-rate-threshold=0.8",
5121+
"--num-episodes-human-play=0",
5122+
"--min-league-size=8",
5123+
],
5124+
main = "examples/multi_agent/self_play_league_based_with_open_spiel.py",
50245125
tags = [
50255126
"examples",
50265127
"exclusive",
@@ -5090,17 +5191,13 @@ py_test(
50905191
)
50915192

50925193
py_test(
5093-
name = "examples/multi_agent/self_play_league_based_with_open_spiel_connect_4_ppo_torch",
5094-
size = "large",
5095-
srcs = ["examples/multi_agent/self_play_league_based_with_open_spiel.py"],
5194+
name = "examples/multi_agent/shared_encoder_cartpole",
5195+
size = "medium",
5196+
srcs = ["examples/multi_agent/shared_encoder_cartpole.py"],
50965197
args = [
5097-
"--framework=torch",
5098-
"--env=connect_four",
5099-
"--win-rate-threshold=0.8",
5100-
"--num-episodes-human-play=0",
5101-
"--min-league-size=8",
5198+
"--stop-iter=10",
51025199
],
5103-
main = "examples/multi_agent/self_play_league_based_with_open_spiel.py",
5200+
main = "examples/multi_agent/shared_encoder_cartpole.py",
51045201
tags = [
51055202
"examples",
51065203
"exclusive",

rllib/algorithms/algorithm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2211,11 +2211,11 @@ def add_module(
22112211
EnvRunnerGroup (with its o EnvRunners plus the local one).
22122212
22132213
Returns:
2214-
The new MultiAgentRLModuleSpec (after the RLModule has been added).
2214+
The new MultiRLModuleSpec (after the RLModule has been added).
22152215
"""
22162216
validate_module_id(module_id, error=True)
22172217

2218-
# The to-be-returned new MultiAgentRLModuleSpec.
2218+
# The to-be-returned new MultiRLModuleSpec.
22192219
multi_rl_module_spec = None
22202220

22212221
if not self.config.is_multi_agent:
@@ -2337,9 +2337,9 @@ def remove_module(
23372337
EnvRunnerGroup (with its o EnvRunners plus the local one).
23382338
23392339
Returns:
2340-
The new MultiAgentRLModuleSpec (after the RLModule has been removed).
2340+
The new MultiRLModuleSpec (after the RLModule has been removed).
23412341
"""
2342-
# The to-be-returned new MultiAgentRLModuleSpec.
2342+
# The to-be-returned new MultiRLModuleSpec.
23432343
multi_rl_module_spec = None
23442344

23452345
# Remove RLModule from the LearnerGroup.

rllib/algorithms/algorithm_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ def DEFAULT_AGENT_TO_MODULE_MAPPING_FN(agent_id, episode):
143143
# Map any agent ID to "default_policy".
144144
return DEFAULT_MODULE_ID
145145

146+
# @OldAPIStack
146147
# TODO (sven): Deprecate in new API stack.
147148
@staticmethod
148149
def DEFAULT_POLICY_MAPPING_FN(aid, episode, worker, **kwargs):
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Footsies Environment
2+
3+
This environment implementation is based on the [FootsiesGym project](https://github.com/chasemcd/FootsiesGym),
4+
specifically the version as of **July 28, 2025**.
5+
6+
## Notes
7+
8+
All examples in the RLlib documentation that use the Footsies environment are self-contained.
9+
This means that you do not need to install anything from the FootsiesGym repository or other places.
10+
Examples handle binary automatically (downloading, extracting, starting, stopping, etc.).

0 commit comments

Comments
 (0)