Merge pull request #73 from moratodpg/tests

Tests
moratodpg · Jan 8, 2024 · 3bb8c12 · 3bb8c12
2 parents ea4c4a7 + 9488d62
commit 3bb8c12
Show file tree

Hide file tree

Showing 29 changed files with 156 additions and 24 deletions.
diff --git a/.gitignore b/.gitignore
@@ -332,7 +332,9 @@ results_scripts/MARL_logs.zip
 /pymarl/results/
 /pymarl/imp_marl_venv
 
-imp_env/pomdp_models/model.npz
+environments/pomdp_models/model.npz
 /heuristics/heuristic_logs/
 /heuristics/heur_logs.zip
 /imp_wrappers/marllib/examples/exp_results/
+
+.DS_Store
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 **IMP-MARL** offers a platform for benchmarking the scalability of cooperative MARL methods in real-world engineering applications.
 
 In IMP-MARL, you can:
-- [Implement your own infrastructure management planning (IMP) environment or execute an available IMP environment](./imp_env/).
+- [Implement your own infrastructure management planning (IMP) environment or execute an available IMP environment](environments/).
 - [Train IMP policies through state-of-the-art MARL methods. The environments can be integrated with typical ecosystems via wrappers](imp_wrappers/).
 - [Compute expert-based heuristic policies](./heuristics/)
 
@@ -26,16 +26,16 @@ To work with our environments, one only needs to install [Numpy](https://numpy.o
 However, to reproduce our results, more packages are required and installation instructions are provided [here](pymarl/README.md).
 
 ## Tutorials
-- [Create your own IMP environment scenario](imp_env/new_imp_env_tutorial.ipynb)
+- [Create your own IMP environment scenario](environments/new_imp_env_tutorial.ipynb)
 - [IMP's API explained](imp_wrappers/README.md)
 - [Train agents like in the paper and/or **reproduce** the results](pymarl/README.md)
 - [Retrieve the results of the paper and execute the plot scripts](results_scripts/README.md)
 - [Train your own MARL agents with PyMarl](pymarl/EXEC_PYMARL.md)
 - [Reproduce the results of the paper](REPROD_EXP.md)
 
 ## Sets of environments available:
-- [(Correlated and uncorrelated) k-out-of-n system with components subject to fatigue deterioration.](./imp_env/struct_env.py)
-- [Offshore wind structural system with components subject to fatigue deterioration.](./imp_env/owf_env.py)
+- [(Correlated and uncorrelated) k-out-of-n system with components subject to fatigue deterioration.](environments/struct_env.py)
+- [Offshore wind structural system with components subject to fatigue deterioration.](environments/owf_env.py)
 
 **Note: A campaign cost can be activated in any environment.**
 

diff --git a/imp_env/README.md → environments/README.md b/imp_env/README.md → environments/README.md
diff --git a/imp_env/__init__.py → environments/__init__.py b/imp_env/__init__.py → environments/__init__.py
diff --git a/imp_env/imp_env.py → environments/imp_env.py b/imp_env/imp_env.py → environments/imp_env.py
diff --git a/imp_env/new_imp_env_tutorial.ipynb → environments/new_imp_env_tutorial.ipynb b/imp_env/new_imp_env_tutorial.ipynb → environments/new_imp_env_tutorial.ipynb
diff --git a/imp_env/owf_env.py → environments/owf_env.py b/imp_env/owf_env.py → environments/owf_env.py
@@ -2,7 +2,7 @@
 
 import os
 import numpy as np
-from imp_env.imp_env import ImpEnv
+from environments.imp_env import ImpEnv
 
 class Struct_owf(ImpEnv):
 

diff --git a/imp_env/pomdp_models/Dr3031C10.npz → environments/pomdp_models/Dr3031C10.npz b/imp_env/pomdp_models/Dr3031C10.npz → environments/pomdp_models/Dr3031C10.npz
diff --git a/imp_env/pomdp_models/Dr3031_H08.npz → environments/pomdp_models/Dr3031_H08.npz b/imp_env/pomdp_models/Dr3031_H08.npz → environments/pomdp_models/Dr3031_H08.npz
diff --git a/imp_env/pomdp_models/__init__.py → environments/pomdp_models/__init__.py b/imp_env/pomdp_models/__init__.py → environments/pomdp_models/__init__.py
diff --git a/imp_env/pomdp_models/create_models.py → environments/pomdp_models/create_models.py b/imp_env/pomdp_models/create_models.py → environments/pomdp_models/create_models.py
diff --git a/imp_env/pomdp_models/generate_models.ipynb → ...nments/pomdp_models/generate_models.ipynb b/imp_env/pomdp_models/generate_models.ipynb → ...nments/pomdp_models/generate_models.ipynb
diff --git a/imp_env/pomdp_models/owf6021.npz → environments/pomdp_models/owf6021.npz b/imp_env/pomdp_models/owf6021.npz → environments/pomdp_models/owf6021.npz
diff --git a/imp_env/struct_env.py → environments/struct_env.py b/imp_env/struct_env.py → environments/struct_env.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import os
-from imp_env.imp_env import ImpEnv
+from environments.imp_env import ImpEnv
 
 
 class Struct(ImpEnv):
@@ -40,6 +40,31 @@ class Struct(ImpEnv):
  immediate_cost
  belief_update_uncorrelated
  belief_update_correlated
+ Examples:
+ >>> from environments.struct_env import Struct
+ >>> import numpy as np
+ >>> env = Struct()
+ >>> obs = env.reset()
+ >>> print(obs.keys())
+ dict_keys(['agent_0', 'agent_1'])
+ >>> obs["agent_0"]
+ array([1.052000e-04, 5.500000e-05, 8.660000e-05, 1.261000e-04,
+ 2.006000e-04, 3.173000e-04, 4.853000e-04, 7.444000e-04,
+ 1.138400e-03, 1.783100e-03, 2.713600e-03, 4.235700e-03,
+ 6.473200e-03, 1.002420e-02, 1.530330e-02, 2.316180e-02,
+ 3.453640e-02, 5.087030e-02, 7.324320e-02, 1.008326e-01,
+ 1.309823e-01, 1.539425e-01, 1.567708e-01, 1.275575e-01,
+ 7.401660e-02, 2.583390e-02, 4.230100e-03, 2.268000e-04,
+ 3.200000e-06, 0.000000e+00, 0.000000e+00])
+ >>> actions = {}
+ >>> for agent_id in env.agent_list:
+ ... actions[agent_id] = np.random.randint(0, env.actions_per_agent)
+ >>> next_obs, rewards, done, info = env.step(actions)
+ >>> print(rewards.keys())
+ dict_keys(['agent_0', 'agent_1'])
+ >>> print(done)
+ False
+
  """
  def __init__(self, config=None):
  """ Initialises the class according to the provided config instructions.
@@ -48,7 +73,7 @@ def __init__(self, config=None):
  config: Dictionary containing config parameters.
  Keys:
  n_comp: Number of components.
- discount_reward: Discount factor. 
+ discount_reward: Discount factor.
  k_comp: Number of components required to not fail.
  env_correlation: Whether the damage probability is correlated or not.
  campaign_cost: Whether to include campaign cost in reward.
@@ -207,7 +232,7 @@ def step(self, action: dict):
  # An episode is done if the agent has reached the target
  done = self.time_step >= self.ep_length
 
- return self.observations, rewards, done, inspection
+ return self.observations, rewards, done, {"inspection":inspection}
 
  def pf_sys(self, pf, k):
  """ Computes the system failure probability pf_sys for k-out-of-n components

diff --git a/heuristics/heuristics_interval_owf.py b/heuristics/heuristics_interval_owf.py
@@ -2,7 +2,7 @@
 from datetime import datetime
 from os import path, makedirs
 
-from imp_env.owf_env import Struct_owf
+from environments.owf_env import Struct_owf
 
 
 class HeuristicsOwf():

diff --git a/heuristics/heuristics_intervals_struct.py b/heuristics/heuristics_intervals_struct.py
@@ -2,7 +2,7 @@
 from datetime import datetime
 from os import path, makedirs
 
-from imp_env.struct_env import Struct
+from environments.struct_env import Struct
 
 
 class HeuristicsStruct():

diff --git a/imp_wrappers/README.md b/imp_wrappers/README.md
@@ -4,7 +4,7 @@ This package contains wrappers for the IMP environments.
 
 Due to the variety of methods and frameworks used to train agents, we here provide a wrapper for most of them and showcase the flexibility provided by `imp_marl`.
 
-The wrappers simply use the interface defined in [imp_env.py](../imp_env/imp_env.py) in order to plug any IMP environment into any framework.
+The wrappers simply use the interface defined in [imp_env.py](../environments/imp_env.py) in order to plug any IMP environment into any framework.
 
 Examples of the use of these wrappers can be found in the [examples](examples) directories.
 

diff --git a/imp_wrappers/gym/gym_wrap_sa_struct.py b/imp_wrappers/gym/gym_wrap_sa_struct.py
@@ -6,8 +6,8 @@
 import numpy as np
 from gym import spaces
 
-from imp_env.owf_env import Struct_owf
-from imp_env.struct_env import Struct
+from environments.owf_env import Struct_owf
+from environments.struct_env import Struct
 
 
 class GymSaStruct(gym.Env):

diff --git a/imp_wrappers/gymnasium/gymnasium_wrap_sa_struct.py b/imp_wrappers/gymnasium/gymnasium_wrap_sa_struct.py
@@ -6,8 +6,8 @@
 import numpy as np
 from gymnasium import spaces
 
-from imp_env.owf_env import Struct_owf
-from imp_env.struct_env import Struct
+from environments.owf_env import Struct_owf
+from environments.struct_env import Struct
 
 
 class GymnasiumSaStruct(gym.Env):

diff --git a/imp_wrappers/pettingzoo/pettingzoo_wrap_struct.py b/imp_wrappers/pettingzoo/pettingzoo_wrap_struct.py
@@ -2,8 +2,8 @@
 import numpy as np
 from gymnasium import spaces
 
-from imp_env.owf_env import Struct_owf
-from imp_env.struct_env import Struct
+from environments.owf_env import Struct_owf
+from environments.struct_env import Struct
 from pettingzoo.utils.env import ParallelEnv
 
 

diff --git a/imp_wrappers/pymarl_wrapper/pymarl_wrap_ma_struct.py b/imp_wrappers/pymarl_wrapper/pymarl_wrap_ma_struct.py
@@ -3,8 +3,8 @@
 import numpy as np
 import torch
 
-from imp_env.owf_env import Struct_owf
-from imp_env.struct_env import Struct
+from environments.owf_env import Struct_owf
+from environments.struct_env import Struct
 from imp_wrappers.pymarl_wrapper.MultiAgentEnv import MultiAgentEnv
 
 

diff --git a/pymarl/EXEC_PYMARL.md b/pymarl/EXEC_PYMARL.md
@@ -47,7 +47,7 @@ We provide the complete list of configurations used in our paper and you can cre
 # Train agents
 Command to train agents, after activating your virtual environment:
 ```
-python pymarl/train_with_pymarl.py --config=alg_config_file --env-config=env_config_file with name=alg_name_in_env_name
+python pymarl/pymarl_train.py --config=alg_config_file --env-config=env_config_file with name=alg_name_in_env_name
 ```
 
 # Train agents like in the paper

diff --git a/pymarl/test_with_pymarl.py → pymarl/pymarl_test.py b/pymarl/test_with_pymarl.py → pymarl/pymarl_test.py
@@ -23,7 +23,7 @@
 from components.episode_buffer import ReplayBuffer
 from components.transforms import OneHot
 
-from train_with_pymarl import _get_config, recursive_dict_update
+from pymarl_train import _get_config, recursive_dict_update
 from run import args_sanity_check, evaluate_sequential
 from sacred import SETTINGS
 SETTINGS.CONFIG.READ_ONLY_CONFIG = False

diff --git a/pymarl/train_with_pymarl.py → pymarl/pymarl_train.py b/pymarl/train_with_pymarl.py → pymarl/pymarl_train.py
diff --git a/pymarl/run.sh b/pymarl/run.sh
@@ -5,7 +5,7 @@
 
 alg=$1
 env=$2
-python pymarl/train_with_pymarl.py --config=${alg} --env-config=${env} with name=${alg}_${env} test_nepisode=-1
+python pymarl/pymarl_train.py --config=${alg} --env-config=${env} with name=${alg}_${env} test_nepisode=-1
 
 #conda deactivate
 #deactivate
diff --git a/pymarl/run_test.sh b/pymarl/run_test.sh
@@ -15,6 +15,6 @@ n_env=2
 name=test_${n_test}_${exp_name}
 echo $alg
 echo $env
-python pymarl/test_with_pymarl.py --config=${alg} --env-config=${env} with test_nepisode=${n_test} checkpoint_path=${path} runner=parallel batch_size_run=${n_env} use_cuda=True name=${name}
+python pymarl/pymarl_test.py --config=${alg} --env-config=${env} with test_nepisode=${n_test} checkpoint_path=${path} runner=parallel batch_size_run=${n_env} use_cuda=True name=${name}
 deactivate
 
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+addopts = --doctest-modules
+testpaths =
+ tests
+ environments
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_struct_env.py b/tests/test_struct_env.py
@@ -0,0 +1,100 @@
+import numpy as np
+from numpy.testing import assert_array_equal
+
+from environments.struct_env import Struct
+
+def test_default_constructor():
+ env = Struct()
+ assert env.n_comp == 2
+ assert env.discount_reward == 1
+ assert env.k_comp == 1
+ assert env.env_correlation == False
+ assert env.campaign_cost == False
+
+ obs = env.reset()
+ assert type(obs) is dict
+ assert len(obs) == 2
+ assert "agent_0" in obs
+ assert "agent_1" in obs
+ assert len(obs["agent_0"]) == 31
+ assert len(obs["agent_1"]) == 31
+
+ actions = {}
+ actions["agent_0"] = 0
+ actions["agent_1"] = 0
+ next_obs, rewards, done, info = env.step(actions)
+ assert type(rewards) is dict
+ assert type(done) is bool
+ assert type(info) is dict
+ assert len(next_obs) == 2
+ assert "agent_0" in next_obs
+ assert "agent_1" in next_obs
+ assert len(next_obs["agent_0"]) == 31
+ assert len(next_obs["agent_1"]) == 31
+ assert len(rewards) == 2
+ assert "agent_0" in rewards
+ assert "agent_1" in rewards
+ assert done == False
+
+
+def test_terminal_state():
+ env = Struct()
+ cpt = 0
+ done = False
+ while not done:
+ _, rewards, done, _ = env.step({"agent_0": np.random.randint(0, 3), "agent_1": np.random.randint(0, 3)})
+ assert rewards["agent_0"] == rewards["agent_1"]
+ cpt += 1
+ assert cpt == 30
+
+def test_repair_initial_distrib():
+ env = Struct()
+ init_distrib = env.initial_damage_proba
+ obs = env.reset()
+
+ # we remove the last element which is the time
+ assert_array_equal(init_distrib[0],obs["agent_0"][:-1])
+ assert_array_equal(init_distrib[1], obs["agent_1"][:-1])
+
+ actions = {}
+ actions["agent_0"] = 0
+ actions["agent_1"] = 0
+ _, _, done, _ = env.step(actions)
+
+ # check repair gives the initial distribution
+ actions["agent_0"] = 2
+ actions["agent_1"] = 2
+ _, _, done, _ = env.step(actions)
+
+ assert_array_equal(init_distrib[0], obs["agent_0"][:-1])
+ assert_array_equal(init_distrib[1], obs["agent_1"][:-1])
+
+def test_only_nothing():
+ np.random.seed(42)
+ env = Struct()
+ done = False
+ total_reward = 0
+ while not done:
+ _, rewards, done, _ = env.step({"agent_0": 0, "agent_1": 0})
+ total_reward += rewards["agent_0"]
+ assert total_reward == -40.24126096000002
+
+def test_only_inspect():
+ np.random.seed(42)
+ env = Struct()
+ done = False
+ total_reward = 0
+ while not done:
+ _, rewards, done, _ = env.step({"agent_0": 1, "agent_1": 1})
+ total_reward += rewards["agent_0"]
+ assert total_reward == -60.14257297286546
+
+def test_only_repair():
+ np.random.seed(42)
+ env = Struct()
+ done = False
+ total_reward = 0
+ while not done:
+ _, rewards, done, _ = env.step({"agent_0": 2, "agent_1": 2})
+ total_reward += rewards["agent_0"]
+ assert total_reward == -1200.0