Skip to content

Commit

Permalink
Merge branch 'master' into nn_tnpg_docs
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolengsy authored Oct 29, 2020
2 parents 89b8316 + 4312678 commit 4017aef
Show file tree
Hide file tree
Showing 76 changed files with 2,596 additions and 491 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ jobs:
"${DOCKER_TAG}" \
/bin/bash -c \
'[ ! -f ${MJKEY_PATH} ] || mv ${MJKEY_PATH} ${MJKEY_PATH}.bak &&
pytest --cov=garage --cov-report=xml -m \
pytest --cov=garage --cov-report=xml --reruns 1 -m \
"not nightly and not huge and not flaky and not large and not mujoco and not mujoco_long" --durations=20 &&
for i in {1..5}; do
bash <(curl -s https://codecov.io/bash --retry 5) -Z && break
Expand Down Expand Up @@ -171,7 +171,7 @@ jobs:
"${DOCKER_TAG}" \
/bin/bash -c \
'[ ! -f ${MJKEY_PATH} ] || mv ${MJKEY_PATH} ${MJKEY_PATH}.bak &&
pytest --cov=garage --cov-report=xml -m "large and not flaky" --durations=20 &&
pytest --cov=garage --cov-report=xml --reruns 1 -m "large and not flaky" --durations=20 &&
for i in {1..5}; do
bash <(curl -s https://codecov.io/bash --retry 5) -Z && break
if [ $i == 5 ]; then
Expand Down Expand Up @@ -211,7 +211,7 @@ jobs:
--memory-swap 6500m \
"${DOCKER_TAG}" \
/bin/bash -c \
'pytest --cov=garage --cov-report=xml -m "mujoco and not flaky" --durations=20 &&
'pytest --cov=garage --cov-report=xml --reruns 1 -m "mujoco and not flaky" --durations=20 &&
for i in {1..5}; do
bash <(curl -s https://codecov.io/bash --retry 5) -Z && break
if [ $i == 5 ]; then
Expand Down Expand Up @@ -251,7 +251,7 @@ jobs:
--memory-swap 6500m \
"${DOCKER_TAG}" \
/bin/bash -c \
'pytest --cov=garage --cov-report=xml -m "mujoco_long and not flaky" --durations=20 &&
'pytest --cov=garage --cov-report=xml --reruns 1 -m "mujoco_long and not flaky" --durations=20 &&
for i in {1..5}; do
bash <(curl -s https://codecov.io/bash --retry 5) -Z && break
if [ $i == 5 ]; then
Expand Down Expand Up @@ -290,7 +290,7 @@ jobs:
$ci_env\
--memory 6500m \
--memory-swap 6500m \
"${DOCKER_TAG}" pytest -v -m nightly
"${DOCKER_TAG}" pytest -v --reruns 1 -m nightly
verify_envs_conda:
Expand Down
21 changes: 16 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,41 +49,52 @@ ci-job-precommit: assert-docker

ci-job-normal: assert-docker
[ ! -f $(MJKEY_PATH) ] || mv $(MJKEY_PATH) $(MJKEY_PATH).bak
pytest --cov=garage --cov-report=xml -m \
pytest --cov=garage --cov-report=xml --reruns 1 -m \
'not nightly and not huge and not flaky and not large and not mujoco and not mujoco_long' --durations=20
for i in {1..5}; do \
bash <(curl -s https://codecov.io/bash --retry 5) -Z && break \
|| echo 'Retrying...' && sleep 30 && continue; \
exit 1; \
done

# Need to be able to access $!, a special bash variable
define LARGE_TEST
pytest --cov=garage --cov-report=xml --reruns 1 -m 'large and not flaky' --durations=20 &
PYTEST_PID=$$!
while ps -p $$PYTEST_PID > /dev/null ; do
echo 'Still running'
sleep 60
done
endef
export LARGE_TEST

ci-job-large: assert-docker
[ ! -f $(MJKEY_PATH) ] || mv $(MJKEY_PATH) $(MJKEY_PATH).bak
pytest --cov=garage --cov-report=xml -m 'large and not flaky' --durations=20
bash -c "$$LARGE_TEST"
for i in {1..5}; do \
bash <(curl -s https://codecov.io/bash --retry 5) -Z && break \
|| echo 'Retrying...' && sleep 30 && continue; \
exit 1; \
done

ci-job-mujoco: assert-docker
pytest --cov=garage --cov-report=xml -m 'mujoco and not flaky' --durations=20
pytest --cov=garage --cov-report=xml --reruns 1 -m 'mujoco and not flaky' --durations=20
for i in {1..5}; do \
bash <(curl -s https://codecov.io/bash --retry 5) -Z && break \
|| echo 'Retrying...' && sleep 30 && continue; \
exit 1; \
done

ci-job-mujoco-long: assert-docker
pytest --cov=garage --cov-report=xml -m 'mujoco_long and not flaky' --durations=20
pytest --cov=garage --cov-report=xml --reruns 1 -m 'mujoco_long and not flaky' --durations=20
for i in {1..5}; do \
bash <(curl -s https://codecov.io/bash --retry 5) -Z && break \
|| echo 'Retrying...' && sleep 30 && continue; \
exit 1; \
done

ci-job-nightly: assert-docker
pytest -m nightly
pytest --reruns 1 -m nightly

ci-job-verify-envs: assert-docker ci-job-verify-envs-pipenv ci-job-verify-envs-conda

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ The table below summarizes the algorithms available in garage.
| CMA-ES | numpy |
| REINFORCE (a.k.a. VPG) | PyTorch, TensorFlow |
| DDPG | PyTorch, TensorFlow |
| DQN | TensorFlow |
| DQN | PyTorch, TensorFlow |
| DDQN | TensorFlow |
| ERWR | TensorFlow |
| NPO | TensorFlow |
| PPO | PyTorch, TensorFlow |
| REPS | TensorFlow |
| TD3 | TensorFlow |
| TD3 | PyTorch, TensorFlow |
| TNPG | TensorFlow |
| TRPO | PyTorch, TensorFlow |
| MAML | PyTorch |
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/src/garage_benchmarks/benchmark_algos.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Benchmarking for algorithms."""
# yapf: disable
from garage_benchmarks.experiments.algos import (ddpg_garage_tf,
her_garage_tf,
from garage_benchmarks.experiments.algos import (ddpg_garage_tf, her_garage_tf,
ppo_garage_pytorch,
ppo_garage_tf,
td3_garage_pytorch,
td3_garage_tf,
trpo_garage_pytorch,
trpo_garage_tf,
Expand Down Expand Up @@ -40,7 +40,7 @@ def td3_benchmarks():
td3_env_ids = [
env_id for env_id in MuJoCo1M_ENV_SET if env_id != 'Reacher-v2'
]

iterate_experiments(td3_garage_pytorch, td3_env_ids)
iterate_experiments(td3_garage_tf, td3_env_ids)


Expand Down
29 changes: 14 additions & 15 deletions benchmarks/src/garage_benchmarks/benchmark_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
# yapf: disable
from garage_benchmarks.experiments.algos import (ddpg_garage_tf,
ppo_garage_pytorch,
ppo_garage_tf,
td3_garage_tf,
ppo_garage_tf, td3_garage_tf,
trpo_garage_pytorch,
trpo_garage_tf,
vpg_garage_pytorch,
Expand All @@ -15,53 +14,53 @@


@benchmark(plot=False, auto=True)
def auto_ddpg_benchmarks(snapshot_config):
def auto_ddpg_benchmarks():
"""Run experiments for DDPG benchmarking."""
iterate_experiments(ddpg_garage_tf,
MuJoCo1M_ENV_SET,
snapshot_config=snapshot_config)
snapshot_config={'snapshot_mode': 'none'})


@benchmark(plot=False, auto=True)
def auto_ppo_benchmarks(snapshot_config):
def auto_ppo_benchmarks():
"""Run experiments for PPO benchmarking."""
iterate_experiments(ppo_garage_pytorch,
MuJoCo1M_ENV_SET,
snapshot_config=snapshot_config)
snapshot_config={'snapshot_mode': 'none'})
iterate_experiments(ppo_garage_tf,
MuJoCo1M_ENV_SET,
snapshot_config=snapshot_config)
snapshot_config={'snapshot_mode': 'none'})


@benchmark(plot=False, auto=True)
def auto_td3_benchmarks(snapshot_config):
def auto_td3_benchmarks():
"""Run experiments for TD3 benchmarking."""
td3_env_ids = [
env_id for env_id in MuJoCo1M_ENV_SET if env_id != 'Reacher-v2'
]

iterate_experiments(td3_garage_tf,
td3_env_ids,
snapshot_config=snapshot_config)
snapshot_config={'snapshot_mode': 'none'})


@benchmark(plot=False, auto=True)
def auto_trpo_benchmarks(snapshot_config):
def auto_trpo_benchmarks():
"""Run experiments for TRPO benchmarking."""
iterate_experiments(trpo_garage_pytorch,
MuJoCo1M_ENV_SET,
snapshot_config=snapshot_config)
snapshot_config={'snapshot_mode': 'none'})
iterate_experiments(trpo_garage_tf,
MuJoCo1M_ENV_SET,
snapshot_config=snapshot_config)
snapshot_config={'snapshot_mode': 'none'})


@benchmark(plot=False, auto=True)
def auto_vpg_benchmarks(snapshot_config):
def auto_vpg_benchmarks():
"""Run experiments for VPG benchmarking."""
iterate_experiments(vpg_garage_pytorch,
MuJoCo1M_ENV_SET,
snapshot_config=snapshot_config)
snapshot_config={'snapshot_mode': 'none'})
iterate_experiments(vpg_garage_tf,
MuJoCo1M_ENV_SET,
snapshot_config=snapshot_config)
snapshot_config={'snapshot_mode': 'none'})
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from garage_benchmarks.experiments.algos.ppo_garage_pytorch import (
ppo_garage_pytorch)
from garage_benchmarks.experiments.algos.ppo_garage_tf import ppo_garage_tf
from garage_benchmarks.experiments.algos.td3_garage_pytorch import (
td3_garage_pytorch)
from garage_benchmarks.experiments.algos.td3_garage_tf import td3_garage_tf
from garage_benchmarks.experiments.algos.trpo_garage_pytorch import (
trpo_garage_pytorch)
Expand All @@ -14,6 +16,6 @@

__all__ = [
'ddpg_garage_tf', 'her_garage_tf', 'ppo_garage_pytorch', 'ppo_garage_tf',
'td3_garage_tf', 'trpo_garage_pytorch', 'trpo_garage_tf',
'vpg_garage_pytorch', 'vpg_garage_tf'
'td3_garage_pytorch', 'td3_garage_tf', 'trpo_garage_pytorch',
'trpo_garage_tf', 'vpg_garage_pytorch', 'vpg_garage_tf'
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""A regression test for automatic benchmarking garage-Pytorch-TD3."""
import torch
from torch.nn import functional as F

from garage import wrap_experiment
from garage.envs import GymEnv, normalize
from garage.experiment import deterministic
from garage.np.exploration_policies import AddGaussianNoise
from garage.np.policies import UniformRandomPolicy
from garage.replay_buffer import PathBuffer
from garage.torch import prefer_gpu
from garage.torch.algos import TD3
from garage.torch.policies import DeterministicMLPPolicy
from garage.torch.q_functions import ContinuousMLPQFunction
from garage.trainer import TFTrainer

hyper_parameters = {
'policy_lr': 1e-3,
'qf_lr': 1e-3,
'policy_hidden_sizes': [256, 256],
'qf_hidden_sizes': [256, 256],
'n_epochs': 250,
'steps_per_epoch': 40,
'batch_size': 100,
'start_steps': 1000,
'update_after': 1000,
'grad_steps_per_env_step': 50,
'discount': 0.99,
'target_update_tau': 0.005,
'replay_buffer_size': int(1e6),
'sigma': 0.1,
'policy_noise': 0.2,
'policy_noise_clip': 0.5,
'buffer_batch_size': 100,
'min_buffer_size': int(1e4),
}


@wrap_experiment(snapshot_mode='last')
def td3_garage_pytorch(ctxt, env_id, seed):
"""Create garage TensorFlow TD3 model and training.
Args:
ctxt (garage.experiment.ExperimentContext): The experiment
configuration used by Localtrainer to create the
snapshotter.
env_id (str): Environment id of the task.
seed (int): Random positive integer for the trial.
"""
deterministic.set_seed(seed)

with TFTrainer(ctxt) as trainer:
num_timesteps = hyper_parameters['n_epochs'] * hyper_parameters[
'steps_per_epoch'] * hyper_parameters['batch_size']
env = normalize(GymEnv(env_id))

policy = DeterministicMLPPolicy(
env_spec=env.spec,
hidden_sizes=hyper_parameters['policy_hidden_sizes'],
hidden_nonlinearity=F.relu,
output_nonlinearity=torch.tanh)

exploration_policy = AddGaussianNoise(
env.spec,
policy,
total_timesteps=num_timesteps,
max_sigma=hyper_parameters['sigma'],
min_sigma=hyper_parameters['sigma'])

uniform_random_policy = UniformRandomPolicy(env.spec)

qf1 = ContinuousMLPQFunction(
env_spec=env.spec,
hidden_sizes=hyper_parameters['qf_hidden_sizes'],
hidden_nonlinearity=F.relu)

qf2 = ContinuousMLPQFunction(
env_spec=env.spec,
hidden_sizes=hyper_parameters['qf_hidden_sizes'],
hidden_nonlinearity=F.relu)

replay_buffer = PathBuffer(
capacity_in_transitions=hyper_parameters['replay_buffer_size'])

td3 = TD3(env_spec=env.spec,
policy=policy,
qf1=qf1,
qf2=qf2,
exploration_policy=exploration_policy,
uniform_random_policy=uniform_random_policy,
replay_buffer=replay_buffer,
steps_per_epoch=hyper_parameters['steps_per_epoch'],
policy_lr=hyper_parameters['policy_lr'],
qf_lr=hyper_parameters['qf_lr'],
target_update_tau=hyper_parameters['target_update_tau'],
discount=hyper_parameters['discount'],
grad_steps_per_env_step=hyper_parameters[
'grad_steps_per_env_step'],
start_steps=hyper_parameters['start_steps'],
min_buffer_size=hyper_parameters['min_buffer_size'],
buffer_batch_size=hyper_parameters['buffer_batch_size'],
policy_optimizer=torch.optim.Adam,
qf_optimizer=torch.optim.Adam,
policy_noise_clip=hyper_parameters['policy_noise_clip'],
policy_noise=hyper_parameters['policy_noise'])

prefer_gpu()
td3.to()
trainer.setup(td3, env)
trainer.train(n_epochs=hyper_parameters['n_epochs'],
batch_size=hyper_parameters['batch_size'])
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
hyper_parameters = {
'policy_lr': 1e-3,
'qf_lr': 1e-3,
'policy_hidden_sizes': [400, 300],
'qf_hidden_sizes': [400, 300],
'n_epochs': 8,
'steps_per_epoch': 20,
'n_exploration_steps': 250,
'n_train_steps': 1,
'policy_hidden_sizes': [256, 256],
'qf_hidden_sizes': [256, 256],
'n_epochs': 250,
'steps_per_epoch': 40,
'n_exploration_steps': 100,
'n_train_steps': 50,
'discount': 0.99,
'tau': 0.005,
'replay_buffer_size': int(1e6),
Expand Down
8 changes: 3 additions & 5 deletions benchmarks/src/garage_benchmarks/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,12 @@ def wrapper_func():
count += 1
_log_dir = _log_dir + '_' + str(count)

snapshot_config = {}

if auto:
_auto = auto
auto_dir = os.path.join(_log_dir, 'auto')
os.makedirs(auto_dir)
snapshot_config['snapshot_mode'] = 'none'

exec_func(snapshot_config)
exec_func()

if plot:
plot_dir = os.path.join(_log_dir, 'plot')
Expand Down Expand Up @@ -148,7 +145,8 @@ def iterate_experiments(func,
tf.compat.v1.reset_default_graph()

ctxt = dict(log_dir=sub_log_dir)
ctxt.update(snapshot_config)
if snapshot_config:
ctxt.update(snapshot_config)
func(ctxt, env_id=env_id, seed=seed)

if _plot is not None or _auto:
Expand Down
Loading

0 comments on commit 4017aef

Please sign in to comment.