Skip to content

Commit

Permalink
Preserve sac log alpha when moving between CPU and GPU (#2260)
Browse files Browse the repository at this point in the history
  • Loading branch information
ziyiwu9494 authored Apr 11, 2021
1 parent f056fb8 commit 3fb4f50
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ jobs:
/bin/bash -c \
'[ ! -f ${MJKEY_PATH} ] || mv ${MJKEY_PATH} ${MJKEY_PATH}.bak &&
pytest --cov=garage --cov-report=xml --reruns 1 -m \
"not nightly and not huge and not flaky and not large and not mujoco and not mujoco_long" --durations=20 &&
"not gpu and not nightly and not huge and not flaky and not large and not mujoco and not mujoco_long" --durations=20 &&
for i in {1..5}; do
bash <(curl -s https://codecov.io/bash --retry 5) -Z && break
if [ $i == 5 ]; then
Expand Down
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ scipy
setproctitle
tensorflow
tensorflow-probability
docutils<0.17
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ markers =
serial
mujoco
mujoco_long
gpu

[yapf]
based_on_style = pep8
Expand Down
28 changes: 21 additions & 7 deletions src/garage/torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,28 @@
NonLinearity, output_height_2d,
output_width_2d, pad_to_last, prefer_gpu,
product_of_gaussians, set_gpu_mode,
soft_update_model, torch_to_np,
update_module_params)
soft_update_model, state_dict_to,
torch_to_np, update_module_params)

# yapf: enable
__all__ = [
'compute_advantages', 'as_torch_dict', 'filter_valids', 'flatten_batch',
'global_device', 'as_torch', 'pad_to_last', 'prefer_gpu',
'product_of_gaussians', 'set_gpu_mode', 'soft_update_model', 'torch_to_np',
'update_module_params', 'NonLinearity', 'flatten_to_single_vector',
'output_width_2d', 'output_height_2d', 'expand_var'
'compute_advantages',
'as_torch_dict',
'filter_valids',
'flatten_batch',
'global_device',
'as_torch',
'pad_to_last',
'prefer_gpu',
'product_of_gaussians',
'set_gpu_mode',
'soft_update_model',
'torch_to_np',
'update_module_params',
'NonLinearity',
'flatten_to_single_vector',
'output_width_2d',
'output_height_2d',
'expand_var',
'state_dict_to',
]
18 changes: 18 additions & 0 deletions src/garage/torch/_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,24 @@ def product_of_gaussians(mus, sigmas_squared):
return mu, sigma_squared


def state_dict_to(state_dict, device):
"""Move optimizer to a specified device.
Args:
state_dict (dict): state dictionary to be moved
device (str): ID of GPU or CPU.
Returns:
dict: state dictionary moved to device
"""
for param in state_dict.values():
if isinstance(param, torch.Tensor):
param.data = param.data.to(device)
elif isinstance(param, dict):
state_dict_to(param, device)
return state_dict


# pylint: disable=W0223
class NonLinearity(nn.Module):
"""Wrapper class for non linear function or module.
Expand Down
14 changes: 11 additions & 3 deletions src/garage/torch/algos/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from garage import log_performance, obtain_evaluation_episodes, StepType
from garage.np.algos import RLAlgorithm
from garage.torch import as_torch_dict, global_device
from garage.torch import as_torch_dict, global_device, state_dict_to

# yapf: enable

Expand Down Expand Up @@ -524,7 +524,15 @@ def to(self, device=None):
self._log_alpha = torch.Tensor([self._fixed_alpha
]).log().to(device)
else:
self._log_alpha = torch.Tensor([self._initial_log_entropy
]).to(device).requires_grad_()
self._log_alpha = self._log_alpha.detach().to(
device).requires_grad_()
self._alpha_optimizer = self._optimizer([self._log_alpha],
lr=self._policy_lr)
self._alpha_optimizer.load_state_dict(
state_dict_to(self._alpha_optimizer.state_dict(), device))
self._qf1_optimizer.load_state_dict(
state_dict_to(self._qf1_optimizer.state_dict(), device))
self._qf2_optimizer.load_state_dict(
state_dict_to(self._qf2_optimizer.state_dict(), device))
self._policy_optimizer.load_state_dict(
state_dict_to(self._policy_optimizer.state_dict(), device))
55 changes: 55 additions & 0 deletions tests/garage/torch/algos/test_sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,58 @@ def test_fixed_alpha():
trainer.train(n_epochs=1, batch_size=100, plot=False)
assert torch.allclose(torch.Tensor([0.5]), sac._log_alpha.cpu())
assert not sac._use_automatic_entropy_tuning


@pytest.mark.gpu
def test_sac_to():
"""Test moving Sac between CPU and GPU."""
env = normalize(GymEnv('InvertedDoublePendulum-v2',
max_episode_length=100))
deterministic.set_seed(0)
policy = TanhGaussianMLPPolicy(
env_spec=env.spec,
hidden_sizes=[32, 32],
hidden_nonlinearity=torch.nn.ReLU,
output_nonlinearity=None,
min_std=np.exp(-20.),
max_std=np.exp(2.),
)

qf1 = ContinuousMLPQFunction(env_spec=env.spec,
hidden_sizes=[32, 32],
hidden_nonlinearity=F.relu)

qf2 = ContinuousMLPQFunction(env_spec=env.spec,
hidden_sizes=[32, 32],
hidden_nonlinearity=F.relu)
replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), )
trainer = Trainer(snapshot_config=snapshot_config)
sampler = LocalSampler(agents=policy,
envs=env,
max_episode_length=env.spec.max_episode_length,
worker_class=FragmentWorker)
sac = SAC(env_spec=env.spec,
policy=policy,
qf1=qf1,
qf2=qf2,
sampler=sampler,
gradient_steps_per_itr=100,
replay_buffer=replay_buffer,
min_buffer_size=1e3,
target_update_tau=5e-3,
discount=0.99,
buffer_batch_size=64,
reward_scale=1.,
steps_per_epoch=2)
trainer.setup(sac, env)
if torch.cuda.is_available():
set_gpu_mode(True)
else:
set_gpu_mode(False)
sac.to()
trainer.setup(algo=sac, env=env)
trainer.train(n_epochs=1, batch_size=100)
log_alpha = torch.clone(sac._log_alpha).cpu()
set_gpu_mode(False)
sac.to()
assert torch.allclose(log_alpha, sac._log_alpha)
56 changes: 53 additions & 3 deletions tests/garage/torch/test_functions.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
"""Module to test garage.torch._functions."""
# yapf: disable
import collections

import numpy as np
import pytest
import torch
from torch import tensor
import torch.nn.functional as F

from garage.envs import GymEnv, normalize
from garage.experiment.deterministic import set_seed
from garage.torch import (as_torch_dict, compute_advantages,
flatten_to_single_vector, global_device, pad_to_last,
product_of_gaussians, set_gpu_mode, torch_to_np)
product_of_gaussians, set_gpu_mode, state_dict_to,
torch_to_np)
import garage.torch._functions as tu
from garage.torch.policies import DeterministicMLPPolicy

from tests.fixtures import TfGraphTestCase

Expand Down Expand Up @@ -56,8 +63,8 @@ def test_as_torch_dict():
"""Test if dict whose values are tensors can be converted to np arrays."""
dic = {'a': np.zeros(1), 'b': np.ones(1)}
as_torch_dict(dic)
for tensor in dic.values():
assert isinstance(tensor, torch.Tensor)
for dic_value in dic.values():
assert isinstance(dic_value, torch.Tensor)


def test_product_of_gaussians():
Expand All @@ -80,6 +87,49 @@ def test_flatten_to_single_vector():
assert expected.shape == flatten_tensor.shape


@pytest.mark.gpu
def test_state_dict_to():
"""Test state_dict_to"""
set_seed(42)
# Using tensor instead of Tensor so it can be declared on GPU
# pylint: disable=not-callable
expected = collections.OrderedDict([
('_module._layers.0.linear.weight',
tensor([[
0.13957974, -0.2693157, -0.19351028, 0.09471931, -0.43573233,
0.03590716, -0.4272097, -0.13935488, -0.35843086, -0.25814268,
0.03060348
],
[
0.20623916, -0.1914061, 0.46729338, -0.5437773,
-0.50449526, -0.55039907, 0.0141218, -0.02489783,
0.26499796, -0.03836302, 0.7235093
]],
device='cuda:0')),
('_module._layers.0.linear.bias', tensor([0., 0.], device='cuda:0')),
('_module._layers.1.linear.weight',
tensor([[-0.7181905, -0.6284401], [0.10591025, -0.14771031]],
device='cuda:0')),
('_module._layers.1.linear.bias', tensor([0., 0.], device='cuda:0')),
('_module._output_layers.0.linear.weight',
tensor([[-0.29133463, 0.58353233]], device='cuda:0')),
('_module._output_layers.0.linear.bias', tensor([0.], device='cuda:0'))
])
# pylint: enable=not-callable
env = normalize(GymEnv('InvertedDoublePendulum-v2'))
policy = DeterministicMLPPolicy(env_spec=env.spec,
hidden_sizes=[2, 2],
hidden_nonlinearity=F.relu,
output_nonlinearity=torch.tanh)
moved_state_dict = state_dict_to(policy.state_dict(), 'cuda')
assert np.all([
torch.allclose(expected[key], moved_state_dict[key])
for key in expected.keys()
])
assert np.all(
[moved_state_dict[key].is_cuda for key in moved_state_dict.keys()])


class TestTorchAlgoUtils(TfGraphTestCase):
"""Test class for torch algo utility functions."""
# yapf: disable
Expand Down

0 comments on commit 3fb4f50

Please sign in to comment.