Skip to content

Commit 86f9670

Browse files
committed
Add noisy MLP
1 parent 10015e6 commit 86f9670

File tree

7 files changed

+338
-41
lines changed

7 files changed

+338
-41
lines changed

examples/torch/dqn_atari.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@
4242
buffer_batch_size=32,
4343
max_epsilon=1.0,
4444
double=True,
45-
dueling=True,
45+
dueling=False,
46+
noisy=False,
47+
noisy_sigma=0.5,
4648
min_epsilon=0.01,
4749
decay_ratio=0.1,
4850
buffer_size=int(1e4),
@@ -162,19 +164,24 @@ def dqn_atari(ctxt=None,
162164
kernel_sizes=hyperparams['kernel_sizes'],
163165
strides=hyperparams['strides'],
164166
dueling=hyperparams['dueling'],
167+
noisy=hyperparams['noisy'],
168+
noisy_sigma=hyperparams['noisy_sigma'],
165169
hidden_w_init=(
166170
lambda x: torch.nn.init.orthogonal_(x, gain=np.sqrt(2))),
167171
hidden_sizes=hyperparams['hidden_sizes'],
168172
is_image=True)
169173

170174
policy = DiscreteQFArgmaxPolicy(env_spec=env.spec, qf=qf)
171-
exploration_policy = EpsilonGreedyPolicy(
172-
env_spec=env.spec,
173-
policy=policy,
174-
total_timesteps=num_timesteps,
175-
max_epsilon=hyperparams['max_epsilon'],
176-
min_epsilon=hyperparams['min_epsilon'],
177-
decay_ratio=hyperparams['decay_ratio'])
175+
176+
exploration_policy = policy
177+
if not hyperparams['noisy']:
178+
exploration_policy = EpsilonGreedyPolicy(
179+
env_spec=env.spec,
180+
policy=policy,
181+
total_timesteps=num_timesteps,
182+
max_epsilon=hyperparams['max_epsilon'],
183+
min_epsilon=hyperparams['min_epsilon'],
184+
decay_ratio=hyperparams['decay_ratio'])
178185

179186
algo = DQN(env_spec=env.spec,
180187
policy=policy,

src/garage/torch/algos/dqn.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,9 @@ def log_eval_results(self, epoch):
222222
tabular.record('QFunction/MaxY', np.max(self._epoch_ys))
223223
tabular.record('QFunction/AverageAbsY',
224224
np.mean(np.abs(self._epoch_ys)))
225+
# log noise levels if using a NoisyNet.
226+
# If NoisyNet is not used, this does nothing.
227+
self._qf.log_noise('QFunction/Noisy-Sigma')
225228

226229
def optimize_qf(self, samples_data):
227230
"""Perform algorithm optimizing.

src/garage/torch/modules/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from garage.torch.modules.gaussian_mlp_module import GaussianMLPModule
1111
from garage.torch.modules.mlp_module import MLPModule
1212
from garage.torch.modules.multi_headed_mlp_module import MultiHeadedMLPModule
13+
from garage.torch.modules.noisy_mlp_module import NoisyMLPModule
1314
# DiscreteCNNModule must go after MLPModule
1415
from garage.torch.modules.discrete_cnn_module import DiscreteCNNModule
1516
# yapf: enable
@@ -20,6 +21,7 @@
2021
'DiscreteCNNModule',
2122
'MLPModule',
2223
'MultiHeadedMLPModule',
24+
'NoisyMLPModule',
2325
'GaussianMLPModule',
2426
'GaussianMLPIndependentStdModule',
2527
'GaussianMLPTwoHeadedModule',

src/garage/torch/modules/discrete_cnn_module.py

Lines changed: 98 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
"""Discrete CNN Q Function."""
2+
from dowel import tabular
23
import torch
34
from torch import nn
45

5-
from garage.torch.modules import CNNModule, MLPModule
6+
from garage.torch.modules import CNNModule, MLPModule, NoisyMLPModule
67

78

89
# pytorch v1.6 issue, see https://github.com/pytorch/pytorch/issues/42305
@@ -33,6 +34,13 @@ class DiscreteCNNModule(nn.Module):
3334
of two hidden layers, each with 32 hidden units.
3435
dueling (bool): Whether to use a dueling architecture for the
3536
fully-connected layer.
37+
noisy (bool): Whether to use parameter noise for the fully-connected
38+
layers. If True, hidden_w_init, hidden_b_init, output_w_init, and
39+
output_b_init are ignored.
40+
noisy_sigma (float): Level of scaling to apply to the parameter noise.
41+
This is ignored if noisy is set to False.
42+
std_noise (float): Standard deviation of the gaussian parameters noise.
43+
This is ignored if noisy is set to False.
3644
mlp_hidden_nonlinearity (callable): Activation function for
3745
intermediate dense layer(s) in the MLP. It should return
3846
a torch.Tensor. Set it to None to maintain a linear activation.
@@ -81,6 +89,9 @@ def __init__(self,
8189
hidden_w_init=nn.init.xavier_uniform_,
8290
hidden_b_init=nn.init.zeros_,
8391
paddings=0,
92+
noisy=True,
93+
noisy_sigma=0.5,
94+
std_noise=1.,
8495
padding_mode='zeros',
8596
max_pool=False,
8697
pool_shape=None,
@@ -94,6 +105,8 @@ def __init__(self,
94105
super().__init__()
95106

96107
self._dueling = dueling
108+
self._noisy = noisy
109+
self._noisy_layers = None
97110

98111
input_var = torch.zeros(input_shape)
99112
cnn_module = CNNModule(input_var=input_var,
@@ -116,26 +129,49 @@ def __init__(self,
116129
flat_dim = torch.flatten(cnn_out, start_dim=1).shape[1]
117130

118131
if dueling:
119-
self._val = MLPModule(flat_dim,
120-
1,
121-
hidden_sizes,
122-
hidden_nonlinearity=mlp_hidden_nonlinearity,
123-
hidden_w_init=hidden_w_init,
124-
hidden_b_init=hidden_b_init,
125-
output_nonlinearity=output_nonlinearity,
126-
output_w_init=output_w_init,
127-
output_b_init=output_b_init,
128-
layer_normalization=layer_normalization)
129-
self._act = MLPModule(flat_dim,
130-
output_dim,
131-
hidden_sizes,
132-
hidden_nonlinearity=mlp_hidden_nonlinearity,
133-
hidden_w_init=hidden_w_init,
134-
hidden_b_init=hidden_b_init,
135-
output_nonlinearity=output_nonlinearity,
136-
output_w_init=output_w_init,
137-
output_b_init=output_b_init,
138-
layer_normalization=layer_normalization)
132+
if noisy:
133+
self._val = NoisyMLPModule(
134+
flat_dim,
135+
1,
136+
hidden_sizes,
137+
sigma_naught=noisy_sigma,
138+
std_noise=std_noise,
139+
hidden_nonlinearity=mlp_hidden_nonlinearity,
140+
output_nonlinearity=output_nonlinearity)
141+
self._act = NoisyMLPModule(
142+
flat_dim,
143+
output_dim,
144+
hidden_sizes,
145+
sigma_naught=noisy_sigma,
146+
std_noise=std_noise,
147+
hidden_nonlinearity=mlp_hidden_nonlinearity,
148+
output_nonlinearity=output_nonlinearity)
149+
self._noisy_layers = [self._val, self._act]
150+
else:
151+
self._val = MLPModule(
152+
flat_dim,
153+
1,
154+
hidden_sizes,
155+
hidden_nonlinearity=mlp_hidden_nonlinearity,
156+
hidden_w_init=hidden_w_init,
157+
hidden_b_init=hidden_b_init,
158+
output_nonlinearity=output_nonlinearity,
159+
output_w_init=output_w_init,
160+
output_b_init=output_b_init,
161+
layer_normalization=layer_normalization)
162+
163+
self._act = MLPModule(
164+
flat_dim,
165+
output_dim,
166+
hidden_sizes,
167+
hidden_nonlinearity=mlp_hidden_nonlinearity,
168+
hidden_w_init=hidden_w_init,
169+
hidden_b_init=hidden_b_init,
170+
output_nonlinearity=output_nonlinearity,
171+
output_w_init=output_w_init,
172+
output_b_init=output_b_init,
173+
layer_normalization=layer_normalization)
174+
139175
if mlp_hidden_nonlinearity is None:
140176
self._module = nn.Sequential(cnn_module, nn.Flatten())
141177
else:
@@ -144,16 +180,29 @@ def __init__(self,
144180
nn.Flatten())
145181

146182
else:
147-
mlp_module = MLPModule(flat_dim,
148-
output_dim,
149-
hidden_sizes,
150-
hidden_nonlinearity=mlp_hidden_nonlinearity,
151-
hidden_w_init=hidden_w_init,
152-
hidden_b_init=hidden_b_init,
153-
output_nonlinearity=output_nonlinearity,
154-
output_w_init=output_w_init,
155-
output_b_init=output_b_init,
156-
layer_normalization=layer_normalization)
183+
mlp_module = None
184+
if noisy:
185+
mlp_module = NoisyMLPModule(
186+
flat_dim,
187+
output_dim,
188+
hidden_sizes,
189+
sigma_naught=noisy_sigma,
190+
std_noise=std_noise,
191+
hidden_nonlinearity=mlp_hidden_nonlinearity,
192+
output_nonlinearity=output_nonlinearity)
193+
self._noisy_layers = [mlp_module]
194+
else:
195+
mlp_module = MLPModule(
196+
flat_dim,
197+
output_dim,
198+
hidden_sizes,
199+
hidden_nonlinearity=mlp_hidden_nonlinearity,
200+
hidden_w_init=hidden_w_init,
201+
hidden_b_init=hidden_b_init,
202+
output_nonlinearity=output_nonlinearity,
203+
output_w_init=output_w_init,
204+
output_b_init=output_b_init,
205+
layer_normalization=layer_normalization)
157206

158207
if mlp_hidden_nonlinearity is None:
159208
self._module = nn.Sequential(cnn_module, nn.Flatten(),
@@ -182,3 +231,21 @@ def forward(self, inputs):
182231
return val + act
183232

184233
return self._module(inputs)
234+
235+
def log_noise(self, key):
236+
"""Log sigma levels for noisy layers.
237+
238+
Args:
239+
key (str): Prefix to use for logging.
240+
241+
"""
242+
if self._noisy:
243+
layer_num = 0
244+
for layer in self._noisy_layers:
245+
for name, param in layer.named_parameters():
246+
if name.endswith('weight_sigma'):
247+
layer_num += 1
248+
sigma_mean = float(
249+
(param**2).mean().sqrt().data.cpu().numpy())
250+
tabular.record(key + '_layer_' + str(layer_num),
251+
sigma_mean)

src/garage/torch/modules/multi_headed_mlp_module.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from garage.torch import NonLinearity
88

99

10+
# pytorch v1.6 issue, see https://github.com/pytorch/pytorch/issues/42305
11+
# pylint: disable=abstract-method
1012
class MultiHeadedMLPModule(nn.Module):
1113
"""MultiHeadedMLPModule Model.
1214
@@ -71,8 +73,6 @@ def __init__(self,
7173
output_nonlinearities = self._check_parameter_for_output_layer(
7274
'output_nonlinearities', output_nonlinearities, n_heads)
7375

74-
self._layers = nn.ModuleList()
75-
7676
prev_size = input_dim
7777
for size in hidden_sizes:
7878
hidden_layers = nn.Sequential()

0 commit comments

Comments
 (0)