-
Notifications
You must be signed in to change notification settings - Fork 817
/
utils.py
73 lines (61 loc) · 2.06 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Third party code
#
# The following code are copied or modified from:
# https://github.com/openai/evolution-strategies-starter.
import numpy as np
def compute_ranks(x):
"""Returns ranks in [0, len(x))
Note: This is different from scipy.stats.rankdata, which returns ranks in
[1, len(x)].
"""
assert x.ndim == 1
ranks = np.empty(len(x), dtype=int)
ranks[x.argsort()] = np.arange(len(x))
return ranks
def compute_centered_ranks(x):
"""Return ranks that is normliazed to [-0.5, 0.5] with the rewards as input.
Args:
x(np.array): an array of rewards.
"""
y = compute_ranks(x.ravel()).reshape(x.shape).astype(np.float32)
y /= (x.size - 1)
y -= 0.5
return y
def itergroups(items, group_size):
"""An iterator that iterates a list with batch data."""
assert group_size >= 1
group = []
for x in items:
group.append(x)
if len(group) == group_size:
yield tuple(group)
del group[:]
if group:
yield tuple(group)
def batched_weighted_sum(weights, vecs, batch_size):
"""Compute the gradients for updating the parameters.
Args:
weights(np.array): the nomalized rewards computed by the function `compute_centered_ranks`.
vecs(np.array): the noise added to the parameters.
batch_size(int): the batch_size for speeding up the computation.
Return:
total(np.array): aggregated gradient.
"""
total = 0
for batch_weights, batch_vecs in zip(
itergroups(weights, batch_size), itergroups(vecs, batch_size)):
assert len(batch_weights) == len(batch_vecs) <= batch_size
total += np.dot(
np.asarray(batch_weights, dtype=np.float32),
np.asarray(batch_vecs, dtype=np.float32))
return total
def unflatten(flat_array, array_shapes):
i = 0
arrays = []
for shape in array_shapes:
size = np.prod(shape, dtype=np.int32)
array = flat_array[i:(i + size)].reshape(shape)
arrays.append(array)
i += size
assert len(flat_array) == i
return arrays