This repository was archived by the owner on Jun 13, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathutil.py
190 lines (170 loc) · 7.22 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/env python
import datetime, os, time, yaml, sys
import json
import matplotlib.pyplot as plt
import numpy as np
import StringIO
import tensorflow as tf
import time
def add_opts(parser):
parser.add_argument('--gradient-clip', type=float, default=5,
help="do global clipping to this norm")
parser.add_argument('--print-gradients', action='store_true',
help="whether to verbose print all gradients and l2 norms")
parser.add_argument('--optimiser', type=str, default="GradientDescent",
help="tf.train.XXXOptimizer to use")
parser.add_argument('--optimiser-args', type=str, default="{\"learning_rate\": 0.001}",
help="json serialised args for optimiser constructor")
parser.add_argument('--use-dropout', action='store_true',
help="include a dropout layers after each fully connected layer")
class StopWatch:
def reset(self):
self.start = time.time()
def time(self):
return time.time() - self.start
# def __enter__(self):
# self.start = time.time()
# return self
# def __exit__(self, *args):
# self.time = time.time() - self.start
def l2_norm(tensor):
"""(row wise) l2 norm of a tensor"""
return tf.sqrt(tf.reduce_sum(tf.pow(tensor, 2)))
def standardise(tensor):
"""standardise a tensor"""
# is std_dev not an op in tensorflow?!? i must be taking crazy pills...
mean = tf.reduce_mean(tensor)
variance = tf.reduce_mean(tf.square(tensor - mean))
std_dev = tf.sqrt(variance)
return (tensor - mean) / std_dev
def clip_and_debug_gradients(gradients, opts):
# extract just the gradients temporarily for global clipping and then rezip
if opts.gradient_clip is not None:
just_gradients, variables = zip(*gradients)
just_gradients, _ = tf.clip_by_global_norm(just_gradients, opts.gradient_clip)
gradients = zip(just_gradients, variables)
# verbose debugging
if opts.print_gradients:
for i, (gradient, variable) in enumerate(gradients):
if gradient is not None:
gradients[i] = (tf.Print(gradient, [l2_norm(gradient)],
"gradient %s l2_norm " % variable.name), variable)
# done
return gradients
def collapsed_successive_ranges(values):
"""reduce an array, e.g. [2,3,4,5,13,14,15], to its successive ranges [2-5, 13-15]"""
last, start, out = None, None, []
for value in values:
if start is None:
start = value
elif value != last + 1:
out.append("%d-%d" % (start, last))
start = value
last = value
out.append("%d-%d" % (start, last))
return ", ".join(out)
def construct_optimiser(opts):
optimiser_cstr = eval("tf.train.%sOptimizer" % opts.optimiser)
args = json.loads(opts.optimiser_args)
return optimiser_cstr(**args)
def shape_and_product_of(t):
shape_product = 1
for dim in t.get_shape():
try:
shape_product *= int(dim)
except TypeError:
# Dimension(None)
pass
return "%s #%s" % (t.get_shape(), shape_product)
class SaverUtil(object):
def __init__(self, sess, ckpt_dir="/tmp", save_freq=60):
self.sess = sess
var_list = [v for v in tf.all_variables() if not "replay_memory" in v.name]
self.saver = tf.train.Saver(var_list=var_list, max_to_keep=1000)
self.ckpt_dir = ckpt_dir
if not os.path.exists(self.ckpt_dir):
os.makedirs(self.ckpt_dir)
assert save_freq > 0
self.save_freq = save_freq
self.load_latest_ckpt_or_init_if_none()
def load_latest_ckpt_or_init_if_none(self):
"""loads latests ckpt from dir. if there are non run init variables."""
# if no latest checkpoint init vars and return
ckpt_info_file = "%s/checkpoint" % self.ckpt_dir
if os.path.isfile(ckpt_info_file):
# load latest ckpt
info = yaml.load(open(ckpt_info_file, "r"))
assert 'model_checkpoint_path' in info
most_recent_ckpt = "%s/%s" % (self.ckpt_dir, info['model_checkpoint_path'])
sys.stderr.write("loading ckpt %s\n" % most_recent_ckpt)
self.saver.restore(self.sess, most_recent_ckpt)
self.next_scheduled_save_time = time.time() + self.save_freq
else:
# no latest ckpts, init and force a save now
sys.stderr.write("no latest ckpt in %s, just initing vars...\n" % self.ckpt_dir)
self.sess.run(tf.initialize_all_variables())
self.force_save()
def force_save(self):
"""force a save now."""
dts = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
new_ckpt = "%s/ckpt.%s" % (self.ckpt_dir, dts)
sys.stderr.write("saving ckpt %s\n" % new_ckpt)
start_time = time.time()
self.saver.save(self.sess, new_ckpt)
print "save_took", time.time() - start_time
self.next_scheduled_save_time = time.time() + self.save_freq
def save_if_required(self):
"""check if save is required based on time and if so, save."""
if time.time() >= self.next_scheduled_save_time:
self.force_save()
class OrnsteinUhlenbeckNoise(object):
"""generate time correlated noise for action exploration"""
def __init__(self, dim, theta=0.01, sigma=0.2, max_magnitude=1.5):
# dim: dimensionality of returned noise
# theta: how quickly the value moves; near zero => slow, near one => fast
# 0.01 gives very roughly 2/3 peaks troughs over ~1000 samples
# sigma: maximum range of values; 0.2 gives approximately the range (-1.5, 1.5)
# which is useful for shifting the output of a tanh which is (-1, 1)
# max_magnitude: max +ve / -ve value to clip at. dft clip at 1.5 (again for
# adding to output from tanh. we do this since sigma gives no guarantees
# regarding min/max values.
self.dim = dim
self.theta = theta
self.sigma = sigma
self.max_magnitude = max_magnitude
self.state = np.zeros(self.dim)
def sample(self):
self.state += self.theta * -self.state
self.state += self.sigma * np.random.randn(self.dim)
self.state = np.clip(self.max_magnitude, -self.max_magnitude, self.state)
return np.copy(self.state)
def write_img_to_png_file(img, filename):
png_bytes = StringIO.StringIO()
plt.imsave(png_bytes, img)
print "writing", filename
with open(filename, "w") as f:
f.write(png_bytes.getvalue())
# some hacks for writing state to disk for visualisation
def render_state_to_png(step, state, split_channels=False):
height, width, num_channels, num_cameras, num_repeats = state.shape
for c_idx in range(num_cameras):
for r_idx in range(num_repeats):
if split_channels:
for channel in range(num_channels):
single_channel = state[:,:,channel,c_idx,r_idx]
img = np.zeros((height, width, 3))
img[:,:,channel] = single_channel
write_img_to_png_file(img, "/tmp/state_s%03d_ch%s_c%s_r%s.png" % (step, channel, c_idx, r_idx))
else:
img = np.empty((height, width, 3))
img[:,:,0] = state[:,:,0,c_idx,r_idx]
img[:,:,1] = state[:,:,1,c_idx,r_idx]
img[:,:,2] = state[:,:,2,c_idx,r_idx]
write_img_to_png_file(img, "/tmp/state_s%03d_c%s_r%s.png" % (step, c_idx, r_idx))
def render_action_to_png(step, action):
import Image, ImageDraw
img = Image.new('RGB', (50, 50), (50, 50, 50))
canvas = ImageDraw.Draw(img)
lx, ly = int(25+(action[0][0]*25)), int(25+(action[0][1]*25))
canvas.line((25,25, lx,ly), fill="black")
img.save("/tmp/action_%03d.png" % step)