-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathanalyze_chance.py
171 lines (135 loc) · 5.8 KB
/
analyze_chance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from task import Task
import numpy as np
import bound_utils as bu
from scipy.stats import binom
from cem import CEM
# bisection search for p_
def inf_p(k, n, delta):
p_min = 0
p_max = 1
while (p_max - p_min > 0.0001):
p = (p_min + p_max) / 2
val = binom.cdf(k-1, n, p)
if val < 1 - delta: # val decreases as p increases
p_max = p
else:
p_min = p
return p
# get theoretical curve, return xs, ys
def get_curve(n, delta, epsilon, grid_density):
eps = np.linspace(0.001, 0.999, num=grid_density)
probs = np.zeros(len(eps))
for i, eps_prime in enumerate(eps):
cum_prob = 0
for k in range(n, 0, -1):
p = inf_p(k,n,delta)
if p >= 1-epsilon:
cum_prob += binom.pmf(k,n,1-eps_prime)
else:
probs[i] = cum_prob
break
return 1-eps, probs
def valid_chance_theory(config):
delta = config["bounds"]["delta"]
epsilon = config["bounds"]["epsilon"]
num_envs_range = config["simulation"]["num_envs_range_theory"]
grid_density = config["bounds"]["grid_density"]
theory_prob_success = np.zeros((len(num_envs_range), grid_density))
theory_prob_accept = np.zeros((len(num_envs_range), grid_density))
for i,n in enumerate(num_envs_range):
print("\nOn theoretical num_envs: ", n)
one_minus_eps, probs = get_curve(n, delta, epsilon, grid_density)
theory_prob_success[i,:] = one_minus_eps
theory_prob_accept[i,:] = probs
return theory_prob_success, theory_prob_accept
def valid_chance_empirical(config):
# simulation parameters
task_name = config["simulation"]["task_name"]
noise = config["simulation"]["noise"]
num_reps = config["simulation"]["num_reps"]
num_envs_range = config["simulation"]["num_envs_range_empirical"]
# CEM parameters
num_samples = config["plans"]["num_samples"]
horizon = config["plans"]["horizon"]
# bounds parameters
num_theory = config["bounds"]["num_theory"]
healthy_low = config["bounds"]["healthy_low"]
healthy_high = config["bounds"]["healthy_high"]
delta = config["bounds"]["delta"]
epsilon = config["bounds"]["epsilon"]
tau = 1-epsilon
# make task
task = Task(task_name, noise)
# Get the noiseless initial position
temp_task = Task(task_name, 0)
temp_env = temp_task.create_envs(1)
init_q_pos = temp_env.env_fns[0]().data.qpos # initial position
init_q_vel = temp_env.env_fns[0]().data.qvel # initial velocity
temp_env.close()
# Set the chance constraint function
# Should take in a sequence of observations in shape (# observations/step, horizon)
# In our case, we require that the ant be "extra" healthy i.e. have torso z value between [0.5, 1]
# across the horizon (standard definition of healthy is between 0.2, 1)
constr_func = lambda obs : np.all((obs[2,:] >= healthy_low) * (obs[2,:] <= healthy_high))
# Generate the action sequences that will consider
plans = task.sample_plans(num_samples, horizon)
min_envs = bu.min_n_chance(delta, tau)
if min(num_envs_range) < min_envs:
raise ValueError("Not enough sim environments for certifying chance constraint!")
# initialize arrays for (x,y) values when plotting: (empirical_prob_success, empirical_prob_accept)
empirical_prob_success = np.zeros((len(num_envs_range), num_samples))
empirical_prob_accept = np.zeros((len(num_envs_range), num_samples))
for i, num_envs in enumerate(num_envs_range):
print('\n\nOn num_envs ', num_envs)
for j in range(num_samples):
print('\nOn plan ' + str(j))
action_seq = plans[j]
theory_val, theory_holds, accept_stats, accept_frac = bu.analyze_chance_without_control(num_reps,
action_seq, task, init_q_pos, init_q_vel, num_theory, num_envs, constr_func, tau, delta, verbose=False)
print('theory_val', theory_val)
print('theory_holds', theory_holds)
print('accept_frac', accept_frac)
empirical_prob_success[i,j] = theory_val
empirical_prob_accept[i,j] = accept_frac
return empirical_prob_success, empirical_prob_accept
def valid_pr_empirical(config):
# simulation parameters
task_name = config["simulation"]["task_name"]
noise = config["simulation"]["noise"]
num_reps = config["simulation"]["num_reps"]
# solver parameters
horizon = config["CEM"]["horizon"]
max_gen = config["CEM"]["max_gen"]
num_samples = config["CEM"]["num_samples"]
num_elite = config["CEM"]["num_elite"]
rand_envs = config["CEM"]["rand_envs"]
processes = config["CEM"]["processes"]
verbose = config["CEM"]["verbose"]
# bounds parameters
num_theory = config["bounds"]["num_theory"]
num_envs = config["bounds"]["num_envs"]
healthy_low = config["bounds"]["healthy_low"]
healthy_high = config["bounds"]["healthy_high"]
delta = config["bounds"]["delta"]
feas_stat = None
num_chance = None
# make task
task = Task(task_name, noise)
# Get the noiseless initial position
temp_task = Task(task_name, 0)
temp_env = temp_task.create_envs(1)
init_q_pos = temp_env.env_fns[0]().data.qpos # initial position
init_q_vel = temp_env.env_fns[0]().data.qvel # initial velocity
temp_env.close()
# Set the chance constraint function
# Should take in a sequence of observations in shape (# observations/step, horizon)
# In our case, we require that the ant be "extra" healthy i.e. have torso z value between [0.5, 1]
# across the horizon (standard definition of healthy is between 0.2, 1)
constr_func = lambda obs : np.all((obs[2,:] >= healthy_low) * (obs[2,:] <= healthy_high))
# Solve for the fixed control sequence to use in later analysis
solver = CEM(max_gen, num_samples, num_elite, 1, task, lambda costs : np.mean(costs), feas_stat, num_chance, rand_envs, processes, False)
action_seq, _ = solver.solve(init_q_pos, init_q_vel, horizon)
# action_seq = np.squeeze(task.sample_plans(1, horizon))
cost_samples, true_pr, bound_samples = bu.analyze_pr_without_control(num_reps, action_seq, task, init_q_pos, init_q_vel, num_theory, num_envs, constr_func, delta, verbose)
bound_quantile = np.quantile(bound_samples, delta)
return cost_samples, bound_samples, true_pr, bound_quantile