|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | +import math |
| 5 | +import numpy as np |
| 6 | +import copy |
| 7 | + |
| 8 | +# evaluation value of the infeasible solution |
| 9 | +INFEASIBLE = np.inf |
| 10 | + |
| 11 | + |
| 12 | +def get_h_inv(dim): |
| 13 | + f = lambda a, b: ((1. + a * a) * math.exp(a * a / 2.) / 0.24) - 10. - dim |
| 14 | + f_prime = lambda a: (1. / 0.24) * a * math.exp(a * a / 2.) * (3. + a * a) |
| 15 | + h_inv = 1.0 |
| 16 | + while abs(f(h_inv, dim)) > 1e-10: |
| 17 | + h_inv = h_inv - 0.5 * (f(h_inv, dim) / f_prime(h_inv)) |
| 18 | + return h_inv |
| 19 | + |
| 20 | + |
| 21 | +def sort_indices_by(evals, z): |
| 22 | + lam = evals.size |
| 23 | + sorted_indices = np.argsort(evals) |
| 24 | + sorted_evals = evals[sorted_indices] |
| 25 | + no_of_feasible_solutions = np.where(sorted_evals != INFEASIBLE)[0].size |
| 26 | + if no_of_feasible_solutions != lam: |
| 27 | + infeasible_z = z[:, np.where(evals == INFEASIBLE)[0]] |
| 28 | + distances = np.sum(infeasible_z ** 2, axis=0) |
| 29 | + infeasible_indices = sorted_indices[no_of_feasible_solutions:] |
| 30 | + indices_sorted_by_distance = np.argsort(distances) |
| 31 | + sorted_indices[no_of_feasible_solutions:] = infeasible_indices[indices_sorted_by_distance] |
| 32 | + return sorted_indices |
| 33 | + |
| 34 | + |
| 35 | +def calc_constraint_violation(x, lamb): |
| 36 | + constraint_violation = np.zeros(lamb) |
| 37 | + for i in range(lamb): |
| 38 | + for j in range(x[:, i].size): |
| 39 | + constraint_violation[i] += (-min(0.0, x[:, i][j]) + max(0.0, x[:, i][j] - 1.0)) * 1e5 |
| 40 | + return constraint_violation |
| 41 | + |
| 42 | + |
| 43 | +class CRFMNES: |
| 44 | + def __init__(self, dim, f, m, sigma, lamb, **kwargs): |
| 45 | + |
| 46 | + if 'seed' in kwargs.keys(): |
| 47 | + np.random.seed(kwargs['seed']) |
| 48 | + self.dim = dim |
| 49 | + self.f = f |
| 50 | + self.m = m |
| 51 | + self.sigma = sigma |
| 52 | + self.lamb = lamb |
| 53 | + |
| 54 | + self.v = kwargs.get('v', np.random.randn(dim, 1) / np.sqrt(dim)) |
| 55 | + self.D = np.ones([dim, 1]) |
| 56 | + self.constraint = kwargs.get('constraint', [[- np.inf, np.inf] for _ in range(dim)]) |
| 57 | + self.penalty_coef = kwargs.get('penalty_coef', 1e5) |
| 58 | + self.use_constraint_violation = True |
| 59 | + |
| 60 | + self.w_rank_hat = (np.log(self.lamb / 2 + 1) - np.log(np.arange(1, self.lamb + 1))).reshape(self.lamb, 1) |
| 61 | + self.w_rank_hat[np.where(self.w_rank_hat < 0)] = 0 |
| 62 | + self.w_rank = self.w_rank_hat / sum(self.w_rank_hat) - (1. / self.lamb) |
| 63 | + self.mueff = 1 / ((self.w_rank + (1 / self.lamb)).T @ (self.w_rank + (1 / self.lamb)))[0][0] |
| 64 | + self.cs = (self.mueff + 2.) / (self.dim + self.mueff + 5.) |
| 65 | + self.cc = (4. + self.mueff / self.dim) / (self.dim + 4. + 2. * self.mueff / self.dim) |
| 66 | + self.c1_cma = 2. / (math.pow(self.dim + 1.3, 2) + self.mueff) |
| 67 | + # initialization |
| 68 | + self.chiN = np.sqrt(self.dim) * (1. - 1. / (4. * self.dim) + 1. / (21. * self.dim * self.dim)) |
| 69 | + self.pc = np.zeros([self.dim, 1]) |
| 70 | + self.ps = np.zeros([self.dim, 1]) |
| 71 | + # distance weight parameter |
| 72 | + self.h_inv = get_h_inv(self.dim) |
| 73 | + self.alpha_dist = lambda lambF: self.h_inv * min(1., math.sqrt(float(self.lamb) / self.dim)) * math.sqrt( |
| 74 | + float(lambF) / self.lamb) |
| 75 | + self.w_dist_hat = lambda z, lambF: math.exp(self.alpha_dist(lambF) * np.linalg.norm(z)) |
| 76 | + # learning rate |
| 77 | + self.eta_m = 1.0 |
| 78 | + self.eta_move_sigma = 1. |
| 79 | + self.eta_stag_sigma = lambda lambF: math.tanh((0.024 * lambF + 0.7 * self.dim + 20.) / (self.dim + 12.)) |
| 80 | + self.eta_conv_sigma = lambda lambF: 2. * math.tanh((0.025 * lambF + 0.75 * self.dim + 10.) / (self.dim + 4.)) |
| 81 | + self.c1 = lambda lambF: self.c1_cma * (self.dim - 5) / 6 * (float(lambF) / self.lamb) |
| 82 | + self.eta_B = lambda lambF: np.tanh((min(0.02 * lambF, 3 * np.log(self.dim)) + 5) / (0.23 * self.dim + 25)) |
| 83 | + |
| 84 | + self.g = 0 |
| 85 | + self.no_of_evals = 0 |
| 86 | + |
| 87 | + self.idxp = np.arange(self.lamb / 2, dtype=int) |
| 88 | + self.idxm = np.arange(self.lamb / 2, self.lamb, dtype=int) |
| 89 | + self.z = np.zeros([self.dim, self.lamb]) |
| 90 | + |
| 91 | + self.f_best = float('inf') |
| 92 | + self.x_best = np.empty(self.dim) |
| 93 | + |
| 94 | + def calc_violations(self, x): |
| 95 | + violations = np.zeros(self.lamb) |
| 96 | + for i in range(self.lamb): |
| 97 | + for j in range(self.dim): |
| 98 | + violations[i] += (- min(0, x[j][i] - self.constraint[j][0]) + max(0, x[j][i] - self.constraint[j][ |
| 99 | + 1])) * self.penalty_coef |
| 100 | + return violations |
| 101 | + |
| 102 | + def optimize(self, iterations): |
| 103 | + for _ in range(iterations): |
| 104 | + _ = self.one_iteration() |
| 105 | + return self.x_best, self.f_best |
| 106 | + |
| 107 | + def one_iteration(self): |
| 108 | + d = self.dim |
| 109 | + lamb = self.lamb |
| 110 | + zhalf = np.random.randn(d, int(lamb / 2)) # dim x lamb/2 |
| 111 | + self.z[:, self.idxp] = zhalf |
| 112 | + self.z[:, self.idxm] = -zhalf |
| 113 | + normv = np.linalg.norm(self.v) |
| 114 | + normv2 = normv ** 2 |
| 115 | + vbar = self.v / normv |
| 116 | + y = self.z + (np.sqrt(1 + normv2) - 1) * vbar @ (vbar.T @ self.z) |
| 117 | + x = self.m + self.sigma * y * self.D |
| 118 | + evals_no_sort = np.array([self.f(np.array(x[:, i].reshape(self.dim, 1))) for i in range(self.lamb)]) |
| 119 | + xs_no_sort = [x[:, i] for i in range(lamb)] |
| 120 | + |
| 121 | + violations = np.zeros(lamb) |
| 122 | + if self.use_constraint_violation: |
| 123 | + violations = calc_constraint_violation(x, self.lamb) |
| 124 | + sorted_indices = sort_indices_by(evals_no_sort + violations, self.z) |
| 125 | + else: |
| 126 | + sorted_indices = sort_indices_by(evals_no_sort, self.z) |
| 127 | + best_eval_id = sorted_indices[0] |
| 128 | + f_best = evals_no_sort[best_eval_id] |
| 129 | + x_best = x[:, best_eval_id] |
| 130 | + self.z = self.z[:, sorted_indices] |
| 131 | + y = y[:, sorted_indices] |
| 132 | + x = x[:, sorted_indices] |
| 133 | + |
| 134 | + self.no_of_evals += self.lamb |
| 135 | + self.g += 1 |
| 136 | + if f_best < self.f_best: |
| 137 | + self.f_best = f_best |
| 138 | + self.x_best = x_best |
| 139 | + |
| 140 | + lambF = np.sum(evals_no_sort > np.finfo(float).max) |
| 141 | + |
| 142 | + # evolution path p_sigma |
| 143 | + self.ps = (1 - self.cs) * self.ps + np.sqrt(self.cs * (2. - self.cs) * self.mueff) * (self.z @ self.w_rank) |
| 144 | + ps_norm = np.linalg.norm(self.ps) |
| 145 | + # distance weight |
| 146 | + w_tmp = np.array( |
| 147 | + [self.w_rank_hat[i] * self.w_dist_hat(np.array(self.z[:, i]), lambF) for i in range(self.lamb)]).reshape( |
| 148 | + self.lamb, 1) |
| 149 | + weights_dist = w_tmp / sum(w_tmp) - 1. / self.lamb |
| 150 | + # switching weights and learning rate |
| 151 | + weights = weights_dist if ps_norm >= self.chiN else self.w_rank |
| 152 | + eta_sigma = self.eta_move_sigma if ps_norm >= self.chiN else self.eta_stag_sigma( |
| 153 | + lambF) if ps_norm >= 0.1 * self.chiN else self.eta_conv_sigma(lambF) |
| 154 | + l_c = 1.0 if ps_norm >= self.chiN else 0.0 |
| 155 | + # update pc, m |
| 156 | + wxm = (x - self.m) @ weights |
| 157 | + self.pc = (1. - self.cc) * self.pc + np.sqrt(self.cc * (2. - self.cc) * self.mueff) * wxm / self.sigma |
| 158 | + self.m += self.eta_m * wxm |
| 159 | + # calculate s, t |
| 160 | + # step1 |
| 161 | + normv4 = normv2 ** 2 |
| 162 | + exY = np.append(y, self.pc / self.D, axis=1) # dim x lamb+1 |
| 163 | + yy = exY * exY # dim x lamb+1 |
| 164 | + ip_yvbar = vbar.T @ exY |
| 165 | + yvbar = exY * vbar # dim x lamb+1. exYのそれぞれの列にvbarがかかる |
| 166 | + gammav = 1. + normv2 |
| 167 | + vbarbar = vbar * vbar |
| 168 | + alphavd = np.min( |
| 169 | + [1, np.sqrt(normv4 + (2 * gammav - np.sqrt(gammav)) / np.max(vbarbar)) / (2 + normv2)]) # scalar |
| 170 | + t = exY * ip_yvbar - vbar * (ip_yvbar ** 2 + gammav) / 2 # dim x lamb+1 |
| 171 | + b = -(1 - alphavd ** 2) * normv4 / gammav + 2 * alphavd ** 2 |
| 172 | + H = np.ones([self.dim, 1]) * 2 - (b + 2 * alphavd ** 2) * vbarbar # dim x 1 |
| 173 | + invH = H ** (-1) |
| 174 | + s_step1 = yy - normv2 / gammav * (yvbar * ip_yvbar) + np.ones([self.dim, self.lamb + 1]) # dim x lamb+1 |
| 175 | + ip_vbart = vbar.T @ t # 1 x lamb+1 |
| 176 | + s_step2 = s_step1 - alphavd / gammav * ((2 + normv2) * (t * vbar) - normv2 * vbarbar @ ip_vbart) # dim x lamb+1 |
| 177 | + invHvbarbar = invH * vbarbar |
| 178 | + ip_s_step2invHvbarbar = invHvbarbar.T @ s_step2 # 1 x lamb+1 |
| 179 | + s = (s_step2 * invH) - b / ( |
| 180 | + 1 + b * vbarbar.T @ invHvbarbar) * invHvbarbar @ ip_s_step2invHvbarbar # dim x lamb+1 |
| 181 | + ip_svbarbar = vbarbar.T @ s # 1 x lamb+1 |
| 182 | + t = t - alphavd * ((2 + normv2) * (s * vbar) - vbar @ ip_svbarbar) # dim x lamb+1 |
| 183 | + # update v, D |
| 184 | + exw = np.append(self.eta_B(lambF) * weights, np.array([l_c * self.c1(lambF)]).reshape(1, 1), |
| 185 | + axis=0) # lamb+1 x 1 |
| 186 | + oldv = copy.deepcopy(self.v) |
| 187 | + self.v = self.v + (t @ exw) / normv |
| 188 | + oldD = copy.deepcopy(self.D) |
| 189 | + self.D = self.D + (s @ exw) * self.D |
| 190 | + # calculate detAold, detA |
| 191 | + nthrootdetAold = np.exp(np.sum(np.log(oldD)) / self.dim + np.log(1 + oldv.T @ oldv) / (2 * self.dim))[0][0] |
| 192 | + nthrootdetA = np.exp(np.sum(np.log(self.D)) / self.dim + np.log(1 + self.v.T @ self.v) / (2 * self.dim))[0][0] |
| 193 | + # update s, D |
| 194 | + G_s = np.sum((self.z * self.z - np.ones([self.dim, self.lamb])) @ weights) / self.dim |
| 195 | + l_s = 1.0 if ps_norm >= self.chiN and G_s < 0 else 0.0 |
| 196 | + self.sigma = self.sigma * np.exp((1 - l_s) * eta_sigma / 2 * G_s) * nthrootdetAold |
| 197 | + self.D = self.D / nthrootdetA |
| 198 | + |
| 199 | + return xs_no_sort, evals_no_sort, violations |
0 commit comments