From c272896c4e3f75ebd3b09b092180f5ef5b12692e Mon Sep 17 00:00:00 2001 From: Deathn0t Date: Thu, 31 Mar 2022 11:57:33 +0200 Subject: [PATCH 1/4] update with max size sample --- skopt/optimizer/optimizer.py | 49 ++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py index fe423039..6e79f349 100644 --- a/skopt/optimizer/optimizer.py +++ b/skopt/optimizer/optimizer.py @@ -198,6 +198,8 @@ def __init__( acq_func_kwargs=None, acq_optimizer_kwargs=None, model_sdv=None, + sample_max_size=-1, + sample_strategy="quantile" ): args = locals().copy() del args["self"] @@ -371,6 +373,10 @@ def __init__( self._min_value = 0 self._max_value = 0 + # parameters to stabilize the size of the dataset used to fit the surrogate model + self._sample_max_size = sample_max_size + self._sample_strategy = sample_strategy + def copy(self, random_state=None): """Create a shallow copy of an instance of the optimizer. @@ -392,7 +398,9 @@ def copy(self, random_state=None): acq_func_kwargs=self.acq_func_kwargs, acq_optimizer_kwargs=self.acq_optimizer_kwargs, random_state=random_state, - model_sdv=self.model_sdv + model_sdv=self.model_sdv, + sample_max_size=self._sample_max_size, + sample_strategy=self._sample_strategy ) optimizer._initial_samples = self._initial_samples @@ -631,6 +639,40 @@ def _filter_failures(self, yi): return yi + def _sample(self, X, y): + + X = np.asarray(X) + y = np.asarray(y) + size = y.shape[0] + + if self._sample_max_size > 0 and size > self._sample_max_size: + + if self._sample_strategy == "quantile": + quantiles = np.quantile(y, [0.10, 0.25, 0.50, 0.75, 0.90]) + int_size = self._sample_max_size // (len(quantiles) + 1) + + Xs, ys = [], [] + for i in range(len(quantiles) + 1): + if i == 0: + s = y < quantiles[i] + elif i == len(quantiles): + s = quantiles[i-1] <= y + else: + s = (quantiles[i - 1] <= y) & (y < quantiles[i]) + + + idx = np.where(s)[0] + idx = np.random.choice(idx, size=int_size, replace=True) + Xi = X[idx] + yi = y[idx] + Xs.append(Xi) + ys.append(yi) + + X = np.concatenate(Xs, axis=0) + y = np.concatenate(ys, axis=0) + + return X, y + def _ask_random_points(self, size=None): samples = self.space.rvs(n_samples=self.n_points, random_state=self.rng) @@ -764,9 +806,12 @@ def _tell(self, x, y, fit=True): # handle failures yi = self._filter_failures(self.yi) + # handle size of the sample fit to the estimator + Xi, yi = self._sample(self.Xi, self.yi) + with warnings.catch_warnings(): warnings.simplefilter("ignore") - Xtt = self.space.imp_const.fit_transform(self.space.transform(self.Xi)) + Xtt = self.space.imp_const.fit_transform(self.space.transform(Xi)) est.fit(Xtt, yi) # for qLCB save the fitted estimator and skip the selection From 162507894b619d3174000581152c0241f2071b33 Mon Sep 17 00:00:00 2001 From: Deathn0t Date: Mon, 11 Apr 2022 11:47:52 +0200 Subject: [PATCH 2/4] avoid transformations of types in _sample --- skopt/optimizer/optimizer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py index 6e79f349..c0430edc 100644 --- a/skopt/optimizer/optimizer.py +++ b/skopt/optimizer/optimizer.py @@ -641,12 +641,11 @@ def _filter_failures(self, yi): def _sample(self, X, y): - X = np.asarray(X) + X = np.asarray(X, dtype="O") y = np.asarray(y) size = y.shape[0] if self._sample_max_size > 0 and size > self._sample_max_size: - if self._sample_strategy == "quantile": quantiles = np.quantile(y, [0.10, 0.25, 0.50, 0.75, 0.90]) int_size = self._sample_max_size // (len(quantiles) + 1) @@ -671,6 +670,8 @@ def _sample(self, X, y): X = np.concatenate(Xs, axis=0) y = np.concatenate(ys, axis=0) + X = X.tolist() + y = y.tolist() return X, y def _ask_random_points(self, size=None): From 9234f8c495e1c809cfb16d47d5d14f87487dd476 Mon Sep 17 00:00:00 2001 From: Deathn0t Date: Tue, 12 Apr 2022 13:50:16 +0200 Subject: [PATCH 3/4] adding ConfigSpace as a dependency --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index c2565530..6775432f 100644 --- a/setup.py +++ b/setup.py @@ -161,6 +161,7 @@ def run(self): "scipy>=0.19.1", "scikit-learn>=0.20.0", "pandas", + "ConfigSpace" ], extras_require={"plots": ["matplotlib>=2.0.0"]}, cmdclass={ From 85e81c5edb7d8cd40dc6920acd72316ad316ce23 Mon Sep 17 00:00:00 2001 From: Deathn0t Date: Tue, 12 Apr 2022 13:51:23 +0200 Subject: [PATCH 4/4] version set to 0.9.7 --- skopt/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skopt/__init__.py b/skopt/__init__.py index fce24088..d2a4bc4d 100644 --- a/skopt/__init__.py +++ b/skopt/__init__.py @@ -29,7 +29,7 @@ # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "0.9.6" +__version__ = "0.9.7" if __SKOPT_SETUP__: import sys