diff --git a/setup.py b/setup.py index c2565530..6775432f 100644 --- a/setup.py +++ b/setup.py @@ -161,6 +161,7 @@ def run(self): "scipy>=0.19.1", "scikit-learn>=0.20.0", "pandas", + "ConfigSpace" ], extras_require={"plots": ["matplotlib>=2.0.0"]}, cmdclass={ diff --git a/skopt/__init__.py b/skopt/__init__.py index fce24088..d2a4bc4d 100644 --- a/skopt/__init__.py +++ b/skopt/__init__.py @@ -29,7 +29,7 @@ # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "0.9.6" +__version__ = "0.9.7" if __SKOPT_SETUP__: import sys diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py index fe423039..c0430edc 100644 --- a/skopt/optimizer/optimizer.py +++ b/skopt/optimizer/optimizer.py @@ -198,6 +198,8 @@ def __init__( acq_func_kwargs=None, acq_optimizer_kwargs=None, model_sdv=None, + sample_max_size=-1, + sample_strategy="quantile" ): args = locals().copy() del args["self"] @@ -371,6 +373,10 @@ def __init__( self._min_value = 0 self._max_value = 0 + # parameters to stabilize the size of the dataset used to fit the surrogate model + self._sample_max_size = sample_max_size + self._sample_strategy = sample_strategy + def copy(self, random_state=None): """Create a shallow copy of an instance of the optimizer. @@ -392,7 +398,9 @@ def copy(self, random_state=None): acq_func_kwargs=self.acq_func_kwargs, acq_optimizer_kwargs=self.acq_optimizer_kwargs, random_state=random_state, - model_sdv=self.model_sdv + model_sdv=self.model_sdv, + sample_max_size=self._sample_max_size, + sample_strategy=self._sample_strategy ) optimizer._initial_samples = self._initial_samples @@ -631,6 +639,41 @@ def _filter_failures(self, yi): return yi + def _sample(self, X, y): + + X = np.asarray(X, dtype="O") + y = np.asarray(y) + size = y.shape[0] + + if self._sample_max_size > 0 and size > self._sample_max_size: + if self._sample_strategy == "quantile": + quantiles = np.quantile(y, [0.10, 0.25, 0.50, 0.75, 0.90]) + int_size = self._sample_max_size // (len(quantiles) + 1) + + Xs, ys = [], [] + for i in range(len(quantiles) + 1): + if i == 0: + s = y < quantiles[i] + elif i == len(quantiles): + s = quantiles[i-1] <= y + else: + s = (quantiles[i - 1] <= y) & (y < quantiles[i]) + + + idx = np.where(s)[0] + idx = np.random.choice(idx, size=int_size, replace=True) + Xi = X[idx] + yi = y[idx] + Xs.append(Xi) + ys.append(yi) + + X = np.concatenate(Xs, axis=0) + y = np.concatenate(ys, axis=0) + + X = X.tolist() + y = y.tolist() + return X, y + def _ask_random_points(self, size=None): samples = self.space.rvs(n_samples=self.n_points, random_state=self.rng) @@ -764,9 +807,12 @@ def _tell(self, x, y, fit=True): # handle failures yi = self._filter_failures(self.yi) + # handle size of the sample fit to the estimator + Xi, yi = self._sample(self.Xi, self.yi) + with warnings.catch_warnings(): warnings.simplefilter("ignore") - Xtt = self.space.imp_const.fit_transform(self.space.transform(self.Xi)) + Xtt = self.space.imp_const.fit_transform(self.space.transform(Xi)) est.fit(Xtt, yi) # for qLCB save the fitted estimator and skip the selection