diff --git a/examples/transfer-learn.py b/examples/transfer-learn.py index da10dfa4..e24d31d8 100644 --- a/examples/transfer-learn.py +++ b/examples/transfer-learn.py @@ -1,46 +1,54 @@ import numpy as np -from skopt import Optimizer import pandas as pd -from sdv.tabular import GaussianCopula -from skopt.space import Real, Integer -from skopt.utils import use_named_args -from sdv.tabular import TVAE from sdv.evaluation import evaluate +from sdv.tabular import TVAE +from skopt import Optimizer +from skopt.space import Integer, Real +from skopt.utils import use_named_args -import sys def f(x): res = 0 for i in range(len(x)): - res = res + (np.sin(5 * x[i]) * (1 - np.tanh(x[i] ** 2)) + np.random.randn() * 0.1) + res = res + ( + np.sin(5 * x[i]) * (1 - np.tanh(x[i] ** 2)) + np.random.randn() * 0.1 + ) return res -df = pd.read_csv('skopt_result.csv',header=0) +df = pd.read_csv("skopt_result.csv", header=0) print(df) q_10 = np.quantile(df.objective.values, 0.10) -req_df = df.loc[df['objective'] < q_10] +req_df = df.loc[df["objective"] < q_10] print(req_df.shape) -req_df = req_df.drop(columns=['objective']) +req_df = req_df.drop(columns=["objective"]) print(req_df.shape) -space = [Integer(1, 20, name='epochs'), - #Integer(1, np.floor(req_df.shape[0]/10), name='batch_size'), - Integer(1, 8, name='embedding_dim'), - Integer(1, 8, name= 'compress_dims'), - Integer(1, 8, name= 'decompress_dims'), - Real(10**-8, 10**-4, "log-uniform", name='l2scale'), - Integer(1, 5, name= 'loss_factor') - ] +space = [ + Integer(1, 20, name="epochs"), + # Integer(1, np.floor(req_df.shape[0]/10), name='batch_size'), + Integer(1, 8, name="embedding_dim"), + Integer(1, 8, name="compress_dims"), + Integer(1, 8, name="decompress_dims"), + Real(10**-8, 10**-4, "log-uniform", name="l2scale"), + Integer(1, 5, name="loss_factor"), +] + @use_named_args(space) def objective(**params): - params['epochs'] = 10*params['epochs'] - #params['batch_size'] = 10*params['batch_size'] - params['embedding_dim'] = 2**params['embedding_dim'] - params['compress_dims'] = [2**params['compress_dims'],2**params['compress_dims']] - params['decompress_dims'] = [2**params['decompress_dims'],2**params['decompress_dims']] + params["epochs"] = 10 * params["epochs"] + # params['batch_size'] = 10*params['batch_size'] + params["embedding_dim"] = 2 ** params["embedding_dim"] + params["compress_dims"] = [ + 2 ** params["compress_dims"], + 2 ** params["compress_dims"], + ] + params["decompress_dims"] = [ + 2 ** params["decompress_dims"], + 2 ** params["decompress_dims"], + ] print(params) model = TVAE(**params) model.fit(req_df) @@ -52,11 +60,17 @@ def objective(**params): @use_named_args(space) def model_fit(**params): - params['epochs'] = 10*params['epochs'] - #params['batch_size'] = 10*params['batch_size'] - params['embedding_dim'] = 2**params['embedding_dim'] - params['compress_dims'] = [2**params['compress_dims'],2**params['compress_dims']] - params['decompress_dims'] = [2**params['decompress_dims'],2**params['decompress_dims']] + params["epochs"] = 10 * params["epochs"] + # params['batch_size'] = 10*params['batch_size'] + params["embedding_dim"] = 2 ** params["embedding_dim"] + params["compress_dims"] = [ + 2 ** params["compress_dims"], + 2 ** params["compress_dims"], + ] + params["decompress_dims"] = [ + 2 ** params["decompress_dims"], + 2 ** params["decompress_dims"], + ] print(params) model = TVAE(**params) model.fit(req_df) @@ -66,16 +80,16 @@ def model_fit(**params): return -score, model -opt = Optimizer(space, tl_sdv=None) +opt = Optimizer(space, model_sdv=None) for i in range(30): suggested = opt.ask() y = objective(suggested) opt.tell(suggested, y) - print('iteration:', i, suggested, y) + print("iteration:", i, suggested, y) print(opt.yi) -min_value = min(opt.yi) +min_value = min(opt.yi) min_index = opt.yi.index(min_value) print(min_value) best_params = opt.Xi[min_index] @@ -83,20 +97,20 @@ def model_fit(**params): score, model = model_fit(best_params) print(score) -opt = Optimizer([(-3.0, 3.0),(-3.0, 3.0),(-3.0, 3.0),(-3.0, 3.0),(-3.0, 3.0)], tl_sdv = model) +opt = Optimizer( + [(-3.0, 3.0), (-3.0, 3.0), (-3.0, 3.0), (-3.0, 3.0), (-3.0, 3.0)], model_sdv=model +) if 1: for i in range(100): suggested = opt.ask() y = f(suggested) opt.tell(suggested, y) - print('iteration:', i, y) + print("iteration:", i, y) df = pd.DataFrame(opt.Xi) print(df) print(opt.yi) - df['yi'] = opt.yi + df["yi"] = opt.yi print(df) - df.to_csv('tl-skopt_result.csv',index=False) - - + df.to_csv("tl-skopt_result.csv", index=False) diff --git a/skopt/optimizer/optimizer.py b/skopt/optimizer/optimizer.py index f7844c66..7cd9ffbd 100644 --- a/skopt/optimizer/optimizer.py +++ b/skopt/optimizer/optimizer.py @@ -161,6 +161,9 @@ class Optimizer(object): Keeps list of models only as long as the argument given. In the case of None, the list has no capped length. + model_sdv : Model or None, default None + A Model from Synthetic-Data-Vault. + Attributes ---------- Xi : list @@ -190,14 +193,12 @@ def __init__( model_queue_size=None, acq_func_kwargs=None, acq_optimizer_kwargs=None, - tl_sdv=None, + model_sdv=None, ): args = locals().copy() del args["self"] self.specs = {"args": args, "function": "Optimizer"} self.rng = check_random_state(random_state) - print(tl_sdv) - self.tl_sdv = tl_sdv # Configure acquisition function @@ -313,7 +314,10 @@ def __init__( if isinstance(self.base_estimator_, GaussianProcessRegressor): dimensions = normalize_dimensions(dimensions) - self.space = Space(dimensions, tl_sdv) + # keep track of the generative model from sdv + self.model_sdv = model_sdv + + self.space = Space(dimensions, model_sdv=self.model_sdv) self._initial_samples = None self._initial_point_generator = cook_initial_point_generator( @@ -382,15 +386,16 @@ def copy(self, random_state=None): acq_func_kwargs=self.acq_func_kwargs, acq_optimizer_kwargs=self.acq_optimizer_kwargs, random_state=random_state, - tl_sdv=self.tl_sdv + model_sdv=self.model_sdv ) optimizer._initial_samples = self._initial_samples optimizer.sampled = self.sampled[:] - if hasattr(self, "tl_sdv"): - optimizer.tl_sdv = self.tl_sdv + # TODO: commented because possibly not necessary + # if hasattr(self, "tl_sdv"): + # optimizer.model_sdv = self.model_sdv if hasattr(self, "gains_"): optimizer.gains_ = np.copy(self.gains_) diff --git a/skopt/space/space.py b/skopt/space/space.py index e7df8347..35b92ba7 100644 --- a/skopt/space/space.py +++ b/skopt/space/space.py @@ -888,18 +888,24 @@ class Space(object): dimensions. """ - def __init__(self, dimensions, tl_sdv=None): + def __init__(self, dimensions, model_sdv=None): + + # attributes used when a ConfigurationSpace from ConfigSpace is given self.is_config_space = False self.config_space_samples = None self.config_space_explored = False + self.imp_const = SimpleImputer( missing_values=np.nan, strategy="constant", fill_value=-1000 ) self.imp_const_inv = SimpleImputer( missing_values=-1000, strategy="constant", fill_value=np.nan ) + + # attribute used when a generative model is used to sample + self.model_sdv = model_sdv + self.hps_names = [] - self.tl_sdv = tl_sdv if isinstance(dimensions, CS.ConfigurationSpace): self.is_config_space = True @@ -1082,57 +1088,50 @@ def rvs(self, n_samples=1, random_state=None): Points sampled from the space. """ - #n_samples = 100 - rng = check_random_state(random_state) if self.is_config_space: req_points = [] - if self.tl_sdv is None: - confs = self.config_space.sample_configuration(n_samples) - else: - confs = self.tl_sdv.sample(n_samples) - print('successfully sampling with tl_sdv! ') - if n_samples == 1: - confs = [confs] + hps_names = self.config_space.get_hyperparameter_names() - #print(confs) + if self.model_sdv is None: + confs = self.config_space.sample_configuration(n_samples) - hps_names = self.config_space.get_hyperparameter_names() - sdv_names = confs.columns + if n_samples == 1: + confs = [confs] + else: + confs = self.model_sdv.sample(n_samples) - new_hps_names = list(set(hps_names)-set(sdv_names)) - #print(new_hps_names) - - rs = np.random.RandomState() - - # randomly sample the new hyperparameters - for name in new_hps_names: - hp = self.config_space.get_hyperparameter(name) - rvs = [] - for i in range(n_samples): - v = hp._sample(rs) - rv = hp._transform(v) - rvs.append(rv) - confs[name] = rvs - - # reoder the column names - confs = confs[hps_names] - #print(confs) - - confs = confs.to_dict('records') - for idx, conf in enumerate(confs): - cf = deactivate_inactive_hyperparameters(conf,self.config_space) - confs[idx] = cf.get_dictionary() - - # check if other conditions are not met; generate valid 1-exchange neighbor; need to test and develop the logic - if 0: - print('conf invalid...generating valid 1-exchange neighbor') - neighborhood = get_one_exchange_neighbourhood(cf,1) - for new_config in neighborhood: - print(new_config) - print(new_config.is_valid_configuration()) - confs[idx] = new_config.get_dictionary() + sdv_names = confs.columns + + new_hps_names = list(set(hps_names)-set(sdv_names)) + + # randomly sample the new hyperparameters + for name in new_hps_names: + hp = self.config_space.get_hyperparameter(name) + rvs = [] + for i in range(n_samples): + v = hp._sample(rng) + rv = hp._transform(v) + rvs.append(rv) + confs[name] = rvs + + # reoder the column names + confs = confs[hps_names] + + confs = confs.to_dict('records') + for idx, conf in enumerate(confs): + cf = deactivate_inactive_hyperparameters(conf,self.config_space) + confs[idx] = cf.get_dictionary() + + # TODO: remove because debug instructions + # check if other conditions are not met; generate valid 1-exchange neighbor; need to test and develop the logic + # print('conf invalid...generating valid 1-exchange neighbor') + # neighborhood = get_one_exchange_neighbourhood(cf,1) + # for new_config in neighborhood: + # print(new_config) + # print(new_config.is_valid_configuration()) + # confs[idx] = new_config.get_dictionary() for idx, conf in enumerate(confs): point = [] @@ -1144,19 +1143,19 @@ def rvs(self, n_samples=1, random_state=None): val = conf[hps_name] point.append(val) req_points.append(point) - #print(req_points[0]) return req_points else: - if self.tl_sdv is None: + if self.model_sdv is None: # Draw columns = [] for dim in self.dimensions: columns.append(dim.rvs(n_samples=n_samples, random_state=rng)) + # Transpose return _transpose_list_array(columns) else: - confs = self.tl_sdv.sample(n_samples) + confs = self.model_sdv.sample(n_samples) return confs.values def set_transformer(self, transform):