Skip to content

Commit

Permalink
debug when space is configspace and no model_sdv is provided
Browse files Browse the repository at this point in the history
  • Loading branch information
Deathn0t committed Feb 14, 2022
1 parent b2b0ba2 commit 163e378
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 93 deletions.
88 changes: 51 additions & 37 deletions examples/transfer-learn.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,54 @@
import numpy as np
from skopt import Optimizer
import pandas as pd
from sdv.tabular import GaussianCopula
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from sdv.tabular import TVAE
from sdv.evaluation import evaluate
from sdv.tabular import TVAE
from skopt import Optimizer
from skopt.space import Integer, Real
from skopt.utils import use_named_args

import sys

def f(x):
res = 0
for i in range(len(x)):
res = res + (np.sin(5 * x[i]) * (1 - np.tanh(x[i] ** 2)) + np.random.randn() * 0.1)
res = res + (
np.sin(5 * x[i]) * (1 - np.tanh(x[i] ** 2)) + np.random.randn() * 0.1
)
return res


df = pd.read_csv('skopt_result.csv',header=0)
df = pd.read_csv("skopt_result.csv", header=0)
print(df)
q_10 = np.quantile(df.objective.values, 0.10)
req_df = df.loc[df['objective'] < q_10]
req_df = df.loc[df["objective"] < q_10]
print(req_df.shape)
req_df = req_df.drop(columns=['objective'])
req_df = req_df.drop(columns=["objective"])
print(req_df.shape)


space = [Integer(1, 20, name='epochs'),
#Integer(1, np.floor(req_df.shape[0]/10), name='batch_size'),
Integer(1, 8, name='embedding_dim'),
Integer(1, 8, name= 'compress_dims'),
Integer(1, 8, name= 'decompress_dims'),
Real(10**-8, 10**-4, "log-uniform", name='l2scale'),
Integer(1, 5, name= 'loss_factor')
]
space = [
Integer(1, 20, name="epochs"),
# Integer(1, np.floor(req_df.shape[0]/10), name='batch_size'),
Integer(1, 8, name="embedding_dim"),
Integer(1, 8, name="compress_dims"),
Integer(1, 8, name="decompress_dims"),
Real(10**-8, 10**-4, "log-uniform", name="l2scale"),
Integer(1, 5, name="loss_factor"),
]


@use_named_args(space)
def objective(**params):
params['epochs'] = 10*params['epochs']
#params['batch_size'] = 10*params['batch_size']
params['embedding_dim'] = 2**params['embedding_dim']
params['compress_dims'] = [2**params['compress_dims'],2**params['compress_dims']]
params['decompress_dims'] = [2**params['decompress_dims'],2**params['decompress_dims']]
params["epochs"] = 10 * params["epochs"]
# params['batch_size'] = 10*params['batch_size']
params["embedding_dim"] = 2 ** params["embedding_dim"]
params["compress_dims"] = [
2 ** params["compress_dims"],
2 ** params["compress_dims"],
]
params["decompress_dims"] = [
2 ** params["decompress_dims"],
2 ** params["decompress_dims"],
]
print(params)
model = TVAE(**params)
model.fit(req_df)
Expand All @@ -52,11 +60,17 @@ def objective(**params):

@use_named_args(space)
def model_fit(**params):
params['epochs'] = 10*params['epochs']
#params['batch_size'] = 10*params['batch_size']
params['embedding_dim'] = 2**params['embedding_dim']
params['compress_dims'] = [2**params['compress_dims'],2**params['compress_dims']]
params['decompress_dims'] = [2**params['decompress_dims'],2**params['decompress_dims']]
params["epochs"] = 10 * params["epochs"]
# params['batch_size'] = 10*params['batch_size']
params["embedding_dim"] = 2 ** params["embedding_dim"]
params["compress_dims"] = [
2 ** params["compress_dims"],
2 ** params["compress_dims"],
]
params["decompress_dims"] = [
2 ** params["decompress_dims"],
2 ** params["decompress_dims"],
]
print(params)
model = TVAE(**params)
model.fit(req_df)
Expand All @@ -66,37 +80,37 @@ def model_fit(**params):
return -score, model


opt = Optimizer(space, tl_sdv=None)
opt = Optimizer(space, model_sdv=None)
for i in range(30):
suggested = opt.ask()
y = objective(suggested)
opt.tell(suggested, y)
print('iteration:', i, suggested, y)
print("iteration:", i, suggested, y)

print(opt.yi)

min_value = min(opt.yi)
min_value = min(opt.yi)
min_index = opt.yi.index(min_value)
print(min_value)
best_params = opt.Xi[min_index]

score, model = model_fit(best_params)
print(score)

opt = Optimizer([(-3.0, 3.0),(-3.0, 3.0),(-3.0, 3.0),(-3.0, 3.0),(-3.0, 3.0)], tl_sdv = model)
opt = Optimizer(
[(-3.0, 3.0), (-3.0, 3.0), (-3.0, 3.0), (-3.0, 3.0), (-3.0, 3.0)], model_sdv=model
)

if 1:
for i in range(100):
suggested = opt.ask()
y = f(suggested)
opt.tell(suggested, y)
print('iteration:', i, y)
print("iteration:", i, y)

df = pd.DataFrame(opt.Xi)
print(df)
print(opt.yi)
df['yi'] = opt.yi
df["yi"] = opt.yi
print(df)
df.to_csv('tl-skopt_result.csv',index=False)


df.to_csv("tl-skopt_result.csv", index=False)
19 changes: 12 additions & 7 deletions skopt/optimizer/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,9 @@ class Optimizer(object):
Keeps list of models only as long as the argument given. In the
case of None, the list has no capped length.
model_sdv : Model or None, default None
A Model from Synthetic-Data-Vault.
Attributes
----------
Xi : list
Expand Down Expand Up @@ -190,14 +193,12 @@ def __init__(
model_queue_size=None,
acq_func_kwargs=None,
acq_optimizer_kwargs=None,
tl_sdv=None,
model_sdv=None,
):
args = locals().copy()
del args["self"]
self.specs = {"args": args, "function": "Optimizer"}
self.rng = check_random_state(random_state)
print(tl_sdv)
self.tl_sdv = tl_sdv

# Configure acquisition function

Expand Down Expand Up @@ -313,7 +314,10 @@ def __init__(
if isinstance(self.base_estimator_, GaussianProcessRegressor):
dimensions = normalize_dimensions(dimensions)

self.space = Space(dimensions, tl_sdv)
# keep track of the generative model from sdv
self.model_sdv = model_sdv

self.space = Space(dimensions, model_sdv=self.model_sdv)

self._initial_samples = None
self._initial_point_generator = cook_initial_point_generator(
Expand Down Expand Up @@ -382,15 +386,16 @@ def copy(self, random_state=None):
acq_func_kwargs=self.acq_func_kwargs,
acq_optimizer_kwargs=self.acq_optimizer_kwargs,
random_state=random_state,
tl_sdv=self.tl_sdv
model_sdv=self.model_sdv
)

optimizer._initial_samples = self._initial_samples

optimizer.sampled = self.sampled[:]

if hasattr(self, "tl_sdv"):
optimizer.tl_sdv = self.tl_sdv
# TODO: commented because possibly not necessary
# if hasattr(self, "tl_sdv"):
# optimizer.model_sdv = self.model_sdv

if hasattr(self, "gains_"):
optimizer.gains_ = np.copy(self.gains_)
Expand Down
97 changes: 48 additions & 49 deletions skopt/space/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,18 +888,24 @@ class Space(object):
dimensions.
"""

def __init__(self, dimensions, tl_sdv=None):
def __init__(self, dimensions, model_sdv=None):

# attributes used when a ConfigurationSpace from ConfigSpace is given
self.is_config_space = False
self.config_space_samples = None
self.config_space_explored = False

self.imp_const = SimpleImputer(
missing_values=np.nan, strategy="constant", fill_value=-1000
)
self.imp_const_inv = SimpleImputer(
missing_values=-1000, strategy="constant", fill_value=np.nan
)

# attribute used when a generative model is used to sample
self.model_sdv = model_sdv

self.hps_names = []
self.tl_sdv = tl_sdv

if isinstance(dimensions, CS.ConfigurationSpace):
self.is_config_space = True
Expand Down Expand Up @@ -1082,57 +1088,50 @@ def rvs(self, n_samples=1, random_state=None):
Points sampled from the space.
"""

#n_samples = 100

rng = check_random_state(random_state)
if self.is_config_space:
req_points = []
if self.tl_sdv is None:
confs = self.config_space.sample_configuration(n_samples)
else:
confs = self.tl_sdv.sample(n_samples)
print('successfully sampling with tl_sdv! ')

if n_samples == 1:
confs = [confs]
hps_names = self.config_space.get_hyperparameter_names()

#print(confs)
if self.model_sdv is None:
confs = self.config_space.sample_configuration(n_samples)

hps_names = self.config_space.get_hyperparameter_names()
sdv_names = confs.columns
if n_samples == 1:
confs = [confs]
else:
confs = self.model_sdv.sample(n_samples)

new_hps_names = list(set(hps_names)-set(sdv_names))
#print(new_hps_names)

rs = np.random.RandomState()

# randomly sample the new hyperparameters
for name in new_hps_names:
hp = self.config_space.get_hyperparameter(name)
rvs = []
for i in range(n_samples):
v = hp._sample(rs)
rv = hp._transform(v)
rvs.append(rv)
confs[name] = rvs

# reoder the column names
confs = confs[hps_names]
#print(confs)

confs = confs.to_dict('records')
for idx, conf in enumerate(confs):
cf = deactivate_inactive_hyperparameters(conf,self.config_space)
confs[idx] = cf.get_dictionary()

# check if other conditions are not met; generate valid 1-exchange neighbor; need to test and develop the logic
if 0:
print('conf invalid...generating valid 1-exchange neighbor')
neighborhood = get_one_exchange_neighbourhood(cf,1)
for new_config in neighborhood:
print(new_config)
print(new_config.is_valid_configuration())
confs[idx] = new_config.get_dictionary()
sdv_names = confs.columns

new_hps_names = list(set(hps_names)-set(sdv_names))

# randomly sample the new hyperparameters
for name in new_hps_names:
hp = self.config_space.get_hyperparameter(name)
rvs = []
for i in range(n_samples):
v = hp._sample(rng)
rv = hp._transform(v)
rvs.append(rv)
confs[name] = rvs

# reoder the column names
confs = confs[hps_names]

confs = confs.to_dict('records')
for idx, conf in enumerate(confs):
cf = deactivate_inactive_hyperparameters(conf,self.config_space)
confs[idx] = cf.get_dictionary()

# TODO: remove because debug instructions
# check if other conditions are not met; generate valid 1-exchange neighbor; need to test and develop the logic
# print('conf invalid...generating valid 1-exchange neighbor')
# neighborhood = get_one_exchange_neighbourhood(cf,1)
# for new_config in neighborhood:
# print(new_config)
# print(new_config.is_valid_configuration())
# confs[idx] = new_config.get_dictionary()

for idx, conf in enumerate(confs):
point = []
Expand All @@ -1144,19 +1143,19 @@ def rvs(self, n_samples=1, random_state=None):
val = conf[hps_name]
point.append(val)
req_points.append(point)
#print(req_points[0])

return req_points
else:
if self.tl_sdv is None:
if self.model_sdv is None:
# Draw
columns = []
for dim in self.dimensions:
columns.append(dim.rvs(n_samples=n_samples, random_state=rng))

# Transpose
return _transpose_list_array(columns)
else:
confs = self.tl_sdv.sample(n_samples)
confs = self.model_sdv.sample(n_samples)
return confs.values

def set_transformer(self, transform):
Expand Down

0 comments on commit 163e378

Please sign in to comment.