Skip to content

Commit

Permalink
Merge pull request #705 from automl/development
Browse files Browse the repository at this point in the history
Release 0.13.1
  • Loading branch information
mfeurer authored Oct 29, 2020
2 parents 9d7d09d + 3c463dc commit 6058475
Show file tree
Hide file tree
Showing 17 changed files with 646 additions and 46 deletions.
10 changes: 10 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# 0.13.1

## Minor Changes
* Improve error message for first run crashed (#694).
* Experimental: add callback mechanism (#703).

## Bug fixes
* Fix a bug which could make successive halving fail if run in parallel (#695).
* Fix a bug which could cause hyperband to ignore the lowest budget (#701).

# 0.13.0

## Major Changes
Expand Down
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ API Documentation

.. toctree::

apidoc/smac.callbacks
apidoc/smac.configspace
apidoc/smac.epm
apidoc/smac.facade
Expand Down
2 changes: 1 addition & 1 deletion smac/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import lazy_import
from smac.utils import dependencies

__version__ = '0.13.0'
__version__ = '0.13.1'
__author__ = 'Marius Lindauer, Matthias Feurer, Katharina Eggensperger, Joshua Marben, ' \
'André Biedenkapp, Francisco Rivera, Ashwin Raaghav, Aaron Klein, Stefan Falkner ' \
'and Frank Hutter'
Expand Down
38 changes: 38 additions & 0 deletions smac/callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from smac.optimizer.smbo import SMBO
from smac.runhistory.runhistory import RunInfo, RunValue

"""Callbacks for SMAC.
Callbacks allow customizing the behavior of SMAC to ones needs. Currently, the list of implemented callbacks is
very limited, but they can easily be added.
How to add a new callback
=========================
1. Implement a callback class in this module. There are no restrictions on how such a callback must look like,
but it is recommended to implement the main logic inside the `__call__` function, such as for example in
``IncorporateRunResultCallback``.
2. Add your callback to ``smac.smbo.optimizer.SMBO._callbacks``, using the name of your callback as the key,
and an empty list as the value.
3. Add your callback to ``smac.smbo.optimizer.SMBO._callback_to_key``, using the callback class as the key,
and the name as value (the name used in 2.).
4. Implement calling all registered callbacks at the correct place. This is as simple as
``for callback in self._callbacks['your_callback']: callback(*args, **kwargs)``, where you obviously need to
change the callback name and signature.
"""


class IncorporateRunResultCallback:

"""Callback to react on a new run result. Called after the finished run is added to the runhistory."""

def __call__(
self, smbo: 'SMBO',
run_info: RunInfo,
result: RunValue,
time_left: float,
) -> None:
pass
25 changes: 25 additions & 0 deletions smac/facade/smac_ac_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,3 +692,28 @@ def get_trajectory(self) -> List[TrajEntry]:
raise ValueError('SMAC was not fitted yet. Call optimize() prior '
'to accessing the runhistory.')
return self.trajectory

def register_callback(self, callback: Callable) -> None:
"""Register a callback function.
Callbacks must implement a class in ``smac.callbacks`` and be instantiated objects.
They will automatically be registered within SMAC based on which callback class from
``smac.callbacks`` they implement.
Parameters
----------
callback - Callable
Returns
-------
None
"""
types_to_check = callback.__class__.__mro__
key = None
for type_to_check in types_to_check:
key = self.solver._callback_to_key.get(type_to_check)
if key is not None:
break
if key is None:
raise ValueError('Cannot register callback of type %s' % type(callback))
self.solver._callbacks[key].append(callback)
3 changes: 2 additions & 1 deletion smac/facade/smac_bohb_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def __init__(self, **kwargs: typing.Any):

# Intensification parameters
# select Hyperband as the intensifier ensure respective parameters are provided
kwargs['intensifier'] = Hyperband
if kwargs.get('intensifier') is None:
kwargs['intensifier'] = Hyperband

# set Hyperband parameters if not given
intensifier_kwargs = kwargs.get('intensifier_kwargs', dict())
Expand Down
6 changes: 6 additions & 0 deletions smac/intensification/hyperband.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,10 @@ def _update_stage(self, run_history: RunHistory = None) -> None:
# sample challengers for next iteration (based on HpBandster package)
n_challengers = int(np.floor((self.s_max + 1) / (self.s + 1)) * self.eta ** self.s)

# Compute this for the next round
n_configs_in_stage = n_challengers * np.power(self.eta, -np.linspace(0, self.s, self.s + 1))
n_configs_in_stage = np.array(np.round(n_configs_in_stage), dtype=int).tolist()

self.logger.info('Hyperband iteration-step: %d-%d with initial budget: %d' % (
self.hb_iters + 1, self.s_max - self.s + 1, sh_initial_budget))

Expand All @@ -287,6 +291,8 @@ def _update_stage(self, run_history: RunHistory = None) -> None:
initial_budget=sh_initial_budget,
max_budget=self.max_budget,
eta=self.eta,
_all_budgets=self.all_budgets[(-self.s - 1):],
_n_configs_in_stage=n_configs_in_stage,
num_initial_challengers=n_challengers,
run_obj_time=self.run_obj_time,
n_seeds=self.n_seeds,
Expand Down
134 changes: 109 additions & 25 deletions smac/intensification/successive_halving.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ class _SuccessiveHalving(AbstractRacer):
maximum budget allowed for 1 run of successive halving
eta : float
'halving' factor after each iteration in a successive halving run. Defaults to 3
_all_budgets: typing.Optional[typing.List[float]] = None
Used internally when HB uses SH as a subrouting
_n_configs_in_stage: typing.Optional[typing.List[int]] = None
Used internally when HB uses SH as a subrouting
num_initial_challengers : typing.Optional[int]
number of challengers to consider for the initial budget. If None, calculated internally
run_obj_time : bool
Expand Down Expand Up @@ -111,6 +115,8 @@ def __init__(self,
initial_budget: typing.Optional[float] = None,
max_budget: typing.Optional[float] = None,
eta: float = 3,
_all_budgets: typing.Optional[typing.List[float]] = None,
_n_configs_in_stage: typing.Optional[typing.List[int]] = None,
num_initial_challengers: typing.Optional[int] = None,
run_obj_time: bool = True,
n_seeds: typing.Optional[int] = None,
Expand Down Expand Up @@ -175,7 +181,9 @@ def __init__(self,
self.inst_seed_pairs = inst_seed_pairs

# successive halving parameters
self._init_sh_params(initial_budget, max_budget, eta, num_initial_challengers)
self._init_sh_params(initial_budget=initial_budget, max_budget=max_budget, eta=eta,
num_initial_challengers=num_initial_challengers,
_all_budgets=_all_budgets, _n_configs_in_stage=_n_configs_in_stage)

# adaptive capping
if self.instance_as_budget and self.instance_order != 'shuffle' and self.run_obj_time:
Expand Down Expand Up @@ -213,13 +221,16 @@ def __init__(self,
# run history, does not have this information and so we track locally. That way,
# when we access the complete list of configs from the run history, we filter
# the ones launched by the current succesive halver using self.run_tracker
self.run_tracker = [] # type: typing.List[typing.Tuple[Configuration, str, int]]
self.run_tracker = {} # type: typing.Dict[typing.Tuple[Configuration, str, int, float], bool]

def _init_sh_params(self,
initial_budget: typing.Optional[float],
max_budget: typing.Optional[float],
eta: float,
num_initial_challengers: typing.Optional[int]) -> None:
num_initial_challengers: typing.Optional[int] = None,
_all_budgets: typing.Optional[typing.List[float]] = None,
_n_configs_in_stage: typing.Optional[typing.List[int]] = None,
) -> None:
"""
initialize Successive Halving parameters
Expand All @@ -233,6 +244,10 @@ def _init_sh_params(self,
'halving' factor after each iteration in a successive halving run
num_initial_challengers : typing.Optional[int]
number of challengers to consider for the initial budget
_all_budgets: typing.Optional[typing.List[float]] = None
Used internally when HB uses SH as a subrouting
_n_configs_in_stage: typing.Optional[typing.List[int]] = None
Used internally when HB uses SH as a subrouting
"""

if eta <= 1:
Expand Down Expand Up @@ -280,14 +295,21 @@ def _init_sh_params(self,
# max. no. of SH iterations possible given the budgets
max_sh_iter = int(np.floor(np.log(self.max_budget / self.initial_budget) / np.log(self.eta)))
# initial number of challengers to sample
if not num_initial_challengers:
if num_initial_challengers is None:
num_initial_challengers = int(self.eta ** max_sh_iter)
# budgets to consider in each stage
self.all_budgets = self.max_budget * np.power(self.eta, -np.linspace(max_sh_iter, 0, max_sh_iter + 1))
# number of challengers to consider in each stage
self.n_configs_in_stage = num_initial_challengers * np.power(self.eta,
-np.linspace(0, max_sh_iter, max_sh_iter + 1))
self.n_configs_in_stage = self.n_configs_in_stage.tolist()

if _all_budgets is not None and _n_configs_in_stage is not None:
# Assert we use the given numbers to avoid rounding issues, see #701
self.all_budgets = _all_budgets
self.n_configs_in_stage = _n_configs_in_stage
else:
# budgets to consider in each stage
self.all_budgets = self.max_budget * np.power(self.eta, -np.linspace(max_sh_iter, 0,
max_sh_iter + 1))
# number of challengers to consider in each stage
n_configs_in_stage = num_initial_challengers * \
np.power(self.eta, -np.linspace(0, max_sh_iter, max_sh_iter + 1))
self.n_configs_in_stage = np.array(np.round(n_configs_in_stage), dtype=int).tolist()

def process_results(self,
run_info: RunInfo,
Expand Down Expand Up @@ -328,6 +350,9 @@ def process_results(self,
empirical performance of incumbent configuration
"""

# Mark the fact that we processed this configuration
self.run_tracker[(run_info.config, run_info.instance, run_info.seed, run_info.budget)] = True

# If The incumbent is None and it is the first run, we use the challenger
if not incumbent and self.first_run:
self.logger.info(
Expand All @@ -336,14 +361,9 @@ def process_results(self,
incumbent = run_info.config
self.first_run = False

# selecting instance-seed subset for this budget, depending on the kind of budget
curr_budget = self.all_budgets[self.stage]
if self.instance_as_budget:
prev_budget = int(self.all_budgets[self.stage - 1]) if self.stage > 0 else 0
curr_insts = self.inst_seed_pairs[int(prev_budget):int(curr_budget)]
else:
curr_insts = self.inst_seed_pairs
n_insts_remaining = len(curr_insts) - self.curr_inst_idx - 1
# Account for running instances across configurations, not only on the
# running configuration
n_insts_remaining = self._get_pending_instances_for_stage(run_history)

# Make sure that there is no Budget exhausted
if result.status == StatusType.CAPPED:
Expand All @@ -367,8 +387,18 @@ def process_results(self,
else:
self.fail_challengers.add(run_info.config) # capped/crashed/do not advance configs

# We need to update the incumbent if this config we are processing
# completes all scheduled instance-seed pairs.
# Here, a config/seed/instance is going to be processed for the first time
# (it has been previously scheduled by get_next_run and marked False, indicating
# that it has not been processed yet. Entering process_results() this config/seed/instance
# is marked as TRUE as an indication that it has finished and should be processed)
# so if all configurations runs are marked as TRUE it means that this new config
# was the missing piece to have everything needed to compare against the incumbent
update_incumbent = all([v for k, v in self.run_tracker.items() if k[0] == run_info.config])

# get incumbent if all instances have been evaluated
if n_insts_remaining <= 0:
if n_insts_remaining <= 0 or update_incumbent:
incumbent = self._compare_configs(challenger=run_info.config,
incumbent=incumbent,
run_history=run_history,
Expand Down Expand Up @@ -582,15 +612,17 @@ def get_next_run(self,
if (self.cutoff is not None) and (cutoff < self.cutoff): # type: ignore[operator] # noqa F821
capped = True

self.run_tracker.append((challenger, instance, seed))
budget = 0.0 if self.instance_as_budget else curr_budget

self.run_tracker[(challenger, instance, seed, budget)] = False
return RunInfoIntent.RUN, RunInfo(
config=challenger,
instance=instance,
instance_specific=self.instance_specifics.get(instance, "0"),
seed=seed,
cutoff=cutoff,
capped=capped,
budget=0.0 if self.instance_as_budget else curr_budget,
budget=budget,
source_id=self.identifier,
)

Expand Down Expand Up @@ -674,7 +706,7 @@ def _update_stage(self, run_history: RunHistory) -> None:
self.iteration_done = True
self.sh_iters += 1
self.stage = 0
self.run_tracker = []
self.run_tracker = {}
self.configs_to_run = []
self.fail_chal_offset = 0

Expand Down Expand Up @@ -881,14 +913,18 @@ def _top_k(self,
for c in configs:
# ensuring that all configurations being compared are run on the same set of instance, seed & budget
cur_run_key = run_history.get_runs_for_config(c, only_max_observed_budget=True)
if cur_run_key != run_key:

# Move to compare set -- get_runs_for_config queries form a dictionary
# which is not an ordered structure. Some queries to that dictionary returned unordered
# list which wrongly trigger the below if
if set(cur_run_key) != set(run_key):
raise ValueError(
'Cannot compare configs that were run on different instances-seeds-budgets: %s vs %s'
% (run_key, cur_run_key)
)
config_costs[c] = run_history.get_cost(c)

configs_sorted = sorted(config_costs, key=config_costs.get)
configs_sorted = [k for k, v in sorted(config_costs.items(), key=lambda item: item[1])]
# select top configurations only
top_configs = configs_sorted[:k]
return top_configs
Expand All @@ -913,6 +949,38 @@ def _count_running_instances_for_challenger(self, run_history: RunHistory) -> in

return running_instances

def _get_pending_instances_for_stage(self, run_history: RunHistory) -> int:
"""
When running SH, M configs might require N instances. Before moving to the
next stage, we need to make sure that all MxN jobs are completed
We use the run tracker to make sure we processed all configurations.
Parameters
----------
run_history : RunHistory
stores all runs we ran so far
Returns
-------
int: All the instances that have not yet been processed
"""
curr_budget = self.all_budgets[self.stage]
if self.instance_as_budget:
prev_budget = int(self.all_budgets[self.stage - 1]) if self.stage > 0 else 0
curr_insts = self.inst_seed_pairs[int(prev_budget):int(curr_budget)]
else:
curr_insts = self.inst_seed_pairs

# The minus one here accounts for the fact that len(curr_insts) is a length starting at 1
# and self.curr_inst_idx is a zero based index
# But when all configurations have been launched and are running in run history
# n_insts_remaining becomes -1, which is confusing. Cap to zero
n_insts_remaining = max(len(curr_insts) - self.curr_inst_idx - 1, 0)
# If there are pending runs from a past config, wait for them
pending_to_process = [k for k, v in self.run_tracker.items() if not v]
return n_insts_remaining + len(pending_to_process)

def _launched_all_configs_for_current_stage(self, run_history: RunHistory) -> bool:
"""
This procedure queries if the addition of currently finished configs
Expand Down Expand Up @@ -943,9 +1011,25 @@ def _launched_all_configs_for_current_stage(self, run_history: RunHistory) -> bo
n_insts_remaining = len(curr_insts) - (self.curr_inst_idx + running_instances)

# Check which of the current configs is running
my_configs = [c for c, i, s in self.run_tracker]
my_configs = [c for c, i, s, b in self.run_tracker]
running_configs = set()
tracked_configs = self.success_challengers.union(
self.fail_challengers).union(self.do_not_advance_challengers)
for k, v in run_history.data.items():
# Our goal here is to account for number of challengers available
# We care if the challenger is running only if is is not tracked in
# success/fails/do not advance
# In other words, in each SH iteration we have to run N configs on
# M instance/seed pairs. This part of the code makes sure that N different
# configurations are launched (we only move to a new config after M
# instance-seed pairs on that config are launched)
# Notice that this number N of configs tracked in num_chal_available
# is a set of processed configurations + the running challengers
# so we do not want to double count configurations
# n_insts_remaining variable above accounts for the last active configuration only
if run_history.ids_config[k.config_id] in tracked_configs:
continue

if v.status == StatusType.RUNNING:
if run_history.ids_config[k.config_id] in my_configs:
running_configs.add(k.config_id)
Expand Down
Loading

0 comments on commit 6058475

Please sign in to comment.