Merge pull request #705 from automl/development

Release 0.13.1
automl · Oct 29, 2020 · 6058475 · 6058475
2 parents 9d7d09d + 3c463dc
commit 6058475
Show file tree

Hide file tree

Showing 17 changed files with 646 additions and 46 deletions.
diff --git a/changelog.md b/changelog.md
@@ -1,3 +1,13 @@
+# 0.13.1
+
+## Minor Changes
+* Improve error message for first run crashed (#694).
+* Experimental: add callback mechanism (#703).
+
+## Bug fixes
+* Fix a bug which could make successive halving fail if run in parallel (#695).
+* Fix a bug which could cause hyperband to ignore the lowest budget (#701).
+
 # 0.13.0
 
 ## Major Changes

diff --git a/doc/api.rst b/doc/api.rst
@@ -8,6 +8,7 @@ API Documentation
 
 .. toctree::
 
+    apidoc/smac.callbacks
     apidoc/smac.configspace
     apidoc/smac.epm
     apidoc/smac.facade

diff --git a/smac/__init__.py b/smac/__init__.py
@@ -5,7 +5,7 @@
 import lazy_import
 from smac.utils import dependencies
 
-__version__ = '0.13.0'
+__version__ = '0.13.1'
 __author__ = 'Marius Lindauer, Matthias Feurer, Katharina Eggensperger, Joshua Marben, ' \
              'André Biedenkapp, Francisco Rivera, Ashwin Raaghav, Aaron Klein, Stefan Falkner ' \
              'and Frank Hutter'

diff --git a/smac/callbacks.py b/smac/callbacks.py
@@ -0,0 +1,38 @@
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from smac.optimizer.smbo import SMBO
+from smac.runhistory.runhistory import RunInfo, RunValue
+
+"""Callbacks for SMAC.
+
+Callbacks allow customizing the behavior of SMAC to ones needs. Currently, the list of implemented callbacks is
+very limited, but they can easily be added.
+
+How to add a new callback
+=========================
+
+1. Implement a callback class in this module. There are no restrictions on how such a callback must look like,
+   but it is recommended to implement the main logic inside the `__call__` function, such as for example in
+   ``IncorporateRunResultCallback``.
+2. Add your callback to ``smac.smbo.optimizer.SMBO._callbacks``, using the name of your callback as the key,
+   and an empty list as the value.
+3. Add your callback to ``smac.smbo.optimizer.SMBO._callback_to_key``, using the callback class as the key,
+   and the name as value (the name used in 2.).
+4. Implement calling all registered callbacks at the correct place. This is as simple as
+   ``for callback in self._callbacks['your_callback']: callback(*args, **kwargs)``, where you obviously need to
+   change the callback name and signature.
+"""
+
+
+class IncorporateRunResultCallback:
+
+    """Callback to react on a new run result. Called after the finished run is added to the runhistory."""
+
+    def __call__(
+            self, smbo: 'SMBO',
+            run_info: RunInfo,
+            result: RunValue,
+            time_left: float,
+    ) -> None:
+        pass
diff --git a/smac/facade/smac_ac_facade.py b/smac/facade/smac_ac_facade.py
@@ -692,3 +692,28 @@ def get_trajectory(self) -> List[TrajEntry]:
             raise ValueError('SMAC was not fitted yet. Call optimize() prior '
                              'to accessing the runhistory.')
         return self.trajectory
+
+    def register_callback(self, callback: Callable) -> None:
+        """Register a callback function.
+
+        Callbacks must implement a class in ``smac.callbacks`` and be instantiated objects.
+        They will automatically be registered within SMAC based on which callback class from
+        ``smac.callbacks`` they implement.
+
+        Parameters
+        ----------
+        callback - Callable
+
+        Returns
+        -------
+        None
+        """
+        types_to_check = callback.__class__.__mro__
+        key = None
+        for type_to_check in types_to_check:
+            key = self.solver._callback_to_key.get(type_to_check)
+            if key is not None:
+                break
+        if key is None:
+            raise ValueError('Cannot register callback of type %s' % type(callback))
+        self.solver._callbacks[key].append(callback)
diff --git a/smac/facade/smac_bohb_facade.py b/smac/facade/smac_bohb_facade.py
@@ -42,7 +42,8 @@ def __init__(self, **kwargs: typing.Any):
 
         # Intensification parameters
         # select Hyperband as the intensifier ensure respective parameters are provided
-        kwargs['intensifier'] = Hyperband
+        if kwargs.get('intensifier') is None:
+            kwargs['intensifier'] = Hyperband
 
         # set Hyperband parameters if not given
         intensifier_kwargs = kwargs.get('intensifier_kwargs', dict())

diff --git a/smac/intensification/hyperband.py b/smac/intensification/hyperband.py
@@ -272,6 +272,10 @@ def _update_stage(self, run_history: RunHistory = None) -> None:
         # sample challengers for next iteration (based on HpBandster package)
         n_challengers = int(np.floor((self.s_max + 1) / (self.s + 1)) * self.eta ** self.s)
 
+        # Compute this for the next round
+        n_configs_in_stage = n_challengers * np.power(self.eta, -np.linspace(0, self.s, self.s + 1))
+        n_configs_in_stage = np.array(np.round(n_configs_in_stage), dtype=int).tolist()
+
         self.logger.info('Hyperband iteration-step: %d-%d  with initial budget: %d' % (
             self.hb_iters + 1, self.s_max - self.s + 1, sh_initial_budget))
 
@@ -287,6 +291,8 @@ def _update_stage(self, run_history: RunHistory = None) -> None:
             initial_budget=sh_initial_budget,
             max_budget=self.max_budget,
             eta=self.eta,
+            _all_budgets=self.all_budgets[(-self.s - 1):],
+            _n_configs_in_stage=n_configs_in_stage,
             num_initial_challengers=n_challengers,
             run_obj_time=self.run_obj_time,
             n_seeds=self.n_seeds,

diff --git a/smac/intensification/successive_halving.py b/smac/intensification/successive_halving.py
@@ -71,6 +71,10 @@ class _SuccessiveHalving(AbstractRacer):
         maximum budget allowed for 1 run of successive halving
     eta : float
         'halving' factor after each iteration in a successive halving run. Defaults to 3
+    _all_budgets: typing.Optional[typing.List[float]] = None
+        Used internally when HB uses SH as a subrouting
+    _n_configs_in_stage: typing.Optional[typing.List[int]] = None
+        Used internally when HB uses SH as a subrouting
     num_initial_challengers : typing.Optional[int]
         number of challengers to consider for the initial budget. If None, calculated internally
     run_obj_time : bool
@@ -111,6 +115,8 @@ def __init__(self,
                  initial_budget: typing.Optional[float] = None,
                  max_budget: typing.Optional[float] = None,
                  eta: float = 3,
+                 _all_budgets: typing.Optional[typing.List[float]] = None,
+                 _n_configs_in_stage: typing.Optional[typing.List[int]] = None,
                  num_initial_challengers: typing.Optional[int] = None,
                  run_obj_time: bool = True,
                  n_seeds: typing.Optional[int] = None,
@@ -175,7 +181,9 @@ def __init__(self,
             self.inst_seed_pairs = inst_seed_pairs
 
         # successive halving parameters
-        self._init_sh_params(initial_budget, max_budget, eta, num_initial_challengers)
+        self._init_sh_params(initial_budget=initial_budget, max_budget=max_budget, eta=eta,
+                             num_initial_challengers=num_initial_challengers,
+                             _all_budgets=_all_budgets, _n_configs_in_stage=_n_configs_in_stage)
 
         # adaptive capping
         if self.instance_as_budget and self.instance_order != 'shuffle' and self.run_obj_time:
@@ -213,13 +221,16 @@ def __init__(self,
         # run history, does not have this information and so we track locally. That way,
         # when we access the complete list of configs from the run history, we filter
         # the ones launched by the current succesive halver using self.run_tracker
-        self.run_tracker = []  # type: typing.List[typing.Tuple[Configuration, str, int]]
+        self.run_tracker = {}  # type: typing.Dict[typing.Tuple[Configuration, str, int, float], bool]
 
     def _init_sh_params(self,
                         initial_budget: typing.Optional[float],
                         max_budget: typing.Optional[float],
                         eta: float,
-                        num_initial_challengers: typing.Optional[int]) -> None:
+                        num_initial_challengers: typing.Optional[int] = None,
+                        _all_budgets: typing.Optional[typing.List[float]] = None,
+                        _n_configs_in_stage: typing.Optional[typing.List[int]] = None,
+                        ) -> None:
         """
         initialize Successive Halving parameters
 
@@ -233,6 +244,10 @@ def _init_sh_params(self,
             'halving' factor after each iteration in a successive halving run
         num_initial_challengers : typing.Optional[int]
             number of challengers to consider for the initial budget
+        _all_budgets: typing.Optional[typing.List[float]] = None
+            Used internally when HB uses SH as a subrouting
+        _n_configs_in_stage: typing.Optional[typing.List[int]] = None
+            Used internally when HB uses SH as a subrouting
         """
 
         if eta <= 1:
@@ -280,14 +295,21 @@ def _init_sh_params(self,
         # max. no. of SH iterations possible given the budgets
         max_sh_iter = int(np.floor(np.log(self.max_budget / self.initial_budget) / np.log(self.eta)))
         # initial number of challengers to sample
-        if not num_initial_challengers:
+        if num_initial_challengers is None:
             num_initial_challengers = int(self.eta ** max_sh_iter)
-        # budgets to consider in each stage
-        self.all_budgets = self.max_budget * np.power(self.eta, -np.linspace(max_sh_iter, 0, max_sh_iter + 1))
-        # number of challengers to consider in each stage
-        self.n_configs_in_stage = num_initial_challengers * np.power(self.eta,
-                                                                     -np.linspace(0, max_sh_iter, max_sh_iter + 1))
-        self.n_configs_in_stage = self.n_configs_in_stage.tolist()
+
+        if _all_budgets is not None and _n_configs_in_stage is not None:
+            # Assert we use the given numbers to avoid rounding issues, see #701
+            self.all_budgets = _all_budgets
+            self.n_configs_in_stage = _n_configs_in_stage
+        else:
+            # budgets to consider in each stage
+            self.all_budgets = self.max_budget * np.power(self.eta, -np.linspace(max_sh_iter, 0,
+                                                                                 max_sh_iter + 1))
+            # number of challengers to consider in each stage
+            n_configs_in_stage = num_initial_challengers * \
+                np.power(self.eta, -np.linspace(0, max_sh_iter, max_sh_iter + 1))
+            self.n_configs_in_stage = np.array(np.round(n_configs_in_stage), dtype=int).tolist()
 
     def process_results(self,
                         run_info: RunInfo,
@@ -328,6 +350,9 @@ def process_results(self,
             empirical performance of incumbent configuration
         """
 
+        # Mark the fact that we processed this configuration
+        self.run_tracker[(run_info.config, run_info.instance, run_info.seed, run_info.budget)] = True
+
         # If The incumbent is None and it is the first run, we use the challenger
         if not incumbent and self.first_run:
             self.logger.info(
@@ -336,14 +361,9 @@ def process_results(self,
             incumbent = run_info.config
             self.first_run = False
 
-        # selecting instance-seed subset for this budget, depending on the kind of budget
-        curr_budget = self.all_budgets[self.stage]
-        if self.instance_as_budget:
-            prev_budget = int(self.all_budgets[self.stage - 1]) if self.stage > 0 else 0
-            curr_insts = self.inst_seed_pairs[int(prev_budget):int(curr_budget)]
-        else:
-            curr_insts = self.inst_seed_pairs
-        n_insts_remaining = len(curr_insts) - self.curr_inst_idx - 1
+        # Account for running instances across configurations, not only on the
+        # running configuration
+        n_insts_remaining = self._get_pending_instances_for_stage(run_history)
 
         # Make sure that there is no Budget exhausted
         if result.status == StatusType.CAPPED:
@@ -367,8 +387,18 @@ def process_results(self,
         else:
             self.fail_challengers.add(run_info.config)  # capped/crashed/do not advance configs
 
+        # We need to update the incumbent if this config we are processing
+        # completes all scheduled instance-seed pairs.
+        # Here, a config/seed/instance is going to be processed for the first time
+        # (it has been previously scheduled by get_next_run and marked False, indicating
+        # that it has not been processed yet. Entering process_results() this config/seed/instance
+        # is marked as TRUE as an indication that it has finished and should be processed)
+        # so if all configurations runs are marked as TRUE it means that this new config
+        # was the missing piece to have everything needed to compare against the incumbent
+        update_incumbent = all([v for k, v in self.run_tracker.items() if k[0] == run_info.config])
+
         # get incumbent if all instances have been evaluated
-        if n_insts_remaining <= 0:
+        if n_insts_remaining <= 0 or update_incumbent:
             incumbent = self._compare_configs(challenger=run_info.config,
                                               incumbent=incumbent,
                                               run_history=run_history,
@@ -582,15 +612,17 @@ def get_next_run(self,
         if (self.cutoff is not None) and (cutoff < self.cutoff):  # type: ignore[operator] # noqa F821
             capped = True
 
-        self.run_tracker.append((challenger, instance, seed))
+        budget = 0.0 if self.instance_as_budget else curr_budget
+
+        self.run_tracker[(challenger, instance, seed, budget)] = False
         return RunInfoIntent.RUN, RunInfo(
             config=challenger,
             instance=instance,
             instance_specific=self.instance_specifics.get(instance, "0"),
             seed=seed,
             cutoff=cutoff,
             capped=capped,
-            budget=0.0 if self.instance_as_budget else curr_budget,
+            budget=budget,
             source_id=self.identifier,
         )
 
@@ -674,7 +706,7 @@ def _update_stage(self, run_history: RunHistory) -> None:
                 self.iteration_done = True
                 self.sh_iters += 1
                 self.stage = 0
-                self.run_tracker = []
+                self.run_tracker = {}
                 self.configs_to_run = []
                 self.fail_chal_offset = 0
 
@@ -881,14 +913,18 @@ def _top_k(self,
         for c in configs:
             # ensuring that all configurations being compared are run on the same set of instance, seed & budget
             cur_run_key = run_history.get_runs_for_config(c, only_max_observed_budget=True)
-            if cur_run_key != run_key:
+
+            # Move to compare set -- get_runs_for_config queries form a dictionary
+            # which is not an ordered structure. Some queries to that dictionary returned unordered
+            # list which wrongly trigger the below if
+            if set(cur_run_key) != set(run_key):
                 raise ValueError(
                     'Cannot compare configs that were run on different instances-seeds-budgets: %s vs %s'
                     % (run_key, cur_run_key)
                 )
             config_costs[c] = run_history.get_cost(c)
 
-        configs_sorted = sorted(config_costs, key=config_costs.get)
+        configs_sorted = [k for k, v in sorted(config_costs.items(), key=lambda item: item[1])]
         # select top configurations only
         top_configs = configs_sorted[:k]
         return top_configs
@@ -913,6 +949,38 @@ def _count_running_instances_for_challenger(self, run_history: RunHistory) -> in
 
         return running_instances
 
+    def _get_pending_instances_for_stage(self, run_history: RunHistory) -> int:
+        """
+        When running SH, M configs might require N instances. Before moving to the
+        next stage, we need to make sure that all MxN jobs are completed
+
+        We use the run tracker to make sure we processed all configurations.
+
+        Parameters
+        ----------
+        run_history : RunHistory
+            stores all runs we ran so far
+
+        Returns
+        -------
+            int: All the instances that have not yet been processed
+        """
+        curr_budget = self.all_budgets[self.stage]
+        if self.instance_as_budget:
+            prev_budget = int(self.all_budgets[self.stage - 1]) if self.stage > 0 else 0
+            curr_insts = self.inst_seed_pairs[int(prev_budget):int(curr_budget)]
+        else:
+            curr_insts = self.inst_seed_pairs
+
+        # The minus one here accounts for the fact that len(curr_insts) is a length starting at 1
+        # and self.curr_inst_idx is a zero based index
+        # But when all configurations have been launched and are running in run history
+        # n_insts_remaining becomes -1, which is confusing. Cap to zero
+        n_insts_remaining = max(len(curr_insts) - self.curr_inst_idx - 1, 0)
+        # If there are pending runs from a past config, wait for them
+        pending_to_process = [k for k, v in self.run_tracker.items() if not v]
+        return n_insts_remaining + len(pending_to_process)
+
     def _launched_all_configs_for_current_stage(self, run_history: RunHistory) -> bool:
         """
         This procedure queries if the addition of currently finished configs
@@ -943,9 +1011,25 @@ def _launched_all_configs_for_current_stage(self, run_history: RunHistory) -> bo
         n_insts_remaining = len(curr_insts) - (self.curr_inst_idx + running_instances)
 
         # Check which of the current configs is running
-        my_configs = [c for c, i, s in self.run_tracker]
+        my_configs = [c for c, i, s, b in self.run_tracker]
         running_configs = set()
+        tracked_configs = self.success_challengers.union(
+            self.fail_challengers).union(self.do_not_advance_challengers)
         for k, v in run_history.data.items():
+            # Our goal here is to account for number of challengers available
+            # We care if the challenger is running only if is is not tracked in
+            # success/fails/do not advance
+            # In other words, in each SH iteration we have to run N configs on
+            # M instance/seed pairs. This part of the code makes sure that N different
+            # configurations are launched (we only move to a new config after M
+            # instance-seed pairs on that config are launched)
+            # Notice that this number N of configs tracked in num_chal_available
+            # is a set of processed configurations + the running challengers
+            # so we do not want to double count configurations
+            # n_insts_remaining variable above accounts for the last active configuration only
+            if run_history.ids_config[k.config_id] in tracked_configs:
+                continue
+
             if v.status == StatusType.RUNNING:
                 if run_history.ids_config[k.config_id] in my_configs:
                     running_configs.add(k.config_id)