Skip to content

Commit

Permalink
Merge pull request scikit-optimize#48 from scikit-optimize/master
Browse files Browse the repository at this point in the history
Merge upstream
  • Loading branch information
holgern authored Feb 19, 2020
2 parents aea940f + 887818e commit d4aabbc
Show file tree
Hide file tree
Showing 28 changed files with 2,088 additions and 83 deletions.
24 changes: 24 additions & 0 deletions doc/modules/classes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,28 @@ details.
utils.point_aslist
utils.use_named_args

.. _sampler_ref:

:mod:`skopt.sampler`: Samplers
==============================

.. automodule:: skopt.sampler
:no-members:
:no-inherited-members:

**User guide:** See the :ref:`sampler` section for further details.

.. currentmodule:: skopt

.. autosummary::
:toctree: generated/
:template: class.rst

sampler.Lhs
sampler.Sobol
sampler.Halton
sampler.Hammersly


.. _space_ref:

Expand Down Expand Up @@ -274,5 +296,7 @@ details.
space.transformers.Normalize
space.transformers.Pipeline
space.transformers.Transformer
space.transformers.LabelEncoder
space.transformers.StringEncoder


6 changes: 6 additions & 0 deletions doc/modules/sampler.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.. currentmodule:: skopt.sampler

.. _sampler:

Sampling methods
================
2 changes: 1 addition & 1 deletion doc/themes/scikit-learn-modern/javascript.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<script>
$(document).ready(function() {
/* Add a [>>>] button on the top-right corner of code samples to hide
/* Add a [>>>] button on the top-right corner of code sampler to hide
* the >>> and ... prompts and the output and thus make the code
* copyable. */
var div = $('.highlight-python .highlight,' +
Expand Down
6 changes: 6 additions & 0 deletions examples/sampler/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.. _sampler_examples:

Initial sampling functions
--------------------------

Examples concerning the :mod:`skopt.sampler` module.
167 changes: 167 additions & 0 deletions examples/sampler/initial-sampling-method-integer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
"""
===================================================
Comparing initial sampling methods on integer space
===================================================
Holger Nahrstaedt 2020 Sigurd Carlsen October 2019
.. currentmodule:: skopt
When doing baysian optimization we often want to reserve some of the
early part of the optimization to pure exploration. By default the
optimizer suggests purely random samples for the first n_initial_points
(10 by default). The downside to this is that there is no guarantee that
these samples are spread out evenly across all the dimensions.
Sampling methods as Latin hypercube, Sobol, Halton and Hammersly
take advantage of the fact that we know beforehand how many random
points we want to sample. Then these points can be "spread out" in
such a way that each dimension is explored.
See also the example on a real space
:ref:`sphx_glr_auto_examples_initial_sampling_method.py`
"""

print(__doc__)
import numpy as np
np.random.seed(1234)
import matplotlib.pyplot as plt
from skopt.space import Space
from skopt.sampler import Sobol
from skopt.sampler import Lhs
from skopt.sampler import Halton
from skopt.sampler import Hammersly
from scipy.spatial.distance import pdist

#############################################################################

def plot_searchspace(x, title):
fig, ax = plt.subplots()
plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', label='samples')
plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bs', markersize=40, alpha=0.5)
# ax.legend(loc="best", numpoints=1)
ax.set_xlabel("X1")
ax.set_xlim([0, 5])
ax.set_ylabel("X2")
ax.set_ylim([0, 5])
plt.title(title)
ax.grid(True)


n_samples = 10
space = Space([(0, 5), (0, 5)])
space.set_transformer("normalize")

#############################################################################
# Random sampling
# ---------------
x = space.rvs(n_samples)
plot_searchspace(x, "Random samples")
pdist_data = []
x_label = []
print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
pdist_data.append(pdist(x).flatten())
x_label.append("random")

#############################################################################
# Sobol
# -----

sobol = Sobol()
x = sobol.generate(space.dimensions, n_samples)
plot_searchspace(x, 'Sobol')
print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
pdist_data.append(pdist(x).flatten())
x_label.append("sobol")

#############################################################################
# Classic latin hypercube sampling
# --------------------------------

lhs = Lhs(lhs_type="classic", criterion=None)
x = lhs.generate(space.dimensions, n_samples)
plot_searchspace(x, 'classic LHS')
print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
pdist_data.append(pdist(x).flatten())
x_label.append("lhs")

#############################################################################
# Centered latin hypercube sampling
# ---------------------------------

lhs = Lhs(lhs_type="centered", criterion=None)
x = lhs.generate(space.dimensions, n_samples)
plot_searchspace(x, 'centered LHS')
print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
pdist_data.append(pdist(x).flatten())
x_label.append("center")

#############################################################################
# Maximin optimized hypercube sampling
# ------------------------------------

lhs = Lhs(criterion="maximin", iterations=10000)
x = lhs.generate(space.dimensions, n_samples)
plot_searchspace(x, 'maximin LHS')
print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
pdist_data.append(pdist(x).flatten())
x_label.append("maximin")

#############################################################################
# Correlation optimized hypercube sampling
# ----------------------------------------

lhs = Lhs(criterion="correlation", iterations=10000)
x = lhs.generate(space.dimensions, n_samples)
plot_searchspace(x, 'correlation LHS')
print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
pdist_data.append(pdist(x).flatten())
x_label.append("corr")

#############################################################################
# Ratio optimized hypercube sampling
# ----------------------------------

lhs = Lhs(criterion="ratio", iterations=10000)
x = lhs.generate(space.dimensions, n_samples)
plot_searchspace(x, 'ratio LHS')
print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
pdist_data.append(pdist(x).flatten())
x_label.append("ratio")

#############################################################################
# Halton sampling
# ---------------

halton = Halton()
x = halton.generate(space.dimensions, n_samples)
plot_searchspace(x, 'Halton')
print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
pdist_data.append(pdist(x).flatten())
x_label.append("halton")

#############################################################################
# Hammersly sampling
# ------------------

hammersly = Hammersly()
x = hammersly.generate(space.dimensions, n_samples)
plot_searchspace(x, 'Hammersly')
print("empty fields: %d" % (36 - np.size(np.unique(x, axis=0), 0)))
pdist_data.append(pdist(x).flatten())
x_label.append("hammersly")

#############################################################################
# Pdist boxplot of all methods
# ----------------------------
#
# This boxplot shows the distance between all generated points using
# Euclidian distance. The higher the value, the better the sampling method.
# It can be seen that random has the worst performance

fig, ax = plt.subplots()
ax.boxplot(pdist_data)
plt.grid(True)
plt.ylabel("pdist")
_ = ax.set_ylim(0, 6)
_ = ax.set_xticklabels(x_label, rotation=45, fontsize=8)
158 changes: 158 additions & 0 deletions examples/sampler/initial-sampling-method.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
"""
==================================
Comparing initial sampling methods
==================================
Holger Nahrstaedt 2020 Sigurd Carlsen October 2019
.. currentmodule:: skopt
When doing baysian optimization we often want to reserve some of the
early part of the optimization to pure exploration. By default the
optimizer suggests purely random samples for the first n_initial_points
(10 by default). The downside to this is that there is no guarantee that
these samples are spread out evenly across all the dimensions.
Sampling methods as Latin hypercube, Sobol, Halton and Hammersly
take advantage of the fact that we know beforehand how many random
points we want to sample. Then these points can be "spread out" in
such a way that each dimension is explored.
See also the example on an integer space
:ref:`sphx_glr_auto_examples_initial_sampling_method_integer.py`
"""

print(__doc__)
import numpy as np
np.random.seed(123)
import matplotlib.pyplot as plt
from skopt.space import Space
from skopt.sampler import Sobol
from skopt.sampler import Lhs
from skopt.sampler import Halton
from skopt.sampler import Hammersly
from scipy.spatial.distance import pdist

#############################################################################

def plot_searchspace(x, title):
fig, ax = plt.subplots()
plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', label='samples')
plt.plot(np.array(x)[:, 0], np.array(x)[:, 1], 'bo', markersize=80, alpha=0.5)
# ax.legend(loc="best", numpoints=1)
ax.set_xlabel("X1")
ax.set_xlim([-5, 10])
ax.set_ylabel("X2")
ax.set_ylim([0, 15])
plt.title(title)

n_samples = 10

space = Space([(-5., 10.), (0., 15.)])
space.set_transformer("normalize")

#############################################################################
# Random sampling
# ---------------
x = space.rvs(n_samples)
plot_searchspace(x, "Random samples")
pdist_data = []
x_label = []
pdist_data.append(pdist(x).flatten())
x_label.append("random")

#############################################################################
# Sobol
# -----

sobol = Sobol()
x = sobol.generate(space.dimensions, n_samples)
plot_searchspace(x, 'Sobol')
pdist_data.append(pdist(x).flatten())
x_label.append("sobol")

#############################################################################
# Classic Latin hypercube sampling
# --------------------------------

lhs = Lhs(lhs_type="classic", criterion=None)
x = lhs.generate(space.dimensions, n_samples)
plot_searchspace(x, 'classic LHS')
pdist_data.append(pdist(x).flatten())
x_label.append("lhs")

#############################################################################
# Centered Latin hypercube sampling
# ---------------------------------

lhs = Lhs(lhs_type="centered", criterion=None)
x = lhs.generate(space.dimensions, n_samples)
plot_searchspace(x, 'centered LHS')
pdist_data.append(pdist(x).flatten())
x_label.append("center")

#############################################################################
# Maximin optimized hypercube sampling
# ------------------------------------

lhs = Lhs(criterion="maximin", iterations=10000)
x = lhs.generate(space.dimensions, n_samples)
plot_searchspace(x, 'maximin LHS')
pdist_data.append(pdist(x).flatten())
x_label.append("maximin")

#############################################################################
# Correlation optimized hypercube sampling
# ----------------------------------------

lhs = Lhs(criterion="correlation", iterations=10000)
x = lhs.generate(space.dimensions, n_samples)
plot_searchspace(x, 'correlation LHS')
pdist_data.append(pdist(x).flatten())
x_label.append("corr")

#############################################################################
# Ratio optimized hypercube sampling
# ----------------------------------

lhs = Lhs(criterion="ratio", iterations=10000)
x = lhs.generate(space.dimensions, n_samples)
plot_searchspace(x, 'ratio LHS')
pdist_data.append(pdist(x).flatten())
x_label.append("ratio")

#############################################################################
# Halton sampling
# ---------------

halton = Halton()
x = halton.generate(space.dimensions, n_samples)
plot_searchspace(x, 'Halton')
pdist_data.append(pdist(x).flatten())
x_label.append("halton")

#############################################################################
# Hammersly sampling
# ------------------

hammersly = Hammersly()
x = hammersly.generate(space.dimensions, n_samples)
plot_searchspace(x, 'Hammersly')
pdist_data.append(pdist(x).flatten())
x_label.append("hammersly")

#############################################################################
# Pdist boxplot of all methods
# ----------------------------
#
# This boxplot shows the distance between all generated points using
# Euclidian distance. The higher the value, the better the sampling method.
# It can be seen that random has the worst performance

fig, ax = plt.subplots()
ax.boxplot(pdist_data)
plt.grid(True)
plt.ylabel("pdist")
_ = ax.set_ylim(0, 12)
_ = ax.set_xticklabels(x_label, rotation=45, fontsize=8)
Loading

0 comments on commit d4aabbc

Please sign in to comment.