Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Noisy Optimization: OCBA and Incumbent #430

Open
wants to merge 33 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
6a6db9b
handle multiple noisy instances
jakob-r Feb 1, 2018
6f0a537
fix tests
jakob-r Feb 5, 2018
d8dc631
error handling
jakob-r Feb 6, 2018
b1a79a3
initial test
ja-thomas Jun 21, 2018
06c7a7d
resolved merge conflicts master
juliambr Jun 22, 2018
ab6fa36
implementation of ocba and incumbent
juliambr Jun 25, 2018
2dd3c91
modified noisy control object
juliambr Jun 25, 2018
ab9cb52
added aggregation functionality
juliambr Jun 25, 2018
2190a10
corrected typos
juliambr Jun 25, 2018
2772d12
added ocba and incumbent replication plus tests
juliambr Jun 25, 2018
1e973fd
printing control object in noisy case
juliambr Jun 25, 2018
3cc45eb
make noisy function to make smoof function noisy
juliambr Jun 25, 2018
02dd45d
remove makenoisy function
juliambr Jun 26, 2018
cbfaffe
fixed test for incumbent
juliambr Jun 27, 2018
3067bef
more documentation
juliambr Jun 27, 2018
b9be9ba
fixed test for incumbent
juliambr Jun 27, 2018
773c4ef
bug usage data.table
juliambr Jun 27, 2018
7ac3f6c
fixed tests
juliambr Jun 27, 2018
677d211
Merge branch 'master' into feature_noisy
jakob-r Jun 28, 2018
34b9fdc
error handling
juliambr Jun 28, 2018
6e35606
smbo noisy fix
jakob-r Jun 29, 2018
17d33d0
Merge branch 'master' of https://github.com/mlr-org/mlrMBO into featu…
juliambr Jul 20, 2018
e820fe0
Merge branch 'feature_noisy' of https://github.com/mlr-org/mlrMBO int…
juliambr Jul 20, 2018
3755cd4
fixed OCBA divide by 0 bug
juliambr Jul 20, 2018
ae2fd31
correct for numerical error in OCBA
juliambr Sep 3, 2018
4009dbc
adding second stage identification for mlrMBO
juliambr Sep 4, 2018
f5e6a40
identification phase bugs
juliambr Sep 5, 2018
af13fc3
tests for identification
juliambr Sep 5, 2018
507431f
fixing bug in identification
juliambr Sep 6, 2018
1fcc40d
documentation of identification method
juliambr Sep 7, 2018
da0a514
adapt identification strategy
juliambr Sep 12, 2018
901e4c4
identification termination by max evals
juliambr Sep 24, 2018
0337c31
merge master
juliambr Dec 12, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ export(renderExampleRunPlot)
export(setMBOControlInfill)
export(setMBOControlMultiObj)
export(setMBOControlMultiPoint)
export(setMBOControlNoisy)
export(setMBOControlTermination)
export(trafoLog)
export(trafoSqrt)
Expand Down
5 changes: 5 additions & 0 deletions R/OptState_getter.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,8 @@ getOptStateValidTerminationStates = function() {
c("term.iter", "term.time", "term.exectime", "term.yval", "term.feval", "term.custom")
}

getOptStateIntensification = function(opt.state) {
opt.problem = getOptStateOptProblem(opt.state)
control = getOptProblemControl(opt.problem)
return(!is.null(control$noisy.method) && control$noisy.method %in% c("incumbent", "ocba"))
}
39 changes: 37 additions & 2 deletions R/evalTargetFun.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,30 @@ evalTargetFun.OptState = function(opt.state, xs, extras) {
# short names and so on
nevals = length(xs)
ny = control$n.objectives

# trafo X points
xs.trafo = lapply(xs, trafoValue, par = par.set)

# handle noisy instances
if (isTRUE(control$noisy.instances > 1L)) {
nevals = nevals * control$noisy.instances
xs = rep(xs, each = control$noisy.instances)
extras = rep(extras, each = control$noisy.instances)
if (!control$noisy.self.replicating) {
xs.trafo = rep(xs.trafo, each = control$noisy.instances)
if (!is.na(control$noisy.instance.param)) {
inst.param = lapply(seq_len(control$noisy.instances), function(x) setNames(list(x), control$noisy.instance.param))
xs.trafo = Map(c, xs.trafo, inst.param)
}
}
}


num.format = control$output.num.format
num.format.string = paste("%s = ", num.format, sep = "")
dobs = ensureVector(asInteger(getOptStateLoop(opt.state)), n = nevals, cl = "integer")
imputeY = control$impute.y.fun

# trafo X points
xs.trafo = lapply(xs, trafoValue, par = par.set)

# function to measure of fun call
wrapFun = function(x) {
Expand All @@ -43,6 +60,9 @@ evalTargetFun.OptState = function(opt.state, xs, extras) {
user.extras = attr(y, "extras")
y = setAttribute(y, "extras", NULL)
}
if (!is.null(control$noisy.instance.param) && !is.na(control$noisy.instance.param) && !control$noisy.self.replicating) {
user.extras = c(user.extras, x[control$noisy.instance.param])
}
st = proc.time() - st
list(y = y, time = st[3], user.extras = user.extras)
}
Expand All @@ -56,6 +76,21 @@ evalTargetFun.OptState = function(opt.state, xs, extras) {
res = parallelMap(wrapFun, xs.trafo, level = "mlrMBO.feval",
impute.error = if (is.null(imputeY)) NULL else identity)

# handle noisy instances of self.replicating functions
if (isTRUE(control$noisy.instances > 1L) && control$noisy.self.replicating) {
xs.trafo = rep(xs.trafo, each = control$noisy.instances)
res = lapply(res, function(r) {
if (is.error(r)) {
rep(list(r), control$noisy.instances)
} else {
lapply(seq_along(r$y), function(i) {
list(y = r$y[i], time = r$time / length(r$y), user.extras = c(r$user.extras, setNames(list(i), control$noisy.instance.param)))
})
}
})
res = unlist(res, recursive = FALSE)
}

# loop evals and to some post-processing
for (i in seq_len(nevals)) {
r = res[[i]]; x = xs[[i]]; x.trafo = xs.trafo[[i]]; dob = dobs[i]
Expand Down
170 changes: 170 additions & 0 deletions R/intensifyOptState.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
intensifyOptState = function(opt.state) {
opt.problem = getOptStateOptProblem(opt.state)
control = getOptProblemControl(opt.problem)

switch(control$noisy.method,
"incumbent" = intensifyIncumbent(opt.state),
"ocba" = intensifyOCBA(opt.state)
)
}

intensifyIncumbent = function(opt.state) {

opt.problem = getOptStateOptProblem(opt.state)
control = getOptProblemControl(opt.problem)
op = as.data.table(getOptStateOptPath(opt.state))
par.names = colnames(op)[1:(which(colnames(op) == "y") - 1)] #FIXME: This sucks

# get a summary of the design
ds = getOptPathSummary(opt.state, par.names)
nds = nrow(ds)

# incumbent: current best point w. r. t. mean over all function evaluations
# the newest point cannot be the incumbent, it is always a challenger
# DOES NOT WORK FOR MULTIPOINT PROPOSAL YET
inc = which.min(ds[- nds, ]$y)
# incumbent is replicated once in each iteration
replicatePoint(opt.state, x = ds[inc, ..par.names], type = "incumbent", reps = 1L)

# determine a set of challengers
if (control$noisy.incumbent.nchallengers == 0L) {
cls = c(nds)
} else {
# determine set of p points to be challenged against incumbent
# incumbent is excluded (cannot be challenged against itself)
# and new point is always set as a challenger
# points are drawn randomly without replacement with probability prop. to their function value
cls = setdiff(seq_len(nds), c(inc, nds))
p = min(control$noisy.incumbent.nchallengers, nds - 2)
probs = exp(- ds[cls, ]$y) / sum(exp(- ds[cls, ]$y))
cls = sample(cls, size = p, prob = probs, replace = FALSE)
cls = c(cls, nds)
}

# start the race
for (cl in cls) {

r = 1L
replicatePoint(opt.state, x = ds[cl, ..par.names], type = paste("challenger"), reps = r)
ds = getOptPathSummary(opt.state, par.names)

# proceed as long as challenger has less runs than incumbent and is better than incumbent
while((ds[cl, "runs"] < ds[inc, "runs"]) && (ds[cl, "y"] < ds[inc, "y"])) {
r = 2L * r
replicatePoint(opt.state, x = ds[cl, ..par.names], type = paste("challenger"), reps = r)
ds = getOptPathSummary(opt.state, par.names)
}

}
return(opt.state)
}

replicatePoint = function(opt.state, x, type, reps = 1L) {

# replicate rows according to the number of desired replicates
xs = seq_len(nrow(x))
xrep = x[rep(xs, reps), ]

opt.problem = getOptStateOptProblem(opt.state)
control = getOptProblemControl(opt.problem)

prop = makeProposal(control, xrep, prop.type = rep(type, nrow(xrep)))
evalProposedPoints.OptState(opt.state, prop)

return(opt.state)
}

getOptPathSummary = function(opt.state, par.names) {
op = as.data.table(getOptStateOptPath(opt.state))
ds = op[, .(y = mean(y), ysd = sd(y), runs = .N), by = par.names]
return(ds)
}


intensifyOCBA = function(opt.state) {

# some intialization
opt.problem = getOptStateOptProblem(opt.state)
control = getOptProblemControl(opt.problem)
par.set = getOptProblemParSet(opt.problem)

op = as.data.table(getOptStateOptPath(opt.state))
par.names = colnames(op)[1:(which(colnames(op) == "y") - 1)] #FIXME: This sucks

# minimum number of replicates at each point
minrep = max(control$noisy.ocba.initial, 2L)

# calculate summary of the dsign
ds = getOptPathSummary(opt.state, par.names)
nds = nrow(ds)

# make sure that initially, each point is evaluated at least minrep times
xinit = rep(seq_len(nds), pmax(minrep - ds$runs, 0))
opt.state = replicatePoint(opt.state, x = ds[xinit, ..par.names], type = paste("initeval"))

ds = getOptPathSummary(opt.state, par.names)
add = distributeOCBA(ds, budget = control$noisy.ocba.budget)
reps = rep(seq_len(nds), add)

replicatePoint(opt.state, x = ds[reps, ..par.names], type = paste("OCBA"))

return(opt.state)
}


distributeOCBA = function(ds, budget) {

nds = nrow(ds)

# TODO: until now only minimization possible
tbudget = budget + sum(ds$runs)

# search for the best and second-best dsign
b = order(ds$y)[1]
s = order(ds$y)[2]

# vector of ratios
ratio = rep(0, nds)
ratio[s] = 1

# calculate ratios
tmp = (ds[b, ]$y - ds[s, ]$y) / (ds[b, ]$y - ds[- c(s, b), ]$y)
ratio[- c(s, b)] = tmp^2 * ds[- c(s, b), ]$ysd^2 / ds[s, ]$ysd^2
ratio[b] = ds[b, ]$ysd * sqrt(sum(ratio^2 / ds$ysd^2))

# additional replications
add = rep(0, nds)

# do not disable any dsign
disabled = rep(FALSE, nds)

more_alloc = TRUE

while (more_alloc) {

add[!disabled] = roundPreserveSum(tbudget / sum(ratio[!disabled]) * ratio[!disabled])

# disable designs that have been run too much
disabled = disabled | (ds$runs > add)
more_alloc = any(ds$runs > add)

# set additional replications s.t. already run replications are set
add[disabled] = ds[disabled, ]$runs

# decrease total budget correspondingly
tbudget = budget + sum(ds$runs) - sum(add[disabled])
}

add = add - ds$runs

return(add)
}

roundPreserveSum = function(x, digits = 0) {
up = 10 ^ digits
x = x * up
y = floor(x)
indices = tail(order(x-y), round(sum(x)) - sum(y))
y[indices] = y[indices] + 1
y / up
}
10 changes: 10 additions & 0 deletions R/makeMBOControl.R
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,21 @@ print.MBOControl = function(x, ...) {
} else {
if (x$propose.points == 1) {
print(x$infill.crit)
catf("")
catf("Infill optimizer : %s", x$infill.opt)
catf("Infill optimizer restarts : %i", x$infill.opt.restarts)
} else {
catf("Multi-point method : %s", x$multipoint.method)
}
catf("Final point by : %s", x$final.method)
}

# if (!is.null(x$noisy.method))
# if (x$noisy.method == "fixed")
# catf("Replication strategy : fixed (noisy instances = %i)", x$noisy.instances)
# if (x$noisy.method == "ocba")
# catf("Replication strategy : ocba (initial budget = %i, replication budget = %i)",
# x$noisy.ocba.budget, x$noisy.ocba.initial)
# if(x$noisy.method == "incumbent")
# catf("Replication strategy : incumbent (number of challengers = (1 + %i)", x$noisy.incumbent.nchallengers)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes look like an error @juliambr

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oops, that shouldn't be there - removed it. Thanks!

}
8 changes: 8 additions & 0 deletions R/makeTaskSingleObj.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,13 @@ makeTaskSingleObj = function(opt.path, control) {
data[[y.name]] = trafo.y.fun(data[[y.name]])
}

agg = control$noisy.instance.aggregation

if (!is.null(agg)) {
par.names = colnames(data)[1:(which(colnames(data) == "y") - 1)] #FIXME: This sucks
data = setDT(data)[, .(y = agg(y)), by = par.names]
data = data.frame(data)
}

makeRegrTask(target = control$y.name, data = data)
}
4 changes: 4 additions & 0 deletions R/mboTemplate.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ mboTemplate.OptState = function(obj) {
repeat {
prop = proposePoints(opt.state)
evalProposedPoints.OptState(opt.state, prop)
intensify = getOptStateIntensification(opt.state)
if (intensify) {
intensifyOptState(opt.state)
}
finalizeMboLoop(opt.state)
terminate = getOptStateTermination(opt.state)
if (terminate$term) {
Expand Down
57 changes: 57 additions & 0 deletions R/setMBOControlNoisy.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#' @title Set options for handling noisy functions.
#' @description
#' Extends an MBO control object with options for handling noisy functions.
#' @template arg_control
#' @param method [\code{character(1)}]\cr
#' Which of the replication strategies should be used? Possible values are:
#' \dQuote{fixed}: Every point is evaluated \code{instances} times. \cr
#' \dQuote{incumbent}: Use an incumbent strategy as intensification strategy.
#' The size of the set of additional challengers (apart from the incumbent) can be specified in \code{incumbent.nchallengers}. \cr
#' \dQuote{ocba}: Distribution replication budget according to OCBA.
#' The replication budget per iteration is specified in \code{ocba.budget},
#' the initial number of iterations per parameter is specified in \code{ocba.initial}. \cr
#' @param instances [\code{integer(1)}]\cr
#' How many instances of one parameter will be calculated?
#' @param instance.param [\code{character(1)}]\cr
#' What is the name of the function param that defines the instance?
#' @param self.replicating [\code{logical(1)}]\cr
#' TRUE if the function returns a vector of noisy results for one input. Then \code{instances} specifies the length of the result we expect.
#' @param incumbent.nchallengers [\code{integer(1)}]\cr
#' The size of the set of additional challengers (apart from the incumbent), defaults to \code{0}.
#' @param ocba.budget [\code{integer(1)}]\cr
#' The budget that is allocated in each iteration per Optimal Computing Budget Allocation (OCBA) rule, defaults to 10.
#' @param ocba.initial [\code{integer(1)}]\cr
#' The number of initial replications at each new point, defaults to \code{3}.
#' This needs to be larger than 1, since OCBA requires an initial variance estimate at each point.
#' @param instance.aggregation [\code{function}]\cr
#' Should data be aggregated per instance? If yes, a function (e. g. mean) needs to be specified.
#' @return [\code{\link{MBOControl}}].
#' @family MBOControl
#' @export
setMBOControlNoisy = function(control,
method = NULL,
instances = NULL,
instance.param = NULL,
instance.aggregation = NULL,
self.replicating = NULL,
incumbent.nchallengers = NULL,
ocba.budget = NULL,
ocba.initial = NULL) {

assertClass(control, "MBOControl")
control$noisy.method = coalesce(method, control$noisy.method, "fixed")
assertChoice(control$noisy.method, choices = c("fixed", "incumbent", "ocba"))
control$noisy.instances = assertInt(instances, lower = 1L, null.ok = TRUE, na.ok = FALSE) %??% control$noisy.instances %??% 1L
control$noisy.self.replicating = assertFlag(self.replicating, null.ok = TRUE, na.ok = FALSE) %??% control$noisy.self.replicating %??% FALSE
control$noisy.instance.param = assertString(instance.param, null.ok = TRUE, na.ok = TRUE) %??% control$noisy.instance.param %??% ifelse(control$noisy.self.replicating, "noisy.repl", NA_character_)
control$noisy.instance.aggregation = assertClass(instance.aggregation, "function", null.ok = TRUE) %??% control$noisy.instance.aggregation
control$noisy.ocba.budget = assertInt(ocba.budget, lower = 1L, null.ok = TRUE, na.ok = FALSE) %??% control$noisy.ocba.budget %??% 10L
control$noisy.ocba.initial = assertInt(ocba.initial, lower = 2L, null.ok = TRUE, na.ok = FALSE) %??% control$noisy.ocba.initial %??% 3L
control$noisy.incumbent.nchallengers = assertInt(incumbent.nchallengers, lower = 0L, null.ok = TRUE, na.ok = FALSE) %??% control$noisy.incumbent.nchallengers %??% 0L

if (control$noisy.self.replicating && control$noisy.instance.param != "noisy.repl") {
stop("You can not change the instance.param for self replicating functions.")
}

return(control)
}
1 change: 1 addition & 0 deletions man/makeMBOControl.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/setMBOControlInfill.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/setMBOControlMultiObj.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/setMBOControlMultiPoint.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading