mlr-org · ja-thomas · Feb 1, 2018 · Feb 5, 2018 · Feb 6, 2018 · Jun 21, 2018
diff --git a/NAMESPACE b/NAMESPACE
@@ -58,6 +58,7 @@ export(renderExampleRunPlot)
 export(setMBOControlInfill)
 export(setMBOControlMultiObj)
 export(setMBOControlMultiPoint)
+export(setMBOControlNoisy)
 export(setMBOControlTermination)
 export(trafoLog)
 export(trafoSqrt)

diff --git a/R/OptState_getter.R b/R/OptState_getter.R
@@ -144,3 +144,8 @@ getOptStateValidTerminationStates = function() {
  c("term.iter", "term.time", "term.exectime", "term.yval", "term.feval", "term.custom")
 }
 
+getOptStateIntensification = function(opt.state) {
+ opt.problem = getOptStateOptProblem(opt.state)
+ control = getOptProblemControl(opt.problem)
+ return(!is.null(control$noisy.method) && control$noisy.method %in% c("incumbent", "ocba"))
+}
diff --git a/R/evalTargetFun.R b/R/evalTargetFun.R
@@ -25,13 +25,30 @@ evalTargetFun.OptState = function(opt.state, xs, extras) {
  # short names and so on
  nevals = length(xs)
  ny = control$n.objectives
+
+ # trafo X points
+ xs.trafo = lapply(xs, trafoValue, par = par.set)
+
+ # handle noisy instances
+ if (isTRUE(control$noisy.instances > 1L)) {
+ nevals = nevals * control$noisy.instances
+ xs = rep(xs, each = control$noisy.instances)
+ extras = rep(extras, each = control$noisy.instances)
+ if (!control$noisy.self.replicating) {
+ xs.trafo = rep(xs.trafo, each = control$noisy.instances)
+ if (!is.na(control$noisy.instance.param)) {
+ inst.param = lapply(seq_len(control$noisy.instances), function(x) setNames(list(x), control$noisy.instance.param))
+ xs.trafo = Map(c, xs.trafo, inst.param)
+ }
+ }
+ }
+
+
  num.format = control$output.num.format
  num.format.string = paste("%s = ", num.format, sep = "")
  dobs = ensureVector(asInteger(getOptStateLoop(opt.state)), n = nevals, cl = "integer")
  imputeY = control$impute.y.fun
 
- # trafo X points
- xs.trafo = lapply(xs, trafoValue, par = par.set)
 
  # function to measure of fun call
  wrapFun = function(x) {
@@ -43,6 +60,9 @@ evalTargetFun.OptState = function(opt.state, xs, extras) {
  user.extras = attr(y, "extras")
  y = setAttribute(y, "extras", NULL)
  }
+ if (!is.null(control$noisy.instance.param) && !is.na(control$noisy.instance.param) && !control$noisy.self.replicating) {
+ user.extras = c(user.extras, x[control$noisy.instance.param])
+ }
  st = proc.time() - st
  list(y = y, time = st[3], user.extras = user.extras)
  }
@@ -56,6 +76,21 @@ evalTargetFun.OptState = function(opt.state, xs, extras) {
  res = parallelMap(wrapFun, xs.trafo, level = "mlrMBO.feval",
  impute.error = if (is.null(imputeY)) NULL else identity)
 
+ # handle noisy instances of self.replicating functions
+ if (isTRUE(control$noisy.instances > 1L) && control$noisy.self.replicating) {
+ xs.trafo = rep(xs.trafo, each = control$noisy.instances)
+ res = lapply(res, function(r) {
+ if (is.error(r)) {
+ rep(list(r), control$noisy.instances)
+ } else {
+ lapply(seq_along(r$y), function(i) {
+ list(y = r$y[i], time = r$time / length(r$y), user.extras = c(r$user.extras, setNames(list(i), control$noisy.instance.param)))
+ })
+ }
+ })
+ res = unlist(res, recursive = FALSE)
+ }
+
  # loop evals and to some post-processing
  for (i in seq_len(nevals)) {
  r = res[[i]]; x = xs[[i]]; x.trafo = xs.trafo[[i]]; dob = dobs[i]

diff --git a/R/intensifyOptState.R b/R/intensifyOptState.R
@@ -0,0 +1,170 @@
+intensifyOptState = function(opt.state) {
+ opt.problem = getOptStateOptProblem(opt.state)
+ control = getOptProblemControl(opt.problem)
+
+ switch(control$noisy.method, 
+ "incumbent" = intensifyIncumbent(opt.state),
+ "ocba" = intensifyOCBA(opt.state)
+ )
+}
+
+intensifyIncumbent = function(opt.state) {
+
+ opt.problem = getOptStateOptProblem(opt.state)
+ control = getOptProblemControl(opt.problem)
+ op = as.data.table(getOptStateOptPath(opt.state))
+ par.names = colnames(op)[1:(which(colnames(op) == "y") - 1)] #FIXME: This sucks
+
+ # get a summary of the design 
+ ds = getOptPathSummary(opt.state, par.names)
+ nds = nrow(ds)
+
+ # incumbent: current best point w. r. t. mean over all function evaluations
+ # the newest point cannot be the incumbent, it is always a challenger
+ # DOES NOT WORK FOR MULTIPOINT PROPOSAL YET
+ inc = which.min(ds[- nds, ]$y) 
+ # incumbent is replicated once in each iteration
+ replicatePoint(opt.state, x = ds[inc, ..par.names], type = "incumbent", reps = 1L)
+
+ # determine a set of challengers
+ if (control$noisy.incumbent.nchallengers == 0L) {
+ cls = c(nds)
+ } else {
+ # determine set of p points to be challenged against incumbent
+ # incumbent is excluded (cannot be challenged against itself)
+ # and new point is always set as a challenger
+ # points are drawn randomly without replacement with probability prop. to their function value
+ cls = setdiff(seq_len(nds), c(inc, nds))
+ p = min(control$noisy.incumbent.nchallengers, nds - 2)
+ probs = exp(- ds[cls, ]$y) / sum(exp(- ds[cls, ]$y))
+ cls = sample(cls, size = p, prob = probs, replace = FALSE)
+ cls = c(cls, nds)
+ }
+
+ # start the race
+ for (cl in cls) {
+
+ r = 1L
+ replicatePoint(opt.state, x = ds[cl, ..par.names], type = paste("challenger"), reps = r)
+ ds = getOptPathSummary(opt.state, par.names)
+
+ # proceed as long as challenger has less runs than incumbent and is better than incumbent
+ while((ds[cl, "runs"] < ds[inc, "runs"]) && (ds[cl, "y"] < ds[inc, "y"])) {
+ r = 2L * r
+ replicatePoint(opt.state, x = ds[cl, ..par.names], type = paste("challenger"), reps = r)
+ ds = getOptPathSummary(opt.state, par.names)
+ }
+
+ }
+ return(opt.state)
+}
+
+replicatePoint = function(opt.state, x, type, reps = 1L) {
+
+ # replicate rows according to the number of desired replicates
+ xs = seq_len(nrow(x))
+ xrep = x[rep(xs, reps), ]
+
+ opt.problem = getOptStateOptProblem(opt.state)
+ control = getOptProblemControl(opt.problem)
+
+ prop = makeProposal(control, xrep, prop.type = rep(type, nrow(xrep)))
+ evalProposedPoints.OptState(opt.state, prop)
+
+ return(opt.state)
+}
+
+getOptPathSummary = function(opt.state, par.names) {
+ op = as.data.table(getOptStateOptPath(opt.state))
+ ds = op[, .(y = mean(y), ysd = sd(y), runs = .N), by = par.names]
+ return(ds)
+}
+
+
+intensifyOCBA = function(opt.state) {
+
+ # some intialization
+ opt.problem = getOptStateOptProblem(opt.state)
+ control = getOptProblemControl(opt.problem)
+ par.set = getOptProblemParSet(opt.problem)
+
+ op = as.data.table(getOptStateOptPath(opt.state))
+ par.names = colnames(op)[1:(which(colnames(op) == "y") - 1)] #FIXME: This sucks
+
+ # minimum number of replicates at each point 
+ minrep = max(control$noisy.ocba.initial, 2L)
+
+ # calculate summary of the dsign 
+ ds = getOptPathSummary(opt.state, par.names)
+ nds = nrow(ds)
+
+ # make sure that initially, each point is evaluated at least minrep times 
+ xinit = rep(seq_len(nds), pmax(minrep - ds$runs, 0))
+ opt.state = replicatePoint(opt.state, x = ds[xinit, ..par.names], type = paste("initeval"))
+
+ ds = getOptPathSummary(opt.state, par.names)
+ add = distributeOCBA(ds, budget = control$noisy.ocba.budget)
+ reps = rep(seq_len(nds), add)
+
+ replicatePoint(opt.state, x = ds[reps, ..par.names], type = paste("OCBA"))
+
+ return(opt.state)
+}
+
+
+distributeOCBA = function(ds, budget) {
+
+ nds = nrow(ds)
+
+ # TODO: until now only minimization possible 
+ tbudget = budget + sum(ds$runs)
+
+ # search for the best and second-best dsign
+ b = order(ds$y)[1]
+ s = order(ds$y)[2]
+
+ # vector of ratios
+ ratio = rep(0, nds)
+ ratio[s] = 1
+
+ # calculate ratios
+ tmp = (ds[b, ]$y - ds[s, ]$y) / (ds[b, ]$y - ds[- c(s, b), ]$y)
+ ratio[- c(s, b)] = tmp^2 * ds[- c(s, b), ]$ysd^2 / ds[s, ]$ysd^2
+ ratio[b] = ds[b, ]$ysd * sqrt(sum(ratio^2 / ds$ysd^2))
+
+ # additional replications
+ add = rep(0, nds)
+
+ # do not disable any dsign
+ disabled = rep(FALSE, nds)
+
+ more_alloc = TRUE
+
+ while (more_alloc) {
+
+ add[!disabled] = roundPreserveSum(tbudget / sum(ratio[!disabled]) * ratio[!disabled])
+
+ # disable designs that have been run too much
+ disabled = disabled | (ds$runs > add)
+ more_alloc = any(ds$runs > add)
+
+ # set additional replications s.t. already run replications are set
+ add[disabled] = ds[disabled, ]$runs
+
+ # decrease total budget correspondingly
+ tbudget = budget + sum(ds$runs) - sum(add[disabled])
+ }
+
+ add = add - ds$runs
+
+ return(add)
+}
+
+roundPreserveSum = function(x, digits = 0) {
+ up = 10 ^ digits
+ x = x * up
+ y = floor(x)
+ indices = tail(order(x-y), round(sum(x)) - sum(y))
+ y[indices] = y[indices] + 1
+ y / up
+}
diff --git a/R/makeMBOControl.R b/R/makeMBOControl.R
@@ -187,11 +187,21 @@ print.MBOControl = function(x, ...) {
  } else {
  if (x$propose.points == 1) {
  print(x$infill.crit)
+ catf("")
  catf("Infill optimizer : %s", x$infill.opt)
  catf("Infill optimizer restarts : %i", x$infill.opt.restarts)
  } else {
  catf("Multi-point method : %s", x$multipoint.method)
  }
  catf("Final point by : %s", x$final.method)
  }
+
+ # if (!is.null(x$noisy.method))
+ # if (x$noisy.method == "fixed")
+ # catf("Replication strategy : fixed (noisy instances = %i)", x$noisy.instances)
+ # if (x$noisy.method == "ocba")
+ # catf("Replication strategy : ocba (initial budget = %i, replication budget = %i)", 
+ # x$noisy.ocba.budget, x$noisy.ocba.initial)
+ # if(x$noisy.method == "incumbent")
+ # catf("Replication strategy : incumbent (number of challengers = (1 + %i)", x$noisy.incumbent.nchallengers)
 }
diff --git a/R/makeTaskSingleObj.R b/R/makeTaskSingleObj.R
@@ -21,5 +21,13 @@ makeTaskSingleObj = function(opt.path, control) {
  data[[y.name]] = trafo.y.fun(data[[y.name]])
  }
 
+ agg = control$noisy.instance.aggregation
+
+ if (!is.null(agg)) {
+ par.names = colnames(data)[1:(which(colnames(data) == "y") - 1)] #FIXME: This sucks
+ data = setDT(data)[, .(y = agg(y)), by = par.names]
+ data = data.frame(data)
+ }
+
  makeRegrTask(target = control$y.name, data = data)
 }
diff --git a/R/mboTemplate.R b/R/mboTemplate.R
@@ -37,6 +37,10 @@ mboTemplate.OptState = function(obj) {
  repeat {
  prop = proposePoints(opt.state)
  evalProposedPoints.OptState(opt.state, prop)
+ intensify = getOptStateIntensification(opt.state)
+ if (intensify) {
+ intensifyOptState(opt.state)
+ }
  finalizeMboLoop(opt.state)
  terminate = getOptStateTermination(opt.state)
  if (terminate$term) {

diff --git a/R/setMBOControlNoisy.R b/R/setMBOControlNoisy.R
@@ -0,0 +1,57 @@
+#' @title Set options for handling noisy functions.
+#' @description
+#' Extends an MBO control object with options for handling noisy functions. 
+#' @template arg_control
+#' @param method [\code{character(1)}]\cr
+#' Which of the replication strategies should be used? Possible values are:
+#' \dQuote{fixed}: Every point is evaluated \code{instances} times. \cr
+#' \dQuote{incumbent}: Use an incumbent strategy as intensification strategy. 
+#' The size of the set of additional challengers (apart from the incumbent) can be specified in \code{incumbent.nchallengers}. \cr
+#' \dQuote{ocba}: Distribution replication budget according to OCBA. 
+#' The replication budget per iteration is specified in \code{ocba.budget},
+#' the initial number of iterations per parameter is specified in \code{ocba.initial}. \cr 
+#' @param instances [\code{integer(1)}]\cr
+#' How many instances of one parameter will be calculated?
+#' @param instance.param [\code{character(1)}]\cr
+#' What is the name of the function param that defines the instance?
+#' @param self.replicating [\code{logical(1)}]\cr
+#' TRUE if the function returns a vector of noisy results for one input. Then \code{instances} specifies the length of the result we expect.
+#' @param incumbent.nchallengers [\code{integer(1)}]\cr
+#' The size of the set of additional challengers (apart from the incumbent), defaults to \code{0}.
+#' @param ocba.budget [\code{integer(1)}]\cr
+#' The budget that is allocated in each iteration per Optimal Computing Budget Allocation (OCBA) rule, defaults to 10.
+#' @param ocba.initial [\code{integer(1)}]\cr
+#' The number of initial replications at each new point, defaults to \code{3}. 
+#' This needs to be larger than 1, since OCBA requires an initial variance estimate at each point.
+#' @param instance.aggregation [\code{function}]\cr
+#' Should data be aggregated per instance? If yes, a function (e. g. mean) needs to be specified. 
+#' @return [\code{\link{MBOControl}}].
+#' @family MBOControl
+#' @export
+setMBOControlNoisy = function(control,
+ method = NULL,
+ instances = NULL,
+ instance.param = NULL,
+ instance.aggregation = NULL,
+ self.replicating = NULL,
+ incumbent.nchallengers = NULL, 
+ ocba.budget = NULL,
+ ocba.initial = NULL) {
+
+ assertClass(control, "MBOControl")
+ control$noisy.method = coalesce(method, control$noisy.method, "fixed")
+ assertChoice(control$noisy.method, choices = c("fixed", "incumbent", "ocba"))
+ control$noisy.instances = assertInt(instances, lower = 1L, null.ok = TRUE, na.ok = FALSE) %??% control$noisy.instances %??% 1L
+ control$noisy.self.replicating = assertFlag(self.replicating, null.ok = TRUE, na.ok = FALSE) %??% control$noisy.self.replicating %??% FALSE
+ control$noisy.instance.param = assertString(instance.param, null.ok = TRUE, na.ok = TRUE) %??% control$noisy.instance.param %??% ifelse(control$noisy.self.replicating, "noisy.repl", NA_character_)
+ control$noisy.instance.aggregation = assertClass(instance.aggregation, "function", null.ok = TRUE) %??% control$noisy.instance.aggregation 
+ control$noisy.ocba.budget = assertInt(ocba.budget, lower = 1L, null.ok = TRUE, na.ok = FALSE) %??% control$noisy.ocba.budget %??% 10L
+ control$noisy.ocba.initial = assertInt(ocba.initial, lower = 2L, null.ok = TRUE, na.ok = FALSE) %??% control$noisy.ocba.initial %??% 3L
+ control$noisy.incumbent.nchallengers = assertInt(incumbent.nchallengers, lower = 0L, null.ok = TRUE, na.ok = FALSE) %??% control$noisy.incumbent.nchallengers %??% 0L
+
+ if (control$noisy.self.replicating && control$noisy.instance.param != "noisy.repl") {
+ stop("You can not change the instance.param for self replicating functions.")
+ }
+
+ return(control)
+}
diff --git a/man/makeMBOControl.Rd b/man/makeMBOControl.Rd
diff --git a/man/setMBOControlInfill.Rd b/man/setMBOControlInfill.Rd
diff --git a/man/setMBOControlMultiObj.Rd b/man/setMBOControlMultiObj.Rd
diff --git a/man/setMBOControlMultiPoint.Rd b/man/setMBOControlMultiPoint.Rd