mlr-org · ja-thomas · Feb 1, 2018 · Feb 5, 2018 · Feb 6, 2018 · Jun 21, 2018
diff --git a/NAMESPACE b/NAMESPACE
@@ -58,6 +58,7 @@ export(renderExampleRunPlot)
 export(setMBOControlInfill)
 export(setMBOControlMultiObj)
 export(setMBOControlMultiPoint)
+export(setMBOControlNoisy)
 export(setMBOControlTermination)
 export(trafoLog)
 export(trafoSqrt)

diff --git a/R/OptState.R b/R/OptState.R
@@ -50,7 +50,8 @@ NULL
 
 makeOptState = function(opt.problem, loop = 0L, tasks = NULL, models = NULL,
   time.model = NULL, opt.result = NULL, state = "init", opt.path = NULL,
-  time.last.saved = Sys.time(), loop.starttime = Sys.time(), time.used = 0L, progress = 0, time.created = Sys.time()) {
+  time.last.saved = Sys.time(), loop.starttime = Sys.time(), time.used = 0L, progress = 0, time.created = Sys.time(),
+  identification.time.used = 0L) {
 
   opt.state = new.env()
 
@@ -68,6 +69,7 @@ makeOptState = function(opt.problem, loop = 0L, tasks = NULL, models = NULL,
   opt.state$loop.starttime = loop.starttime
   opt.state$time.used = time.used
   opt.state$progress = progress
+  opt.state$identification.time.used = identification.time.used
 
   opt.state$random.seed = getRandomSeed()
   opt.state$time.created = time.created

diff --git a/R/OptState_getter.R b/R/OptState_getter.R
@@ -144,3 +144,38 @@ getOptStateValidTerminationStates = function() {
   c("term.iter", "term.time", "term.exectime", "term.yval", "term.feval", "term.custom")
 }
 
+getOptStateIntensification = function(opt.state) {
+  opt.problem = getOptStateOptProblem(opt.state)
+  control = getOptProblemControl(opt.problem)
+  return(!is.null(control$noisy.method) && control$noisy.method %in% c("incumbent", "ocba"))
+}
+
+getOptStateIdentification = function(opt.state) {
+  opt.problem = getOptStateOptProblem(opt.state)
+  control = getOptProblemControl(opt.problem)
+  return(control$identification.time.budget > 0 | control$identification.max.evals > 0)
+}
+
+getOptStatePCS = function(opt.state) {
+  opt.problem = getOptStateOptProblem(opt.state)
+  control = getOptProblemControl(opt.problem)
+  return(control$noisy.identification.pcs)
+}
+
+getOptStateTerminationIdentification = function(opt.state) {
+  terminate = shouldTerminateIdentification.OptState(opt.state)
+  setOptStateProgress(opt.state, terminate$progress)
+  # update only if termination condition is met
+  if (terminate$term) {
+    setOptStateState(opt.state, terminate$code)
+  }
+  terminate
+}
+
+getOptStateStartTimeIdentification = function(opt.state) {
+  opt.state$identification.starttime
+}
+
+getOptStateTimeUsedIdentification = function(opt.state) {
+  opt.state$identification.time.used
+}
diff --git a/R/OptState_setter.R b/R/OptState_setter.R
@@ -41,6 +41,11 @@ setOptStateLoopStarttime = function(opt.state) {
   invisible()
 }
 
+setOptStateIdentificationStarttime = function(opt.state) {
+  opt.state$identification.starttime = Sys.time()
+  invisible()
+}
+
 setOptStateTimeUsed = function(opt.state, time.used = NULL, time.add = NULL) {
   if (!is.null(time.used)) {
     opt.state$time.used = time.used
@@ -64,3 +69,15 @@ setOptStateProgress = function(opt.state, progress) {
   opt.state$progress = progress
   invisible()
 }
+
+setOptStateTimeUsedIdentification = function(opt.state, time.used = NULL, time.add = NULL) {
+  if (!is.null(time.used)) {
+    opt.state$identification.time.used = time.used
+  } else if (!is.null(time.add)) {
+    opt.state$identification.time.used = getOptStateTimeUsedIdentification(opt.state) + time.add
+  } else {
+    opt.state$identification.time.used = getOptStateTimeUsedIdentification(opt.state) + difftime(Sys.time(), getOptStateStartTimeIdentification(opt.state), units = "secs")
+    setOptStateIdentificationStarttime(opt.state)
+  }
+  invisible()
+}
diff --git a/R/SMBO.R b/R/SMBO.R
@@ -49,6 +49,8 @@ initSMBO = function(par.set, design, learner = NULL, control, minimize = rep(TRU
 #'   Outcome of the optimization.
 #'   For multiple results use a list.
 #'   For a result of a multi-objective function use a numeric vector.
+#'   For multiple results of for noisy instances use a list.
+#'   Each list element should correspond to one x value.
 #'
 #' @return [\code{\link{OptState}}]
 #' @export

diff --git a/R/evalTargetFun.R b/R/evalTargetFun.R
@@ -25,13 +25,30 @@ evalTargetFun.OptState = function(opt.state, xs, extras) {
   # short names and so on
   nevals = length(xs)
   ny = control$n.objectives
+
+  # trafo X points
+  xs.trafo = lapply(xs, trafoValue, par = par.set)
+
+  # handle noisy instances
+  if (isTRUE(control$noisy.instances > 1L)) {
+    nevals = nevals * control$noisy.instances
+    xs = rep(xs, each = control$noisy.instances)
+    extras = rep(extras, each = control$noisy.instances)
+    if (!control$noisy.self.replicating) {
+      xs.trafo = rep(xs.trafo, each = control$noisy.instances)
+      if (!is.na(control$noisy.instance.param)) {
+        inst.param = lapply(seq_len(control$noisy.instances), function(x) setNames(list(x), control$noisy.instance.param))
+        xs.trafo = Map(c, xs.trafo, inst.param)
+      }
+    }
+  }
+
+
   num.format = control$output.num.format
   num.format.string = paste("%s = ", num.format, sep = "")
   dobs = ensureVector(asInteger(getOptStateLoop(opt.state)), n = nevals, cl = "integer")
   imputeY = control$impute.y.fun
 
-  # trafo X points
-  xs.trafo = lapply(xs, trafoValue, par = par.set)
 
   # function to measure of fun call
     wrapFun = function(x) {
@@ -43,6 +60,9 @@ evalTargetFun.OptState = function(opt.state, xs, extras) {
         user.extras = attr(y, "extras")
         y = setAttribute(y, "extras", NULL)
       }
+      if (!is.null(control$noisy.instance.param) && !is.na(control$noisy.instance.param) && !control$noisy.self.replicating) {
+        user.extras = c(user.extras, x[control$noisy.instance.param])
+      }
       st = proc.time() - st
       list(y = y, time = st[3], user.extras = user.extras)
     }
@@ -56,6 +76,21 @@ evalTargetFun.OptState = function(opt.state, xs, extras) {
   res = parallelMap(wrapFun, xs.trafo, level = "mlrMBO.feval",
     impute.error = if (is.null(imputeY)) NULL else identity)
 
+  # handle noisy instances of self.replicating functions
+  if (isTRUE(control$noisy.instances > 1L) && control$noisy.self.replicating) {
+    xs.trafo = rep(xs.trafo, each = control$noisy.instances)
+    res = lapply(res, function(r) {
+      if (is.error(r)) {
+        rep(list(r), control$noisy.instances)
+      } else {
+        lapply(seq_along(r$y), function(i) {
+          list(y = r$y[i], time = r$time / length(r$y), user.extras = c(r$user.extras, setNames(list(i), control$noisy.instance.param)))
+        })
+      }
+    })
+    res = unlist(res, recursive = FALSE)
+  }
+
   # loop evals and to some post-processing
   for (i in seq_len(nevals)) {
     r = res[[i]]; x = xs[[i]]; x.trafo = xs.trafo[[i]]; dob = dobs[i]

diff --git a/R/identifyFinalPoints.R b/R/identifyFinalPoints.R
@@ -0,0 +1,68 @@
+identifyFinalPoints = function(opt.state, min.pcs = NULL, time.budget = NULL) {
+
+  # some initialization
+  opt.problem = getOptStateOptProblem(opt.state)
+  control = getOptProblemControl(opt.problem)
+  par.set = getOptProblemParSet(opt.problem)
+  op = as.data.table(getOptStateOptPath(opt.state))
+  par.names = colnames(op)[1:(which(colnames(op) == "y") - 1)] #FIXME: This sucks
+  min.pcs = min.pcs %??% getOptStatePCS(opt.state)
+
+  # calculate summary of the design
+  ds = getOptPathSummary(opt.state, par.names)
+  nds = nrow(ds)
+
+  # set start time for identification here
+
+  # make sure that initially, each point is evaluated at least minrep times
+  xinit = rep(seq_len(nds), pmax(2 - ds$runs, 0))
+  opt.state = replicatePoint(opt.state, x = ds[xinit, ..par.names], type = paste("initeval"))
+
+  setOptStateTimeUsedIdentification(opt.state)
+  terminate = getOptStateTerminationIdentification(opt.state)
+
+  pcs = calculatePCS(opt.state)
+  while(pcs < min.pcs) {
+      ds = getOptPathSummary(opt.state, par.names)
+      add = distributeOCBA(ds, budget = 3)
+      reps = rep(seq_len(nrow(ds)), add)
+      replicatePoint(opt.state, x = ds[reps, ..par.names], type = paste("identification"))
+      setOptStateTimeUsedIdentification(opt.state)
+      terminate = getOptStateTerminationIdentification(opt.state)
+      showInfo(getOptProblemShowInfo(opt.problem), "[mbo] identification: P(CS) %.3f / %.3f", pcs, min.pcs)
+      pcs = calculatePCS(opt.state)
+
+      if(terminate$term)
+        break
+  }
+
+  return(opt.state)
+}
+
+
+calculatePCS = function(opt.state) {
+  # calculate the (approximate) probability of correct selection
+  # best observed design
+  op = as.data.table(getOptStateOptPath(opt.state))
+  par.names = colnames(op)[1:(which(colnames(op) == "y") - 1)] #FIXME: This sucks
+  ds = getOptPathSummary(opt.state, par.names)
+
+  ds = ds[order(ds$y), ]
+  b = 1
+
+  # mean anc covariance for vector (y1 - yb, y2 - yb, ..., yn - yb)
+  vf = rep(1, nrow(ds) - 1)
+  trafo.mat = cbind(vf, diag(- vf)) # transformation matrix A
+
+  m = ds[b, ]$y - ds[- b, ]$y
+
+  # covariance for vector (y1 - yb, y2 - yb, ..., yn - yb)
+  # is obtained by multiplication with matrix A = [(1 -1, 0), (1, 0, -1)]
+  # rule is cov(A * Y) = A * cov(Y) * t(A)
+  sigma = trafo.mat %*% diag(ds$ysd^2 / ds$runs) %*% t(trafo.mat)
+ #  sigma = diag(sqrt(ds$ysd[-b]^2 / ds$runs[-b]^2 + ds$ysd[b]^2 / ds$runs[b]^2))
+
+  pcs = pmvnorm(upper = 0, mean = m, sigma = sigma)
+
+  return(pcs[1])
+}