mlr-org · be-marc · Dec 10, 2024 · Dec 5, 2024 · Dec 6, 2024 · Dec 6, 2024
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,6 +3,7 @@
 S3method(as.data.table,ArchiveBatchFSelect)
 S3method(as.data.table,DictionaryFSelector)
 S3method(as.data.table,EnsembleFSResult)
+S3method(c,EnsembleFSResult)
 S3method(extract_inner_fselect_archives,BenchmarkResult)
 S3method(extract_inner_fselect_archives,ResampleResult)
 S3method(extract_inner_fselect_results,BenchmarkResult)

diff --git a/NEWS.md b/NEWS.md
@@ -1,8 +1,9 @@
 # mlr3fselect (development version)
 
-* Use [fastVoteR](https://github.com/bblodfon/fastVoteR) for feature ranking in `EnsembleFSResult()` objects
-* Add embedded ensemble feature selection `embedded_ensemble_fselect()`
-* Refactor `ensemble_fselect()` and `EnsembleFSResult()`
+* refactor: Use [fastVoteR](https://github.com/bblodfon/fastVoteR) for feature ranking in `EnsembleFSResult()` objects
+* feat: Add embedded ensemble feature selection `embedded_ensemble_fselect()`
+* refactor/perf: `ensemble_fselect()` and `EnsembleFSResult()`
+* feat: Add `c.EnsembleFSResult(...)` and `EnsembleFSResult$combine(...)` methods
 
 # mlr3fselect 1.2.1
 

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
@@ -15,6 +15,9 @@
 #'     * `x` ([EnsembleFSResult])
 #'     * `benchmark_result` (`logical(1)`)\cr
 #'       Whether to add the learner, task and resampling information from the benchmark result.
+#' * `c(...)`\cr
+#'   ([EnsembleFSResult], ...) -> [EnsembleFSResult]\cr
+#'   Combines multiple [EnsembleFSResult] objects into a new [EnsembleFSResult].
 #'
 #' @references
 #' `r format_bib("das1999", "meinshausen2010")`
@@ -166,6 +169,75 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
       private$.active_measure = which
     },
 
+    #' @description
+    #' Combines a second [EnsembleFSResult] into the current object, modifying it **in-place**.
+    #' If the second [EnsembleFSResult] (`efsr`) is `NULL`, the method returns the object unmodified.
+    #'
+    #' Both objects must have the same task features and `measure`.
+    #' If the `inner_measure` differs between the objects or is `NULL` in either, it will be set to `NULL` in the combined object.
+    #' Additionally, the `importance` column will be removed if it is missing in either object.
+    #' If both objects contain a `benchmark_result`, these will be combined.
+    #' Otherwise, the combined object will have a `NULL` value for `benchmark_result`.
+    #'
+    #' This method modifies the object by reference.
+    #' To preserve the original state, explicitly `$clone()` the object beforehand.
+    #' Alternatively, you can use the [c()] function, which internally calls this method.
+    #'
+    #' @param efsr ([EnsembleFSResult])\cr
+    #'   A second [EnsembleFSResult] object to combine with the current object.
+    #'
+    #' @return
+    #' Returns the object itself, but modified **by reference**.
+    combine = function(efsr) {
+      if (!is.null(efsr)) {
+        assert_class(efsr, "EnsembleFSResult")
+
+        # Ensure both objects have the same task features
+        assert_set_equal(private$.features, get_private(efsr)$.features)
+
+        # Ensure both objects have the same (outer) measure
+        assert_set_equal(private$.measure$id, get_private(efsr)$.measure$id)
+
+        # Set inner measure to NULL if the measure ids are different or one of them is NULL
+        inner_msr = private$.inner_measure
+        inner_msr2 = get_private(efsr)$.inner_measure
+        result2 = get_private(efsr)$.result
+        if (is.null(inner_msr) || is.null(inner_msr2) || inner_msr$id != inner_msr2$id) {
+          private$.inner_measure = NULL
+
+          # Remove associated inner measure scores from results
+          if (!is.null(inner_msr)) {
+            private$.result[[sprintf("%s_inner", inner_msr$id)]] = NULL
+          }
+          if (!is.null(inner_msr2)) {
+            result2[[sprintf("%s_inner", inner_msr2$id)]] = NULL
+          }
+        }
+
+        # remove importance scores if missing in either object
+        has_imp = "importance" %in% names(private$.result)
+        has_imp2 = "importance" %in% names(result2)
+        if (!has_imp || !has_imp2) {
+          if (has_imp) private$.result[["importance"]] = NULL
+          if (has_imp2) result2[["importance"]] = NULL
+        }
+
+        # Combine results from both objects
+        private$.result = data.table::rbindlist(list(private$.result, result2), fill = FALSE)
+
+        # Merge benchmark results if available in both objects
+        has_bmr = !is.null(self$benchmark_result)
+        has_bmr2 = !is.null(efsr$benchmark_result)
+        if (has_bmr && has_bmr2) {
+          self$benchmark_result = self$benchmark_result$combine(efsr$benchmark_result)
+        } else {
+          self$benchmark_result = NULL
+        }
+      }
+
+      invisible(self)
+    },
+
     #' @description
     #' Calculates the feature ranking via [fastVoteR::rank_candidates()].
     #'
@@ -499,3 +571,20 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
 as.data.table.EnsembleFSResult = function(x, ...) {
   x$result
 }
+
+#' @export
+c.EnsembleFSResult = function(...) {
+  efsrs = list(...)
+
+  # Deep clone the first object for initialization
+  init = efsrs[[1]]$clone(deep = TRUE)
+
+  # If there's only one object, return it directly
+  if (length(efsrs) == 1) {
+    return(init)
+  }
+
+  # Combine the remaining objects
+  rest = tail(efsrs, -1)
+  Reduce(function(lhs, rhs) lhs$combine(rhs), rest, init = init)
+}
diff --git a/R/embedded_ensemble_fselect.R b/R/embedded_ensemble_fselect.R
@@ -88,6 +88,8 @@ embedded_ensemble_fselect = function(
 
   # extract scores on the test sets
   scores = bmr$score(measure)
+  # remove `bmr_score` class
+  class(scores) = c("data.table", "data.frame")
 
   set(scores, j = "features", value = features)
   set(scores, j = "n_features", value = n_features)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
@@ -136,6 +136,8 @@ ensemble_fselect = function(
 
   # extract scores on the test sets
   scores = bmr$score(measure)
+  # remove `bmr_score` class
+  class(scores) = c("data.table", "data.frame")
 
   set(scores, j = "features", value = features)
   set(scores, j = "n_features", value = n_features)

diff --git a/man/ensemble_fs_result.Rd b/man/ensemble_fs_result.Rd
diff --git a/tests/testthat/test_embedded_ensemble_fselect.R b/tests/testthat/test_embedded_ensemble_fselect.R
@@ -62,3 +62,44 @@ test_that("embedded efs works", {
   expect_data_table(feature_ranking, nrows = length(task$feature_names))
   expect_equal(names(feature_ranking), c("feature", "score", "norm_score", "borda_score"))
 })
+
+test_that("combine embedded efs results", {
+  task = tsk("sonar")
+  with_seed(42, {
+    efsr1 = embedded_ensemble_fselect(
+      task = task,
+      learners = lrns(c("classif.rpart", "classif.featureless")),
+      init_resampling = rsmp("subsampling", repeats = 2),
+      measure = msr("classif.ce")
+    )
+  })
+
+  with_seed(43, {
+    efsr2 = embedded_ensemble_fselect(
+      task = task,
+      learners = lrns(c("classif.rpart", "classif.featureless")),
+      init_resampling = rsmp("subsampling", repeats = 3),
+      measure = msr("classif.ce")
+    )
+  })
+
+  comb1 = efsr1$clone(deep = TRUE)$combine(efsr2)
+  comb2 = c(efsr1, efsr2)
+
+  expect_class(comb1, "EnsembleFSResult")
+  expect_class(comb2, "EnsembleFSResult")
+  expect_data_table(comb1$result, nrows = 10L)
+  expect_data_table(comb2$result, nrows = 10L)
+  expect_equal(comb1$n_learners, 2L)
+  expect_equal(comb2$n_learners, 2L)
+  expect_equal(get_private(comb1)$.measure$id, "classif.ce")
+  expect_equal(get_private(comb2)$.measure$id, "classif.ce")
+  expect_null(get_private(comb1)$.inner_measure)
+  expect_null(get_private(comb2)$.inner_measure)
+  assert_benchmark_result(comb1$benchmark_result)
+  assert_benchmark_result(comb2$benchmark_result)
+  expect_equal(comb1$benchmark_result$n_resample_results, 4L)
+  expect_equal(comb2$benchmark_result$n_resample_results, 4L)
+  expect_equal(nrow(get_private(comb1$benchmark_result)$.data$data$fact), 10L)
+  expect_equal(nrow(get_private(comb2$benchmark_result)$.data$data$fact), 10L)
+})
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
@@ -219,6 +219,117 @@ test_that("EnsembleFSResult initialization", {
   expect_false(efsr$measure$minimize)
 })
 
+test_that("combining EnsembleFSResult objects", {
+  selected_features = list(
+    c("V3", "V20"),
+    c("V3", "V5", "V19", "V15"),
+    c("V11", "V7", "V6", "V8"),
+    c("V11"),
+    c("V17", "V2", "V12", "V9", "V1"),
+    c("V11", "V18", "V9")
+  )
+  feats = paste0("V", 1:20)
+
+  res1 = data.table(
+    resampling_iteration = c(1, 1, 2, 2, 3, 3),
+    learner_id = rep(c("lrn1", "lrn2"), 3),
+    n_features = c(2, 4, 4, 1, 5, 3),
+    features = selected_features,
+    classif.ce = runif(6),
+    classif.acc_inner = runif(6) # inner measure has the `_inner` end-fix
+  )
+
+  # same result, just different learners
+  res2 = data.table(
+    resampling_iteration = c(1, 1, 2, 2, 3, 3),
+    learner_id = rep(c("lrn3", "lrn4"), 3),
+    n_features = c(2, 4, 4, 1, 5, 3),
+    features = selected_features,
+    classif.ce = runif(6),
+    classif.acc_inner = runif(6) # inner measure has the `_inner` end-fix
+  )
+
+  # no `inner_measure`
+  res3 = res2[, -c("classif.acc_inner")]
+  # different `measure`
+  res4 = setnames(copy(res3), "classif.ce", "classif.auc")
+  # different `inner_measure`
+  res5 = setnames(copy(res2), "classif.acc_inner", "classif.ce_inner")
+
+  # initialize efsr objects
+  m1 = msr("classif.ce")
+  m2 = msr("classif.acc")
+  m3 = msr("classif.auc")
+  efsr1 = EnsembleFSResult$new(res1, features = feats, measure = m1, inner_measure = m2)
+  efsr2 = EnsembleFSResult$new(res2, features = feats, measure = m1, inner_measure = m2)
+  efsr3 = EnsembleFSResult$new(res3, features = feats, measure = m1)
+  efsr4 = EnsembleFSResult$new(res4, features = feats, measure = m3)
+  efsr5 = EnsembleFSResult$new(res5, features = feats, measure = m1, inner_measure = m1)
+
+  # combine efsr with nothing gives the same object back deep-cloned
+  efsr11 = c(efsr1)
+  assert_class(efsr11, "EnsembleFSResult")
+  expect_equal(efsr1$result$classif.ce, efsr11$result$classif.ce)
+
+  # combine efsrs with same inner and outer measures
+  comb1 = efsr1$clone(deep = TRUE)$combine(efsr2)
+  comb11 = c(efsr1, efsr2) # same as above
+  # efsr1 doesn't change
+  expect_data_table(efsr1$result, nrows = 6L)
+  expect_equal(efsr1$n_learners, 2L)
+  expect_equal(get_private(efsr1)$.measure$id, "classif.ce")
+  expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc")
+  # efsr2 doesn't change either
+  expect_data_table(efsr2$result, nrows = 6)
+  expect_equal(efsr2$n_learners, 2)
+  expect_equal(get_private(efsr2)$.measure$id, "classif.ce")
+  expect_equal(get_private(efsr2)$.inner_measure$id, "classif.acc")
+  # combined object has more rows
+  expect_data_table(comb1$result, nrows = 12L)
+  expect_data_table(comb11$result, nrows = 12L)
+  expect_equal(comb1$n_learners, 4L)
+  expect_equal(comb11$n_learners, 4L)
+  expect_equal(get_private(comb1)$.measure$id, "classif.ce")
+  expect_equal(get_private(comb11)$.measure$id, "classif.ce")
+  expect_equal(get_private(comb1)$.inner_measure$id, "classif.acc")
+  expect_equal(get_private(comb11)$.inner_measure$id, "classif.acc")
+
+  # no `inner_measure` in the 2nd efsr
+  comb2 = efsr1$clone(deep = TRUE)$combine(efsr3)
+  comb22 = c(efsr1, efsr3)
+  expect_equal(get_private(efsr1)$.measure$id, "classif.ce")
+  expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc")
+  expect_null(get_private(efsr3)$.inner_measure)
+  expect_data_table(comb2$result, nrows = 12L)
+  expect_data_table(comb22$result, nrows = 12L)
+  expect_equal(comb2$n_learners, 4L)
+  expect_equal(comb22$n_learners, 4L)
+  expect_equal(get_private(comb2)$.measure$id, "classif.ce")
+  expect_equal(get_private(comb22)$.measure$id, "classif.ce")
+  expect_null(get_private(comb2)$.inner_measure$id)
+  expect_null(get_private(comb22)$.inner_measure$id)
+
+  # different (outer) measure => not possible to combine
+  expect_error(efsr1$clone(deep = TRUE)$combine(efsr4))
+
+  # different `inner_measure`
+  comb3 = efsr1$clone(deep = TRUE)$combine(efsr5)
+  expect_data_table(comb3$result, nrows = 12L)
+  expect_equal(comb3$n_learners, 4L)
+  expect_equal(get_private(comb3)$.measure$id, "classif.ce")
+  expect_null(get_private(comb3)$.inner_measure$id)
+  # `inner_measure`s of the individual objects did not change
+  expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc")
+  expect_equal(get_private(efsr5)$.inner_measure$id, "classif.ce")
+
+  # multi-combine works
+  comb_all = c(efsr1, efsr2, efsr3, efsr5)
+  expect_data_table(comb_all$result, nrows = 24L)
+  expect_equal(comb_all$n_learners, 4L)
+  expect_equal(get_private(comb_all)$.measure$id, "classif.ce")
+  expect_null(get_private(comb_all)$.inner_measure$id)
+})
+
 test_that("different callbacks can be set", {
   callback_test = callback_batch_fselect("mlr3fselect.test",
     on_eval_before_archive = function(callback, context) {