Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

combine EnsembleFSResult objects #128

Merged
merged 6 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
S3method(as.data.table,ArchiveBatchFSelect)
S3method(as.data.table,DictionaryFSelector)
S3method(as.data.table,EnsembleFSResult)
S3method(c,EnsembleFSResult)
S3method(extract_inner_fselect_archives,BenchmarkResult)
S3method(extract_inner_fselect_archives,ResampleResult)
S3method(extract_inner_fselect_results,BenchmarkResult)
Expand Down
7 changes: 4 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# mlr3fselect (development version)

* Use [fastVoteR](https://github.com/bblodfon/fastVoteR) for feature ranking in `EnsembleFSResult()` objects
* Add embedded ensemble feature selection `embedded_ensemble_fselect()`
* Refactor `ensemble_fselect()` and `EnsembleFSResult()`
* refactor: Use [fastVoteR](https://github.com/bblodfon/fastVoteR) for feature ranking in `EnsembleFSResult()` objects
* feat: Add embedded ensemble feature selection `embedded_ensemble_fselect()`
* refactor/perf: `ensemble_fselect()` and `EnsembleFSResult()`
* feat: Add `c.EnsembleFSResult(...)` and `EnsembleFSResult$combine(...)` methods

# mlr3fselect 1.2.1

Expand Down
89 changes: 89 additions & 0 deletions R/EnsembleFSResult.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
#' * `x` ([EnsembleFSResult])
#' * `benchmark_result` (`logical(1)`)\cr
#' Whether to add the learner, task and resampling information from the benchmark result.
#' * `c(...)`\cr
#' ([EnsembleFSResult], ...) -> [EnsembleFSResult]\cr
#' Combines multiple [EnsembleFSResult] objects into a new [EnsembleFSResult].
#'
#' @references
#' `r format_bib("das1999", "meinshausen2010")`
Expand Down Expand Up @@ -166,6 +169,75 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
private$.active_measure = which
},

#' @description
#' Combines a second [EnsembleFSResult] into the current object, modifying it **in-place**.
#' If the second [EnsembleFSResult] (`efsr`) is `NULL`, the method returns the object unmodified.
#'
#' Both objects must have the same task features and `measure`.
#' If the `inner_measure` differs between the objects or is `NULL` in either, it will be set to `NULL` in the combined object.
#' Additionally, the `importance` column will be removed if it is missing in either object.
#' If both objects contain a `benchmark_result`, these will be combined.
#' Otherwise, the combined object will have a `NULL` value for `benchmark_result`.
#'
#' This method modifies the object by reference.
#' To preserve the original state, explicitly `$clone()` the object beforehand.
#' Alternatively, you can use the [c()] function, which internally calls this method.
#'
#' @param efsr ([EnsembleFSResult])\cr
#' A second [EnsembleFSResult] object to combine with the current object.
#'
#' @return
#' Returns the object itself, but modified **by reference**.
combine = function(efsr) {
if (!is.null(efsr)) {
assert_class(efsr, "EnsembleFSResult")

# Ensure both objects have the same task features
assert_set_equal(private$.features, get_private(efsr)$.features)

# Ensure both objects have the same (outer) measure
assert_set_equal(private$.measure$id, get_private(efsr)$.measure$id)

# Set inner measure to NULL if the measure ids are different or one of them is NULL
inner_msr = private$.inner_measure
inner_msr2 = get_private(efsr)$.inner_measure
result2 = get_private(efsr)$.result
if (is.null(inner_msr) || is.null(inner_msr2) || inner_msr$id != inner_msr2$id) {
private$.inner_measure = NULL

# Remove associated inner measure scores from results
if (!is.null(inner_msr)) {
private$.result[[sprintf("%s_inner", inner_msr$id)]] = NULL
}
if (!is.null(inner_msr2)) {
result2[[sprintf("%s_inner", inner_msr2$id)]] = NULL
}
}

# remove importance scores if missing in either object
has_imp = "importance" %in% names(private$.result)
has_imp2 = "importance" %in% names(result2)
if (!has_imp || !has_imp2) {
if (has_imp) private$.result[["importance"]] = NULL
if (has_imp2) result2[["importance"]] = NULL
}

# Combine results from both objects
private$.result = data.table::rbindlist(list(private$.result, result2), fill = FALSE)

# Merge benchmark results if available in both objects
has_bmr = !is.null(self$benchmark_result)
has_bmr2 = !is.null(efsr$benchmark_result)
if (has_bmr && has_bmr2) {
self$benchmark_result = self$benchmark_result$combine(efsr$benchmark_result)
} else {
self$benchmark_result = NULL
}
}

invisible(self)
},

#' @description
#' Calculates the feature ranking via [fastVoteR::rank_candidates()].
#'
Expand Down Expand Up @@ -499,3 +571,20 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
as.data.table.EnsembleFSResult = function(x, ...) {
x$result
}

#' @export
c.EnsembleFSResult = function(...) {
efsrs = list(...)

# Deep clone the first object for initialization
init = efsrs[[1]]$clone(deep = TRUE)

# If there's only one object, return it directly
if (length(efsrs) == 1) {
return(init)
}

# Combine the remaining objects
rest = tail(efsrs, -1)
Reduce(function(lhs, rhs) lhs$combine(rhs), rest, init = init)
}
2 changes: 2 additions & 0 deletions R/embedded_ensemble_fselect.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ embedded_ensemble_fselect = function(

# extract scores on the test sets
scores = bmr$score(measure)
# remove `bmr_score` class
class(scores) = c("data.table", "data.frame")

set(scores, j = "features", value = features)
set(scores, j = "n_features", value = n_features)
Expand Down
2 changes: 2 additions & 0 deletions R/ensemble_fselect.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ ensemble_fselect = function(

# extract scores on the test sets
scores = bmr$score(measure)
# remove `bmr_score` class
class(scores) = c("data.table", "data.frame")

set(scores, j = "features", value = features)
set(scores, j = "n_features", value = n_features)
Expand Down
36 changes: 36 additions & 0 deletions man/ensemble_fs_result.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions tests/testthat/test_embedded_ensemble_fselect.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,44 @@ test_that("embedded efs works", {
expect_data_table(feature_ranking, nrows = length(task$feature_names))
expect_equal(names(feature_ranking), c("feature", "score", "norm_score", "borda_score"))
})

test_that("combine embedded efs results", {
task = tsk("sonar")
with_seed(42, {
efsr1 = embedded_ensemble_fselect(
task = task,
learners = lrns(c("classif.rpart", "classif.featureless")),
init_resampling = rsmp("subsampling", repeats = 2),
measure = msr("classif.ce")
)
})

with_seed(43, {
efsr2 = embedded_ensemble_fselect(
task = task,
learners = lrns(c("classif.rpart", "classif.featureless")),
init_resampling = rsmp("subsampling", repeats = 3),
measure = msr("classif.ce")
)
})

comb1 = efsr1$clone(deep = TRUE)$combine(efsr2)
comb2 = c(efsr1, efsr2)

expect_class(comb1, "EnsembleFSResult")
expect_class(comb2, "EnsembleFSResult")
expect_data_table(comb1$result, nrows = 10L)
expect_data_table(comb2$result, nrows = 10L)
expect_equal(comb1$n_learners, 2L)
expect_equal(comb2$n_learners, 2L)
expect_equal(get_private(comb1)$.measure$id, "classif.ce")
expect_equal(get_private(comb2)$.measure$id, "classif.ce")
expect_null(get_private(comb1)$.inner_measure)
expect_null(get_private(comb2)$.inner_measure)
assert_benchmark_result(comb1$benchmark_result)
assert_benchmark_result(comb2$benchmark_result)
expect_equal(comb1$benchmark_result$n_resample_results, 4L)
expect_equal(comb2$benchmark_result$n_resample_results, 4L)
expect_equal(nrow(get_private(comb1$benchmark_result)$.data$data$fact), 10L)
expect_equal(nrow(get_private(comb2$benchmark_result)$.data$data$fact), 10L)
})
111 changes: 111 additions & 0 deletions tests/testthat/test_ensemble_fselect.R
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,117 @@ test_that("EnsembleFSResult initialization", {
expect_false(efsr$measure$minimize)
})

test_that("combining EnsembleFSResult objects", {
selected_features = list(
c("V3", "V20"),
c("V3", "V5", "V19", "V15"),
c("V11", "V7", "V6", "V8"),
c("V11"),
c("V17", "V2", "V12", "V9", "V1"),
c("V11", "V18", "V9")
)
feats = paste0("V", 1:20)

res1 = data.table(
resampling_iteration = c(1, 1, 2, 2, 3, 3),
learner_id = rep(c("lrn1", "lrn2"), 3),
n_features = c(2, 4, 4, 1, 5, 3),
features = selected_features,
classif.ce = runif(6),
classif.acc_inner = runif(6) # inner measure has the `_inner` end-fix
)

# same result, just different learners
res2 = data.table(
resampling_iteration = c(1, 1, 2, 2, 3, 3),
learner_id = rep(c("lrn3", "lrn4"), 3),
n_features = c(2, 4, 4, 1, 5, 3),
features = selected_features,
classif.ce = runif(6),
classif.acc_inner = runif(6) # inner measure has the `_inner` end-fix
)

# no `inner_measure`
res3 = res2[, -c("classif.acc_inner")]
# different `measure`
res4 = setnames(copy(res3), "classif.ce", "classif.auc")
# different `inner_measure`
res5 = setnames(copy(res2), "classif.acc_inner", "classif.ce_inner")

# initialize efsr objects
m1 = msr("classif.ce")
m2 = msr("classif.acc")
m3 = msr("classif.auc")
efsr1 = EnsembleFSResult$new(res1, features = feats, measure = m1, inner_measure = m2)
efsr2 = EnsembleFSResult$new(res2, features = feats, measure = m1, inner_measure = m2)
efsr3 = EnsembleFSResult$new(res3, features = feats, measure = m1)
efsr4 = EnsembleFSResult$new(res4, features = feats, measure = m3)
efsr5 = EnsembleFSResult$new(res5, features = feats, measure = m1, inner_measure = m1)

# combine efsr with nothing gives the same object back deep-cloned
efsr11 = c(efsr1)
assert_class(efsr11, "EnsembleFSResult")
expect_equal(efsr1$result$classif.ce, efsr11$result$classif.ce)

# combine efsrs with same inner and outer measures
comb1 = efsr1$clone(deep = TRUE)$combine(efsr2)
comb11 = c(efsr1, efsr2) # same as above
# efsr1 doesn't change
expect_data_table(efsr1$result, nrows = 6L)
expect_equal(efsr1$n_learners, 2L)
expect_equal(get_private(efsr1)$.measure$id, "classif.ce")
expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc")
# efsr2 doesn't change either
expect_data_table(efsr2$result, nrows = 6)
expect_equal(efsr2$n_learners, 2)
expect_equal(get_private(efsr2)$.measure$id, "classif.ce")
expect_equal(get_private(efsr2)$.inner_measure$id, "classif.acc")
# combined object has more rows
expect_data_table(comb1$result, nrows = 12L)
expect_data_table(comb11$result, nrows = 12L)
expect_equal(comb1$n_learners, 4L)
expect_equal(comb11$n_learners, 4L)
expect_equal(get_private(comb1)$.measure$id, "classif.ce")
expect_equal(get_private(comb11)$.measure$id, "classif.ce")
expect_equal(get_private(comb1)$.inner_measure$id, "classif.acc")
expect_equal(get_private(comb11)$.inner_measure$id, "classif.acc")

# no `inner_measure` in the 2nd efsr
comb2 = efsr1$clone(deep = TRUE)$combine(efsr3)
comb22 = c(efsr1, efsr3)
expect_equal(get_private(efsr1)$.measure$id, "classif.ce")
expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc")
expect_null(get_private(efsr3)$.inner_measure)
expect_data_table(comb2$result, nrows = 12L)
expect_data_table(comb22$result, nrows = 12L)
expect_equal(comb2$n_learners, 4L)
expect_equal(comb22$n_learners, 4L)
expect_equal(get_private(comb2)$.measure$id, "classif.ce")
expect_equal(get_private(comb22)$.measure$id, "classif.ce")
expect_null(get_private(comb2)$.inner_measure$id)
expect_null(get_private(comb22)$.inner_measure$id)

# different (outer) measure => not possible to combine
expect_error(efsr1$clone(deep = TRUE)$combine(efsr4))

# different `inner_measure`
comb3 = efsr1$clone(deep = TRUE)$combine(efsr5)
expect_data_table(comb3$result, nrows = 12L)
expect_equal(comb3$n_learners, 4L)
expect_equal(get_private(comb3)$.measure$id, "classif.ce")
expect_null(get_private(comb3)$.inner_measure$id)
# `inner_measure`s of the individual objects did not change
expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc")
expect_equal(get_private(efsr5)$.inner_measure$id, "classif.ce")

# multi-combine works
comb_all = c(efsr1, efsr2, efsr3, efsr5)
expect_data_table(comb_all$result, nrows = 24L)
expect_equal(comb_all$n_learners, 4L)
expect_equal(get_private(comb_all)$.measure$id, "classif.ce")
expect_null(get_private(comb_all)$.inner_measure$id)
})

test_that("different callbacks can be set", {
callback_test = callback_batch_fselect("mlr3fselect.test",
on_eval_before_archive = function(callback, context) {
Expand Down
Loading