Skip to content

Commit

Permalink
refactor boot_bw and boot_bw_estimate to account for multiple strata; f…
Browse files Browse the repository at this point in the history
…ix #50
  • Loading branch information
ernestguevarra committed Jan 9, 2025
1 parent 84d3c7f commit a95c80f
Show file tree
Hide file tree
Showing 14 changed files with 118 additions and 38 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ Imports:
cli,
doParallel,
foreach,
methods,
parallel,
parallelly,
stats,
stringr,
withr
Suggests:
knitr,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ importFrom(foreach,"%:%")
importFrom(foreach,"%do%")
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
importFrom(methods,is)
importFrom(parallel,makeCluster)
importFrom(parallelly,availableCores)
importFrom(stats,na.omit)
importFrom(stats,pnorm)
importFrom(stats,quantile)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stringr,str_split)
importFrom(withr,with_options)
2 changes: 2 additions & 0 deletions R/bbw.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
#' @importFrom cli cli_abort cli_bullets cli_alert_success cli_progress_bar
#' cli_progress_message cli_alert_info cli_progress_update cli_progress_done
#' cli_h2 cli_h3
#' @importFrom methods is
#' @importFrom stringr str_split
#'
"_PACKAGE"

Expand Down
45 changes: 36 additions & 9 deletions R/boot_bw.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,15 @@ boot_bw_parallel <- function(x, w, statistic,
"Resampling by {.strong {strata}} - {.strong {replicates}} replicates in parallel"
)

boot <- foreach::foreach(i = unique(w[[strata]])) %:%
.by <- apply(X = x[strata], MARGIN = 1, FUN = paste, collapse = ".")

boot <- foreach::foreach(i = get_strata(x, strata = strata)) %:%
foreach::foreach(seq_len(replicates), .combine = rbind) %dopar% {
## Subset x to strata ----
y <- x[which(x[[strata]] == i), ]
y <- x[which(.by == i), ]

## Subset df_weighted to strata ----replicates = 400,
z <- w[which(w[[strata]] == i), ]
## Subset df_weighted to strata ----
z <- w[which(w$psu %in% y$psu), ]

## Sample clusters ----
sampled_clusters <- boot_bw_sample_clusters(x = y, w = z)
Expand All @@ -141,14 +143,25 @@ boot_bw_parallel <- function(x, w, statistic,
## Re-structure boot to identify outputs list and rename data.frames ----
cli::cli_progress_step("Tidying up resampling outputs")
boot <- tidy_boot(
boot, w = w, strata = strata, outputColumns = outputColumns
boot, x = x, strata = strata, outputColumns = outputColumns
)
}

## Stop parallelism ----
cli::cli_progress_step("Closing {.strong {cores}} parallel operations")
parallel::stopCluster(cl)

## Create list output and append class ----
boot <- list(
statistic = deparse(substitute(statistic)),
params = params,
replicates = replicates,
strata = strata,
boot_data = boot
)

class(boot) <- "boot_bw"

## Return boot ----
boot
}
Expand All @@ -171,6 +184,7 @@ boot_bw_sequential <- function(x, w, statistic,
if (is.null(strata)) {
cli::cli_h3("Resampling with {.strong {replicates}} replicates")
cli::cli_progress_bar("Resampling", total = replicates, clear = FALSE)

boot <- foreach::foreach(
replicate = seq_len(replicates), .combine = rbind
) %do% {
Expand All @@ -195,18 +209,20 @@ boot_bw_sequential <- function(x, w, statistic,
cli::cli_h3(
"Resampling by {.strong {strata}} with {.strong {replicates}} replicates"
)

.by <- apply(X = x[strata], MARGIN = 1, FUN = paste, collapse = ".")

boot <- foreach::foreach(i = unique(w[[strata]])) %:%
boot <- foreach::foreach(i = get_strata(x, strata = strata)) %:%
foreach::foreach(replicate = seq_len(replicates), .combine = rbind) %do% {
cli::cli_progress_message(
"Resampling {.strong {strata} -} {.strong {i}} sequentially: replicate {.strong {replicate}}"
)

## Subset x to strata ----
y <- x[which(x[[strata]] == i), ]
y <- x[which(.by == i), ]

## Subset df_weighted to strata ----
z <- w[which(w[[strata]] == i), ]
z <- w[which(w$psu %in% y$psu), ]

## Sample clusters ----
sampled_clusters <- boot_bw_sample_clusters(x = y, w = z)
Expand All @@ -222,10 +238,21 @@ boot_bw_sequential <- function(x, w, statistic,
## Re-structure boot to identify outputs list and rename data.frames ----
cli::cli_progress_step("Tidying up resampling outputs")
boot <- tidy_boot(
boot, w = w, strata = strata, outputColumns = outputColumns
boot, x = x, strata = strata, outputColumns = outputColumns
)
}

## Create list output and append class ----
boot <- list(
statistic = deparse(substitute(statistic)),
params = params,
replicates = replicates,
strata = strata,
boot_data = boot
)

class(boot) <- "boot_bw"

## Return boot ----
boot
}
Expand Down
21 changes: 14 additions & 7 deletions R/boot_bw_estimate.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,20 @@
#'

boot_bw_estimate <- function(boot_df) {
## Check that boot_df is class boot_bw ----
if (!is(boot_df, "boot_bw"))
cli::cli_abort(
"{.arg boot_df} is not a {.strong {.var boot_bw}} object"
)

## Get estimates ----
est <- lapply(
X = boot_df,
X = boot_df$boot_data,
FUN = boot_percentile
)

## Structure list names ----
if (!is.data.frame(boot_df)) {
if (!is.data.frame(boot_df$boot_data)) {
if (nrow(est[[1]]) == 1) {
names(est) <- paste(
names(est),
Expand All @@ -42,11 +48,12 @@ boot_bw_estimate <- function(boot_df) {
do.call(rbind, args = _)

## Re-structure results ----
est <- data.frame(
strata = gsub("\\.[^\\.]{1,}", "", row.names(est)),
indicator = gsub("[^\\.]{1,}\\.", "", row.names(est)),
est
)
est <- stringr::str_split(
row.names(est), pattern = "\\.", simplify = TRUE
) |>
as.data.frame() |>
(\(x) { names(x) <- c(boot_df$strata, "indicator"); x })() |>
data.frame(est)
} else {
## Flatten list ----
est <- est |>
Expand Down
9 changes: 6 additions & 3 deletions R/post_strat_estimation.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#' @param pop_df A [data.frame()] with at least two variables: `strata` for the
#' stratification/grouping information that matches `strata` in `est_df` and
#' `pop` for information on population for the given `strata`.
#' @param strata A character value of the variable name in `est_df` that
#' corresponds to the `strata` values to match with values in `pop_df`
#'
#' @returns A vector of values for the overall estimate, overall 95% lower
#' confidence limit, and overall 95% upper confidence limit for each of the
Expand All @@ -29,17 +31,18 @@
#'
#' names(pop_df) <- c("strata", "pop")
#'
#' estimate_total(est_df, pop_df)
#' estimate_total(est_df, pop_df, strata = "region")
#'
#' @export
#'

estimate_total <- function(est_df, pop_df) {
estimate_total <- function(est_df, pop_df, strata) {
## Check the data ----
check_est_df(est_df)
check_est_df(est_df, strata = strata)
check_pop_df(pop_df)

## Merge estimates with population data ----
est_df$strata <- est_df[[strata]]
est_pop_df <- merge(pop_df, est_df, by = "strata", all.y = TRUE)

## Get total estimates ----
Expand Down
28 changes: 22 additions & 6 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ check_data <- function(x) {
#' @keywords internal
#'

tidy_boot <- function(boot, w, strata, outputColumns) {
tidy_boot <- function(boot, x, strata, outputColumns) {
if (is.list(boot)) {
boot <- lapply(
X = boot,
Expand All @@ -107,7 +107,7 @@ tidy_boot <- function(boot, w, strata, outputColumns) {
x
}
) |>
(\(x) { names(x) <- unique(w[[strata]]); x })()
(\(y) { names(y) <- get_strata(x, strata); y })()
} else {
boot <- as.data.frame(boot)
row.names(boot) <- NULL
Expand All @@ -125,10 +125,10 @@ tidy_boot <- function(boot, w, strata, outputColumns) {
#' @keywords internal
#'

check_est_df <- function(est_df) {
data_name_check <- c("strata", "est", "se") %in% names(est_df)
data_name_in <- c("strata", "est", "se")[which(data_name_check)]
data_name_out <- c("strata", "est", "se")[which(!data_name_check)]
check_est_df <- function(est_df, strata) {
data_name_check <- c(strata, "est", "se") %in% names(est_df)
data_name_in <- c(strata, "est", "se")[which(data_name_check)]
data_name_out <- c(strata, "est", "se")[which(!data_name_check)]

arg_name <- deparse(substitute(est_df))

Expand Down Expand Up @@ -174,4 +174,20 @@ check_pop_df <- function(pop_df) {
} else {
cli::cli_abort(message_out)
}
}


#'
#' Get levels of stratification
#'
#' @keywords internal
#'

get_strata <- function(x, strata) {
y <- lapply(
X = x[strata],
FUN = factor
)

names(split(x, y))
}
2 changes: 1 addition & 1 deletion man/check_est_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/estimate_total.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions man/get_strata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/tidy_boot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions tests/testthat/test-boot_bw.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ mean_boot <- boot_bw(
)

test_that("boot_bw works as expected", {
expect_s3_class(mean_boot, "data.frame")
expect_s3_class(mean_boot$boot_data, "data.frame")
})

mean_boot <- boot_bw(
Expand All @@ -32,7 +32,7 @@ mean_boot <- boot_bw(
)

test_that("boot_bw works as expected", {
expect_s3_class(mean_boot, "data.frame")
expect_s3_class(mean_boot$boot_data, "data.frame")
})


Expand All @@ -43,7 +43,7 @@ mean_boot <- boot_bw(
)

test_that("boot_bw works as expected", {
expect_type(mean_boot, "list")
expect_type(mean_boot$boot_data, "list")
})

mean_boot <- boot_bw(
Expand All @@ -53,7 +53,7 @@ mean_boot <- boot_bw(
)

test_that("boot_bw works as expected", {
expect_type(mean_boot, "list")
expect_type(mean_boot$boot_data, "list")
})


Expand Down
4 changes: 3 additions & 1 deletion tests/testthat/test-boot_bw_estimate.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ test_that("boot_bw_estimate works as expected", {
)

expect_s3_class(boot_bw_estimate(boot_df), "data.frame")
})

expect_error(boot_bw_estimate(boot_df$boot_data))
})
12 changes: 8 additions & 4 deletions tests/testthat/test-post_strat_estimation.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ test_that("estimate_total works as expected", {

names(pop_df) <- c("strata", "pop")

expect_s3_class(estimate_total(est_df, pop_df), "data.frame")
expect_s3_class(
estimate_total(est_df, pop_df, strata = "region"), "data.frame"
)

boot_df <- boot_bw(
indicatorsHH, villageData, statistic = bootClassic,
Expand All @@ -22,14 +24,16 @@ test_that("estimate_total works as expected", {

est_df <- boot_bw_estimate(boot_df)

expect_s3_class(estimate_total(est_df, pop_df), "data.frame")
expect_s3_class(
estimate_total(est_df, pop_df, strata = "region"), "data.frame"
)

est_dfx <- est_df
names(est_dfx) <- c("strat", "indicator", "es", "lcl", "ucl", "sse")

pop_dfx <- pop_df
names(pop_dfx) <- c("strat", "pop")

expect_error(estimate_total(est_dfx, pop_df))
expect_error(estimate_total(est_df, pop_dfx))
expect_error(estimate_total(est_dfx, pop_df, strata = "region"))
expect_error(estimate_total(est_df, pop_dfx, strata = "region"))
})

0 comments on commit a95c80f

Please sign in to comment.