diff --git a/NAMESPACE b/NAMESPACE index 44d5b6a..4d5292b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,11 @@ # Generated by roxygen2: do not edit by hand S3method(plot,SUMMERproj) +S3method(plot,svysae) S3method(print,SUMMERmodel) S3method(print,SUMMERmodel.svy) S3method(print,SUMMERprojlist) +S3method(print,svysae) S3method(summary,SUMMERmodel) S3method(summary,SUMMERmodel.svy) S3method(summary,SUMMERprojlist) @@ -13,6 +15,7 @@ export(adjustPopMat) export(aggregateSurvey) export(areaPopToArea) export(calibrateByRegion) +export(compareEstimates) export(expit) export(fitGeneric) export(fitINLA) @@ -32,6 +35,7 @@ export(hatchPlot) export(logit) export(logitNormMean) export(makePopIntegrationTab) +export(mapEstimates) export(mapPlot) export(mapPoints) export(pixelPopToArea) @@ -98,6 +102,7 @@ importFrom(stats,model.frame) importFrom(stats,model.matrix) importFrom(stats,model.response) importFrom(stats,na.pass) +importFrom(stats,qnorm) importFrom(stats,quantile) importFrom(stats,rbinom) importFrom(stats,runif) @@ -106,4 +111,5 @@ importFrom(stats,setNames) importFrom(stats,var) importFrom(terra,extract) importFrom(terra,gdal) +importFrom(utils,combn) importFrom(viridis,viridis_pal) diff --git a/R/check_global.R b/R/check_global.R new file mode 100644 index 0000000..3a51314 --- /dev/null +++ b/R/check_global.R @@ -0,0 +1,16 @@ +# Satisfy the global variable check +utils::globalVariables(c("Domain1", + "Domain2", + "Prob", + "est", + ".data", + "domain", + "method", + "lower", + "upper", + "svytotal", + "svyby", + "svymean" + )) + + \ No newline at end of file diff --git a/R/smoothArea.R b/R/smoothArea.R index 21334fa..7fd14f3 100644 --- a/R/smoothArea.R +++ b/R/smoothArea.R @@ -1,160 +1,204 @@ -#' Smooth via area level model +#' Small area estimation via basic area level model #' -#' Generates small area estimates by smoothing direct estimates using an area -#' level model +#' Generates small area estimates by smoothing direct estimates using an +#' area level model #' -#' @param formula an object of class 'formula' describing the model to be fitted. +#' @param formula An object of class 'formula' describing the model to be fitted. #' If direct.est is specified, the right hand side of the formula is not necessary. -#' @param domain formula specifying variable containing domain labels -#' @param design an object of class "svydesign" containing the data for the model -#' @param responseType of the response variable, currently supports 'binary' (default with logit link function) or 'gaussian'. -#' @param Amat adjacency matrix for the regions. If set to NULL, the IID spatial effect will be used. -#' @param direct.est data frame of direct estimates, with first column containing domain, second column containing direct estimate, and third column containing variance of direct estimate. -#' @param X.area areal covariates data frame. One of the column names needs to match 'domain', in order to be linked to the data input. Currently only supporting time-invariant domain-level covariates. -#' @param domain.size domain size data frame. One of the column names needs to match 'domain' in order to be linked to the data input and there must be a size column containing domain sizes. -#' @param pc.u hyperparameter U for the PC prior on precisions. -#' @param pc.alpha hyperparameter alpha for the PC prior on precisions. -#' @param pc.u.phi hyperparameter U for the PC prior on the mixture probability phi in BYM2 model. -#' @param pc.alpha.phi hyperparameter alpha for the PC prior on the mixture probability phi in BYM2 model. -#' @param CI the desired posterior credible interval to calculate -#' @param n.sample number of draws from posterior used to compute summaries -#' @param var.tol tolerance parameter; if variance of an area's direct estimator is below this value, that direct estimator is dropped from model +#' @param domain One-sided formula specifying factors containing domain labels +#' @param design An object of class "svydesign" containing the data for the model +#' @param adj.mat Adjacency matrix with rownames matching the domain labels. If set to NULL, the IID spatial effect will be used. +#' @param X.domain Data frame of areal covariates. One of the column names needs to match the name of the domain variable, in order to be linked to the data input. Currently only supporting time-invariant covariates. +#' @param direct.est Data frame of direct estimates, with first column containing the domain variable, second column containing direct estimate, and third column containing the variance of direct estimate. +#' @param domain.size Data frame of domain sizes. One of the column names needs to match the name of the domain variable, in order to be linked to the data input and there must be a column names 'size' containing domain sizes. +#' @param transform Optional transformation applied to the direct estimates before fitting area level model. The default option is no transformation, but logit and log are implemented. +#' @param pc.u Hyperparameter U for the PC prior on precisions. See the INLA documentation for more details on the parameterization. +#' @param pc.alpha Hyperparameter alpha for the PC prior on precisions. +#' @param pc.u.phi Hyperparameter U for the PC prior on the mixture probability phi in BYM2 model. +#' @param pc.alpha.phi Hyperparameter alpha for the PC prior on the mixture probability phi in BYM2 model. +#' @param level The specified level for the posterior credible intervals +#' @param n.sample Number of draws from posterior used to compute summaries +#' @param var.tol Tolerance parameter; if variance of an area's direct estimator is below this value, that direct estimator is dropped from model +#' @param return.samples If TRUE, return matrix of posterior samples of area level quantities #' -#' @return A list with elements -#' \item{direct.est}{direct estimates} -#' \item{s.dir.iid.fit}{fitted INLA object for iid domain effects model} -#' \item{s.dir.iid.est}{non-spatial smoothed estimates} -#' \item{s.dir.sp.fit}{fitted INLA object for spatial domain effects model} -#' \item{s.dir.sp.est}{spatially smoothed estimates (if adjacency matrix provided)} +#' @importFrom stats model.frame model.matrix model.response qnorm +#' @importFrom utils combn +#' +#' @return A svysae object #' #' @export #' #' @examples #' \dontrun{ -#' library(survey) #' data(DemoData2) #' data(DemoMap2) +#' library(survey) #' des0 <- svydesign(ids = ~clustid+id, strata = ~strata, #' weights = ~weights, data = DemoData2, nest = T) #' Xmat <- aggregate(age~region, data = DemoData2, FUN = mean) #' -#' EXAMPLE 1: Continuous response model -#' cts.res <- smoothArea(tobacco.use ~ 1, domain = ~region, -#' Amat = DemoMap2$Amat, design = des0, +#' # EXAMPLE 1: Continuous response model +#' cts.res <- smoothArea(tobacco.use ~ 1, +#' domain = ~region, +#' design = des0, +#' adj.mat = DemoMap2$Amat, #' pc.u = 1, #' pc.alpha = 0.01, #' pc.u.phi = 0.5, #' pc.alpha.phi = 2/3) -#' -#' EXAMPLE 2: Including area level covariates -#' cts.cov.res <- smoothArea(tobacco.use ~ age, domain = ~region, -#' Amat = DemoMap2$Amat, design = des0, -#' X.area = Xmat, +#' +#' # EXAMPLE 2: Including area level covariates +#' cts.cov.res <- smoothArea(tobacco.use ~ age, +#' domain = ~region, +#' design = des0, +#' adj.mat = DemoMap2$Amat, +#' X.domain = Xmat, #' pc.u = 1, #' pc.alpha = 0.01, #' pc.u.phi = 0.5, #' pc.alpha.phi = 2/3) -#' -#' EXAMPLE 3: Binary response model -#' bin.res <- smoothArea(tobacco.use ~ 1, domain = ~region, -#' responseType = "binary", -#' Amat = DemoMap2$Amat, design = des0, +#' +#' # EXAMPLE 3: Binary response model +#' bin.res <- smoothArea(tobacco.use ~ 1, +#' domain = ~region, +#' design = des0, +#' adj.mat = DemoMap2$Amat, +#' transform = "logit", #' pc.u = 1, #' pc.alpha = 0.01, #' pc.u.phi = 0.5, #' pc.alpha.phi = 2/3) -#' -#' EXAMPLE 4: Including area level covariates in binary response model -#' bin.cov.res <- smoothArea(tobacco.use ~ age, domain = ~region, -#' responseType = "binary", -#' Amat = DemoMap2$Amat, design = des0, -#' X.area = Xmat, +#' +#' # EXAMPLE 4: Including area level covariates in binary response model +#' bin.cov.res <- smoothArea(tobacco.use ~ age, +#' domain = ~region, +#' design = des0, +#' adj.mat = DemoMap2$Amat, +#' transform = "logit", +#' X.domain = Xmat, #' pc.u = 1, #' pc.alpha = 0.01, #' pc.u.phi = 0.5, #' pc.alpha.phi = 2/3) #'} -#' smoothArea <- function(formula, domain, design = NULL, - responseType = c("gaussian", "binary")[1], - Amat = NULL, + adj.mat = NULL, + X.domain = NULL, direct.est = NULL, - X.area = NULL, domain.size = NULL, + transform = c("identity", "logit", "log"), pc.u = 1, pc.alpha = 0.01, pc.u.phi = 0.5, pc.alpha.phi = 2/3, - CI = .95, + level = .95, n.sample = 250, - var.tol = 1e-10) { + var.tol = 1e-10, + return.samples = F) { - - # SETUP - domain.var <- all.vars(domain)[1] + # setup output + out <- list() + attr(out, "inla.fitted") <- c() + + # identify domain variable + domain.var <- all.vars(domain) + if (length(domain.var) != 1) { + stop("Domain labels must be contained in a single column. Spatio-temporal not supported yet.") + } + + # response formula resp.frm <- as.formula(paste0("~", all.vars(update(formula, . ~ NULL))[1])) + # covariate formula cov.frm <- update(formula, NULL ~ .) - out <- list() - + # calculate direct estimates via svyby if (is.null(direct.est)) { - # calculate direct estimates + # known domain sizes if (!is.null(domain.size)) { - dir.est <- survey::svyby(resp.frm, domain, design = design, survey::svytotal, na.rm = T) - dir.est$domain.size <- domain.size$size[match(dir.est[, 1], domain.size[[domain.var]])] - dir.est[, 2] = dir.est[, 2] / dir.est$domain.size - dir.est[, 3] = (dir.est[, 3] / dir.est$domain.size) ^ 2 - dir.est <- dir.est[, 1:3] + direct.est <- + survey::svyby(resp.frm, domain, design = design, svytotal, na.rm = T) + direct.est$domain.size <- + domain.size[match(direct.est[, 1], domain.size[, 1]), 2] + direct.est[, 2] = direct.est[, 2] / direct.est$domain.size + direct.est[, 3] = (direct.est[, 3] / direct.est$domain.size) ^ 2 + direct.est <- direct.est[, 1:3] } else { - dir.est <- survey::svyby(resp.frm, domain, design = design, survey::svymean, na.rm = T) - dir.est[, 3] <- dir.est[, 3] ^ 2 + direct.est <- + survey::svyby(resp.frm, domain, design = design, svymean, na.rm = T) + direct.est[, 3] <- direct.est[, 3] ^ 2 } - - } else { - # assumes structure of direct.est!!!! - dir.est <- direct.est + # assumes structure of direct.est + direct.est <- direct.est } - rownames(dir.est) <- NULL - colnames(dir.est) <- c("domain", "dir.est", "dir.est.var") - out$direct.est <- dir.est - - # remove any areas with zero sampling variances - mod.dat <- dir.est - mod.dat$dir.est <- ifelse(mod.dat$dir.est.var > var.tol, mod.dat$dir.est, NA) - mod.dat$dir.est.var <- ifelse(mod.dat$dir.est.var > var.tol, mod.dat$dir.est.var, NA) - mod.dat$dir.est.prec <- 1 / mod.dat$dir.est.var + colnames(direct.est) <- c("domain", "mean", "var") + direct.est <- + data.frame(domain = direct.est$domain, + mean = direct.est$mean, + median = direct.est$mean, + var = direct.est$var, + lower = direct.est$mean + + qnorm((1-level)/2) * sqrt(direct.est$var), + upper = direct.est$mean + + qnorm(1 - (1-level)/2) * sqrt(direct.est$var), + method = paste0("Direct")) + out$direct.est <- direct.est + attr(out, "domain.names") <- sort(direct.est$domain) + attr(out, "method.names") <- c("direct.est") + + # prepare data for modeling, removing any areas with zero/low sampling variances + mod.dat <- direct.est + mod.dat$mean <- + ifelse(mod.dat$var > var.tol, mod.dat$mean, NA) + mod.dat$var <- + ifelse(mod.dat$var > var.tol, mod.dat$var, NA) + mod.dat$prec <- 1 / mod.dat$var mod.dat$domain.id <- 1:nrow(mod.dat) - if(!is.null(X.area)) { - mod.X.area <- X.area - mod.X.area$domain <- X.area[[domain.var]] - mod.X.area <- mod.X.area[, names(mod.X.area) != domain.var] - mod.dat <- merge(mod.dat, mod.X.area, by = "domain") + if(!is.null(X.domain)) { + mod.X.domain <- X.domain + mod.X.domain$domain <- X.domain[[domain.var]] + mod.X.domain <- mod.X.domain[, names(mod.X.domain) != domain.var] + mod.dat <- merge(mod.dat, mod.X.domain, by = "domain") } mod.dat <- mod.dat[match(1:nrow(mod.dat), mod.dat$domain.id), ] - mm.area <- stats::model.matrix(cov.frm, mod.dat) - h <- function(x) x - h.inv <- function(x) x - if (responseType == "binary") { + mm.domain <- model.matrix(cov.frm, mod.dat) + + transform <- match.arg(transform) + attr(out, "transform") <- transform + + # apply transformation to direct estimates + if (transform == "identity") { + h <- function(x) x + h.inv <- function(x) x + if (all(mod.dat$mean < 1 & mod.dat$mean > 0)) { + warning("Direct estimates appear to be proportions. You may want to consider using transform = 'logit'.") + } + } else if (transform == "logit") { h <- function(x) logit(x) h.inv <- function(x) expit(x) - mod.dat$dir.est.prec <- - (mod.dat$dir.est^2*(1-mod.dat$dir.est)^2) / mod.dat$dir.est.var - mod.dat$dir.est <- h(mod.dat$dir.est) - + mod.dat$prec <- + (mod.dat$mean^2*(1-mod.dat$mean)^2) / mod.dat$var + mod.dat$mean <- h(mod.dat$mean) + } else if (transform == "log") { + h <- function(x) log(x) + h.inv <- function(x) exp(x) + mod.dat$prec <- + mod.dat$mean^2 / mod.dat$var + mod.dat$mean <- h(mod.dat$mean) } - mod.dat$scale <- mod.dat$dir.est.prec + mod.dat$scale <- mod.dat$prec + # prepare formula for INLA s.dir.ftxt <- - paste("dir.est ~ 1") + paste("mean ~ 1") if (length(all.vars(cov.frm)) > 0) { s.dir.ftxt <- paste(s.dir.ftxt, paste(all.vars(cov.frm), collapse = " + "), sep = " + ") } - s.dir.iid.ftxt <- + iid.model.ftxt <- paste0(s.dir.ftxt, " + f(domain.id, model = 'iid', hyper = hyperpc.iid.int)") # set priors hyperpc.iid.int <- list( @@ -162,87 +206,344 @@ smoothArea <- function(formula, param = c(pc.u , pc.alpha)) ) # SMOOTHED DIRECT w/ IID RE - s.dir.iid.fit <- - INLA::inla(as.formula(s.dir.iid.ftxt), + iid.model.fit <- + INLA::inla(as.formula(iid.model.ftxt), family = "gaussian", data = mod.dat, - scale = mod.dat$dir.est.prec, + scale = mod.dat$prec , control.family = list(hyper = list(prec = list(initial= log(1), fixed= TRUE))), control.predictor = list(compute = TRUE), control.compute=list(config = TRUE)) sample.id <-c(list(domain.id = 1:nrow(mod.dat)), - as.list(setNames(rep(1, ncol(mm.area)), colnames(mm.area)))) - s.dir.iid.sample <- - INLA::inla.posterior.sample(n = n.sample, s.dir.iid.fit, sample.id) - fe.idx <- grep(colnames(mm.area)[1], rownames(s.dir.iid.sample[[1]]$latent)) - fe.idx <- fe.idx:(fe.idx + ncol(mm.area) - 1) - s.dir.iid.mat <- - do.call(cbind, lapply(s.dir.iid.sample, + as.list(setNames(rep(1, ncol(mm.domain)), colnames(mm.domain)))) + iid.model.sample <- + INLA::inla.posterior.sample(n = n.sample, iid.model.fit, sample.id) + fe.idx <- grep(colnames(mm.domain)[1], rownames(iid.model.sample[[1]]$latent)) + fe.idx <- fe.idx:(fe.idx + ncol(mm.domain) - 1) + + # sample domain means from posterior + iid.model.mat <- + do.call(cbind, lapply(iid.model.sample, function(x) x$latent[1:nrow(mod.dat)] + - mm.area %*% x$latent[fe.idx])) - - s.dir.iid.mat <- h.inv(s.dir.iid.mat) - out$s.dir.iid.fit <- s.dir.iid.fit - - out$s.dir.iid.est <- + mm.domain %*% x$latent[fe.idx])) + + iid.model.mat <- h.inv(iid.model.mat) + out$iid.model.fit <- iid.model.fit + + out$iid.model.est <- data.frame(domain = mod.dat$domain, - mean = rowMeans(s.dir.iid.mat), - median = apply(s.dir.iid.mat, 1, + mean = rowMeans(iid.model.mat), + median = apply(iid.model.mat, 1, function(x) median(x, na.rm = T)), - var = apply(s.dir.iid.mat, 1, var), - lower = apply(s.dir.iid.mat, 1, - function(x) quantile(x, (1-CI)/2, na.rm = T)), - upper = apply(s.dir.iid.mat, 1, - function(x) quantile(x, 1-(1-CI)/2, na.rm = T)), - method = paste0("Smoothed Direct: IID")) + var = apply(iid.model.mat, 1, var), + lower = apply(iid.model.mat, 1, + function(x) quantile(x, (1-level)/2, na.rm = T)), + upper = apply(iid.model.mat, 1, + function(x) quantile(x, 1-(1-level)/2, na.rm = T)), + method = paste0("Area level model: IID")) + attr(out, "method.names") <- c(attr(out, "method.names"), "iid.model.est") + attr(out, "inla.fitted") <- c(attr(out, "inla.fitted"), "iid.model") + + if (return.samples) { + out$iid.model.sample <- iid.model.mat + } else { + out$iid.model.sample <- NULL + } # SMOOTHED DIRECT w/ BYM2 RE - if (!is.null(Amat)) { - mod.dat$domain.id <- match(mod.dat$domain, rownames(Amat)) + if (!is.null(adj.mat)) { + mod.dat$domain.id <- match(mod.dat$domain, rownames(adj.mat)) mod.dat <- mod.dat[match(1:nrow(mod.dat), mod.dat$domain.id), ] - mm.area <- model.matrix(cov.frm, mod.dat) + mm.domain <- model.matrix(cov.frm, mod.dat) hyperpc.bym.int <- list( prec = list(prior = "pc.prec", param = c(pc.u , pc.alpha)), phi = list(prior = 'pc', param = c(pc.u.phi , pc.alpha.phi)) ) - s.dir.sp.ftxt <- + + # prepare formula for INLA + bym2.model.ftxt <- paste0(s.dir.ftxt, - "+ f(domain.id, model = 'bym2', graph = Amat,", + "+ f(domain.id, model = 'bym2', graph = adj.mat,", "hyper = hyperpc.bym.int, scale.model = TRUE)") - s.dir.sp.fit <- - INLA::inla(as.formula(s.dir.sp.ftxt), + bym2.model.fit <- + INLA::inla(as.formula(bym2.model.ftxt), family = "gaussian", data = mod.dat, - scale = mod.dat$dir.est.prec, + scale = mod.dat$prec , control.family = list(hyper = list(prec = list(initial= log(1), fixed= TRUE))), control.predictor = list(compute = TRUE), control.compute=list(config = TRUE)) - sample.id <-c(list(domain.id = 1:nrow(Amat)), - as.list(setNames(rep(1, ncol(mm.area)), colnames(mm.area)))) - s.dir.sp.sample <- - INLA::inla.posterior.sample(n = n.sample, s.dir.sp.fit, sample.id) - fe.idx <- grep(colnames(mm.area)[1], rownames(s.dir.sp.sample[[1]]$latent)) - fe.idx <- fe.idx:(fe.idx + ncol(mm.area) - 1) - s.dir.sp.mat <- - do.call(cbind, lapply(s.dir.sp.sample, - function(x) x$latent[1:nrow(Amat)] + - mm.area %*% x$latent[fe.idx])) - s.dir.sp.mat <- h.inv(s.dir.sp.mat) - out$s.dir.sp.fit <- s.dir.sp.fit - out$s.dir.sp.est <- + sample.id <-c(list(domain.id = 1:nrow(adj.mat)), + as.list(setNames(rep(1, ncol(mm.domain)), colnames(mm.domain)))) + bym2.model.sample <- + INLA::inla.posterior.sample(n = n.sample, bym2.model.fit, sample.id) + fe.idx <- grep(colnames(mm.domain)[1], rownames(bym2.model.sample[[1]]$latent)) + fe.idx <- fe.idx:(fe.idx + ncol(mm.domain) - 1) + + # sample domain means from posterior + bym2.model.mat <- + do.call(cbind, lapply(bym2.model.sample, + function(x) x$latent[1:nrow(adj.mat)] + + mm.domain %*% x$latent[fe.idx])) + bym2.model.mat <- h.inv(bym2.model.mat) + out$bym2.model.fit <- bym2.model.fit + out$bym2.model.est <- data.frame(domain = mod.dat$domain, - mean = rowMeans(s.dir.sp.mat), - median = apply(s.dir.sp.mat, 1, + mean = rowMeans(bym2.model.mat), + median = apply(bym2.model.mat, 1, function(x) median(x, na.rm = T)), - var = apply(s.dir.sp.mat, 1, var), - lower = apply(s.dir.sp.mat, 1, - function(x) quantile(x, (1-CI)/2, na.rm = T)), - upper = apply(s.dir.sp.mat, 1, - function(x) quantile(x, 1-(1-CI)/2, na.rm = T)), - method = paste0("Smoothed Direct: BYM2")) - out$s.dir.sp.est <- - out$s.dir.sp.est[match(out$direct.est$domain, out$s.dir.sp.est$domain),] - + var = apply(bym2.model.mat, 1, var), + lower = apply(bym2.model.mat, 1, + function(x) quantile(x, (1-level)/2, na.rm = T)), + upper = apply(bym2.model.mat, 1, + function(x) quantile(x, 1-(1-level)/2, na.rm = T)), + method = paste0("Area level model: BYM2")) + out$bym2.model.est <- + out$bym2.model.est[match(out$direct.est$domain, out$bym2.model.est$domain),] + if (return.samples) { + out$bym2.model.sample <- bym2.model.mat + } else { + out$bym2.model.sample <- NULL + } + attr(out, "method.names") <- c(attr(out, "method.names"), "bym2.model.est") + attr(out, "inla.fitted") <- c(attr(out, "inla.fitted"), "bym2.model") } + out$call <- match.call() + class(out) <- c("svysae") return(out) } + +#' @method print svysae +#' @export +print.svysae <- function(x, ...) { + x_att <- attributes(x) + + # print out call + cat("Call:\n") + print(x$call) + cat("\n") + + # print estimation methods + cat("Methods used: ") + cat(x_att$method.names, sep = ", ") + cat("\n\n") + + # print transform, if used + if (!is.null(x_att$transform)) { + cat("Transform: ") + cat(x_att$transform) + cat("\n\n") + } + + # print estimates + for (i in x_att$method.names) { + cat(i, "\n") + print(x[[i]]) + cat("\n") + } +} + +#' @method plot svysae +#' @export +plot.svysae <- function(x, return_list = F, plot.factor = NULL, ...) { + combined_est <- do.call(rbind, x[attr(x, "method.names")]) + + # pull the first method (should be "direct.est") + ref_method <- attr(x, "method.names")[1] + m <- nrow(x[[ref_method]]) + sorted_levels <- x[[ref_method]]$domain[order(x[[ref_method]]$mean)] + combined_est$domain <- factor(combined_est$domain, + levels = sorted_levels) + + # split across multiple plots for many estimates + if (m > 30 & is.null(plot.factor)) { + plot_breaks <- c(seq(0, m, by = 30), m) + plot_labels <- paste0( + "Areas ", + (seq_along(plot_breaks)[-1] - 2) * 30 + 1, + " to ", + pmin((seq_along(plot_breaks)[-1] - 1) * 30, m) + ) + combined_est$plot <- + cut(1:m, breaks = plot_breaks, labels = plot_labels) + } else if (is.null(plot.factor)) { + combined_est$plot <- factor(1, levels = 1) + } + plot_list <- lapply( + split(combined_est, combined_est$plot), + function(x) { + ggplot2::ggplot(x, ggplot2::aes(x = domain, y = mean, color = method)) + + ggplot2::geom_point(position = position_dodge(width = 0.5)) + + ggplot2::geom_linerange(ggplot2::aes(x = domain, ymin = lower, ymax = upper), + position = ggplot2::position_dodge(width = 0.5)) + + ggplot2::scale_color_discrete(name = "Method") + + ggplot2::ylab("Small Area Estimate") + ggplot2::xlab("Domain") + + ggplot2::theme_bw() + + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1)) + } + ) + if (!return_list) { + for (i in seq_along(plot_list)) { + print(plot_list[[i]]) + } + } else { + return(plot_list) + } +} + +#' Plot heatmap comparing pairwise posterior exceedence probabilities for all +#' areas +#' +#' @param x svysae object. Plots are created for all models in this object. +#' @param posterior.sample Matrix of posteriors samples of area level quantities with one row for each area and one column for each sample. This argument may be specified to only provide a heatmap for the desired samples. +#' +#' @return ggplot containing heat map of pairwise comparisons +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' data(DemoData2) +#' data(DemoMap2) +#' des0 <- svydesign(ids = ~clustid+id, strata = ~strata, +#' weights = ~weights, data = DemoData2, nest = T) +#' Xmat <- aggregate(age~region, data = DemoData2, FUN = mean) +#' +#' cts.res <- smoothArea(tobacco.use ~ 1, +#' domain = ~region, +#' design = des0, +#' adj.mat = DemoMap2$Amat, +#' pc.u = 1, +#' pc.alpha = 0.01, +#' pc.u.phi = 0.5, +#' pc.alpha.phi = 2/3, +#' return.samples = T) +#' compareEstimates(cts.res) +#' } +compareEstimates <- function(x, + posterior.sample = NULL) { + + + x_att <- attributes(x) + if (is.null(posterior.sample)) { + sample_list <- x[paste0(x_att$inla.fitted, ".sample")] + } else { + sample_list <- list(posterior.sample) + } + for (i in seq_along(sample_list)) { + current_samples <- sample_list[[i]] + domain_medians <- apply(current_samples, 1, median) + median_order <- order(domain_medians) + + current_samples <- current_samples[median_order, ] + # get indices for all possible pairs of admin 1 areas + domain_comb <- combn(length(x_att$domain.names), 2) + + plot_dat <- data.frame( + Domain1 = x_att$domain.names[median_order][domain_comb[1,]], + Domain2 = x_att$domain.names[median_order][domain_comb[2,]] + ) + plot_dat$Domain1 <- factor(plot_dat$Domain1, + x_att$domain.names[median_order]) + plot_dat$Domain2 <- factor(plot_dat$Domain2, + x_att$domain.names[median_order]) + plot_dat$Prob = apply( + domain_comb, 2, + function(x) mean(current_samples[x[1],] > current_samples[x[2],]) + ) + plot_dat$est = NA + + median_dat <- data.frame( + Domain1 = x_att$domain.names[median_order], + Domain2 = "Median", + Prob = NA, + est = domain_medians + + ) + extra_cols <- c("", "Median", "Interval") + # combine into single tibble for plotting + plot_dat <- rbind(plot_dat, median_dat) + + plot_dat$Domain2 <- + factor(plot_dat$Domain2, levels = c(levels(plot_dat$Domain1), extra_cols)) + + g_heat <- ggplot2::ggplot(data = plot_dat, + ggplot2::aes(x = Domain2, y = Domain1, fill = Prob)) + + ggplot2::theme_minimal() + + ggplot2::theme(legend.position="bottom", + axis.text.x = ggplot2::element_text(angle = 45, vjust = 1, hjust=1, size = 10), + panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + text = ggplot2::element_text(size = 10)) + + ggplot2::geom_tile() + + ggplot2::geom_text( + ggplot2::aes( + label = ifelse(is.na(Prob), "", sprintf("%0.2f", round(Prob, digits = 2))), + color = Prob > .5 + ), + size = 5 + ) + + ggplot2::geom_text( + data = plot_dat, + ggplot2::aes( + label = ifelse(is.na(est), "", sprintf("%0.2f", round(est, digits = 2))) + ), + size = 5 + ) + + ggplot2::coord_equal() + + ggplot2::scale_fill_viridis_c(name = "Probability of Domain 1 > Domain 2", + na.value = "white") + + ggplot2::scale_color_manual(values = c("white", "black"), guide = "none") + + ggplot2::labs(title = x_att$inla.fitted[i], + x = "Domain 2", + y = "Domain 1") + suppressWarnings(print(g_heat)) + } +} +#' Title +#' +#' +#' @param x syvsae object +#' @param geo.data sf object containing polygon data for the small areas. One of the columns should be named domain and contain the domain labels. +#' @param variable The posterior summary variable to plot. May be one of "median", "mean", or "var". +#' @param viridis.option viridis color scheme +#' +#' @return ggplot containing map of small area posterior summary statistics +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' data(DemoData2) +#' data(DemoMap2) +#' library(survey) +#' des0 <- svydesign(ids = ~clustid+id, strata = ~strata, +#' weights = ~weights, data = DemoData2, nest = T) +#' Xmat <- aggregate(age~region, data = DemoData2, FUN = mean) +#' geo.data <- sf::st_as_sf(DemoMap2$geo) +#' geo.data$domain <- geo.data$REGNAME +#' cts.res <- smoothArea(tobacco.use ~ 1, +#' domain = ~region, +#' design = des0, +#' adj.mat = DemoMap2$Amat, +#' pc.u = 1, +#' pc.alpha = 0.01, +#' pc.u.phi = 0.5, +#' pc.alpha.phi = 2/3, +#' return.samples = T) +#' mapEstimates(cts.res, geo.data = geo.data, variable = "median") +#' mapEstimates(cts.res, geo.data = geo.data, variable = "var") +#' } +mapEstimates <- function(x, geo.data, variable, viridis.option = "viridis") { + combined_est <- do.call(rbind, x[attr(x, "method.names")]) + + # join estimates and geo, by domain column + plot_dat <- merge(geo.data, combined_est, all.x = TRUE) + ggplot2::ggplot(plot_dat, ggplot2::aes(fill = .data[[variable]])) + + ggplot2::geom_sf() + + ggplot2::facet_wrap(~method) + + ggplot2::theme_bw() + + ggplot2::scale_fill_viridis_c(name = variable, option = viridis.option) +} + + diff --git a/R/smoothUnit.R b/R/smoothUnit.R index 84b1dab..5b6d244 100644 --- a/R/smoothUnit.R +++ b/R/smoothUnit.R @@ -1,34 +1,29 @@ -#' Smooth via unit level model +#' Smooth via basic unit level model #' -#' Generates small area estimates by smoothing direct estimates using a unit -#' level model +#' Generates small area estimates by smoothing direct estimates using a basic +#' unit level model #' -#' @param formula an object of class "formula" describing the model to be fitted. -#' @param domain formula specifying variable containing domain labels -#' @param design an object of class "svydesign" containing the data for the model +#' @param formula An object of class 'formula' describing the model to be fitted. +#' @param domain One-sided formula specifying factors containing domain labels +#' @param design An object of class "svydesign" containing the data for the model #' @param family of the response variable, currently supports 'binomial' (default with logit link function) or 'gaussian'. -#' @param Amat Adjacency matrix for the regions. If set to NULL, the IID spatial effect will be used. -#' @param X.pop unit-level covariates data frame. One of the column name needs to match the domain specified, in order to be linked to the data input. Currently only supporting time-invariant domain-level covariates. -#' @param domain.size Domain size data frame. One of the column name needs to match the domain specified, in order to be linked to the data input and there must be a size column containing domain sizes. -#' @param pc.u hyperparameter U for the PC prior on precisions. -#' @param pc.alpha hyperparameter alpha for the PC prior on precisions. -#' @param pc.u.phi hyperparameter U for the PC prior on the mixture probability phi in BYM2 model. -#' @param pc.alpha.phi hyperparameter alpha for the PC prior on the mixture probability phi in BYM2 model. -#' @param CI the desired posterior credible interval to calculate -#' @param n.sample number of draws from posterior used to compute summaries +#' @param X.pop Data frame of population unit-level covariates. One of the column name needs to match the domain specified, in order to be linked to the data input. Currently only supporting time-invariant covariates. +#' @param adj.mat Adjacency matrix with rownames matching the domain labels. If set to NULL, the IID spatial effect will be used. +#' @param domain.size Data frame of domain sizes. One of the column names needs to match the name of the domain variable, in order to be linked to the data input and there must be a column names 'size' containing domain sizes. The default option is no transformation, but logit and log are implemented. +#' @param pc.u Hyperparameter U for the PC prior on precisions. See the INLA documentation for more details on the parameterization. +#' @param pc.alpha Hyperparameter alpha for the PC prior on precisions. +#' @param pc.u.phi Hyperparameter U for the PC prior on the mixture probability phi in BYM2 model. +#' @param pc.alpha.phi Hyperparameter alpha for the PC prior on the mixture probability phi in BYM2 model. +#' @param level The specified level for the posterior credible intervals +#' @param n.sample Number of draws from posterior used to compute summaries +#' @param return.samples If TRUE, return matrix of posterior samples of area level quantities #' -#' @return A list with elements -#' \item{direct.est}{direct estimates} -#' \item{model.fit}{fitted INLA object for iid domain effects model} -#' \item{model.est}{smoothed estimates} -#' @importFrom stats model.frame -#' @importFrom stats model.matrix -#' @importFrom stats model.response +#' @return A svysae object +#' #' @export #' #' @examples #' \dontrun{ -#' library(survey) #' data(DemoData2) #' data(DemoMap2) #' des0 <- svydesign(ids = ~clustid+id, strata = ~strata, @@ -45,19 +40,19 @@ #' domain = ~region, #' design = des0, X.pop = DemoData2) #'} - smoothUnit <- function(formula, domain, design, family = c("gaussian", "binomial")[1], - Amat = NULL, X.pop = NULL, + adj.mat = NULL, domain.size = NULL, pc.u = 1, pc.alpha = 0.01, pc.u.phi = 0.5, pc.alpha.phi = 2/3, - CI = .95, n.sample = 250) { + level = .95, n.sample = 250, + return.samples = F) { if (design$has.strata) { warning("This model does not account for stratification yet.") @@ -70,32 +65,54 @@ smoothUnit <- function(formula, stop(paste0("family = ", family, " not supported")) } - # SETUP - domain.var <- all.vars(domain)[1] + # SETUP ---------------------------------------------------------------------- + + # set up return value + out <- list() + attr(out, "inla.fitted") <- c() + + # get domain variable + domain.var <- all.vars(domain) + if (length(domain.var) != 1) { + stop("Domain labels must be contained in a single column. Spatio-temporal not supported yet.") + } + + # set up formulas resp.frm <- as.formula(paste0("~", all.vars(formula)[1])) cov.frm <- update(formula, NULL ~ .) - out <- list() + + # DIRECT ESTIMATES ----------------------------------------------------------- + # compute direct estimates (Hajek estimates if domain size unknown) if (!is.null(domain.size)) { - dir.est <- survey::svyby(resp.frm, domain, design = design, survey::svytotal, na.rm = T) - dir.est$domain.size <- domain.size$size[match(dir.est[, 1], domain.size[[domain.var]])] - dir.est[, 2] = dir.est[, 2] / dir.est$domain.size - dir.est[, 3] = (dir.est[, 3] / dir.est$domain.size) ^ 2 - dir.est <- dir.est[, 1:3] + direct.est <- svyby(resp.frm, domain, design = design, svytotal, na.rm = T) + direct.est$domain.size <- + domain.size$size[match(direct.est[, 1], domain.size[[domain.var]])] + direct.est[, 2] = direct.est[, 2] / direct.est$domain.size + direct.est[, 3] = (direct.est[, 3] / direct.est$domain.size) ^ 2 + direct.est <- direct.est[, 1:3] } else { - dir.est <- survey::svyby(resp.frm, domain, design = design, survey::svymean, na.rm = T) - dir.est[, 3] <- dir.est[, 3] ^ 2 + direct.est <- svyby(resp.frm, domain, design = design, svymean, na.rm = T) + direct.est[, 3] <- direct.est[, 3] ^ 2 } - rownames(dir.est) <- NULL - colnames(dir.est) <- c("domain", "dir.est", "dir.est.var") - out$direct.est <- dir.est + rownames(direct.est) <- NULL + colnames(direct.est) <- c("domain", "mean", "var") + direct.est <- + data.frame(domain = direct.est$domain, + mean = direct.est$mean, + median = direct.est$mean, + var = direct.est$var, + lower = direct.est$mean + qnorm((1-level)/2) * sqrt(direct.est$var), + upper = direct.est$mean + qnorm(1 - (1-level)/2) * sqrt(direct.est$var), + method = paste0("Direct")) + out$direct.est <- direct.est + attr(out, "domain.names") <- sort(direct.est$domain) + attr(out, "method.names") <- c("direct.est") + - # MAKE DATA FRAME + # UNIT LEVEL MODEL ----------------------------------------------------------- mf <- model.frame(formula, design$variables) resp <- model.response(mf, "numeric") - - #Xvars <- delete.response(terms(formula)) - pop.dat <- data.frame( domain = X.pop[[domain.var]] ) @@ -106,83 +123,93 @@ smoothUnit <- function(formula, ) mod.dat <- cbind(mod.dat, model.matrix(cov.frm, design$variables)) + # if no adjacency matrix, take domain names from X.pop domain.table <- data.frame(domain = unique(as.character(pop.dat$domain))) - if (!is.null(Amat)) { - domain.table <- data.frame(domain = rownames(Amat)) + + # if adjacency matrix provided, take domain names from row names + if (!is.null(adj.mat)) { + domain.table <- data.frame(domain = rownames(adj.mat)) } + + # domain labels as indexes for use in INLA domain.table$domain.struct <- seq_len(nrow(domain.table)) mod.dat$domain.struct <- match(mod.dat$domain, domain.table$domain) pop.dat$domain.struct <- match(pop.dat$domain, domain.table$domain) - # MODEL FORMULA + + # model formula ftxt <- paste("resp ~ 1") if (length(all.vars(cov.frm)) > 0) { ftxt <- paste(ftxt, paste(all.vars(cov.frm), collapse = " + "), sep = " + ") } - if (is.null(Amat)) { + if (is.null(adj.mat)) { # set priors hyperpc.iid.int <- list( prec = list(prior = "pc.prec", param = c(pc.u , pc.alpha)) ) warning("No spatial information provided, using iid domain effects") + model.method <- "iid.model" ftxt <- paste0(ftxt, " + f(domain.struct, model = 'iid', hyper = hyperpc.iid.int)") } else { hyperpc.bym.int <- list( prec = list(prior = "pc.prec", param = c(pc.u , pc.alpha)), phi = list(prior = 'pc', param = c(pc.u.phi , pc.alpha.phi)) ) - ftxt <- paste0(ftxt, " + f(domain.struct, model = 'bym2', graph=Amat, hyper = hyperpc.bym.int)") + model.method <- "bym2.model" + ftxt <- paste0(ftxt, " + f(domain.struct, model = 'bym2', graph=adj.mat, hyper = hyperpc.bym.int)") } - - # ADD IID CLUSTER EFFECT - if(ncol(design$cluster) > 1) { - mod.dat$cluster <- design$cluster[, 1] - ftxt <- paste0(ftxt, " + f(cluster, model = 'iid', hyper = hyperpc.iid.int)") - } - # FIT MODEL + + # fit model mod.frm <- as.formula(ftxt) fit <- INLA::inla(mod.frm, family = family, data = mod.dat, control.compute = list(dic = T, mlik = T, cpo = T, config = TRUE), control.predictor = list(compute = TRUE), - lincomb = NULL, quantiles = c((1-CI)/2, 0.5, 1-(1-CI)/2)) + lincomb = NULL, quantiles = c((1-level)/2, 0.5, 1-(1-level)/2)) - # GENERATE DRAWS (IF BINOMIAL?) - sampAll <- INLA::inla.posterior.sample(n = n.sample, result = fit, intern = TRUE) - - proj <- cbind(domain.table$domain, - data.frame(mean=NA, var=NA, median=NA, lower=NA, upper=NA, - logit.mean=NA, logit.var=NA, - logit.median=NA, logit.lower=NA, logit.upper=NA)) - fe.idx <- grep(colnames(mm.pop)[1], rownames(sampAll[[1]]$latent)) + # generate draws + samp.all <- INLA::inla.posterior.sample(n = n.sample, result = fit, intern = TRUE) + + # identify indices of fixed effects and random effects + fe.idx <- grep(colnames(mm.pop)[1], rownames(samp.all[[1]]$latent)) fe.idx <- fe.idx:(fe.idx + length(all.vars(cov.frm))) - re.idx <- grep("domain.struct", x = rownames(sampAll[[1]]$latent)) + re.idx <- grep("domain.struct", x = rownames(samp.all[[1]]$latent)) + + # aggregate sample predictions summary.sample <- function(x) { pop.unit.ests <- x$latent[re.idx][pop.dat$domain.struct] + mm.pop %*% x$latent[fe.idx] - # pop.unit.ests <- pop.unit.ests + - # rnorm(nrow(mm.pop), sd = sqrt(1 / exp(x$hyperpar[1]))) if (family == "binomial") { pop.unit.ests <- expit(pop.unit.ests) } area.ests <- aggregate(pop.unit.ests, list(domain = pop.dat$domain.struct), mean) return(area.ests[match(1:length(re.idx), area.ests[, 1]), 2]) } - est.mat <- do.call(cbind, lapply(sampAll, summary.sample)) - out$model.fit <- fit - out$model.est <- + est.mat <- do.call(cbind, lapply(samp.all, summary.sample)) + out[[paste0(model.method, ".fit")]] <- fit + out[[paste0(model.method, ".est")]] <- data.frame(domain = unique(pop.dat$domain), mean = rowMeans(est.mat), median = apply(est.mat, 1, function(x) median(x, na.rm = T)), var = apply(est.mat, 1, var), lower = apply(est.mat, 1, - function(x) quantile(x, (1-CI)/2, na.rm = T)), + function(x) quantile(x, (1-level)/2, na.rm = T)), upper = apply(est.mat, 1, - function(x) quantile(x, 1-(1-CI)/2, na.rm = T))) + function(x) quantile(x, 1-(1-level)/2, na.rm = T)), + method = paste0("Unit level model: IID")) + if (return.samples) { + out[[paste0(model.method, ".sample")]] <- est.mat + } + attr(out, "method.names") <- c(attr(out, "method.names"), paste0(model.method, ".est")) + attr(out, "inla.fitted") <- c(attr(out, "inla.fitted"), model.method) + + # finish building return value + out$call <- match.call() + class(out) <- c("svysae") return(out) } diff --git a/man/compareEstimates.Rd b/man/compareEstimates.Rd new file mode 100644 index 0000000..6e22243 --- /dev/null +++ b/man/compareEstimates.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/smoothArea.R +\name{compareEstimates} +\alias{compareEstimates} +\title{Plot heatmap comparing pairwise posterior exceedence probabilities for all +areas} +\usage{ +compareEstimates(x, posterior.sample = NULL) +} +\arguments{ +\item{x}{svysae object. Plots are created for all models in this object.} + +\item{posterior.sample}{Matrix of posteriors samples of area level quantities with one row for each area and one column for each sample. This argument may be specified to only provide a heatmap for the desired samples.} +} +\value{ +ggplot containing heat map of pairwise comparisons +} +\description{ +Plot heatmap comparing pairwise posterior exceedence probabilities for all +areas +} +\examples{ +\dontrun{ +data(DemoData2) +data(DemoMap2) +des0 <- svydesign(ids = ~clustid+id, strata = ~strata, + weights = ~weights, data = DemoData2, nest = T) +Xmat <- aggregate(age~region, data = DemoData2, FUN = mean) + +cts.res <- smoothArea(tobacco.use ~ 1, + domain = ~region, + design = des0, + adj.mat = DemoMap2$Amat, + pc.u = 1, + pc.alpha = 0.01, + pc.u.phi = 0.5, + pc.alpha.phi = 2/3, + return.samples = T) +compareEstimates(cts.res) +} +} diff --git a/man/mapEstimates.Rd b/man/mapEstimates.Rd new file mode 100644 index 0000000..ea75d36 --- /dev/null +++ b/man/mapEstimates.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/smoothArea.R +\name{mapEstimates} +\alias{mapEstimates} +\title{Title} +\usage{ +mapEstimates(x, geo.data, variable, viridis.option = "viridis") +} +\arguments{ +\item{x}{syvsae object} + +\item{geo.data}{sf object containing polygon data for the small areas. One of the columns should be named domain and contain the domain labels.} + +\item{variable}{The posterior summary variable to plot. May be one of "median", "mean", or "var".} + +\item{viridis.option}{viridis color scheme} +} +\value{ +ggplot containing map of small area posterior summary statistics +} +\description{ +Title +} +\examples{ +\dontrun{ +data(DemoData2) +data(DemoMap2) +library(survey) +des0 <- svydesign(ids = ~clustid+id, strata = ~strata, + weights = ~weights, data = DemoData2, nest = T) +Xmat <- aggregate(age~region, data = DemoData2, FUN = mean) +geo.data <- sf::st_as_sf(DemoMap2$geo) +geo.data$domain <- geo.data$REGNAME +cts.res <- smoothArea(tobacco.use ~ 1, + domain = ~region, + design = des0, + adj.mat = DemoMap2$Amat, + pc.u = 1, + pc.alpha = 0.01, + pc.u.phi = 0.5, + pc.alpha.phi = 2/3, + return.samples = T) +mapEstimates(cts.res, geo.data = geo.data, variable = "median") +mapEstimates(cts.res, geo.data = geo.data, variable = "var") +} +} diff --git a/man/smoothArea.Rd b/man/smoothArea.Rd index f303cba..6f74509 100644 --- a/man/smoothArea.Rd +++ b/man/smoothArea.Rd @@ -2,73 +2,70 @@ % Please edit documentation in R/smoothArea.R \name{smoothArea} \alias{smoothArea} -\title{Smooth via area level model} +\title{Small area estimation via basic area level model} \usage{ smoothArea( formula, domain, design = NULL, - responseType = c("gaussian", "binary")[1], - Amat = NULL, + adj.mat = NULL, + X.domain = NULL, direct.est = NULL, - X.area = NULL, domain.size = NULL, + transform = c("identity", "logit", "log"), pc.u = 1, pc.alpha = 0.01, pc.u.phi = 0.5, pc.alpha.phi = 2/3, - CI = 0.95, + level = 0.95, n.sample = 250, - var.tol = 1e-10 + var.tol = 1e-10, + return.samples = F ) } \arguments{ -\item{formula}{an object of class 'formula' describing the model to be fitted. +\item{formula}{An object of class 'formula' describing the model to be fitted. If direct.est is specified, the right hand side of the formula is not necessary.} -\item{domain}{formula specifying variable containing domain labels} +\item{domain}{One-sided formula specifying factors containing domain labels} -\item{design}{an object of class "svydesign" containing the data for the model} +\item{design}{An object of class "svydesign" containing the data for the model} -\item{responseType}{of the response variable, currently supports 'binary' (default with logit link function) or 'gaussian'.} +\item{adj.mat}{Adjacency matrix with rownames matching the domain labels. If set to NULL, the IID spatial effect will be used.} -\item{Amat}{adjacency matrix for the regions. If set to NULL, the IID spatial effect will be used.} +\item{X.domain}{Data frame of areal covariates. One of the column names needs to match the name of the domain variable, in order to be linked to the data input. Currently only supporting time-invariant covariates.} -\item{direct.est}{data frame of direct estimates, with first column containing domain, second column containing direct estimate, and third column containing variance of direct estimate.} +\item{direct.est}{Data frame of direct estimates, with first column containing the domain variable, second column containing direct estimate, and third column containing the variance of direct estimate.} -\item{X.area}{areal covariates data frame. One of the column names needs to match 'domain', in order to be linked to the data input. Currently only supporting time-invariant domain-level covariates.} +\item{domain.size}{Data frame of domain sizes. One of the column names needs to match the name of the domain variable, in order to be linked to the data input and there must be a column names 'size' containing domain sizes.} -\item{domain.size}{domain size data frame. One of the column names needs to match 'domain' in order to be linked to the data input and there must be a size column containing domain sizes.} +\item{transform}{Optional transformation applied to the direct estimates before fitting area level model. The default option is no transformation, but logit and log are implemented.} -\item{pc.u}{hyperparameter U for the PC prior on precisions.} +\item{pc.u}{Hyperparameter U for the PC prior on precisions. See the INLA documentation for more details on the parameterization.} -\item{pc.alpha}{hyperparameter alpha for the PC prior on precisions.} +\item{pc.alpha}{Hyperparameter alpha for the PC prior on precisions.} -\item{pc.u.phi}{hyperparameter U for the PC prior on the mixture probability phi in BYM2 model.} +\item{pc.u.phi}{Hyperparameter U for the PC prior on the mixture probability phi in BYM2 model.} -\item{pc.alpha.phi}{hyperparameter alpha for the PC prior on the mixture probability phi in BYM2 model.} +\item{pc.alpha.phi}{Hyperparameter alpha for the PC prior on the mixture probability phi in BYM2 model.} -\item{CI}{the desired posterior credible interval to calculate} +\item{level}{The specified level for the posterior credible intervals} -\item{n.sample}{number of draws from posterior used to compute summaries} +\item{n.sample}{Number of draws from posterior used to compute summaries} -\item{var.tol}{tolerance parameter; if variance of an area's direct estimator is below this value, that direct estimator is dropped from model} +\item{var.tol}{Tolerance parameter; if variance of an area's direct estimator is below this value, that direct estimator is dropped from model} + +\item{return.samples}{If TRUE, return matrix of posterior samples of area level quantities} } \value{ -A list with elements -\item{direct.est}{direct estimates} -\item{s.dir.iid.fit}{fitted INLA object for iid domain effects model} -\item{s.dir.iid.est}{non-spatial smoothed estimates} -\item{s.dir.sp.fit}{fitted INLA object for spatial domain effects model} -\item{s.dir.sp.est}{spatially smoothed estimates (if adjacency matrix provided)} +A svysae object } \description{ -Generates small area estimates by smoothing direct estimates using an area -level model +Generates small area estimates by smoothing direct estimates using an +area level model } \examples{ \dontrun{ -library(survey) data(DemoData2) data(DemoMap2) des0 <- svydesign(ids = ~clustid+id, strata = ~strata, @@ -76,40 +73,47 @@ des0 <- svydesign(ids = ~clustid+id, strata = ~strata, Xmat <- aggregate(age~region, data = DemoData2, FUN = mean) EXAMPLE 1: Continuous response model -cts.res <- smoothArea(tobacco.use ~ 1, domain = ~region, - Amat = DemoMap2$Amat, design = des0, +cts.res <- smoothArea(tobacco.use ~ 1, + domain = ~region, + design = des0, + adj.mat = DemoMap2$Amat, pc.u = 1, pc.alpha = 0.01, pc.u.phi = 0.5, pc.alpha.phi = 2/3) - + EXAMPLE 2: Including area level covariates -cts.cov.res <- smoothArea(tobacco.use ~ age, domain = ~region, - Amat = DemoMap2$Amat, design = des0, - X.area = Xmat, +cts.cov.res <- smoothArea(tobacco.use ~ age, + domain = ~region, + design = des0, + adj.mat = DemoMap2$Amat, + X.domain = Xmat, pc.u = 1, pc.alpha = 0.01, pc.u.phi = 0.5, pc.alpha.phi = 2/3) - + EXAMPLE 3: Binary response model -bin.res <- smoothArea(tobacco.use ~ 1, domain = ~region, - responseType = "binary", - Amat = DemoMap2$Amat, design = des0, +bin.res <- smoothArea(tobacco.use ~ 1, + domain = ~region, + design = des0, + adj.mat = DemoMap2$Amat, + transform = "logit", pc.u = 1, pc.alpha = 0.01, pc.u.phi = 0.5, pc.alpha.phi = 2/3) - + EXAMPLE 4: Including area level covariates in binary response model -bin.cov.res <- smoothArea(tobacco.use ~ age, domain = ~region, - responseType = "binary", - Amat = DemoMap2$Amat, design = des0, - X.area = Xmat, +bin.cov.res <- smoothArea(tobacco.use ~ age, + domain = ~region, + design = des0, + adj.mat = DemoMap2$Amat, + transform = "logit", + X.domain = Xmat, pc.u = 1, pc.alpha = 0.01, pc.u.phi = 0.5, pc.alpha.phi = 2/3) } - } diff --git a/man/smoothUnit.Rd b/man/smoothUnit.Rd index 4cdc49d..d91b72b 100644 --- a/man/smoothUnit.Rd +++ b/man/smoothUnit.Rd @@ -2,64 +2,63 @@ % Please edit documentation in R/smoothUnit.R \name{smoothUnit} \alias{smoothUnit} -\title{Smooth via unit level model} +\title{Smooth via basic unit level model} \usage{ smoothUnit( formula, domain, design, family = c("gaussian", "binomial")[1], - Amat = NULL, X.pop = NULL, + adj.mat = NULL, domain.size = NULL, pc.u = 1, pc.alpha = 0.01, pc.u.phi = 0.5, pc.alpha.phi = 2/3, - CI = 0.95, - n.sample = 250 + level = 0.95, + n.sample = 250, + return.samples = F ) } \arguments{ -\item{formula}{an object of class "formula" describing the model to be fitted.} +\item{formula}{An object of class 'formula' describing the model to be fitted.} -\item{domain}{formula specifying variable containing domain labels} +\item{domain}{One-sided formula specifying factors containing domain labels} -\item{design}{an object of class "svydesign" containing the data for the model} +\item{design}{An object of class "svydesign" containing the data for the model} \item{family}{of the response variable, currently supports 'binomial' (default with logit link function) or 'gaussian'.} -\item{Amat}{Adjacency matrix for the regions. If set to NULL, the IID spatial effect will be used.} +\item{X.pop}{Data frame of population unit-level covariates. One of the column name needs to match the domain specified, in order to be linked to the data input. Currently only supporting time-invariant covariates.} -\item{X.pop}{unit-level covariates data frame. One of the column name needs to match the domain specified, in order to be linked to the data input. Currently only supporting time-invariant domain-level covariates.} +\item{adj.mat}{Adjacency matrix with rownames matching the domain labels. If set to NULL, the IID spatial effect will be used.} -\item{domain.size}{Domain size data frame. One of the column name needs to match the domain specified, in order to be linked to the data input and there must be a size column containing domain sizes.} +\item{domain.size}{Data frame of domain sizes. One of the column names needs to match the name of the domain variable, in order to be linked to the data input and there must be a column names 'size' containing domain sizes. The default option is no transformation, but logit and log are implemented.} -\item{pc.u}{hyperparameter U for the PC prior on precisions.} +\item{pc.u}{Hyperparameter U for the PC prior on precisions. See the INLA documentation for more details on the parameterization.} -\item{pc.alpha}{hyperparameter alpha for the PC prior on precisions.} +\item{pc.alpha}{Hyperparameter alpha for the PC prior on precisions.} -\item{pc.u.phi}{hyperparameter U for the PC prior on the mixture probability phi in BYM2 model.} +\item{pc.u.phi}{Hyperparameter U for the PC prior on the mixture probability phi in BYM2 model.} -\item{pc.alpha.phi}{hyperparameter alpha for the PC prior on the mixture probability phi in BYM2 model.} +\item{pc.alpha.phi}{Hyperparameter alpha for the PC prior on the mixture probability phi in BYM2 model.} -\item{CI}{the desired posterior credible interval to calculate} +\item{level}{The specified level for the posterior credible intervals} -\item{n.sample}{number of draws from posterior used to compute summaries} +\item{n.sample}{Number of draws from posterior used to compute summaries} + +\item{return.samples}{If TRUE, return matrix of posterior samples of area level quantities} } \value{ -A list with elements -\item{direct.est}{direct estimates} -\item{model.fit}{fitted INLA object for iid domain effects model} -\item{model.est}{smoothed estimates} +A svysae object } \description{ -Generates small area estimates by smoothing direct estimates using a unit -level model +Generates small area estimates by smoothing direct estimates using a basic +unit level model } \examples{ \dontrun{ -library(survey) data(DemoData2) data(DemoMap2) des0 <- svydesign(ids = ~clustid+id, strata = ~strata,