From 2320013212cb8758d1b237a6d858b750a6fa088f Mon Sep 17 00:00:00 2001 From: Peter Date: Tue, 7 Nov 2023 12:38:16 -0800 Subject: [PATCH] updates generic SAE functions --- .DS_Store | Bin 0 -> 6148 bytes R/smoothArea.R | 58 +++++++++++++++++++++++++++++++++++----------- R/smoothUnit.R | 51 ++++++++++++++++++++++++++++++---------- man/smoothUnit.Rd | 5 +++- 4 files changed, 87 insertions(+), 27 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..b6519d7f2751aecd5c7d74717e28c1df4c85454a GIT binary patch literal 6148 zcmeHK%}T>S5Z-O8O(;SR3OxqA7OX)i#Y>3#0!H+pQWFwtFlI}WHiuHkSzpK}@p+ut z-4seIcoMNQVfLGypDg=r*v&G=xHpSV8M7E;1}I{~gl3Ci9Cbq)+JngDJ7QMA6SF)l zH?qm`7a72{GufOaY|JuPxz+mRlQ0z_L_fkwn&d^l|H&K8*4}=*ZMHk@NB>!rei0P2 z+zY1n*t-!T4Qo3HZ46L z88|)r%o!|u_G0Mt!5%uxrD+@;pPXNhUeec0yeT?4e0fq1G}iDA#+I4Q{b`npYz)!5 zvaTQ`28aP-U{@H>`=8O-l?l^4i2-8Z#|+^9V1pvM8Y_iz>wpG-A2HrSL;)M`5{R}& zS7W6RA|PCq0;*E3uNYjFgWuM9uEt8CDra1d4DZn+bA3bMa&++9GM#Z(A+^K+F|f`+ zQ*|vo|F6Ek|F0KOj~E~Z{uKkfb?IMvFeQDqZcGl(S{w8U6b17tg{u-U var.tol, mod.dat$var, NA) mod.dat$prec <- 1 / mod.dat$var - mod.dat$domain.id <- 1:nrow(mod.dat) - if(!is.null(X.domain)) { + + # identify domains for estimation (including estimates for regions in X.domain or adj.mat) + if (!is.null(X.domain)) { + if (!is.null(adj.mat) & !setequal(X.domain[[domain.var]], rownames(adj.mat))) { + stop("Domains in X.domain do not match domains in adj.mat.") + } + if (any(is.na(match(X.domain[[domain.var]], mod.dat$domain)))) { + warning(cat("There are domains in X.domain not in design/direct estimates.", + "\nGenerating estimates for all domains in X.domain.")) + } mod.X.domain <- X.domain mod.X.domain$domain <- X.domain[[domain.var]] mod.X.domain <- mod.X.domain[, names(mod.X.domain) != domain.var] - mod.dat <- merge(mod.dat, mod.X.domain, by = "domain") + mod.dat <- merge(mod.dat, mod.X.domain, by = "domain", all.y = T) + direct.est <- + merge(direct.est, data.frame(domain = mod.X.domain$domain), + by = "domain", all.y = T) + direct.est$method = "Direct" + } else if(!is.null(adj.mat)) { + if (any(is.na(match(mod.dat$domain, rownames(adj.mat))))) { + stop("Domains in adj.mat do not match domains in design/direct estimates.") + } + if (any(is.na(match(rownames(adj.mat), mod.dat$domain)))) { + warning(cat("There are domains in adj.mat not in design/direct estimates.", + "\nGenerating estimates for all domains in adj.mat.")) + } + mod.dat <- + merge(mod.dat, data.frame(domain = rownames(adj.mat)), + by = "domain", all.y = T) + direct.est <- + merge(direct.est, data.frame(domain = rownames(adj.mat)), + by = "domain", all.y = T) + direct.est$method = "Direct" } - mod.dat <- mod.dat[match(1:nrow(mod.dat), mod.dat$domain.id), ] - + mod.dat$domain.id <- 1:nrow(mod.dat) mm.domain <- model.matrix(cov.frm, mod.dat) - + out$direct.est <- direct.est + attr(out, "domain.names") <- sort(direct.est$domain) + attr(out, "method.names") <- c("direct.est") + transform <- match.arg(transform) attr(out, "transform") <- transform @@ -241,17 +267,21 @@ smoothArea <- function(formula, upper = apply(iid.model.mat, 1, function(x) quantile(x, 1-(1-level)/2, na.rm = T)), method = paste0("Area level model: IID")) - attr(out, "method.names") <- c(attr(out, "method.names"), "iid.model.est") - attr(out, "inla.fitted") <- c(attr(out, "inla.fitted"), "iid.model") - + + out$iid.model.est <- + out$iid.model.est[match(out$direct.est$domain, out$iid.model.est$domain),] + rownames(out$iid.model.est) <- NULL if (return.samples) { out$iid.model.sample <- iid.model.mat } else { out$iid.model.sample <- NULL } + attr(out, "method.names") <- c(attr(out, "method.names"), "iid.model.est") + attr(out, "inla.fitted") <- c(attr(out, "inla.fitted"), "iid.model") # SMOOTHED DIRECT w/ BYM2 RE if (!is.null(adj.mat)) { + mod.dat$domain.id <- match(mod.dat$domain, rownames(adj.mat)) mod.dat <- mod.dat[match(1:nrow(mod.dat), mod.dat$domain.id), ] mm.domain <- model.matrix(cov.frm, mod.dat) @@ -259,7 +289,6 @@ smoothArea <- function(formula, prec = list(prior = "pc.prec", param = c(pc.u , pc.alpha)), phi = list(prior = 'pc', param = c(pc.u.phi , pc.alpha.phi)) ) - # prepare formula for INLA bym2.model.ftxt <- paste0(s.dir.ftxt, @@ -300,6 +329,7 @@ smoothArea <- function(formula, method = paste0("Area level model: BYM2")) out$bym2.model.est <- out$bym2.model.est[match(out$direct.est$domain, out$bym2.model.est$domain),] + rownames(out$bym2.model.est) <- NULL if (return.samples) { out$bym2.model.sample <- bym2.model.mat } else { diff --git a/R/smoothUnit.R b/R/smoothUnit.R index 4b27653..962292b 100644 --- a/R/smoothUnit.R +++ b/R/smoothUnit.R @@ -17,6 +17,7 @@ #' @param level The specified level for the posterior credible intervals #' @param n.sample Number of draws from posterior used to compute summaries #' @param return.samples If TRUE, return matrix of posterior samples of area level quantities +#' @param X.pop.weights Optional vector of weights to use when aggregating unit level predictions #' #' @return A svysae object #' @@ -53,7 +54,8 @@ smoothUnit <- function(formula, pc.u.phi = 0.5, pc.alpha.phi = 2/3, level = .95, n.sample = 250, - return.samples = F) { + return.samples = F, + X.pop.weights = NULL) { if (design$has.strata) { warning("This model does not account for stratification yet.") @@ -105,11 +107,31 @@ smoothUnit <- function(formula, lower = direct.est$mean + qnorm((1-level)/2) * sqrt(direct.est$var), upper = direct.est$mean + qnorm(1 - (1-level)/2) * sqrt(direct.est$var), method = paste0("Direct")) + + + if (!is.null(adj.mat) & !setequal(X.pop[[domain.var]], rownames(adj.mat))) { + stop("Domains in X.pop do not match domains in adj.mat.") + } + if (any(is.na(match(X.pop[[domain.var]], direct.est$domain)))) { + warning(cat("There are domains in X.pop not in design/direct estimates.", + "\nGenerating estimates for all domains in X.pop.")) + } + # if no adjacency matrix matches, take domain names from X.pop + domain.table <- data.frame(domain = unique(as.character(X.pop[[domain.var]]))) + # if adjacency matrix provided, take domain names from row names + if (!is.null(adj.mat)) { + domain.table <- data.frame(domain = rownames(adj.mat)) + } + + direct.est <- + merge(direct.est, data.frame(domain = domain.table$domain), + by = "domain", all.y = T) + direct.est$method = "Direct" + out$direct.est <- direct.est attr(out, "domain.names") <- sort(direct.est$domain) attr(out, "method.names") <- c("direct.est") - # UNIT LEVEL MODEL ----------------------------------------------------------- mf <- model.frame(formula, design$variables) resp <- model.response(mf, "numeric") @@ -124,13 +146,7 @@ smoothUnit <- function(formula, ) mod.dat <- cbind(mod.dat, model.matrix(cov.frm, design$variables)) - # if no adjacency matrix, take domain names from X.pop - domain.table <- data.frame(domain = unique(as.character(pop.dat$domain))) - # if adjacency matrix provided, take domain names from row names - if (!is.null(adj.mat)) { - domain.table <- data.frame(domain = rownames(adj.mat)) - } # domain labels as indexes for use in INLA domain.table$domain.struct <- seq_len(nrow(domain.table)) @@ -186,13 +202,20 @@ smoothUnit <- function(formula, if (family == "binomial") { pop.unit.ests <- expit(pop.unit.ests) } - area.ests <- aggregate(pop.unit.ests, list(domain = pop.dat$domain.struct), mean) - return(area.ests[match(1:length(re.idx), area.ests[, 1]), 2]) + if (!is.null(X.pop.weights)) { + area.ests <- + aggregate(pop.unit.ests * X.pop.weights, list(domain = pop.dat$domain.struct), sum) + } else { + area.ests <- + aggregate(pop.unit.ests, list(domain = pop.dat$domain.struct), mean) + } + + return(area.ests[match(1:nrow(domain.table), area.ests[, 1]), 2]) } est.mat <- do.call(cbind, lapply(samp.all, summary.sample)) out[[paste0(model.method, ".fit")]] <- fit out[[paste0(model.method, ".est")]] <- - data.frame(domain = unique(pop.dat$domain), + data.frame(domain = domain.table$domain, mean = rowMeans(est.mat), median = apply(est.mat, 1, function(x) median(x, na.rm = T)), @@ -201,10 +224,14 @@ smoothUnit <- function(formula, function(x) quantile(x, (1-level)/2, na.rm = T)), upper = apply(est.mat, 1, function(x) quantile(x, 1-(1-level)/2, na.rm = T)), - method = paste0("Unit level model: IID")) + method = paste0("Unit level model: ", + ifelse(is.null(adj.mat), "IID", "BYM2"))) if (return.samples) { out[[paste0(model.method, ".sample")]] <- est.mat } + out[[paste0(model.method, ".est")]] <- + out[[paste0(model.method, ".est")]][match(out$direct.est$domain, + out[[paste0(model.method, ".est")]]$domain),] attr(out, "method.names") <- c(attr(out, "method.names"), paste0(model.method, ".est")) attr(out, "inla.fitted") <- c(attr(out, "inla.fitted"), model.method) diff --git a/man/smoothUnit.Rd b/man/smoothUnit.Rd index fa47810..14dd0e3 100644 --- a/man/smoothUnit.Rd +++ b/man/smoothUnit.Rd @@ -18,7 +18,8 @@ smoothUnit( pc.alpha.phi = 2/3, level = 0.95, n.sample = 250, - return.samples = F + return.samples = F, + X.pop.weights = NULL ) } \arguments{ @@ -49,6 +50,8 @@ smoothUnit( \item{n.sample}{Number of draws from posterior used to compute summaries} \item{return.samples}{If TRUE, return matrix of posterior samples of area level quantities} + +\item{X.pop.weights}{Optional vector of weights to use when aggregating unit level predictions} } \value{ A svysae object