Merge branch 'pr/34' and clean up

# Conflicts: # DESCRIPTION # NAMESPACE # NEWS.md # R/fitINLA.R # R/fitINLA2.R # R/fitspace.R # R/kenyaPopulationData.R # R/plot_projINLA.R # R/popGrid.R # R/projINLA.R # R/projKenya.R # R/simPop.R # R/simSPDE.R # R/summary.R # README.md # data/kenyaPopulationData.rda # inst/doc/u5mr-vignette.html # inst/doc/u5mr-vignette.html.asis # man/aggPixelPreds.Rd # man/aggPop.Rd # man/getSmoothed.Rd # man/kenyaPopulationData.Rd # man/print.SUMMERmodel.Rd # man/print.SUMMERmodel.svy.Rd # man/print.SUMMERprojlist.Rd # man/projKenya.Rd # man/simPop.Rd # man/simPopInternal.Rd # man/simSPDE.Rd # man/smoothCluster.Rd # man/smoothDirect.Rd # man/smoothSurvey.Rd # man/summary.SUMMERmodel.Rd # man/summary.SUMMERmodel.svy.Rd
richardli · Oct 25, 2023 · 86c1a82 · 86c1a82
2 parents 64ea980 + a6cdbaf
commit 86c1a82
Show file tree

Hide file tree

Showing 28 changed files with 5,410 additions and 8 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: SUMMER
 Type: Package
 Title: Small-Area-Estimation Unit/Area Models and Methods for Estimation in R
-Version: 1.3.1
-Date: 2022-07-24
+Version: 1.4.0
+Date: 2023-10-24
 Authors@R: c(
  person(given = "Zehang R", family = "Li", email= "[email protected]", role = c("cre","aut")),
  person(given = "Bryan D", family = "Martin", email= "[email protected]", role = "aut"), 
@@ -15,17 +15,17 @@ Authors@R: c(
  person(given = "Geir-Arne", family = "Fuglstad", email = "[email protected]", role = "aut"),
  person(given = "Andrea", family = "Riebler", email = "[email protected]", role = "aut")
  )
-Description: Provides methods for spatial and spatio-temporal smoothing of demographic and health indicators using survey data, with particular focus on estimating and projecting under-five mortality rates, described in Mercer et al. (2015) <doi:10.1214/15-AOAS872>, Li et al. (2019) <doi:10.1371/journal.pone.0210645> and Li et al. (2020) <arXiv:2007.05117>. 
+Description: Provides methods for spatial and spatio-temporal smoothing of demographic and health indicators using survey data, with particular focus on estimating and projecting under-five mortality rates, described in Mercer et al. (2015) <doi:10.1214/15-AOAS872>, Li et al. (2019) <doi:10.1371/journal.pone.0210645>, Wu et al. (DHS Spatial Analysis Reports No. 21, 2021), and Li et al. (2023) <arXiv:2007.05117>. 
 URL: https://github.com/richardli/SUMMER
 BugReports: https://github.com/richardli/SUMMER/issues
 Depends: R (>= 3.5)
 License: GPL (>= 2)
-Imports: survey, stats, spdep, survival, ggplot2, scales, utils, Matrix, reshape2, viridis, sp, sf, shadowtext, ggridges, methods, data.table, RColorBrewer, grDevices
+Imports: survey, stats, spdep, survival, ggplot2, scales, utils, Matrix, reshape2, viridis, sp, sf, shadowtext, ggridges, methods, data.table, RColorBrewer, grDevices, fields, terra
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.2.3
 Additional_repositories: https://inla.r-inla-download.org/R/testing/
-Suggests: INLA, knitr, rmarkdown, readstata13, patchwork, rdhs, R.rsp, fields, sae, dplyr, tidyr
+Suggests: INLA, knitr, rmarkdown, readstata13, patchwork, rdhs, R.rsp, sae, dplyr, tidyr, raster
 VignetteBuilder: R.rsp, knitr
 NeedsCompilation: no
 Config/build/clean-inst-doc: FALSE
diff --git a/NAMESPACE b/NAMESPACE
@@ -9,26 +9,40 @@ S3method(summary,SUMMERmodel.svy)
 S3method(summary,SUMMERprojlist)
 export(Benchmark)
 export(ChangeRegion)
+export(adjustPopMat)
 export(aggregateSurvey)
+export(areaPopToArea)
+export(calibrateByRegion)
 export(expit)
 export(fitGeneric)
 export(fitINLA)
 export(fitINLA2)
 export(getAdjusted)
 export(getAmat)
+export(getAreaName)
 export(getBirths)
 export(getCounts)
 export(getDiag)
 export(getDirect)
 export(getDirectList)
+export(getPoppsub)
 export(getSmoothed)
 export(getSmoothedDensity)
 export(hatchPlot)
 export(logit)
+export(logitNormMean)
+export(makePopIntegrationTab)
 export(mapPlot)
 export(mapPoints)
+export(pixelPopToArea)
+export(poppRegionFromPopMat)
+export(projKenya)
 export(ridgePlot)
 export(rst)
+export(setThresholdsByRegion)
+export(simPopCustom)
+export(simPopSPDE)
+export(simSPDE)
 export(simhyper)
 export(smoothArea)
 export(smoothCluster)
@@ -40,6 +54,10 @@ import(Matrix)
 import(RColorBrewer)
 import(data.table)
 importFrom(Matrix,Diagonal)
+importFrom(fields,fields.rdist.near)
+importFrom(fields,in.poly)
+importFrom(fields,make.surface.grid)
+importFrom(fields,rdist)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,geom_errorbar)
 importFrom(ggplot2,geom_line)
@@ -60,12 +78,19 @@ importFrom(sf,st_centroid)
 importFrom(sf,st_coordinates)
 importFrom(sf,st_crs)
 importFrom(shadowtext,geom_shadowtext)
+importFrom(sp,"proj4string<-")
+importFrom(sp,CRS)
+importFrom(sp,SpatialPoints)
+importFrom(sp,as.SpatialPolygons.PolygonsList)
+importFrom(sp,coordinates)
+importFrom(sp,over)
 importFrom(sp,plot)
 importFrom(sp,proj4string)
 importFrom(spdep,nb2mat)
 importFrom(spdep,poly2nb)
 importFrom(stats,aggregate)
 importFrom(stats,as.formula)
+importFrom(stats,cor)
 importFrom(stats,dgamma)
 importFrom(stats,dnorm)
 importFrom(stats,median)
@@ -79,4 +104,6 @@ importFrom(stats,runif)
 importFrom(stats,sd)
 importFrom(stats,setNames)
 importFrom(stats,var)
+importFrom(terra,extract)
+importFrom(terra,gdal)
 importFrom(viridis,viridis_pal)
diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,16 @@
 # SUMMER - changes
+
+
+Version 1.4.0
+==========================
++ New major functions for population and prevalence simulation based on population frame and population density information, along with methods for aggregating pixel level prevalences and populations to the areal level. 
+
+
+Version 1.3.0
+==========================
++ New SAE functions and vignette.
+
+
 Version 1.2.1 
 ==========================
 + `smoothSurvey` now can return posterior draws.

diff --git a/R/.DS_Store b/R/.DS_Store
diff --git a/R/getAreaName.R b/R/getAreaName.R
@@ -0,0 +1,142 @@
+#' Determines which administrative areas contain the given points
+#' 
+#' For any points not in an area, they are assigned the nearest area using 
+#' fields::fields.rdist.near or fields::rdist depending on the number of points 
+#' and the maximum memory in bytes with a warning.
+#' 
+#' @param pts 2 column matrix of lon/lat coordinates
+#' @param shapefile A SpatialPolygonsDataFrame object
+#' @param areaNameVar The column name in \code{slot(shapefile, "data")} 
+#' corresponding to the area level of interest
+#' @param delta Argument passed to fields::fields.rdist.near in fields package
+#' @param mean.neighbor Argument passed to fields::fields.rdist.near in fields 
+#' package
+#' @param maxBytes Maximum allowed memory in bytes (default is 3Gb). Determines 
+#' whether to call fields::fields.rdist.near which saves memory bute requires 
+#' delta and mean.neighbor inputs to be specified or fields::rdist.
+#' 
+#' @details delta and mean.neighbor arguments only used when some points 
+#' are not in areas, perhaps due to inconsistencies in shapefiles.
+#' 
+#' @author John Paige
+#' 
+#' @seealso \code{\link{projKenya}}, \code{\link[fields]{fields.rdist.near}}
+#' 
+#' @examples 
+#' \dontrun{
+#' # download Kenya GADM shapefiles from SUMMERdata github repository
+#' githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
+#' download.file(githubURL,"kenyaMaps.rda")
+#' 
+#' # load it in
+#' load("kenyaMaps.rda")
+#' 
+#' # use the shapefile data to see what Admin1 and 2 areas the 
+#' # points (0, 37) and (0.5, 38) are in
+#' # (these are longitude/latitude coordinates)
+#' pts = cbind(c(37, 38), c(0, .5))
+#' head(slot(adm1, "data"))
+#' admin1Areas = getAreaName(pts, adm1, "NAME_1")
+#' admin2Areas = getAreaName(pts, adm2, "NAME_2")
+#' }
+#' 
+#' @return A list of area IDs, area names, whether or not 
+#' points are in multiple areas, and whether or not points 
+#' are in no areas and assigned to the nearest one.
+#' 
+#' @importFrom fields fields.rdist.near
+#' @importFrom fields rdist
+#' @importFrom fields in.poly
+#' @export
+getAreaName = function(pts, shapefile, areaNameVar="NAME_1", 
+ delta=.05, mean.neighbor=50, maxBytes=3*2^30) {
+ # require(fields)
+
+ if(nrow(pts) == 1) {
+ # must work around the fact that fields::in.poly() doesn't work for matrix with 1 row
+ pts = rbind(pts, pts)
+ oneRow = TRUE
+ } else {
+ oneRow = FALSE
+ }
+
+ # data(kenyaMaps, envir = environment())
+ areaNames = shapefile@data[[areaNameVar]]
+
+ # get area map polygons and set helper function for testing if pts are in the constituencies
+ polys = shapefile@polygons
+ inRegion = function(i) {
+ countyPolys = polys[[i]]@Polygons
+ inside = sapply(1:length(countyPolys), function(x) {
+ fields::in.poly(pts, countyPolys[[x]]@coords, inflation=0)
+ })
+ insideAny = apply(inside, 1, any)
+
+ return(insideAny*i)
+ }
+ out = sapply(1:length(polys), inRegion)
+ if(oneRow) {
+ out = matrix(out[1,], nrow=1)
+ }
+ multipleRegs = apply(out, 1, function(vals) {sum(vals != 0) > 1})
+ areaID = apply(out, 1, function(vals) {match(1, vals != 0)})
+ areaNameVec = areaNames[areaID]
+
+ # for all points not in a area polygon, determine the nearest area
+ insideAny = apply(out, 1, function(x) {any(x != 0)})
+ if(any(!insideAny)) {
+ warning("Some points not inside any areas. Assigning them to nearest area")
+ problemPointsI = which(!insideAny)
+
+ bytesUsed = length(problemPointsI) * nrow(pts)
+ if(bytesUsed > maxBytes) {
+ # get nearby points (points within delta lon/lat units), remove self matches
+ nearbyPoints = fields::fields.rdist.near(matrix(pts[problemPointsI,], ncol=2), pts, 
+ delta=delta, mean.neighbor=mean.neighbor)
+ selfI = nearbyPoints$ra == 0
+ nearbyPoints$ind = nearbyPoints$ind[!selfI,]
+ nearbyPoints$ra = nearbyPoints$ra[!selfI]
+ nearbyI = lapply(sort(unique(nearbyPoints$ind[,1])), function(x) {
+ nearbyPoints$ind[nearbyPoints$ind[,1] == x,2]
+ })
+ } else {
+ # get all points, remove self matches
+ dists = fields::rdist(matrix(pts[problemPointsI,], ncol=2), pts)
+ dists[cbind(1:length(problemPointsI), problemPointsI)] = Inf
+ nearbyI = apply(dists, 1, which.min)
+ }
+
+ # get nearby constituencies, counties, and distances
+ nearbyAreas = lapply(nearbyI, function(x) {areaNameVec[x]})
+ nearbyLengths = sapply(nearbyI, function(x) {length(x)})
+
+ if(bytesUsed > maxBytes) {
+ nearbyDistances = c()
+ startI = 1
+ for(i in 1:length(nearbyI)) {
+ endI = startI + nearbyLengths[i] - 1
+ nearbyDistances = c(nearbyDistances, list(nearbyPoints$ra[startI:endI]))
+ startI = endI + 1
+ }
+ } else {
+ nearbyDistances = dists[cbind(1:nrow(dists), nearbyI)]
+ }
+
+
+ # sort nearby constituencies and indices by distance
+ for(i in 1:length(nearbyI)) {
+ thisDistances = nearbyDistances[[i]]
+ sortI = sort(thisDistances, index.return=TRUE)$ix
+ nearbyDistances[[i]] = nearbyDistances[[i]][sortI]
+ nearbyAreas[[i]] = nearbyAreas[[i]][sortI]
+ nearbyI[[i]] = nearbyI[[i]][sortI]
+ }
+
+ # get nearest non-NA area and assign it
+ closestArea = sapply(nearbyAreas, function(x) {x[match(TRUE, !is.na(x))]})
+ areaNameVec[problemPointsI] = closestArea
+ }
+
+ list(areaID=areaID, areaNames=areaNameVec, 
+ multipleRegs=multipleRegs, notInAnyAreas=!insideAny)
+}
diff --git a/R/kenyaPopulationData.R b/R/kenyaPopulationData.R
@@ -0,0 +1,27 @@
+#' Kenya 2009 Census Frame and Related Datasets
+#' 
+#' Datasets related to the 2009 census frame for Kenya based on the 2009 Kenya Population and Housing 
+#' Census. General population totals are estimated for 2014. Based on 2014 population density 
+#' estimates interpolated with exponential growth rate between 2010 and 2015 from WorldPop data.
+#'
+#' @format A number of data.frames with information about the 2009 Kenya Population and Housing Census and the population in Kenya at the time of the 2014 Demographic Health Survey. Some of the 
+#' data.frames have been adjusted to contain information about neonatals born from 2010-2014 rather than general population in 2014. 
+#' The dataset names are: easpaKenya, easpaKenyaNeonatal, 
+#' poppaKenya, and poppsubKenya.
+#' @references Kenya National Bureau of Statistics, Ministry of Health/Kenya, National AIDS Control Council/Kenya, Kenya Medical Research Institute, and National Council For Population And Development/Kenya, 2015. Kenya Demographic and Health Survey 2014. Rockville, Maryland, USA. URL: http://dhsprogram.com/pubs/pdf/FR308/FR308.pdf.
+#' @references Stevens, F.R., Gaughan, A.E., Linard, C., Tatem, A.J., 2015. Disaggregat- ing census data for population mapping using random forests with remotely-sensed and ancillary data. PloS One 10, e0107042.
+#' @references Tatem, A.J., 2017. WorldPop, open data for spatial demography. Scientific Data 4.
+#' @source <http://dhsprogram.com/pubs/pdf/FR308/FR308.pdf>
+#' @docType data
+#' @name kenyaPopulationData
+#' @usage data(kenyaPopulationData)
+"easpaKenya"
+
+#' @rdname kenyaPopulationData
+"easpaKenyaNeonatal"
+
+#' @rdname kenyaPopulationData
+"poppaKenya"
+
+#' @rdname kenyaPopulationData
+"poppsubKenya"
diff --git a/R/logitNormMean.R b/R/logitNormMean.R
@@ -0,0 +1,57 @@
+#' Calculate the mean of a distribution whose 
+#' logit is Gaussian
+#' 
+#' Adapted from logitnorm package. Calculates the mean of a distribution whose 
+#' logit is Gaussian. Each row of muSigmaMat is a mean and standard deviation 
+#' on the logit scale. 
+#' 
+#' @param muSigmaMat An n x 2 matrix where each row is \eqn{\mu}{mu} and \eqn{\sigma}{sigma} 
+#' on the logit scale for an independent random variable.
+#' @param logisticApprox Whether or not to use logistic approximation to speed 
+#' up computation. See details for more information.
+#' @param ... More arguments, passed to \code{integrate} function
+#' 
+#' @return A vector of expectations of the specified random variables
+#' 
+#' @author John Paige
+#' 
+#' @details If \eqn{\mbox{logit}(Y) \sim N(\mu, \sigma^2)}{logit(Y) ~ N(mu, sigma^2)}, 
+#' This function calculates \eqn{E[Y]}{E[Y]} via either numerical integration or by 
+#' assuming that Y follows a logistic distribution. Under this approximation, setting 
+#' \eqn{k = 16 \sqrt(3) / (15 \pi)}{k = 16 * sqrt(3) / (15 * pi)}, we approximate 
+#' the expectation as:
+#' \deqn{E[Y] = expit(\mu / \sqrt(1 + k^2 \sigma^2))}{E[Y] = expit(mu / sqrt(1 + k^2 * sigma^2))}.
+#' The above logistic approximation speeds up the computation, but also sacrifices 
+#' some accuracy.
+#' 
+#' @examples
+#' mus = c(-5, 0, 5)
+#' sigmas = rep(1, 3)
+#' logitNormMean(cbind(mus, sigmas))
+#' logitNormMean(cbind(mus, sigmas), TRUE)
+#' 
+#' @export
+logitNormMean = function(muSigmaMat, logisticApprox=FALSE, ...) {
+ if(length(muSigmaMat) > 2) {
+ apply(muSigmaMat, 1, logitNormMean, logisticApprox=logisticApprox, ...)
+ }
+ else {
+ mu = muSigmaMat[1]
+ sigma = muSigmaMat[2]
+ if(sigma == 0)
+ SUMMER::expit(mu)
+ else {
+ if(any(is.na(c(mu, sigma))))
+ NA
+ else if(!logisticApprox) {
+ # numerically calculate the mean
+ fExp <- function(x) exp(stats::plogis(x, log.p=TRUE) + stats::dnorm(x, mean = mu, sd = sigma, log=TRUE))
+ stats::integrate(fExp, mu-10*sigma, mu+10*sigma, abs.tol = 0, ...)$value
+ } else {
+ # use logistic approximation
+ k = 16 * sqrt(3) / (15 * pi)
+ SUMMER::expit(mu / sqrt(1 + k^2 * sigma^2))
+ }
+ }
+ }
+}