Skip to content

Commit

Permalink
Merge branch 'pr/34' and clean up
Browse files Browse the repository at this point in the history
# Conflicts:
#	DESCRIPTION
#	NAMESPACE
#	NEWS.md
#	R/fitINLA.R
#	R/fitINLA2.R
#	R/fitspace.R
#	R/kenyaPopulationData.R
#	R/plot_projINLA.R
#	R/popGrid.R
#	R/projINLA.R
#	R/projKenya.R
#	R/simPop.R
#	R/simSPDE.R
#	R/summary.R
#	README.md
#	data/kenyaPopulationData.rda
#	inst/doc/u5mr-vignette.html
#	inst/doc/u5mr-vignette.html.asis
#	man/aggPixelPreds.Rd
#	man/aggPop.Rd
#	man/getSmoothed.Rd
#	man/kenyaPopulationData.Rd
#	man/print.SUMMERmodel.Rd
#	man/print.SUMMERmodel.svy.Rd
#	man/print.SUMMERprojlist.Rd
#	man/projKenya.Rd
#	man/simPop.Rd
#	man/simPopInternal.Rd
#	man/simSPDE.Rd
#	man/smoothCluster.Rd
#	man/smoothDirect.Rd
#	man/smoothSurvey.Rd
#	man/summary.SUMMERmodel.Rd
#	man/summary.SUMMERmodel.svy.Rd
  • Loading branch information
richardli committed Oct 25, 2023
2 parents 64ea980 + a6cdbaf commit 86c1a82
Show file tree
Hide file tree
Showing 28 changed files with 5,410 additions and 8 deletions.
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: SUMMER
Type: Package
Title: Small-Area-Estimation Unit/Area Models and Methods for Estimation in R
Version: 1.3.1
Date: 2022-07-24
Version: 1.4.0
Date: 2023-10-24
Authors@R: c(
person(given = "Zehang R", family = "Li", email= "[email protected]", role = c("cre","aut")),
person(given = "Bryan D", family = "Martin", email= "[email protected]", role = "aut"),
Expand All @@ -15,17 +15,17 @@ Authors@R: c(
person(given = "Geir-Arne", family = "Fuglstad", email = "[email protected]", role = "aut"),
person(given = "Andrea", family = "Riebler", email = "[email protected]", role = "aut")
)
Description: Provides methods for spatial and spatio-temporal smoothing of demographic and health indicators using survey data, with particular focus on estimating and projecting under-five mortality rates, described in Mercer et al. (2015) <doi:10.1214/15-AOAS872>, Li et al. (2019) <doi:10.1371/journal.pone.0210645> and Li et al. (2020) <arXiv:2007.05117>.
Description: Provides methods for spatial and spatio-temporal smoothing of demographic and health indicators using survey data, with particular focus on estimating and projecting under-five mortality rates, described in Mercer et al. (2015) <doi:10.1214/15-AOAS872>, Li et al. (2019) <doi:10.1371/journal.pone.0210645>, Wu et al. (DHS Spatial Analysis Reports No. 21, 2021), and Li et al. (2023) <arXiv:2007.05117>.
URL: https://github.com/richardli/SUMMER
BugReports: https://github.com/richardli/SUMMER/issues
Depends: R (>= 3.5)
License: GPL (>= 2)
Imports: survey, stats, spdep, survival, ggplot2, scales, utils, Matrix, reshape2, viridis, sp, sf, shadowtext, ggridges, methods, data.table, RColorBrewer, grDevices
Imports: survey, stats, spdep, survival, ggplot2, scales, utils, Matrix, reshape2, viridis, sp, sf, shadowtext, ggridges, methods, data.table, RColorBrewer, grDevices, fields, terra
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
Additional_repositories: https://inla.r-inla-download.org/R/testing/
Suggests: INLA, knitr, rmarkdown, readstata13, patchwork, rdhs, R.rsp, fields, sae, dplyr, tidyr
Suggests: INLA, knitr, rmarkdown, readstata13, patchwork, rdhs, R.rsp, sae, dplyr, tidyr, raster
VignetteBuilder: R.rsp, knitr
NeedsCompilation: no
Config/build/clean-inst-doc: FALSE
27 changes: 27 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,40 @@ S3method(summary,SUMMERmodel.svy)
S3method(summary,SUMMERprojlist)
export(Benchmark)
export(ChangeRegion)
export(adjustPopMat)
export(aggregateSurvey)
export(areaPopToArea)
export(calibrateByRegion)
export(expit)
export(fitGeneric)
export(fitINLA)
export(fitINLA2)
export(getAdjusted)
export(getAmat)
export(getAreaName)
export(getBirths)
export(getCounts)
export(getDiag)
export(getDirect)
export(getDirectList)
export(getPoppsub)
export(getSmoothed)
export(getSmoothedDensity)
export(hatchPlot)
export(logit)
export(logitNormMean)
export(makePopIntegrationTab)
export(mapPlot)
export(mapPoints)
export(pixelPopToArea)
export(poppRegionFromPopMat)
export(projKenya)
export(ridgePlot)
export(rst)
export(setThresholdsByRegion)
export(simPopCustom)
export(simPopSPDE)
export(simSPDE)
export(simhyper)
export(smoothArea)
export(smoothCluster)
Expand All @@ -40,6 +54,10 @@ import(Matrix)
import(RColorBrewer)
import(data.table)
importFrom(Matrix,Diagonal)
importFrom(fields,fields.rdist.near)
importFrom(fields,in.poly)
importFrom(fields,make.surface.grid)
importFrom(fields,rdist)
importFrom(ggplot2,aes)
importFrom(ggplot2,geom_errorbar)
importFrom(ggplot2,geom_line)
Expand All @@ -60,12 +78,19 @@ importFrom(sf,st_centroid)
importFrom(sf,st_coordinates)
importFrom(sf,st_crs)
importFrom(shadowtext,geom_shadowtext)
importFrom(sp,"proj4string<-")
importFrom(sp,CRS)
importFrom(sp,SpatialPoints)
importFrom(sp,as.SpatialPolygons.PolygonsList)
importFrom(sp,coordinates)
importFrom(sp,over)
importFrom(sp,plot)
importFrom(sp,proj4string)
importFrom(spdep,nb2mat)
importFrom(spdep,poly2nb)
importFrom(stats,aggregate)
importFrom(stats,as.formula)
importFrom(stats,cor)
importFrom(stats,dgamma)
importFrom(stats,dnorm)
importFrom(stats,median)
Expand All @@ -79,4 +104,6 @@ importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(stats,var)
importFrom(terra,extract)
importFrom(terra,gdal)
importFrom(viridis,viridis_pal)
12 changes: 12 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# SUMMER - changes


Version 1.4.0
==========================
+ New major functions for population and prevalence simulation based on population frame and population density information, along with methods for aggregating pixel level prevalences and populations to the areal level.


Version 1.3.0
==========================
+ New SAE functions and vignette.


Version 1.2.1
==========================
+ `smoothSurvey` now can return posterior draws.
Expand Down
Binary file added R/.DS_Store
Binary file not shown.
142 changes: 142 additions & 0 deletions R/getAreaName.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#' Determines which administrative areas contain the given points
#'
#' For any points not in an area, they are assigned the nearest area using
#' fields::fields.rdist.near or fields::rdist depending on the number of points
#' and the maximum memory in bytes with a warning.
#'
#' @param pts 2 column matrix of lon/lat coordinates
#' @param shapefile A SpatialPolygonsDataFrame object
#' @param areaNameVar The column name in \code{slot(shapefile, "data")}
#' corresponding to the area level of interest
#' @param delta Argument passed to fields::fields.rdist.near in fields package
#' @param mean.neighbor Argument passed to fields::fields.rdist.near in fields
#' package
#' @param maxBytes Maximum allowed memory in bytes (default is 3Gb). Determines
#' whether to call fields::fields.rdist.near which saves memory bute requires
#' delta and mean.neighbor inputs to be specified or fields::rdist.
#'
#' @details delta and mean.neighbor arguments only used when some points
#' are not in areas, perhaps due to inconsistencies in shapefiles.
#'
#' @author John Paige
#'
#' @seealso \code{\link{projKenya}}, \code{\link[fields]{fields.rdist.near}}
#'
#' @examples
#' \dontrun{
#' # download Kenya GADM shapefiles from SUMMERdata github repository
#' githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
#' download.file(githubURL,"kenyaMaps.rda")
#'
#' # load it in
#' load("kenyaMaps.rda")
#'
#' # use the shapefile data to see what Admin1 and 2 areas the
#' # points (0, 37) and (0.5, 38) are in
#' # (these are longitude/latitude coordinates)
#' pts = cbind(c(37, 38), c(0, .5))
#' head(slot(adm1, "data"))
#' admin1Areas = getAreaName(pts, adm1, "NAME_1")
#' admin2Areas = getAreaName(pts, adm2, "NAME_2")
#' }
#'
#' @return A list of area IDs, area names, whether or not
#' points are in multiple areas, and whether or not points
#' are in no areas and assigned to the nearest one.
#'
#' @importFrom fields fields.rdist.near
#' @importFrom fields rdist
#' @importFrom fields in.poly
#' @export
getAreaName = function(pts, shapefile, areaNameVar="NAME_1",
delta=.05, mean.neighbor=50, maxBytes=3*2^30) {
# require(fields)

if(nrow(pts) == 1) {
# must work around the fact that fields::in.poly() doesn't work for matrix with 1 row
pts = rbind(pts, pts)
oneRow = TRUE
} else {
oneRow = FALSE
}

# data(kenyaMaps, envir = environment())
areaNames = shapefile@data[[areaNameVar]]

# get area map polygons and set helper function for testing if pts are in the constituencies
polys = shapefile@polygons
inRegion = function(i) {
countyPolys = polys[[i]]@Polygons
inside = sapply(1:length(countyPolys), function(x) {
fields::in.poly(pts, countyPolys[[x]]@coords, inflation=0)
})
insideAny = apply(inside, 1, any)

return(insideAny*i)
}
out = sapply(1:length(polys), inRegion)
if(oneRow) {
out = matrix(out[1,], nrow=1)
}
multipleRegs = apply(out, 1, function(vals) {sum(vals != 0) > 1})
areaID = apply(out, 1, function(vals) {match(1, vals != 0)})
areaNameVec = areaNames[areaID]

# for all points not in a area polygon, determine the nearest area
insideAny = apply(out, 1, function(x) {any(x != 0)})
if(any(!insideAny)) {
warning("Some points not inside any areas. Assigning them to nearest area")
problemPointsI = which(!insideAny)

bytesUsed = length(problemPointsI) * nrow(pts)
if(bytesUsed > maxBytes) {
# get nearby points (points within delta lon/lat units), remove self matches
nearbyPoints = fields::fields.rdist.near(matrix(pts[problemPointsI,], ncol=2), pts,
delta=delta, mean.neighbor=mean.neighbor)
selfI = nearbyPoints$ra == 0
nearbyPoints$ind = nearbyPoints$ind[!selfI,]
nearbyPoints$ra = nearbyPoints$ra[!selfI]
nearbyI = lapply(sort(unique(nearbyPoints$ind[,1])), function(x) {
nearbyPoints$ind[nearbyPoints$ind[,1] == x,2]
})
} else {
# get all points, remove self matches
dists = fields::rdist(matrix(pts[problemPointsI,], ncol=2), pts)
dists[cbind(1:length(problemPointsI), problemPointsI)] = Inf
nearbyI = apply(dists, 1, which.min)
}

# get nearby constituencies, counties, and distances
nearbyAreas = lapply(nearbyI, function(x) {areaNameVec[x]})
nearbyLengths = sapply(nearbyI, function(x) {length(x)})

if(bytesUsed > maxBytes) {
nearbyDistances = c()
startI = 1
for(i in 1:length(nearbyI)) {
endI = startI + nearbyLengths[i] - 1
nearbyDistances = c(nearbyDistances, list(nearbyPoints$ra[startI:endI]))
startI = endI + 1
}
} else {
nearbyDistances = dists[cbind(1:nrow(dists), nearbyI)]
}


# sort nearby constituencies and indices by distance
for(i in 1:length(nearbyI)) {
thisDistances = nearbyDistances[[i]]
sortI = sort(thisDistances, index.return=TRUE)$ix
nearbyDistances[[i]] = nearbyDistances[[i]][sortI]
nearbyAreas[[i]] = nearbyAreas[[i]][sortI]
nearbyI[[i]] = nearbyI[[i]][sortI]
}

# get nearest non-NA area and assign it
closestArea = sapply(nearbyAreas, function(x) {x[match(TRUE, !is.na(x))]})
areaNameVec[problemPointsI] = closestArea
}

list(areaID=areaID, areaNames=areaNameVec,
multipleRegs=multipleRegs, notInAnyAreas=!insideAny)
}
27 changes: 27 additions & 0 deletions R/kenyaPopulationData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#' Kenya 2009 Census Frame and Related Datasets
#'
#' Datasets related to the 2009 census frame for Kenya based on the 2009 Kenya Population and Housing
#' Census. General population totals are estimated for 2014. Based on 2014 population density
#' estimates interpolated with exponential growth rate between 2010 and 2015 from WorldPop data.
#'
#' @format A number of data.frames with information about the 2009 Kenya Population and Housing Census and the population in Kenya at the time of the 2014 Demographic Health Survey. Some of the
#' data.frames have been adjusted to contain information about neonatals born from 2010-2014 rather than general population in 2014.
#' The dataset names are: easpaKenya, easpaKenyaNeonatal,
#' poppaKenya, and poppsubKenya.
#' @references Kenya National Bureau of Statistics, Ministry of Health/Kenya, National AIDS Control Council/Kenya, Kenya Medical Research Institute, and National Council For Population And Development/Kenya, 2015. Kenya Demographic and Health Survey 2014. Rockville, Maryland, USA. URL: http://dhsprogram.com/pubs/pdf/FR308/FR308.pdf.
#' @references Stevens, F.R., Gaughan, A.E., Linard, C., Tatem, A.J., 2015. Disaggregat- ing census data for population mapping using random forests with remotely-sensed and ancillary data. PloS One 10, e0107042.
#' @references Tatem, A.J., 2017. WorldPop, open data for spatial demography. Scientific Data 4.
#' @source <http://dhsprogram.com/pubs/pdf/FR308/FR308.pdf>
#' @docType data
#' @name kenyaPopulationData
#' @usage data(kenyaPopulationData)
"easpaKenya"

#' @rdname kenyaPopulationData
"easpaKenyaNeonatal"

#' @rdname kenyaPopulationData
"poppaKenya"

#' @rdname kenyaPopulationData
"poppsubKenya"
57 changes: 57 additions & 0 deletions R/logitNormMean.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#' Calculate the mean of a distribution whose
#' logit is Gaussian
#'
#' Adapted from logitnorm package. Calculates the mean of a distribution whose
#' logit is Gaussian. Each row of muSigmaMat is a mean and standard deviation
#' on the logit scale.
#'
#' @param muSigmaMat An n x 2 matrix where each row is \eqn{\mu}{mu} and \eqn{\sigma}{sigma}
#' on the logit scale for an independent random variable.
#' @param logisticApprox Whether or not to use logistic approximation to speed
#' up computation. See details for more information.
#' @param ... More arguments, passed to \code{integrate} function
#'
#' @return A vector of expectations of the specified random variables
#'
#' @author John Paige
#'
#' @details If \eqn{\mbox{logit}(Y) \sim N(\mu, \sigma^2)}{logit(Y) ~ N(mu, sigma^2)},
#' This function calculates \eqn{E[Y]}{E[Y]} via either numerical integration or by
#' assuming that Y follows a logistic distribution. Under this approximation, setting
#' \eqn{k = 16 \sqrt(3) / (15 \pi)}{k = 16 * sqrt(3) / (15 * pi)}, we approximate
#' the expectation as:
#' \deqn{E[Y] = expit(\mu / \sqrt(1 + k^2 \sigma^2))}{E[Y] = expit(mu / sqrt(1 + k^2 * sigma^2))}.
#' The above logistic approximation speeds up the computation, but also sacrifices
#' some accuracy.
#'
#' @examples
#' mus = c(-5, 0, 5)
#' sigmas = rep(1, 3)
#' logitNormMean(cbind(mus, sigmas))
#' logitNormMean(cbind(mus, sigmas), TRUE)
#'
#' @export
logitNormMean = function(muSigmaMat, logisticApprox=FALSE, ...) {
if(length(muSigmaMat) > 2) {
apply(muSigmaMat, 1, logitNormMean, logisticApprox=logisticApprox, ...)
}
else {
mu = muSigmaMat[1]
sigma = muSigmaMat[2]
if(sigma == 0)
SUMMER::expit(mu)
else {
if(any(is.na(c(mu, sigma))))
NA
else if(!logisticApprox) {
# numerically calculate the mean
fExp <- function(x) exp(stats::plogis(x, log.p=TRUE) + stats::dnorm(x, mean = mu, sd = sigma, log=TRUE))
stats::integrate(fExp, mu-10*sigma, mu+10*sigma, abs.tol = 0, ...)$value
} else {
# use logistic approximation
k = 16 * sqrt(3) / (15 * pi)
SUMMER::expit(mu / sqrt(1 + k^2 * sigma^2))
}
}
}
}
Loading

0 comments on commit 86c1a82

Please sign in to comment.