diff --git a/DESCRIPTION b/DESCRIPTION
index d861ec7..7b04812 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: SUMMER
 Type: Package
 Title: Small-Area-Estimation Unit/Area Models and Methods for Estimation in R
-Version: 1.3.1
-Date: 2022-07-24
+Version: 1.4.0
+Date: 2023-10-24
 Authors@R: c(
     person(given = "Zehang R", family = "Li", email= "lizehang@gmail.com", role = c("cre","aut")),
     person(given = "Bryan D", family = "Martin", email= "bmartin6@uw.edu", role = "aut"), 
@@ -15,17 +15,17 @@ Authors@R: c(
     person(given = "Geir-Arne", family = "Fuglstad", email = "geir-arne.fuglstad@ntnu.no", role = "aut"),
     person(given = "Andrea", family = "Riebler", email = "andrea.riebler@ntnu.no", role = "aut")
   )
-Description: Provides methods for spatial and spatio-temporal smoothing of demographic and health indicators using survey data, with particular focus on estimating and projecting under-five mortality rates, described in Mercer et al. (2015) <doi:10.1214/15-AOAS872>, Li et al. (2019) <doi:10.1371/journal.pone.0210645> and Li et al. (2020) <arXiv:2007.05117>. 
+Description: Provides methods for spatial and spatio-temporal smoothing of demographic and health indicators using survey data, with particular focus on estimating and projecting under-five mortality rates, described in Mercer et al. (2015) <doi:10.1214/15-AOAS872>, Li et al. (2019) <doi:10.1371/journal.pone.0210645>, Wu et al. (DHS Spatial Analysis Reports No. 21, 2021), and Li et al. (2023) <arXiv:2007.05117>. 
 URL: https://github.com/richardli/SUMMER
 BugReports: https://github.com/richardli/SUMMER/issues
 Depends: R (>= 3.5)
 License: GPL (>= 2)
-Imports: survey, stats, spdep, survival, ggplot2, scales, utils, Matrix, reshape2, viridis, sp, sf, shadowtext, ggridges, methods, data.table, RColorBrewer, grDevices
+Imports: survey, stats, spdep, survival, ggplot2, scales, utils, Matrix, reshape2, viridis, sp, sf, shadowtext, ggridges, methods, data.table, RColorBrewer, grDevices, fields, terra
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.2.3
 Additional_repositories: https://inla.r-inla-download.org/R/testing/
-Suggests: INLA, knitr, rmarkdown, readstata13, patchwork, rdhs, R.rsp, fields, sae, dplyr, tidyr
+Suggests: INLA, knitr, rmarkdown, readstata13, patchwork, rdhs, R.rsp, sae, dplyr, tidyr, raster
 VignetteBuilder: R.rsp, knitr
 NeedsCompilation: no
 Config/build/clean-inst-doc: FALSE
diff --git a/NAMESPACE b/NAMESPACE
index cb41b25..44d5b6a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,26 +9,40 @@ S3method(summary,SUMMERmodel.svy)
 S3method(summary,SUMMERprojlist)
 export(Benchmark)
 export(ChangeRegion)
+export(adjustPopMat)
 export(aggregateSurvey)
+export(areaPopToArea)
+export(calibrateByRegion)
 export(expit)
 export(fitGeneric)
 export(fitINLA)
 export(fitINLA2)
 export(getAdjusted)
 export(getAmat)
+export(getAreaName)
 export(getBirths)
 export(getCounts)
 export(getDiag)
 export(getDirect)
 export(getDirectList)
+export(getPoppsub)
 export(getSmoothed)
 export(getSmoothedDensity)
 export(hatchPlot)
 export(logit)
+export(logitNormMean)
+export(makePopIntegrationTab)
 export(mapPlot)
 export(mapPoints)
+export(pixelPopToArea)
+export(poppRegionFromPopMat)
+export(projKenya)
 export(ridgePlot)
 export(rst)
+export(setThresholdsByRegion)
+export(simPopCustom)
+export(simPopSPDE)
+export(simSPDE)
 export(simhyper)
 export(smoothArea)
 export(smoothCluster)
@@ -40,6 +54,10 @@ import(Matrix)
 import(RColorBrewer)
 import(data.table)
 importFrom(Matrix,Diagonal)
+importFrom(fields,fields.rdist.near)
+importFrom(fields,in.poly)
+importFrom(fields,make.surface.grid)
+importFrom(fields,rdist)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,geom_errorbar)
 importFrom(ggplot2,geom_line)
@@ -60,12 +78,19 @@ importFrom(sf,st_centroid)
 importFrom(sf,st_coordinates)
 importFrom(sf,st_crs)
 importFrom(shadowtext,geom_shadowtext)
+importFrom(sp,"proj4string<-")
+importFrom(sp,CRS)
+importFrom(sp,SpatialPoints)
+importFrom(sp,as.SpatialPolygons.PolygonsList)
+importFrom(sp,coordinates)
+importFrom(sp,over)
 importFrom(sp,plot)
 importFrom(sp,proj4string)
 importFrom(spdep,nb2mat)
 importFrom(spdep,poly2nb)
 importFrom(stats,aggregate)
 importFrom(stats,as.formula)
+importFrom(stats,cor)
 importFrom(stats,dgamma)
 importFrom(stats,dnorm)
 importFrom(stats,median)
@@ -79,4 +104,6 @@ importFrom(stats,runif)
 importFrom(stats,sd)
 importFrom(stats,setNames)
 importFrom(stats,var)
+importFrom(terra,extract)
+importFrom(terra,gdal)
 importFrom(viridis,viridis_pal)
diff --git a/NEWS.md b/NEWS.md
index b20d75e..f910617 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,16 @@
 # SUMMER - changes
+
+
+Version 1.4.0
+==========================
++ New major functions for  population and prevalence simulation based on population frame and population density information, along with methods for aggregating pixel level prevalences and populations to the areal level. 
+
+
+Version 1.3.0
+==========================
++ New SAE functions and vignette.
+
+
 Version 1.2.1  
 ==========================
 + `smoothSurvey` now can return posterior draws.
diff --git a/R/.DS_Store b/R/.DS_Store
new file mode 100755
index 0000000..5008ddf
Binary files /dev/null and b/R/.DS_Store differ
diff --git a/R/getAreaName.R b/R/getAreaName.R
new file mode 100755
index 0000000..5dbf01b
--- /dev/null
+++ b/R/getAreaName.R
@@ -0,0 +1,142 @@
+#' Determines which administrative areas contain the given points
+#' 
+#' For any points not in an area, they are assigned the nearest area using 
+#' fields::fields.rdist.near or fields::rdist depending on the number of points 
+#' and the maximum memory in bytes with a warning.
+#' 
+#' @param pts 2 column matrix of lon/lat coordinates
+#' @param shapefile A SpatialPolygonsDataFrame object
+#' @param areaNameVar The column name in \code{slot(shapefile, "data")} 
+#' corresponding to the area level of interest
+#' @param delta Argument passed to fields::fields.rdist.near in fields package
+#' @param mean.neighbor Argument passed to fields::fields.rdist.near in fields 
+#' package
+#' @param maxBytes Maximum allowed memory in bytes (default is 3Gb). Determines 
+#' whether to call fields::fields.rdist.near which saves memory bute requires 
+#' delta and mean.neighbor inputs to be specified or fields::rdist.
+#' 
+#' @details delta and mean.neighbor arguments only used when some points 
+#' are not in areas, perhaps due to inconsistencies in shapefiles.
+#' 
+#' @author John Paige
+#' 
+#' @seealso \code{\link{projKenya}}, \code{\link[fields]{fields.rdist.near}}
+#' 
+#' @examples 
+#' \dontrun{
+#' # download Kenya GADM shapefiles from SUMMERdata github repository
+#' githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
+#' download.file(githubURL,"kenyaMaps.rda")
+#' 
+#' # load it in
+#' load("kenyaMaps.rda")
+#' 
+#' # use the shapefile data to see what Admin1 and 2 areas the 
+#' # points (0, 37) and (0.5, 38) are in
+#' # (these are longitude/latitude coordinates)
+#' pts = cbind(c(37, 38), c(0, .5))
+#' head(slot(adm1, "data"))
+#' admin1Areas = getAreaName(pts, adm1, "NAME_1")
+#' admin2Areas = getAreaName(pts, adm2, "NAME_2")
+#' }
+#' 
+#' @return A list of area IDs, area names, whether or not 
+#' points are in multiple areas, and whether or not points 
+#' are in no areas and assigned to the nearest one.
+#' 
+#' @importFrom fields fields.rdist.near
+#' @importFrom fields rdist
+#' @importFrom fields in.poly
+#' @export
+getAreaName = function(pts, shapefile, areaNameVar="NAME_1", 
+                       delta=.05, mean.neighbor=50, maxBytes=3*2^30) {
+  # require(fields)
+  
+  if(nrow(pts) == 1) {
+    # must work around the fact that fields::in.poly() doesn't work for matrix with 1 row
+    pts = rbind(pts, pts)
+    oneRow = TRUE
+  } else {
+    oneRow = FALSE
+  }
+  
+  # data(kenyaMaps, envir = environment())
+  areaNames = shapefile@data[[areaNameVar]]
+  
+  # get area map polygons and set helper function for testing if pts are in the constituencies
+  polys = shapefile@polygons
+  inRegion = function(i) {
+    countyPolys = polys[[i]]@Polygons
+    inside = sapply(1:length(countyPolys), function(x) {
+      fields::in.poly(pts, countyPolys[[x]]@coords, inflation=0)
+      })
+    insideAny = apply(inside, 1, any)
+    
+    return(insideAny*i)
+  }
+  out = sapply(1:length(polys), inRegion)
+  if(oneRow) {
+    out = matrix(out[1,], nrow=1)
+  }
+  multipleRegs = apply(out, 1, function(vals) {sum(vals != 0) > 1})
+  areaID = apply(out, 1, function(vals) {match(1, vals != 0)})
+  areaNameVec = areaNames[areaID]
+  
+  # for all points not in a area polygon, determine the nearest area
+  insideAny = apply(out, 1, function(x) {any(x != 0)})
+  if(any(!insideAny)) {
+    warning("Some points not inside any areas. Assigning them to nearest area")
+    problemPointsI = which(!insideAny)
+    
+    bytesUsed = length(problemPointsI) * nrow(pts)
+    if(bytesUsed > maxBytes) {
+      # get nearby points (points within delta lon/lat units), remove self matches
+      nearbyPoints = fields::fields.rdist.near(matrix(pts[problemPointsI,], ncol=2), pts, 
+                                               delta=delta, mean.neighbor=mean.neighbor)
+      selfI = nearbyPoints$ra == 0
+      nearbyPoints$ind = nearbyPoints$ind[!selfI,]
+      nearbyPoints$ra = nearbyPoints$ra[!selfI]
+      nearbyI = lapply(sort(unique(nearbyPoints$ind[,1])), function(x) {
+        nearbyPoints$ind[nearbyPoints$ind[,1] == x,2]
+        })
+    } else {
+      # get all points, remove self matches
+      dists = fields::rdist(matrix(pts[problemPointsI,], ncol=2), pts)
+      dists[cbind(1:length(problemPointsI), problemPointsI)] = Inf
+      nearbyI = apply(dists, 1, which.min)
+    }
+    
+    # get nearby constituencies, counties, and distances
+    nearbyAreas = lapply(nearbyI, function(x) {areaNameVec[x]})
+    nearbyLengths = sapply(nearbyI, function(x) {length(x)})
+    
+    if(bytesUsed > maxBytes) {
+      nearbyDistances = c()
+      startI = 1
+      for(i in 1:length(nearbyI)) {
+        endI = startI + nearbyLengths[i] - 1
+        nearbyDistances = c(nearbyDistances, list(nearbyPoints$ra[startI:endI]))
+        startI = endI + 1
+      }
+    } else {
+      nearbyDistances = dists[cbind(1:nrow(dists), nearbyI)]
+    }
+    
+    
+    # sort nearby constituencies and indices by distance
+    for(i in 1:length(nearbyI)) {
+      thisDistances = nearbyDistances[[i]]
+      sortI = sort(thisDistances, index.return=TRUE)$ix
+      nearbyDistances[[i]] = nearbyDistances[[i]][sortI]
+      nearbyAreas[[i]] = nearbyAreas[[i]][sortI]
+      nearbyI[[i]] = nearbyI[[i]][sortI]
+    }
+    
+    # get nearest non-NA area and assign it
+    closestArea = sapply(nearbyAreas, function(x) {x[match(TRUE, !is.na(x))]})
+    areaNameVec[problemPointsI] = closestArea
+  }
+  
+  list(areaID=areaID, areaNames=areaNameVec, 
+       multipleRegs=multipleRegs, notInAnyAreas=!insideAny)
+}
\ No newline at end of file
diff --git a/R/kenyaPopulationData.R b/R/kenyaPopulationData.R
new file mode 100755
index 0000000..2da2a9d
--- /dev/null
+++ b/R/kenyaPopulationData.R
@@ -0,0 +1,27 @@
+#' Kenya 2009 Census Frame and Related Datasets
+#' 
+#' Datasets related to the 2009 census frame for Kenya based on the 2009 Kenya Population and Housing 
+#' Census. General population totals are estimated for 2014. Based on 2014 population density 
+#' estimates interpolated with exponential growth rate between 2010 and 2015 from WorldPop data.
+#'
+#' @format A number of data.frames with information about the 2009 Kenya Population and Housing Census and the population in Kenya at the time of the 2014 Demographic Health Survey. Some of the 
+#' data.frames have been adjusted to contain information about neonatals born from 2010-2014 rather than general population in 2014. 
+#' The dataset names are: easpaKenya, easpaKenyaNeonatal, 
+#' poppaKenya, and poppsubKenya.
+#' @references Kenya National Bureau of Statistics, Ministry of Health/Kenya, National AIDS Control Council/Kenya, Kenya Medical Research Institute, and National Council For Population And Development/Kenya, 2015. Kenya Demographic and Health Survey 2014. Rockville, Maryland, USA. URL: http://dhsprogram.com/pubs/pdf/FR308/FR308.pdf.
+#' @references Stevens, F.R., Gaughan, A.E., Linard, C., Tatem, A.J., 2015. Disaggregat- ing census data for population mapping using random forests with remotely-sensed and ancillary data. PloS One 10, e0107042.
+#' @references Tatem, A.J., 2017. WorldPop, open data for spatial demography. Scientific Data 4.
+#' @source <http://dhsprogram.com/pubs/pdf/FR308/FR308.pdf>
+#' @docType data
+#' @name kenyaPopulationData
+#' @usage data(kenyaPopulationData)
+"easpaKenya"
+
+#' @rdname kenyaPopulationData
+"easpaKenyaNeonatal"
+
+#' @rdname kenyaPopulationData
+"poppaKenya"
+
+#' @rdname kenyaPopulationData
+"poppsubKenya"
\ No newline at end of file
diff --git a/R/logitNormMean.R b/R/logitNormMean.R
new file mode 100755
index 0000000..d380123
--- /dev/null
+++ b/R/logitNormMean.R
@@ -0,0 +1,57 @@
+#' Calculate the mean of a distribution whose 
+#' logit is Gaussian
+#' 
+#' Adapted from logitnorm package.  Calculates the mean of a distribution whose 
+#' logit is Gaussian. Each row of muSigmaMat is a mean and standard deviation 
+#' on the logit scale. 
+#' 
+#' @param muSigmaMat An n x 2 matrix where each row is \eqn{\mu}{mu} and \eqn{\sigma}{sigma} 
+#' on the logit scale for an independent random variable.
+#' @param logisticApprox Whether or not to use logistic approximation to speed 
+#' up computation. See details for more information.
+#' @param ... More arguments, passed to \code{integrate} function
+#' 
+#' @return A vector of expectations of the specified random variables
+#' 
+#' @author John Paige
+#' 
+#' @details If \eqn{\mbox{logit}(Y) \sim N(\mu, \sigma^2)}{logit(Y) ~ N(mu, sigma^2)}, 
+#' This function calculates \eqn{E[Y]}{E[Y]} via either numerical integration or by 
+#' assuming that Y follows a logistic distribution. Under this approximation, setting 
+#' \eqn{k = 16 \sqrt(3) / (15 \pi)}{k = 16 * sqrt(3) / (15 * pi)}, we approximate 
+#' the expectation as:
+#' \deqn{E[Y] = expit(\mu / \sqrt(1 + k^2 \sigma^2))}{E[Y] = expit(mu / sqrt(1 + k^2 * sigma^2))}.
+#' The above logistic approximation speeds up the computation, but also sacrifices 
+#' some accuracy.
+#' 
+#' @examples
+#' mus = c(-5, 0, 5)
+#' sigmas = rep(1, 3)
+#' logitNormMean(cbind(mus, sigmas))
+#' logitNormMean(cbind(mus, sigmas), TRUE)
+#' 
+#' @export
+logitNormMean = function(muSigmaMat, logisticApprox=FALSE, ...) {
+  if(length(muSigmaMat) > 2) {
+    apply(muSigmaMat, 1, logitNormMean, logisticApprox=logisticApprox, ...)
+  }
+  else {
+    mu = muSigmaMat[1]
+    sigma = muSigmaMat[2]
+    if(sigma == 0)
+      SUMMER::expit(mu)
+    else {
+      if(any(is.na(c(mu, sigma))))
+        NA
+      else if(!logisticApprox) {
+        # numerically calculate the mean
+        fExp <- function(x) exp(stats::plogis(x, log.p=TRUE) + stats::dnorm(x, mean = mu, sd = sigma, log=TRUE))
+        stats::integrate(fExp, mu-10*sigma, mu+10*sigma, abs.tol = 0, ...)$value
+      } else {
+        # use logistic approximation
+        k = 16 * sqrt(3) / (15 * pi)
+        SUMMER::expit(mu / sqrt(1 + k^2 * sigma^2))
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/R/popGrid.R b/R/popGrid.R
new file mode 100755
index 0000000..5c042ea
--- /dev/null
+++ b/R/popGrid.R
@@ -0,0 +1,1276 @@
+#' Generating pixellated populations, and population frames
+#' 
+#' Functions for generating pixellated population information and 
+#' population frames at the `area` and `subarea` levels.  
+#' The `area` and `subarea` levels can be thought of as big 
+#' regions and little regions, where areas can be partitioned into 
+#' unique sets of subareas. For example, Admin-1 and Admin-2 
+#' areas might be areas and subareas respectively. The population totals are either 
+#' tabulated at the area x urban/rural level, the subarea x urban/rural 
+#' level, or at the pixel level of a specified resolution. Totals 
+#' are calculated using population density information, shapefiles, 
+#' and, possibly, preexisting population frames at different 
+#' areal levels. Note that area names should each be unique, and similarly for 
+#' subarea names.
+#' 
+#' @describeIn makePopIntegrationTab Generate pixellated `grid` of coordinates (both longitude/latitude and east/north) 
+#' over spatial domain of the given resolution with associated population totals, areas, subareas, 
+#' and urban/rural levels. For very small areas that might not 
+#' otherwise have a grid point in them, a custom integration point is added at their 
+#' centroid. Sets urbanicity classifications by thresholding input population density raster 
+#' using area and subarea population tables, and generates area and subarea population 
+#' tables from population density information if not already given. Can be used for integrating 
+#' predictions from the given coordinates to area and subarea levels using population weights.
+#' 
+#' @param popMat Pixellated grid data frame with variables `area` and `pop` such as that 
+#' generated by \code{\link{makePopIntegrationTab}}
+#' @param poppa data.frame of population per area separated by urban/rural. If `poppsub` 
+#'   is not included, this is used for normalization of populations associated with 
+#'   population integration points. Contains variables:
+#' \describe{
+#'   \item{area}{name of area}
+#'   \item{popUrb}{total urban (general) population of area}
+#'   \item{popRur}{total rural (general) population of area}
+#'   \item{popTotal}{total (general) population of area}
+#'   \item{pctUrb}{percentage of population in the area that is urban (between 0 and 100)}
+#' }
+#' @param poppsub data.frame of population per subarea separated by 
+#' urban/rural using for population normalization or urbanicity 
+#' classification. Often based on extra fine scale population density grid. 
+#' Has variables:
+#' \describe{
+#'   \item{subarea}{name of subarea}
+#'   \item{area}{name of area}
+#'   \item{popUrb}{total urban (general) population of subarea}
+#'   \item{popRur}{total rural (general) population of subarea}
+#'   \item{popTotal}{total (general) population of subarea}
+#'   \item{pctUrb}{percentage of population in the subarea that is urban (between 0 and 100)}
+#' }
+#' @param areapa A list with variables:
+#' \describe{
+#'   \item{area}{name of area}
+#'   \item{spatialArea}{spatial area of the subarea (e.g. in km^2)}
+#' }
+#' @param areapsub A list with variables:
+#' \describe{
+#'   \item{subarea}{name of subarea}
+#'   \item{spatialArea}{spatial area of the subarea (e.g. in km^2)}
+#' }
+#' @param kmRes The resolution of the pixelated grid in km
+#' @param domainMapDat A shapefile representing the full spatial domain (e.g. country)
+#' @param eastLim Range in km easting over the spatial domain under the input projection
+#' @param northLim Range in km northing over the spatial domain under the input projection
+#' @param mapProjection A projection function taking longitude and latitude and returning easting and 
+#'                northing in km. Or the inverse if inverse is set to TRUE. For example, 
+#'                \code{\link{projKenya}}. Check https://epsg.io/ for example for best projection EPSG codes 
+#'                for specific countries
+#' @param pop Population density raster
+#' @param areaMapDat SpatialPolygonsDataFrame object with area level map information
+#' @param subareaMapDat SpatialPolygonsDataFrame object with subarea level map information
+#' @param areaNameVar The name of the area variable associated with \code{areaMapDat@data} 
+#'            and \code{subareaMapDat@data}
+#' @param subareaNameVar The name of the subarea variable associated with \code{subareaMapDat@data}
+#' @param stratifyByUrban Whether to stratify the pixellated grid by urban/rural. If TRUE, 
+#'   renormalizes population densities within areas or subareas crossed with urban/rural
+#' @param poppaTarget Target population per area stratified by urban rural. Same format as poppa
+#' @param adjustBy Whether to adjust population density by the `area` or `subarea` level
+#' @param areaPolygonSubsetI Index in areaMapDat for a specific area to subset the grid over. This 
+#'            option can help reduce computation time relative to constructing the whole grid 
+#'            and subsetting afterwards
+#' @param subareaPolygonSubsetI FOR EXPERIMENTAL PURPOSES ONLY. Index in subareaMapDat for a 
+#'            specific area to subset the grid over. This 
+#'            option can help reduce computation time relative to constructing the whole grid 
+#'            and subsetting afterwards
+#' @param customSubsetPolygons 'SpatialPolygonsDataFrame' or 'SpatialPolygons' object to subset 
+#'            the grid over. This option can help reduce computation time relative to 
+#'            constructing the whole grid and subsetting afterwards. `areaPolygonSubsetI` or 
+#'            `subareaPolygonSubsetI` can be used when subsetting by areas or subareas in 
+#'            `areaMapDat` or `subareaMapDat`. Must be in latitude/longitude projection "EPSG:4326"
+#' @param returnPoppTables If TRUE, poppa and poppsub will be calculated based on the generated 
+#'                          population integration matrix and input area/subarea map data
+#' @param mean.neighbor For determining what area or subarea points are nearest to if they do not 
+#' directly fall into an area. See \code{\link[fields]{fields.rdist.near}} for details.
+#' @param delta For determining what area or subarea points are nearest to if they do not 
+#' directly fall into an area. See \code{\link[fields]{fields.rdist.near}} for details.
+#' @param setNAsToZero If TRUE, sets NA populations to 0.
+#' @param fixZeroPopDensitySubareas If TRUE, if population density in a subarea is estimated to be 
+#' zero, but the total population in the subarea is nonzero, population is filled into the 
+#' area uniformly
+#' @param extractMethod Either 'bilinear' or 'simple'. see `method` from 
+#' \code{\link[terra]{extract}}
+#' 
+#' @author John Paige
+#' @seealso \code{\link{setThresholdsByRegion}}, \code{\link{poppRegionFromPopMat}}, \code{\link{simPopSPDE}}, \code{\link{simPopCustom}}
+#' @examples 
+#' \dontrun{
+#' # download Kenya GADM shapefiles from SUMMERdata github repository
+#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+#'                     "kenyaMaps.rda?raw=true")
+#' tempDirectory = "~/"
+#' mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
+#' if(!file.exists(mapsFilename)) {
+#'   download.file(githubURL,mapsFilename)
+#' }
+#' 
+#' # load it in
+#' out = load(mapsFilename)
+#' out
+#' adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
+#' adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+#' adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+#' adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
+#' adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+#' adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+#' 
+#' # some Admin-2 areas have the same name
+#' adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") & 
+#'                    (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") & 
+#'                    (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") & 
+#'                    (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") & 
+#'                    (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
+#' 
+#' # The spatial area of unknown 8 is so small, it causes problems unless its removed or 
+#' # unioned with another subarea. Union it with neighboring Kakeguria:
+#' newadm2 = adm2
+#' unknown8I = which(newadm2$NAME_2 == "unknown 8")
+#' newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <- 
+#'   "Kapenguria + unknown 8"
+#' admin2.IDs <- newadm2$NAME_2
+#' 
+#' library(maptools)
+#' temp <- unionSpatialPolygons(newadm2, admin2.IDs)
+#' tempData = newadm2@data[-unknown8I,]
+#' tempData = tempData[order(tempData$NAME_2),]
+#' newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
+#' adm2 = newadm2
+#' 
+#' # download 2014 Kenya population density and associated TIF file
+#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+#'                     "Kenya2014Pop/pop.rda?raw=true")
+#' popFilename = paste0(tempDirectory, "/pop.rda")
+#' if(!file.exists(popFilename)) {
+#'   download.file(githubURL,popFilename)
+#' }
+#' 
+#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+#'                     "Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
+#' popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+#' if(!file.exists(popTIFFilename)) {
+#'   download.file(githubURL,popTIFFilename)
+#' }
+#' 
+#' # load it in
+#' require(terra)
+#' out = load(popFilename)
+#' out
+#' 
+#' # make sure this is correct for re-projections
+#' pop@file@name = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+#' 
+#' eastLim = c(-110.6405, 832.4544)
+#' northLim = c(-555.1739, 608.7130)
+#' 
+#' ## Construct poppsubKenya, a table of urban/rural general population totals 
+#' ## in each subarea. Technically, this is not necessary since we can load in 
+#' ## poppsubKenya via data(kenyaPopulationData). First, we will need to calculate 
+#' ## the areas in km^2 of the areas and subareas
+#' 
+#' library(rgdal)
+#' library(sp)
+#' 
+#' # use Lambert equal area projection of areas (Admin-1) and subareas (Admin-2)
+#' midLon = mean(adm1@bbox[1,])
+#' midLat = mean(adm1@bbox[2,])
+#' p4s = paste0("+proj=laea +x_0=0 +y_0=0 +lon_0=", midLon, 
+#'              " +lat_0=", midLat, " +units=km")
+#' 
+#' library(rgdal)
+#' 
+#' adm1proj <- spTransform(adm1, CRS(p4s))
+#' adm2proj <- spTransform(adm2, CRS(p4s))
+#' 
+#' # now calculate spatial area in km^2
+#' library(rgeos)
+#' admin1Areas = gArea(adm1proj, TRUE)
+#' admin2Areas = gArea(adm2proj, TRUE)
+#' areapaKenya = data.frame(area=adm1proj@data$NAME_1, spatialArea=admin1Areas)
+#' areapsubKenya = data.frame(area=adm2proj@data$NAME_1, subarea=adm2proj@data$NAME_2, 
+#'                            spatialArea=admin2Areas)
+#' 
+#' # Calculate general population totals at the subarea (Admin-2) x urban/rural 
+#' # level and using 1km resolution population grid. Assign urbanicity by 
+#' # thresholding population density based on estimated proportion population 
+#' # urban/rural, making sure total area (Admin-1) urban/rural populations in 
+#' # each area matches poppaKenya.
+#' require(fields)
+#' # NOTE: the following function will typically take ~20 minutes. Can speed up 
+#' #       by setting kmRes to be higher, but we recommend fine resolution for 
+#' #       this step, since it only needs to be done once. Instead of running this, 
+#' #       you can simply run data(kenyaPopulationData)
+#' system.time(poppsubKenya <- getPoppsub(
+#'   kmRes=1, pop=pop, domainMapDat=adm0,
+#'   eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+#'   poppa = poppaKenya, areapa=areapaKenya, areapsub=areapsubKenya, 
+#'   areaMapDat=adm1, subareaMapDat=adm2, 
+#'   areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+#' 
+#' # Now generate a general population integration table at 5km resolution, 
+#' # based on subarea (Admin-2) x urban/rural population totals. This takes 
+#' # ~1 minute
+#' system.time(popMatKenya <- makePopIntegrationTab(
+#'   kmRes=5, pop=pop, domainMapDat=adm0,
+#'   eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+#'   poppa = poppaKenya, poppsub=poppsubKenya, 
+#'   areaMapDat = adm1, subareaMapDat = adm2,
+#'   areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+#' 
+#' ## Adjust popMat to be target (neonatal) rather than general population density. First
+#' ## creat the target population frame
+#' ## (these numbers are based on IPUMS microcensus data)
+#' mothersPerHouseholdUrb = 0.3497151
+#' childrenPerMotherUrb = 1.295917
+#' mothersPerHouseholdRur = 0.4787696
+#' childrenPerMotherRur = 1.455222
+#' targetPopPerStratumUrban = easpaKenya$HHUrb * mothersPerHouseholdUrb * childrenPerMotherUrb
+#' targetPopPerStratumRural = easpaKenya$HHRur * mothersPerHouseholdRur * childrenPerMotherRur
+#' easpaKenyaNeonatal = easpaKenya
+#' easpaKenyaNeonatal$popUrb = targetPopPerStratumUrban
+#' easpaKenyaNeonatal$popRur = targetPopPerStratumRural
+#' easpaKenyaNeonatal$popTotal = easpaKenyaNeonatal$popUrb + easpaKenyaNeonatal$popRur
+#' easpaKenyaNeonatal$pctUrb = 100 * easpaKenyaNeonatal$popUrb / easpaKenyaNeonatal$popTotal
+#' easpaKenyaNeonatal$pctTotal = 
+#'   100 * easpaKenyaNeonatal$popTotal / sum(easpaKenyaNeonatal$popTotal)
+#' 
+#' # Generate the target population density by scaling the current population density grid 
+#' # at the Admin1 x urban/rural level
+#' popMatKenyaNeonatal = adjustPopMat(popMatKenya, easpaKenyaNeonatal)
+#' 
+#' # Generate neonatal population table from the neonatal population integration matrix.
+#' # This is technically not necessary for population simulation purposes, but is here 
+#' # for illustrative purposes
+#' poppsubKenyaNeonatal = poppRegionFromPopMat(popMatKenyaNeonatal, popMatKenyaNeonatal$subarea)
+#' poppsubKenyaNeonatal = cbind(subarea=poppsubKenyaNeonatal$region, 
+#'                              area=adm2@data$NAME_1[match(poppsubKenyaNeonatal$region, 
+#'                                adm2@data$NAME_2)], 
+#'                              poppsubKenyaNeonatal[,-1])
+#' print(head(poppsubKenyaNeonatal))
+#' }
+#' @importFrom terra extract
+#' @importFrom terra gdal
+#' @importFrom sp SpatialPoints
+#' @importFrom sp CRS
+#' @importFrom sp over
+#' @importFrom sp coordinates
+#' @importFrom sp as.SpatialPolygons.PolygonsList
+#' @importFrom sp proj4string<-
+#' @importFrom fields make.surface.grid
+#' @export
+makePopIntegrationTab = function(kmRes=5, pop, domainMapDat, eastLim, northLim, mapProjection, 
+                                 areaMapDat, subareaMapDat, 
+                                 areaNameVar="NAME_1", subareaNameVar="NAME_2", 
+                                 poppa=NULL, poppsub=NULL, stratifyByUrban=TRUE, 
+                                 areapa=NULL, areapsub=NULL, customSubsetPolygons=NULL, 
+                                 areaPolygonSubsetI=NULL, subareaPolygonSubsetI=NULL, 
+                                 mean.neighbor=50, delta=.1, returnPoppTables=FALSE, 
+                                 setNAsToZero=TRUE, fixZeroPopDensitySubareas=FALSE, 
+                                 extractMethod="bilinear") {
+  thresholdUrbanBy = ifelse(is.null(poppsub), "area", "subarea")
+  
+  # some basic checks to make sure inputs are correct
+  
+  # make sure area and subarea names are unique
+  nameCounts = aggregate(areaMapDat@data[[areaNameVar]], by=list(area=areaMapDat@data[[areaNameVar]]), FUN=length)
+  if(any(nameCounts$x > 1)) {
+    stop("area names are not unique in areaMapDat")
+  }
+  if(!is.null(subareaMapDat)) {
+    nameCounts = aggregate(subareaMapDat@data[[subareaNameVar]], by=list(area=subareaMapDat@data[[subareaNameVar]]), FUN=length)
+    if(any(nameCounts$x > 1)) {
+      stop("subarea names are not unique in subareaMapDat")
+    }
+  }
+  if(!is.null(poppa)) {
+    nameCounts = aggregate(poppa$area, by=list(area=poppa$area), FUN=length)
+    if(any(nameCounts$x > 1)) {
+      stop("area names are not unique in poppa")
+    }
+    
+    # make sure area names match up
+    if(!all.equal(sort(poppa$area), sort(areaMapDat@data[[areaNameVar]]))) {
+      stop("area names in poppa do not match those in areaMapDat@data")
+    }
+  }
+  if(!is.null(poppsub)) {
+    nameCounts = aggregate(poppsub$subarea, by=list(area=poppsub$subarea), FUN=length)
+    if(any(nameCounts$x > 1)) {
+      stop("subarea names are not unique in poppsub")
+    }
+    
+    # make sure subarea names match up
+    if(!is.null(subareaMapDat)) {
+      if(!all.equal(sort(poppsub$subarea), sort(subareaMapDat@data[[subareaNameVar]]))) {
+        stop("subarea names in poppsub do not match those in subareaMapDat@data")
+      }
+    }
+  }
+  
+  # get a rectangular grid
+  eastGrid = seq(eastLim[1], eastLim[2], by=kmRes)
+  northGrid = seq(northLim[1], northLim[2], by=kmRes)
+  
+  if(!is.null(areaPolygonSubsetI)) {
+    # get range of the grid that we actually need
+    temp = areaMapDat@polygons[[areaPolygonSubsetI]]
+    allPolygons = temp@Polygons
+    eastNorthCoords = do.call("rbind", lapply(1:length(allPolygons), function(i) {mapProjection(allPolygons[[i]]@coords)}))
+    eastSubRange = range(eastNorthCoords[,1])
+    northSubRange = range(eastNorthCoords[,2])
+    
+    # subset grid to the range we need
+    eastGrid = eastGrid[eastGrid >= eastSubRange[1]]
+    eastGrid = eastGrid[eastGrid <= eastSubRange[2]]
+    northGrid = northGrid[northGrid >= northSubRange[1]]
+    northGrid = northGrid[northGrid <= northSubRange[2]]
+  }
+  
+  if(!is.null(subareaPolygonSubsetI)) {
+    # get range of the grid that we actually need
+    temp = subareaMapDat@polygons[[subareaPolygonSubsetI]]
+    allPolygons = temp@Polygons
+    eastNorthCoords = do.call("rbind", lapply(1:length(allPolygons), function(i) {mapProjection(allPolygons[[i]]@coords)}))
+    eastSubRange = range(eastNorthCoords[,1])
+    northSubRange = range(eastNorthCoords[,2])
+    
+    # subset grid to the range we need
+    eastGrid = eastGrid[eastGrid >= eastSubRange[1]]
+    eastGrid = eastGrid[eastGrid <= eastSubRange[2]]
+    northGrid = northGrid[northGrid >= northSubRange[1]]
+    northGrid = northGrid[northGrid <= northSubRange[2]]
+  }
+  
+  if(!is.null(customSubsetPolygons)) {
+    if(("SpatialPolygons" %in% class(customSubsetPolygons)) || ("SpatialPolygonsDataFrame" %in% class(customSubsetPolygons))) {
+      
+      # calculate the east/north range of each polygon
+      getPolygonENrange = function(pol) {
+        # get range of the grid that we actually need
+        temp = pol
+        allPolygons = temp@Polygons
+        eastNorthCoords = do.call("rbind", lapply(1:length(allPolygons), function(i) {mapProjection(allPolygons[[i]]@coords)}))
+        eastSubRange = range(eastNorthCoords[,1])
+        northSubRange = range(eastNorthCoords[,2])
+        
+        # subset grid to the range we need
+        rbind(eastSubRange, 
+              northSubRange)
+      }
+      allRanges = lapply(getPolygonENrange, customSubsetPolygons@polygons)
+      
+      # calculate the full range based on each individual polygon range
+      eastings = sapply(allRanges, function(x) {x[1,]})
+      northings = sapply(allRanges, function(x) {x[2,]})
+      eastSubRange = range(eastings)
+      northSubRange = range(northings)
+      
+      # subset grid to the range we need
+      eastGrid = eastGrid[eastGrid >= eastSubRange[1]]
+      eastGrid = eastGrid[eastGrid <= eastSubRange[2]]
+      northGrid = northGrid[northGrid >= northSubRange[1]]
+      northGrid = northGrid[northGrid <= northSubRange[2]]
+    } else {
+      stop("customSubsetPolygons must be of class 'SpatialPolygons' or 'SpatialPolygonsDataFrame'")
+    }
+  }
+  
+  utmGrid = matrix(fields::make.surface.grid(list(east=eastGrid, north=northGrid)), ncol=2)
+  
+  # project coordinates into lat/lon
+  if(length(utmGrid) > 0) {
+    lonLatGrid = matrix(mapProjection(utmGrid, inverse=TRUE), ncol=2)
+  } else {
+    warning(paste0("no grid cell centroid are in the areas of interest. ", 
+                   "Integration grid will be composed entirely of custom ", 
+                   "integration points at the centroids of the areas of interest"))
+    lonLatGrid = utmGrid
+  }
+  
+  # subset grid so it's in the domain
+  # inDomain = in.poly(lonLatGrid, domainPoly)
+  # determine version of PROJ
+  ver = terra::gdal(lib="proj")
+  PROJ6 <- as.numeric(substr(ver, 1, 1)) >= 6
+  
+  # from lon/lat coords to easting/northing
+  if(!PROJ6) {
+    lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS("+proj=longlat"))
+  } else {
+    lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS(SRS_string="EPSG:4326"))
+  }
+  inDomain = sp::over(lonLatCoords, domainMapDat)
+  inDomain = !is.na(inDomain[,1])
+  utmGrid = matrix(utmGrid[inDomain,], ncol=2)
+  lonLatGrid = matrix(lonLatGrid[inDomain,], ncol=2)
+  
+  # compute areas associated with locations
+  if(length(lonLatGrid) > 0) {
+    if(!is.null(subareaMapDat)) {
+      subareas = SUMMER::getAreaName(lonLatGrid, subareaMapDat, areaNameVar=subareaNameVar, mean.neighbor=mean.neighbor, delta=delta)$areaNames
+    } else {
+      subareas = NULL
+    }
+    
+    if(!is.null(areaMapDat)) {
+      areas = SUMMER::getAreaName(lonLatGrid, areaMapDat, areaNameVar=areaNameVar, mean.neighbor=mean.neighbor, delta=delta)$areaNames
+    } else {
+      areas = NULL
+    }
+    
+  } else {
+    subareas = character(0)
+    areas = character(0)
+  }
+  
+  if(!is.null(areaPolygonSubsetI)) {
+    areaSubsetName = areaMapDat@data[areaPolygonSubsetI,areaNameVar]
+    insideArea = areas == areaSubsetName
+    
+    # subset grid and area/subarea names so they're in the area of interest
+    utmGrid = matrix(utmGrid[insideArea,], ncol=2)
+    lonLatGrid = matrix(lonLatGrid[insideArea,], ncol=2)
+    areas = areas[insideArea]
+    subareas = subareas[insideArea]
+  }
+  
+  if(!is.null(subareaPolygonSubsetI)) {
+    subareaSubsetName = subareaMapDat@data[subareaPolygonSubsetI,subareaNameVar]
+    insideSubarea = subareas == subareaSubsetName
+    
+    # subset grid and area/subarea names so they're in the area of interest
+    utmGrid = matrix(utmGrid[insideSubarea,], ncol=2)
+    lonLatGrid = matrix(lonLatGrid[insideSubarea,], ncol=2)
+    areas = areas[insideSubarea]
+    subareas = subareas[insideSubarea]
+  }
+  
+  if(!is.null(customSubsetPolygons)) {
+    if(!grepl("+proj=longlat", customSubsetPolygons@proj4string@projargs)) {
+      warning(paste0("customSubsetPolygons does not use longitude/latitude ", 
+                     "coordinate system? Using makePopIntegrationTab outside ", 
+                     "its use case, which may result in errors"))
+    }
+    
+    if(!PROJ6) {
+      lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS("+proj=longlat"))
+    } else {
+      lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS(SRS_string="EPSG:4326"))
+    }
+    insideCustomSubset = sp::over(lonLatCoords, customSubsetPolygons)
+    
+    # subset grid and area/subarea names so they're in the area of interest
+    utmGrid = matrix(utmGrid[insideCustomSubset,], ncol=2)
+    lonLatGrid = matrix(lonLatGrid[insideCustomSubset,], ncol=2)
+    areas = areas[insideCustomSubset]
+    
+    if(!is.null(subareaMapDat)) {
+      subareas = subareas[insideCustomSubset]
+    }
+  }
+  
+  # determine what subareas we want our grid to contain
+  if(!is.null(subareaMapDat)) {
+    allSubareas = sort(unique(subareaMapDat@data[[subareaNameVar]]))
+    if(!is.null(areaPolygonSubsetI)) {
+      allSubareas = sort(unique(subareaMapDat@data[subareaMapDat@data[[areaNameVar]] == areaSubsetName,][[subareaNameVar]]))
+    }
+    if(!is.null(subareaPolygonSubsetI)) {
+      allSubareas = subareaSubsetName
+    }
+    if(!is.null(customSubsetPolygons)) {
+      allSubareas = sort(unique(subareas))
+    }
+  }
+  
+  # filter out parts of poppsub that are irrelevant for the subareas of 
+  # interest
+  if(!is.null(poppsub)) {
+    poppsub = poppsub[poppsub$subarea %in% allSubareas,]
+  }
+  
+  if(!is.null(subareaMapDat)) {
+    # check to make sure every subarea has at least 2 pixels
+    subareasFactor = factor(subareas, levels=allSubareas)
+    if(length(lonLatGrid) > 0) {
+      out = aggregate(subareas, by=list(subarea=subareasFactor), FUN=length, drop=FALSE)
+    } else {
+      out = data.frame(subarea=sort(unique(subareaMapDat@data[[subareaNameVar]])), 
+                       x=NA)
+    }
+    noPixels = is.na(out$x)
+    onePixel = out$x == 1
+    onePixel[is.na(onePixel)] = FALSE
+    onePixelNames = out$subarea[onePixel]
+    badSubareas = noPixels | onePixel
+    badSubareaNames = as.character(out$subarea[badSubareas])
+    
+    if(any(badSubareas)) {
+      badSubareaString = paste(badSubareaNames, collapse=", ")
+      warning(paste0("The following subareas have < 2 regular grid points ", 
+                     "at the given resolution: ", badSubareaString, ", so ", 
+                     "they will be given custom integration points"))
+      
+      # get centroids of the subareas (or it's single pixel coordinates)
+      thisSpatialPolyList = sp::as.SpatialPolygons.PolygonsList(subareaMapDat@polygons)
+      centroidsLonLat = matrix(ncol=2, nrow=length(allSubareas))
+      
+      for(i in 1:length(allSubareas)) {
+        thisSubarea = allSubareas[i]
+        if(thisSubarea %in% onePixelNames) {
+          thisCentroid = lonLatGrid[subareas == thisSubarea,]
+        } else {
+          subareaI = match(as.character(thisSubarea), as.character(subareaMapDat@data[[subareaNameVar]]))
+          thisCentroid = sp::coordinates(thisSpatialPolyList[subareaI])
+        }
+        
+        centroidsLonLat[i,] = thisCentroid
+      }
+      
+      # sort to match results of aggregate (alphabetical order)
+      # sortI = sort(as.character(subareaMapDat@data[[subareaNameVar]]), index.return=TRUE)$ix
+      sortI = sort(as.character(allSubareas), index.return=TRUE)$ix
+      centroidsLonLat = centroidsLonLat[sortI,]
+      
+      # remove the one pixel for subareas with only one pixel 
+      # (we will add it in again later, twice if stratified: both urban and rural)
+      onePixel = which(subareas %in% onePixelNames)
+      if(length(onePixel) > 0) {
+        lonLatGrid = lonLatGrid[-onePixel,]
+        utmGrid = utmGrid[-onePixel,]
+        subareas = subareas[-onePixel]
+        areas = areas[-onePixel]
+      }
+      
+      # add centroids of only the bad subareas
+      centroidsLonLat = matrix(centroidsLonLat[badSubareas,], ncol=2)
+      
+      # convert to east/north
+      centroidsEastNorth = projKenya(centroidsLonLat[,1], centroidsLonLat[,2])
+      
+      # only add centroid in stratum if bad subareas have any population in the stratum. 
+      # If poppsub not included and resolution not high enough to have multiple points 
+      # in a subarea, treat it as entirely urban or rural
+      if(is.null(poppsub)) {
+        hasUrbanPop = rep(TRUE, sum(badSubareas))
+        hasRuralPop = rep(FALSE, sum(badSubareas))
+      } else {
+        hasUrbanPop = (poppsub$popUrb > 0)[badSubareas]
+        hasRuralPop = (poppsub$popRur > 0)[badSubareas]
+      }
+      
+      # add centroids to the matrices of pixellated grid coordinates. 
+      # Add them twice: once for urban, once for rural
+      lonLatGrid = rbind(lonLatGrid, centroidsLonLat[hasUrbanPop,], centroidsLonLat[hasRuralPop,])
+      utmGrid = rbind(utmGrid, centroidsEastNorth[hasUrbanPop,], centroidsEastNorth[hasRuralPop,])
+      
+      # add associated consituencies, areas, provinces to respective vectors
+      # Normally, we could just caluclate what subarea/area the points are in. 
+      # However, since centroids might not be in the associated subarea, we 
+      # instead just assign the known subareas directly
+      # newSubareas = getAreaName(rbind(centroidsLonLat[hasUrbanPop,], 
+      #                                 centroidsLonLat[hasRuralPop,]), 
+      #                           subareaMapDat, subareaNameVar, delta, mean.neighbor)$areaNames
+      # newAreas = getAreaName(rbind(centroidsLonLat[hasUrbanPop,], 
+      #                              centroidsLonLat[hasRuralPop,]), 
+      #                        areaMapDat, areaNameVar, delta, mean.neighbor)$areaNames
+      newSubareas = c(badSubareaNames[hasUrbanPop], badSubareaNames[hasRuralPop])
+      newAreas = subareaMapDat@data[[areaNameVar]][match(newSubareas, subareaMapDat@data[[subareaNameVar]])]
+      
+      subareas = c(as.character(subareas), as.character(newSubareas))
+      areas = c(as.character(areas), as.character(newAreas))
+    }
+  } else {
+    badSubareas = FALSE
+  }
+  
+  # get population density at those coordinates
+  if(!PROJ6) {
+    # extract the raster values for each chunk of points
+    interpPopVals <- tryCatch(
+      {
+        terra::extract(pop, sp::SpatialPoints(lonLatGrid),method=extractMethod)
+      },
+      error=function(cond) {
+        message(cond)
+        stop(paste0("Error extracting raster values. In case of memory limitations, see ", 
+                    "terra::terraOptions()"))
+        # Choose a return value in case of error
+        return(NA)
+      }
+    )
+  } else {
+    sp::proj4string(pop) = sp::CRS(SRS_string="EPSG:4326")
+    interpPopVals <- tryCatch(
+      {
+        terra::extract(pop, sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS(SRS_string="EPSG:4326")), method="bilinear")
+      },
+      error=function(cond) {
+        message(cond)
+        stop(paste0("Error extracting raster values. In case of memory limitations, see ", 
+                    "terra::aggregate, terra::resample, terra::projectRaster, and terra::terraOptions"))
+        # Choose a return value in case of error
+        return(NA)
+      }
+    )
+  }
+  
+  if(setNAsToZero) {
+    interpPopVals[is.na(interpPopVals)] = 0
+  }
+  
+  # if requested, fix subareas with entirely zero population density
+  if(fixZeroPopDensitySubareas && !is.null(poppsub)) {
+    newPop = data.frame(list(lon=lonLatGrid[,1], lat=lonLatGrid[,2], pop=interpPopVals, area=areas, subarea=subareas, urban=NA))
+    poppsubCurrent = poppRegionFromPopMat(newPop, newPop$subarea)
+    
+    zeroPopSubareas = poppsubCurrent$region[poppsubCurrent$popTotal == 0]
+    if(length(zeroPopSubareas) > 0) {
+      warning(paste0("The following subareas have entirely zero population density ", 
+                     "but nonzero total population, and their population will be filled ", 
+                     "in uniformly: ", paste(zeroPopSubareas, collapse=", ")))
+      # fill in population density uniformly in these subareas
+      for(i in 1:length(zeroPopSubareas)) {
+        thisSub = zeroPopSubareas[i]
+        thisSubI = newPop$subarea == thisSub
+        thisSubPop = poppsubCurrent$popTotal[poppsubCurrent$region == thisSub]
+        newPop$pop[thisSubI] = thisSubPop/sum(thisSubI)
+      }
+    }
+    
+    interpPop = newPop$pop
+  }
+  
+  if(any(badSubareas)) {
+    # make sure population densities in the bad subareas 
+    # are slightly different so one will be classified as urban and 
+    # the other as rural. They will be renormalized later based on poppsub, 
+    # or, if poppsub doesn't exist, only new/custom "urban" points will be kept, 
+    # so no densities are modified here
+    nUnits = length(interpPopVals)
+    nNewRural = sum(hasRuralPop)
+    if(nNewRural >= 1) {
+      interpPopVals[(nUnits-nNewRural + 1):nUnits] = interpPopVals[(nUnits-nNewRural + 1):nUnits] / 2
+    }
+  }
+  
+  # determine which points are urban via population density thresholding
+  newPop = data.frame(list(lon=lonLatGrid[,1], lat=lonLatGrid[,2], pop=interpPopVals, area=areas, subarea=subareas, urban=NA))
+  if(thresholdUrbanBy == "area") {
+    threshes = SUMMER::setThresholdsByRegion(newPop, poppa, regionType="area")
+    popThreshes = sapply(1:nrow(newPop), function(i) {threshes$threshes[as.character(threshes$regions) == as.character(newPop$area[i])]})
+    urban = newPop$pop >= unlist(popThreshes)
+    newPop$urban = urban
+  } else {
+    tempSubarea = poppsub
+    tempPop = newPop
+    allSubareas = sort(unique(tempPop$subarea))
+    tempSubarea = tempSubarea[tempSubarea$subarea %in% allSubareas,]
+    threshes = SUMMER::setThresholdsByRegion(tempPop, poppr = tempSubarea, regionType="subarea")
+    popThreshes = sapply(1:nrow(newPop), function(i) {threshes$threshes[as.character(threshes$regions) == as.character(newPop$subarea[i])]})
+    urban = newPop$pop >= unlist(popThreshes)
+    newPop$urban = urban
+  }
+  
+  newPop$east = utmGrid[,1]
+  newPop$north = utmGrid[,2]
+  
+  pointAreaNames = as.character(newPop$area)
+  pointAreaNamesU = as.character(newPop$area)
+  pointAreaNamesU[!newPop$urban] = "DoNotUseThis"
+  pointAreaNamesR = as.character(newPop$area)
+  pointAreaNamesR[newPop$urban] = "DoNotUseThis"
+  
+  if(!is.null(subareaMapDat)) {
+    pointSubareaNames = as.character(newPop$subarea)
+    pointSubareaNamesU = as.character(newPop$subarea)
+    pointSubareaNamesU[!newPop$urban] = "DoNotUseThis"
+    pointSubareaNamesR = as.character(newPop$subarea)
+    pointSubareaNamesR[newPop$urban] = "DoNotUseThis"
+  }
+  
+  # if necessary, renormalize population values within subareas or areas 
+  # crossed with urban/rural to be the correct value
+  if(!is.null(poppsub)) {
+    # In this case, areapsub and areapa are not necessary
+    
+    if(stratifyByUrban) {
+      pointPopUrb = calibrateByRegion(newPop$pop, pointSubareaNamesU, poppsub$subarea, poppsub$popUrb)
+      pointPopRur = calibrateByRegion(newPop$pop, pointSubareaNamesR, poppsub$subarea, poppsub$popRur)
+      newPop$pop = (pointPopUrb + pointPopRur)
+    } else {
+      newPop$pop = calibrateByRegion(newPop$pop, pointSubareaNames, poppsub$subarea, poppsub$popTotal)
+    }
+  } else if(!is.null(poppa)) {
+    # i.e. we don't have poppsub, so now we need to be careful about the spatial areas 
+    # (e.g. area in km^2) associated with each point
+    
+    # determine how to use spatial areas (e.g. in km^2) of areas/subareas:
+    # Case 1: We don't have subareaMapDat, but do have areapa
+    # Case 2: We don't have subareaMapDat, and don't have areapa
+    # Case 3: We have subareaMapDat and areapsub
+    # Case 4: We have subareaMapDat but don't have areapsub (this is sketchy if 
+    #         there's any custom integration points)
+    
+    # set the areas of each point to be equal to start out. This must be 
+    # proportional to, but not necessarily equal to, the actual area 
+    # associated with each point
+    pointAreas = rep(1, nrow(newPop))
+    if(is.null(subareaMapDat) && !is.null(areapa)) {
+      pointAreas = calibrateByRegion(pointAreas, pointAreaNames, areapa$area, areapa$spatialArea)
+    } else if(is.null(subareaMapDat) && is.null(areapa)) {
+      # do nothing, since pointAreas = rep(1, nrow(newPop)) is fine
+    } else if(!is.null(subareaMapDat) && !is.null(areapsub)) {
+      pointAreas = calibrateByRegion(pointAreas, pointSubareaNames, areapsub$subarea, areapsub$spatialArea)
+    } else if(!is.null(subareaMapDat) && is.null(areapsub)) {
+      if(any(badSubareas)) {
+        stop("poppsub and areapsub are NULL, but subareaMapDat is included")
+      }
+      # could continue here, since pointAreas = rep(1, nrow(newPop)) might not be disastrous, 
+      # but it would be wrong
+    }
+    
+    # use the population per stratum to renormalize population densities
+    if(stratifyByUrban) {
+      popUrb = calibrateByRegion(newPop$pop*pointAreas, pointAreaNamesU, poppa$area, poppa$popUrb)
+      popRur = calibrateByRegion(newPop$pop*pointAreas, pointAreaNamesR, poppa$area, poppa$popRur)
+      
+      newPop$pop = popUrb + popRur
+    } else {
+      newPop$pop = calibrateByRegion(newPop$pop*pointAreas, pointAreaNames, poppa$area, poppa$popTotal)
+    }
+  } else {
+    stop("must either include poppsub, or poppa")
+  }
+  
+  # if requested by user, return the population integration matrix as well 
+  # as the associate population per area and subarea tables
+  if(returnPoppTables) {
+    if(!is.null(areaMapDat)) {
+      areas = getAreaName(cbind(newPop$lon, newPop$lat), shapefile=areaMapDat, areaNameVar=areaNameVar)$areaNames
+      newpoppa = poppRegionFromPopMat(newPop, areas)
+      names(newpoppa)[1] = "area"
+      newpoppa$pctTotal = 100 * newpoppa$popTotal/sum(newpoppa$popTotal)
+      newpoppa$pctUrb = 100 * newpoppa$popUrb/newpoppa$popTotal
+    } else {
+      if(is.null(poppa)) {
+        warning(paste0("returnPoppTables set to TRUE, but areaMapDat is NULL, ", 
+                       "so no area level population table will be calculated"))
+      }
+      newpoppa = NULL
+    }
+    if(!is.null(subareaMapDat)) {
+      subareas = getAreaName(cbind(newPop$lon, newPop$lat), shapefile=subareaMapDat, areaNameVar=subareaNameVar)$areaNames
+      newpoppsub = poppRegionFromPopMat(newPop, subareas)
+      names(newpoppsub)[1] = "subarea"
+      
+      # get area associated with subareas
+      associatedAreas = sapply(newpoppsub$subarea, function(subareaName) {newPop$area[match(subareaName, newPop$subarea)]})
+      newpoppsub$area = associatedAreas
+      
+      newpoppsub = newpoppsub[,c("subarea", "area", "popUrb", "popRur", "popTotal")]
+      
+      # calculate percent urban and percent of total population
+      newpoppsub$pctUrb = 100 * newpoppsub$popUrb / newpoppsub$popTotal
+      newpoppsub$pctTotal = 100 * newpoppsub$popTotal / sum(newpoppsub$popTotal)
+    } else {
+      if(is.null(poppsub)) {
+        warning(paste0("returnPoppTables set to TRUE, but subareaMapDat is NULL, ", 
+                       "so no subarea level population table will be calculated"))
+      }
+      newpoppsub = NULL
+    }
+    
+    return(list(popMat=newPop, poppa=newpoppa, poppsub=newpoppsub))
+  }
+  
+  newPop
+}
+
+#' @describeIn makePopIntegrationTab Generate table of estimates of population
+#'   totals per subarea x urban/rural combination based on population density
+#'   raster at `kmres` resolution "grid", including custom integration points
+#'   for any subarea too small to include grid points at their centroids.
+#' @export
+getPoppsub = function(kmRes=1, pop, domainMapDat, eastLim, northLim, mapProjection, 
+                      poppa, areapa=NULL, areapsub, subareaMapDat, subareaNameVar="NAME_2", 
+                      stratifyByUrban=TRUE, areaMapDat=NULL, areaNameVar="NAME_1", 
+                      areaPolygonSubsetI=NULL, subareaPolygonSubsetI=NULL, 
+                      customSubsetPolygons=NULL, 
+                      mean.neighbor=50, delta=.1, setNAsToZero=TRUE, fixZeroPopDensitySubareas=FALSE) {
+  
+  out = SUMMER::makePopIntegrationTab(kmRes=kmRes, pop=pop, domainMapDat=domainMapDat, 
+                              areapa=areapa, areapsub=areapsub, 
+                              eastLim=eastLim, northLim=northLim, mapProjection=mapProjection, 
+                              subareaMapDat=subareaMapDat, areaMapDat=areaMapDat, 
+                              areaNameVar=areaNameVar, subareaNameVar=subareaNameVar, 
+                              poppa=poppa, stratifyByUrban=stratifyByUrban, 
+                              areaPolygonSubsetI=areaPolygonSubsetI, 
+                              subareaPolygonSubsetI=subareaPolygonSubsetI, 
+                              customSubsetPolygons=customSubsetPolygons, 
+                              mean.neighbor=mean.neighbor, delta=delta, 
+                              setNAsToZero=setNAsToZero, 
+                              fixZeroPopDensitySubareas=fixZeroPopDensitySubareas, 
+                              returnPoppTables=TRUE)
+  out$poppsub
+}
+
+#' @describeIn makePopIntegrationTab Adjust population densities in grid based on a population frame.
+#' @export
+adjustPopMat = function(popMat, poppaTarget=NULL, adjustBy=c("area", "subarea"), stratifyByUrban=TRUE) {
+  adjustBy = match.arg(adjustBy)
+  
+  # sort get population per stratum from poppaTarget
+  if(adjustBy == "area") {
+    areas=sort(unique(poppaTarget$area))
+  } else {
+    areas=sort(unique(poppaTarget$subarea))
+  }
+  
+  if(stratifyByUrban) {
+    targetPopPerStratumUrban = poppaTarget$popUrb
+    targetPopPerStratumRural = poppaTarget$popRur
+    
+    # generate 2 nArea x nPixels matrices for urban and rural strata integrating pixels with respect to population density to get area estimates
+    getAreaStratumIntegrationMatrix = function(getUrban=TRUE) {
+      areas = as.character(areas)
+      
+      if(adjustBy == "area") {
+        mat = t(sapply(areas, function(area) {
+          popMat$area == area
+        }))
+      } else {
+        mat = t(sapply(areas, function(area) {
+          popMat$subarea == area
+        }))
+      }
+      
+      mat = sweep(mat, 2, popMat$pop, "*")
+      
+      sweep(mat, 2, popMat$urban == getUrban, "*")
+    }
+    urbanIntegrationMat = getAreaStratumIntegrationMatrix()
+    ruralIntegrationMat = getAreaStratumIntegrationMatrix(FALSE)
+    
+    # calculate number of people per stratum by integrating the population density surface
+    urbanPopulations = rowSums(urbanIntegrationMat)
+    ruralPopulations = rowSums(ruralIntegrationMat)
+    
+    # adjust each row of the integration matrices to get the correct expected number of children per stratum
+    urbanIntegrationMat = sweep(urbanIntegrationMat, 1, targetPopPerStratumUrban / urbanPopulations, "*")
+    urbanIntegrationMat[urbanPopulations == 0,] = 0
+    ruralIntegrationMat = sweep(ruralIntegrationMat, 1, targetPopPerStratumRural / ruralPopulations, "*")
+    ruralIntegrationMat[ruralPopulations == 0,] = 0
+    
+    # the column sums of the matrices give the correct modified population densities
+    popMat$pop = colSums(urbanIntegrationMat) + colSums(ruralIntegrationMat)
+  } else {
+    targetPopPerArea = poppaTarget$popTotal
+    
+    # generate 2 nArea x nPixels matrices for urban and rural strata integrating pixels with respect to population density to get area estimates
+    getAreaIntegrationMatrix = function() {
+      areas = as.character(areas)
+      
+      if(adjustBy == "area") {
+        mat = t(sapply(areas, function(area) {
+          popMat$area == area
+        }))
+      } else {
+        mat = t(sapply(areas, function(area) {
+          popMat$subarea == area
+        }))
+      }
+      
+      mat = sweep(mat, 2, popMat$pop, "*")
+      mat
+    }
+    integrationMat = getAreaIntegrationMatrix()
+    
+    # calculate number of people per stratum by integrating the population density surface
+    totalPopulations = rowSums(integrationMat)
+    
+    # adjust each row of the integration matrices to get the correct expected number of children per stratum
+    urbanIntegrationMat = sweep(integrationMat, 1, targetPopPerArea / totalPopulations, "*")
+    urbanIntegrationMat[totalPopulations == 0,] = 0
+    
+    # the column sums of the matrices give the correct modified population densities
+    popMat$pop = colSums(integrationMat)
+  }
+  
+  
+  popMat
+}
+
+#' Calibrate the point level totals so their sum matches the regional totals
+#' 
+#' Calibrate/normalize the point level totals so their sum matches the 
+#' regional totals. Technically, the totals can be at any level smaller  
+#' than the region level specified.
+#' 
+#' @param pointTotals Vector of point level totals that will be calibrated/normalized
+#' @param pointRegions Vector of regions associated with each point
+#' @param regions Vector of region names
+#' @param regionTotals Vector of desired region level totals associated with `regions`
+#' 
+#' @return A vector of same length as pointTotals and pointRegions containing 
+#' the calibrated/normalized point totals that sum to the correct regional totals
+#' 
+#' @author John Paige
+#' 
+#' @examples
+#' pointTotals = c(1, 1, 1, 2)
+#' pointRegions = c("a", "a", "b", "b")
+#' regionTotals = c(10, 20)
+#' regions = c("a", "b")
+#' calibrateByRegion(pointTotals, pointRegions, regions, regionTotals)
+#' 
+#' @return Vector of updated point level totals, calibrated to match region totals
+#' 
+#' @export
+calibrateByRegion = function(pointTotals, pointRegions, regions, regionTotals) {
+  regions = as.character(regions)
+  pointRegions = as.character(pointRegions)
+  
+  # generate nRegions x nPoints matrix that separates points totals to each region
+  getRegionIntegrationMatrix = function() {
+    
+    mat = t(sapply(regions, function(regionName) {pointRegions == regionName}))
+    mat = sweep(mat, 2, pointTotals, "*")
+    mat
+  }
+  integrationMat = getRegionIntegrationMatrix()
+  
+  # calculate total per region before calibration
+  tempRegionTotals = rowSums(integrationMat)
+  
+  # adjust each row of the integration matrices to get the correct totals per region
+  integrationMat = sweep(integrationMat, 1, regionTotals / tempRegionTotals, "*")
+  integrationMat[tempRegionTotals == 0,] = 0
+  
+  # check to make sure all zero population totals are in zero population regions. If 
+  # any extra, spread the region population in the associated pointTotals
+  badRegions = (tempRegionTotals == 0) & (regionTotals != 0)
+  if(any(badRegions)) {
+    badRegionIs = which(badRegions)
+    for(i in 1:length(badRegionIs)) {
+      thisRegionI = badRegionIs[i]
+      thisRegion = pointRegions == regions[thisRegionI]
+      integrationMat[thisRegionI,thisRegion] = regionTotals[thisRegionI] * (1/sum(thisRegion))
+    }
+  }
+  
+  # the column sums of the matrix give the correct calibrated regional totals
+  colSums(integrationMat)
+}
+
+#' Generate a population frame of a similar format to poppa argument of \code{\link{simPopCustom}} with a custom set of regions
+#' 
+#' @param popMat Pixellated grid data frame with variables `area` and `pop`. Assumed to be stratified by urban/rural
+#' @param regions character vector of length nPixels giving a custom set of regions for which to generate 
+#'  a population frame using population density
+#' 
+#' @details Urbanicity thresholds are set based on that region's percent population 
+#' urban. Intended as a helper function of \code{\link{getPoppsub}}, but 
+#' can also be used for custom sets of regions (i.e. more than just 2 
+#' areal levels: area and subarea). 
+#' 
+#' @author John Paige
+#' 
+#' @seealso \code{\link{getPoppsub}}
+#' 
+#' @examples
+#' \dontrun{
+#' data(kenyaPopulationData)
+#' 
+#' #' # download Kenya GADM shapefiles from SUMMERdata github repository
+#' githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
+#' tempDirectory = "~/"
+#' mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
+#' if(!file.exists(mapsFilename)) {
+#'   download.file(githubURL,mapsFilename)
+#' }
+#' 
+#' # load it in
+#' out = load(mapsFilename)
+#' out
+#' adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
+#' adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+#' adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+#' adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
+#' adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+#' adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+#' 
+#' # some Admin-2 areas have the same name
+#' adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") & 
+#'   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") & 
+#'   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") & 
+#'   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") & 
+#'   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
+#' 
+#' # The spatial area of unknown 8 is so small, it causes problems unless 
+#' # its removed or unioned with another subarea. Union it with neighboring 
+#' # Kakeguria:
+#' newadm2 = adm2
+#' unknown8I = which(newadm2$NAME_2 == "unknown 8")
+#' newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <- "Kapenguria + unknown 8"
+#' admin2.IDs <- newadm2$NAME_2
+#' 
+#' library(maptools)
+#' temp <- unionSpatialPolygons(newadm2, admin2.IDs)
+#' tempData = newadm2@data[-unknown8I,]
+#' tempData = tempData[order(tempData$NAME_2),]
+#' newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
+#' adm2 = newadm2
+#' 
+#' # download 2014 Kenya population density and associated TIF file
+#' githubURL <- 
+#'   "https://github.com/paigejo/SUMMERdata/blob/main/data/Kenya2014Pop/pop.rda?raw=true"
+#' popFilename = paste0(tempDirectory, "/pop.rda")
+#' if(!file.exists(popFilename)) {
+#'   download.file(githubURL,popFilename)
+#' }
+#' 
+#' githubURL <- 
+#'   paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/Kenya2014Pop/", 
+#'     "worldpop_total_1y_2014_00_00.tif?raw=true")
+#' popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+#' if(!file.exists(popTIFFilename)) {
+#'   download.file(githubURL,popTIFFilename)
+#' }
+#' 
+#' # load it in
+#' require(terra)
+#' out = load(popFilename)
+#' out
+#' 
+#' # make sure this is correct for re-projections
+#' pop@file@name = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+#' 
+#' eastLim = c(-110.6405, 832.4544)
+#' northLim = c(-555.1739, 608.7130)
+#' 
+#' require(fields)
+#' 
+#' # Now generate a general population integration table at 5km resolution, 
+#' # based on subarea (Admin-2) x urban/rural population totals. This takes 
+#' # ~1 minute
+#' system.time(popMatKenya <- makePopIntegrationTab(
+#'   kmRes=5, pop=pop, domainMapDat=adm0,
+#'   eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+#'   poppa = poppaKenya, poppsub=poppsubKenya, 
+#'   areaMapDat = adm1, subareaMapDat = adm2,
+#'   areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+#'   
+#' out = poppRegionFromPopMat(popMatKenya, popMatKenya$area)
+#' out
+#' poppaKenya
+#' 
+#' out = poppRegionFromPopMat(popMatKenya, popMatKenya$subarea)
+#' out
+#' poppsubKenya
+#' 
+#' popMatKenyaUnstratified = popMatKenya
+#' popMatKenyaUnstratified$urban = NULL
+#' out = poppRegionFromPopMat(popMatKenyaUnstratified, popMatKenyaUnstratified$area)
+#' out
+#' poppaKenya
+#' }
+#' @return A table of population totals by region
+#' 
+#' @export
+poppRegionFromPopMat = function(popMat, regions) {
+  stratifyByUrban = ("urban" %in% names(popMat)) && !all(is.na(popMat$urban))
+  
+  if(stratifyByUrban) {
+    # in this case, generate urban, rural, and overall totals within each region
+    out = aggregate(popMat$pop, by=list(region=as.character(regions), urban=popMat$urban), FUN=sum, drop=FALSE)
+    regions = sort(unique(out$region))
+    poppr = data.frame(region=regions, popUrb=out[(length(regions) + 1):(2*length(regions)), 3], 
+                       popRur=out[1:length(regions), 3])
+    poppr$popUrb[is.na(poppr$popUrb)] = 0
+    poppr$popRur[is.na(poppr$popRur)] = 0
+    poppr$popTotal = poppr$popUrb + poppr$popRur
+  } else {
+    # in this case, only generate overall totals within each region
+    out = aggregate(popMat$pop, by=list(region=as.character(regions)), FUN=sum, drop=FALSE)
+    regions = sort(unique(out$region))
+    poppr = data.frame(region=regions, popTotal=out[1:length(regions), 2])
+    poppr$popTotal[is.na(poppr$popTotal)] = 0
+  }
+  
+  poppr
+}
+
+#' Set thresholds of population density for urbanicity classifications 
+#' within each region of the given type 
+#' 
+#' @param popMat pixellated population density data frame with variables 
+#' regionType and `pop`
+#' @param poppr A table with population totals by region of the given type 
+#' (e.g. poppa or poppsub from \code{\link{makePopIntegrationTab}})
+#' @param regionType The variable name from poppr giving the region names. 
+#' Defaults to "area"
+#' 
+#' @details Thresholds are set based on that region's percent population 
+#' urban. Intended as a helper function of \code{\link{makePopIntegrationTab}}. 
+#' 
+#' @author John Paige
+#' 
+#' @seealso \code{\link{makePopIntegrationTab}}
+#' 
+#' @examples
+#' \dontrun{
+#' data(kenyaPopulationData)
+#' 
+#' #' # download Kenya GADM shapefiles from SUMMERdata github repository
+#' githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
+#' tempDirectory = "~/"
+#' mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
+#' if(!file.exists(mapsFilename)) {
+#'   download.file(githubURL,mapsFilename)
+#' }
+#' 
+#' # load it in
+#' out = load(mapsFilename)
+#' out
+#' adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
+#' adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+#' adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+#' adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
+#' adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+#' adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+#' 
+#' # some Admin-2 areas have the same name
+#' adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") & 
+#'   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") & 
+#'   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") & 
+#'   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") & 
+#'   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
+#' 
+#' # The spatial area of unknown 8 is so small, it causes problems unless 
+#' # its removed or unioned with another subarea. Union it with neighboring 
+#' # Kakeguria:
+#' newadm2 = adm2
+#' unknown8I = which(newadm2$NAME_2 == "unknown 8")
+#' newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <- "Kapenguria + unknown 8"
+#' admin2.IDs <- newadm2$NAME_2
+#' 
+#' library(maptools)
+#' temp <- unionSpatialPolygons(newadm2, admin2.IDs)
+#' tempData = newadm2@data[-unknown8I,]
+#' tempData = tempData[order(tempData$NAME_2),]
+#' newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
+#' adm2 = newadm2
+#' 
+#' # download 2014 Kenya population density and associated TIF file
+#' githubURL <- 
+#'   "https://github.com/paigejo/SUMMERdata/blob/main/data/Kenya2014Pop/pop.rda?raw=true"
+#' popFilename = paste0(tempDirectory, "/pop.rda")
+#' if(!file.exists(popFilename)) {
+#'   download.file(githubURL,popFilename)
+#' }
+#' 
+#' githubURL <- 
+#'   paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/Kenya2014Pop/", 
+#'     "worldpop_total_1y_2014_00_00.tif?raw=true")
+#' popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+#' if(!file.exists(popTIFFilename)) {
+#'   download.file(githubURL,popTIFFilename)
+#' }
+#' 
+#' # load it in
+#' require(terra)
+#' out = load(popFilename)
+#' out
+#' 
+#' # make sure this is correct for re-projections
+#' pop@file@name = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+#' 
+#' eastLim = c(-110.6405, 832.4544)
+#' northLim = c(-555.1739, 608.7130)
+#' 
+#' require(fields)
+#' 
+#' # Now generate a general population integration table at 5km resolution, 
+#' # based on subarea (Admin-2) x urban/rural population totals. This takes 
+#' # ~1 minute
+#' system.time(popMatKenya <- makePopIntegrationTab(
+#'   kmRes=5, pop=pop, domainMapDat=adm0,
+#'   eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+#'   poppa = poppaKenya, poppsub=poppsubKenya, 
+#'   areaMapDat = adm1, subareaMapDat = adm2,
+#'   areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+#'   
+#' out = setThresholdsByRegion(popMatKenya, poppaKenya)
+#' out
+#' 
+#' out = setThresholdsByRegion(popMatKenya, poppsubKenya, regionType="subarea")
+#' out
+#' }
+#' 
+#' @return A list of region names and their urbanicity thresholds in population density
+#' 
+#' @export
+setThresholdsByRegion = function(popMat, poppr, regionType="area") {
+  
+  getRegionThresh = function(regionName) {
+    # do the setup
+    thisRegion = as.character(popMat[[regionType]]) == regionName
+    thisPop = popMat$pop[thisRegion]
+    thisTot = sum(thisPop, na.rm=TRUE)
+    pctUrb = poppr$pctUrb[poppr[[regionType]] == regionName]/100
+    pctRural = 1 - pctUrb
+    
+    # must round to avoid numerical inaccuracies
+    if(round(pctUrb, 10) == 1) {
+      return(-Inf)
+    } else if(round(pctUrb, 10) == 0) {
+      return(Inf)
+    }
+    
+    # calculate threshold by integrating ecdf via sorted value cumulative sum
+    sortedPop = sort(thisPop)
+    cumsumPop = cumsum(sortedPop)
+    threshI = match(1, cumsumPop >= thisTot*pctRural)
+    if((threshI != 1) && (threshI != length(thisPop))) {
+      thresh = sortedPop[threshI]
+    } else {
+      # make sure not all pixels are urban or all are rural
+      if(threshI == 1) {
+        thresh = mean(c(sortedPop[1], sortedPop[2]), na.rm=TRUE)
+      } else {
+        thresh = mean(c(sortedPop[length(thisPop)], sortedPop[length(thisPop)-1]), na.rm=TRUE)
+      }
+    }
+    
+    thresh
+  }
+  
+  # compute threshold for each area
+  regions = poppr[[regionType]]
+  threshes = sapply(regions, getRegionThresh)
+  
+  list(regions=regions, threshes=threshes)
+}
+
+
+
diff --git a/R/projKenya.R b/R/projKenya.R
new file mode 100755
index 0000000..fe29e1a
--- /dev/null
+++ b/R/projKenya.R
@@ -0,0 +1,80 @@
+#' Map projection for Kenya
+#' 
+#' Projection specifically chosen for Kenya. Project from lat/lon to northing/easting 
+#' in kilometers.  Uses epsg=21097 with km units. May not work on all systems due to 
+#' differences in the behavior between different PROJ and GDAL versions.
+#' 
+#' @param lon either longitude or, if inverse == TRUE, easting in km
+#' @param lat either latitude or, if inverse == TRUE, northing in km
+#' @param inverse if FALSE, projects from lon/lat to easting/northing.  Else from easting/northing to lon/lat
+#' @return A 2 column matrix of easting/northing coordinates in km if inverse == FALSE. Otherwise, a 2 column matrix of longitude/latitude coordinates.
+#' @author John Paige
+#' @examples
+#' eastLim = c(-110.6405, 832.4544)
+#' northLim = c(-555.1739, 608.7130)
+#' coordMatrixEN = cbind(eastLim, northLim)
+#' coordMatrixLL = projKenya(coordMatrixEN, inverse=TRUE)
+#' 
+#' coordMatrixLL
+#' # if the coordMatrixLL isn't the following, projKenya may not support 
+#' # your installation of GDAL and/or PROJ:
+#' #      east north
+#' # [1,] 33.5  -5.0
+#' # [2,] 42.0   5.5
+#' 
+#' projKenya(coordMatrixLL, inverse=FALSE)
+#' # regardless of your PROJ/GDAL installations, the result of the 
+#' # above line of could should be:
+#' #            lon       lat
+#' # [1,] -110.6405 -555.1739
+#' # [2,]  832.4544  608.7130
+#' 
+#' @importFrom terra gdal
+#' @importFrom sp SpatialPoints
+#' @importFrom sp CRS
+#' @export
+projKenya = function(lon, lat=NULL, inverse=FALSE) {
+  if(is.null(lat)) {
+    lat = lon[,2]
+    lon = lon[,1]
+  }
+  
+  # determine version of PROJ
+  ver = terra::gdal(lib="proj")
+  PROJ6 <- as.numeric(substr(ver, 1, 1)) >= 6
+  
+  if(!inverse) {
+    # from lon/lat coords to easting/northing
+    if(!PROJ6) {
+      lonLatCoords = sp::SpatialPoints(cbind(lon, lat), proj4string=sp::CRS("+proj=longlat"))
+    } else {
+      lonLatCoords = sp::SpatialPoints(cbind(lon, lat), proj4string=sp::CRS(SRS_string="EPSG:4326"))
+    }
+    coordsEN = sp::spTransform(lonLatCoords, sp::CRS("+init=epsg:21097 +units=m"))
+    
+    out = attr(coordsEN, "coords")
+    colnames(out) = c("east", "north")
+    
+    # convert coordinates from m to km
+    out = out/1000
+  }
+  else {
+    # from easting/northing coords to lon/lat
+    
+    # first convert from km to m
+    east = lon*1000
+    north = lat*1000
+    
+    coordsEN = sp::SpatialPoints(cbind(east, north), proj4string=sp::CRS("+init=epsg:21097 +units=m"))
+    if(!PROJ6) {
+      lonLatCoords = sp::spTransform(coordsEN, sp::CRS("+proj=longlat"))
+    } else {
+      lonLatCoords = sp::spTransform(coordsEN, sp::CRS(SRS_string="EPSG:4326"))
+    }
+    
+    out = attr(lonLatCoords, "coords")
+    colnames(out) = c("lon", "lat")
+  }
+  
+  out
+}
\ No newline at end of file
diff --git a/R/simPop.R b/R/simPop.R
new file mode 100755
index 0000000..e6a6ea8
--- /dev/null
+++ b/R/simPop.R
@@ -0,0 +1,1926 @@
+# Functions for simulating populations
+
+#' Simulate populations and areal prevalences
+#' 
+#' Given a spatial risk model, simulate populations and population prevalences at the 
+#' enumeration area level (represented as points), and aggregate to the pixel and 
+#' administrative areal level.
+#' 
+#' @param nsim Number of simulations
+#' @param easpa data.frame of enumeration area, households, and target population per area stratified by urban/rural with variables:
+#' \describe{
+#'   \item{area}{name of area}
+#'   \item{EAUrb}{number of urban enumeration areas in the area}
+#'   \item{EARur}{number of rural enumeration areas in the area}
+#'   \item{EATotal}{total number of enumeration areas in the area}
+#'   \item{HHUrb}{number of urban households in the area}
+#'   \item{HHRur}{number of rural households in the area}
+#'   \item{HHTotal}{total number of households in the area}
+#'   \item{popUrb}{total urban (target) population of area}
+#'   \item{popRur}{total rural (target) population of area}
+#'   \item{popTotal}{total (general) population of area}
+#' }
+#' @param popMat Pixellated grid data frame with variables `lon`, `lat`, `pop`, `area`, `subareas` (if subareaLevel is TRUE), `urban` (if stratifyByUrban is TRUE), `east`, and `north`
+#' @param targetPopMat Same as popMat, but `pop` variable gives target rather than general population
+#' @param poppsub data.frame of population per subarea separated by 
+#' urban/rural using for population density grid normalization or urbanicity 
+#' classification. Often based on extra fine scale population density grid. Has variables:
+#' @param spdeMesh Triangular mesh for the SPDE
+#' @param margVar Marginal variance of the spatial process, excluding cluster effects. 
+#'          If 0, no spatial component is included
+#' @param effRange Effective spatial range for the SPDE model
+#' @param beta0 Intercept of logit model for risk
+#' @param gamma Effect of urban on logit scale for logit model for risk
+#' @param sigmaEpsilon Standard deviation on the logit scale for iid Gaussian EA level random effects in the risk model
+#' @param seed Random number generator seed
+#' @param inla.seed Seed input to inla.qsample. 0L sets seed intelligently, 
+#'            > 0 sets a specific seed, < 0 keeps existing RNG
+#' @param nHHSampled Number of households sampled per enumeration area. Default is 25 to match DHS surveys
+#' @param stratifyByUrban Whether or not to stratify simulations by urban/rural classification
+#' @param subareaLevel Whether or not to aggregate the population by subarea
+#' @param doFineScaleRisk Whether or not to calculate the fine scale risk at each aggregation level in addition to the prevalence
+#' @param doSmoothRisk Whether or not to calculate the smooth risk at each aggregation level in addition to the prevalence
+#' @param doSmoothRiskLogisticApprox Whether to use logistic approximation when calculating smooth risk. See 
+#' \code{\link{logitNormMean}} for details.
+#' @param min1PerSubarea If TRUE, ensures there is at least 1 EA per subarea. If subareas are particularly unlikely to 
+#' have enumeration areas since they have a very low proportion of the population in an area, then setting this to TRUE may be 
+#' computationally intensive.
+#' @param logitRiskDraws nIntegrationPoints x nsim dimension matrix of draws from the pixel leve risk field on logit scale, leaving out 
+#' potential nugget/cluster/EA level effects.
+#' @param sigmaEpsilonDraws nsim length vector of draws of cluster effect logit scale SD (joint draws with logitRiskDraws)
+#' @param validationPixelI CURRENTLY FOR TESTING PURPOSES ONLY a set of indices of pixels for which we want to simulate populations (used for pixel level validation)
+#' @param validationClusterI CURRENTLY FOR TESTING PURPOSES ONLY a set of indices of cluster for which we want to simulate populations (used for cluster level validation)
+#' @param clustersPerPixel CURRENTLY FOR TESTING PURPOSES ONLY Used for pixel level validation. Fixes the number of EAs per pixel.
+#' @param returnEAinfo If TRUE, returns information on every individual EA (BAU) for each simulated population
+#' @param epsc nEAs x nsim matrix of simulated EA (BAU) level iid effects representing fine scale variation in 
+#'       risk. If NULL, they are simulated as iid Gaussian on a logit scale with 
+#'       SD given by sigmaEpsilonDraws
+#' list(pixelPop=outPixelLevel, subareaPop=outSubareaLevel, areaPop=outAreaLevel, logitRiskDraws=logitRiskDraws)
+#' @return The simulated population aggregated to the enumeration area, 
+#' pixel, subarea (generally Admin2), and area (generally Admin1) levels. Output includes:
+#' \item{pixelPop}{A list of pixel level population aggregates}
+#' \item{subareaPop}{A list of `subarea` level population aggregates}
+#' \item{areaPop}{A list of `area` level population aggregates}
+#' Each of these contains population numerator and denominator as well as prevalence and risk 
+#' information aggregated to the appropriate level.
+#' 
+#' @details For population simulation and aggregation, we consider three models: smooth  
+#' risk, fine scale risk, and the fine scale prevalence. All will be described in detail 
+#' in a paper in preparation. In the smooth risk model, pixel level risks are integrated 
+#' with respect to target population density when producing areal estimates on a prespecified 
+#' set of integration points. The target population may be, for example, neonatals rather 
+#' than the general population. In the fine scale models, enumeration areas (EAs) are simulated as 
+#' point locations and iid random effects in the EA level risk are allowed. EAs and populations are dispersed conditional on the (possibly 
+#' approximately) known number of EAs, households, and target population at a particular 
+#' areal level (these we call `areas`) using multilevel multinomial sampling, first 
+#' sampling the EAs, then distributing households among the EAs, then the target population 
+#' among the households. Any areal level below the `areas` we call `subareas`. For instance, 
+#' the `areas` might be Admin-1 if that is the smallest level at which the number of EAs, 
+#' households, and people is known, and the `subareas` might be Admin-2. The multilevel 
+#' multinomial sampling may be stratified by urban/rural within the areas if the number of 
+#' EAs, households, and people is also approximately known at that level.
+#' 
+#' Within each EA we assume a fixed probability of an event occurring, which is the fine scale `risk`. 
+#' The fine scale `prevalence` is the empirical proportion of events within that EA. We assume EA 
+#' level logit scale iid N(0, sigmaEpsilon^2) random effects in the risk model. When averaged 
+#' with equal weights over all EAs in an areal unit, this forms the fine scale risk. When 
+#' instead the population numerators and denominators are aggregated, and are used to 
+#' calculate the empirical proportion of events occurring in an areal unit, the resulting 
+#' quantity is the fine scale prevalence in that areal unit.
+#' 
+#' Note that these functions can be used for either simulating populations for simulation 
+#' studies, or for generating predictions accounting for uncertainty in EA locations 
+#' and fine scale variation occuring at the EA level due to EA level iid random effects. 
+#' Required, however, is a seperately fit EA level spatial risk model 
+#' and information on the spatial population density and the population frame.
+#' 
+#' @author John Paige
+#' @references In preparation
+#' @seealso \code{\link{simPopCustom}}, \code{\link{makePopIntegrationTab}}, \code{\link{adjustPopMat}}, \code{\link{simSPDE}}.
+#' @examples 
+#' \dontrun{
+#' ## In this script we will create 5km resolution pixellated grid over Kenya, 
+#' ## and generate tables of estimated (both target and general) population 
+#' ## totals at the area (e.g. Admin-1) and subarea (e.g. Admin-2) levels. Then 
+#' ## we will use that to simulate populations of 
+#' 
+#' # download Kenya GADM shapefiles from SUMMERdata github repository
+#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+#'                     "kenyaMaps.rda?raw=true")
+#' tempDirectory = "~/"
+#' mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
+#' if(!file.exists(mapsFilename)) {
+#'   download.file(githubURL,mapsFilename)
+#' }
+#' 
+#' # load it in
+#' out = load(mapsFilename)
+#' out
+#' adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
+#' adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+#' adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+#' adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
+#' adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+#' adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+#' 
+#' # some Admin-2 areas have the same name
+#' adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") & 
+#'                    (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") & 
+#'                    (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") & 
+#'                    (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
+#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") & 
+#'                    (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
+#' 
+#' # The spatial area of unknown 8 is so small, it causes problems unless its removed or 
+#' # unioned with another subarea. Union it with neighboring Kakeguria:
+#' newadm2 = adm2
+#' unknown8I = which(newadm2$NAME_2 == "unknown 8")
+#' newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <- 
+#'   "Kapenguria + unknown 8"
+#' admin2.IDs <- newadm2$NAME_2
+#' 
+#' library(maptools)
+#' temp <- unionSpatialPolygons(newadm2, admin2.IDs)
+#' tempData = newadm2@data[-unknown8I,]
+#' tempData = tempData[order(tempData$NAME_2),]
+#' newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
+#' adm2 = newadm2
+#' 
+#' # download 2014 Kenya population density and associated TIF file
+#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+#'                     "Kenya2014Pop/pop.rda?raw=true")
+#' popFilename = paste0(tempDirectory, "/pop.rda")
+#' if(!file.exists(popFilename)) {
+#'   download.file(githubURL,popFilename)
+#' }
+#' 
+#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+#'                     "Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
+#' popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+#' if(!file.exists(popTIFFilename)) {
+#'   download.file(githubURL,popTIFFilename)
+#' }
+#' 
+#' # load it in
+#' require(raster)
+#' out = load(popFilename)
+#' out
+#' 
+#' # make sure this is correct for re-projections
+#' pop@file@name = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+#' 
+#' eastLim = c(-110.6405, 832.4544)
+#' northLim = c(-555.1739, 608.7130)
+#' 
+#' ## Construct poppsubKenya, a table of urban/rural general population totals 
+#' ## in each subarea. Technically, this is not necessary since we can load in 
+#' ## poppsubKenya via data(kenyaPopulationData). First, we will need to calculate 
+#' ## the areas in km^2 of the areas and subareas
+#' 
+#' library(rgdal)
+#' library(sp)
+#' 
+#' # use Lambert equal area projection of areas (Admin-1) and subareas (Admin-2)
+#' midLon = mean(adm1@bbox[1,])
+#' midLat = mean(adm1@bbox[2,])
+#' p4s = paste0("+proj=laea +x_0=0 +y_0=0 +lon_0=", midLon, 
+#'              " +lat_0=", midLat, " +units=km")
+#' 
+#' library(rgdal)
+#' 
+#' adm1proj <- spTransform(adm1, CRS(p4s))
+#' adm2proj <- spTransform(adm2, CRS(p4s))
+#' 
+#' # now calculate spatial area in km^2
+#' library(rgeos)
+#' admin1Areas = gArea(adm1proj, TRUE)
+#' admin2Areas = gArea(adm2proj, TRUE)
+#' areapaKenya = data.frame(area=adm1proj@data$NAME_1, spatialArea=admin1Areas)
+#' areapsubKenya = data.frame(area=adm2proj@data$NAME_1, subarea=adm2proj@data$NAME_2, 
+#'                            spatialArea=admin2Areas)
+#' 
+#' # Calculate general population totals at the subarea (Admin-2) x urban/rural 
+#' # level and using 1km resolution population grid. Assign urbanicity by 
+#' # thresholding population density based on estimated proportion population 
+#' # urban/rural, making sure total area (Admin-1) urban/rural populations in 
+#' # each area matches poppaKenya.
+#' require(fields)
+#' # NOTE: the following function will typically take ~20 minutes. Can speed up 
+#' #       by setting kmRes to be higher, but we recommend fine resolution for 
+#' #       this step, since it only needs to be done once.
+#' system.time(poppsubKenya <- getPoppsub(
+#'   kmRes=1, pop=pop, domainPoly=kenyaPoly,
+#'   eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+#'   poppa = poppaKenya, areapa=areapaKenya, areapsub=areapsubKenya, 
+#'   areaMapDat=adm1, subareaMapDat=adm2, 
+#'   areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+#' 
+#' # Now generate a general population integration table at 5km resolution, 
+#' # based on subarea (Admin-2) x urban/rural population totals. This takes 
+#' # ~1 minute
+#' system.time(popMatKenya <- makePopIntegrationTab(
+#'   kmRes=5, pop=pop, domainPoly=kenyaPoly,
+#'   eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+#'   poppa = poppaKenya, poppsub=poppsubKenya, 
+#'   areaMapDat = adm1, subareaMapDat = adm2,
+#'   areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+#' 
+#' ## Adjust popMat to be target (neonatal) rather than general population 
+#' ## density. First create the target population frame
+#' ## (these numbers are based on IPUMS microcensus data)
+#' mothersPerHouseholdUrb = 0.3497151
+#' childrenPerMotherUrb = 1.295917
+#' mothersPerHouseholdRur = 0.4787696
+#' childrenPerMotherRur = 1.455222
+#' targetPopPerStratumUrban = easpaKenya$HHUrb * mothersPerHouseholdUrb * 
+#'   childrenPerMotherUrb
+#' targetPopPerStratumRural = easpaKenya$HHRur * mothersPerHouseholdRur * 
+#'   childrenPerMotherRur
+#' easpaKenyaNeonatal = easpaKenya
+#' easpaKenyaNeonatal$popUrb = targetPopPerStratumUrban
+#' easpaKenyaNeonatal$popRur = targetPopPerStratumRural
+#' easpaKenyaNeonatal$popTotal = easpaKenyaNeonatal$popUrb + 
+#'   easpaKenyaNeonatal$popRur
+#' easpaKenyaNeonatal$pctUrb = 100 * easpaKenyaNeonatal$popUrb / 
+#'   easpaKenyaNeonatal$popTotal
+#' easpaKenyaNeonatal$pctTotal = 
+#'   100 * easpaKenyaNeonatal$popTotal / sum(easpaKenyaNeonatal$popTotal)
+#' 
+#' # Generate the target population density by scaling the current 
+#' # population density grid at the Admin1 x urban/rural level
+#' popMatKenyaNeonatal = adjustPopMat(popMatKenya, easpaKenyaNeonatal)
+#' 
+#' # Generate neonatal population table from the neonatal population integration 
+#' # matrix. This is technically not necessary for population simulation purposes, 
+#' # but is here for illustrative purposes
+#' poppsubKenyaNeonatal = poppRegionFromPopMat(popMatKenyaNeonatal, 
+#'                                             popMatKenyaNeonatal$subarea)
+#' poppsubKenyaNeonatal = 
+#'   cbind(subarea=poppsubKenyaNeonatal$region, 
+#'         area=adm2@data$NAME_1[match(poppsubKenyaNeonatal$region, adm2@data$NAME_2)], 
+#'         poppsubKenyaNeonatal[,-1])
+#' print(head(poppsubKenyaNeonatal))
+#' 
+#' ## Now we're ready to simulate neonatal populations along with neonatal 
+#' ## mortality risks and prevalences
+#' 
+#' # use the following model to simulate the neonatal population based roughly 
+#' # on Paige et al. (2020) neonatal mortality modeling for Kenya.
+#' beta0=-2.9 # intercept
+#' gamma=-1 # urban effect
+#' rho=(1/3)^2 # spatial variance
+#' effRange = 400 # effective spatial range in km
+#' sigmaEpsilon=sqrt(1/2.5) # cluster (nugget) effect standard deviation
+#' 
+#' # Run a simulation! This produces multiple dense nEA x nsim and nPixel x nsim 
+#' # matrices. In the future sparse matrices and chunk by chunk computations 
+#' # may be incorporated.
+#' simPop = simPopSPDE(nsim=1, easpa=easpaKenyaNeonatal, 
+#'                     popMat=popMatKenya, targetPopMat=popMatKenyaNeonatal, 
+#'                     poppsub=poppsubKenya, spdeMesh=kenyaMesh, 
+#'                     margVar=rho, sigmaEpsilon=sigmaEpsilon, 
+#'                     gamma=gamma, effRange=effRange, beta0=beta0, 
+#'                     seed=12, inla.seed=12, nHHSampled=25, 
+#'                     stratifyByUrban=TRUE, subareaLevel=TRUE, 
+#'                     doFineScaleRisk=TRUE, doSmoothRisk=TRUE, 
+#'                     min1PerSubarea=TRUE)
+#' 
+#' # get average absolute percent error relative to fine scale prevalence at Admin-2 level
+#' tempDat = simPop$subareaPop$aggregationResults[,c("region", "pFineScalePrevalence", 
+#'                                                   "pFineScaleRisk", "pSmoothRisk")]
+#' 100*mean(abs(tempDat$pFineScalePrevalence - tempDat$pFineScaleRisk) / 
+#'            tempDat$pFineScalePrevalence)
+#' 100*mean(abs(tempDat$pFineScalePrevalence - tempDat$pSmoothRisk) / 
+#'            tempDat$pFineScalePrevalence)
+#' 100*mean(abs(tempDat$pFineScaleRisk - tempDat$pSmoothRisk) / 
+#'            tempDat$pFineScalePrevalence)
+#' 
+#' # verify number of EAs per area and subarea
+#' cbind(aggregate(simPop$eaPop$eaSamples[,1], by=list(area=popMatKenya$area), FUN=sum), 
+#'       trueNumEAs=easpaKenya$EATotal[order(easpaKenya$area)])
+#' aggregate(simPop$eaPop$eaSamples[,1], by=list(area=popMatKenya$subarea), FUN=sum)
+#' 
+#' ## plot simulated population
+#' # directory for plotting 
+#' # (mapPlot takes longer when it doesn't save to a file)
+#' tempDirectory = "~/"
+#' 
+#' # pixel level
+#' zlim = c(0, quantile(probs=.995, c(simPop$pixelPop$pFineScalePrevalence, 
+#'                                    simPop$pixelPop$pFineScaleRisk, 
+#'                                    simPop$pixelPop$pSmoothRisk), na.rm=TRUE))
+#' pdf(file=paste0(tempDirectory, "simPopSPDEPixel.pdf"), width=8, height=8)
+#' par(mfrow=c(2,2))
+#' plot(adm2, asp=1)
+#' points(simPop$eaPop$eaDatList[[1]]$lon, simPop$eaPop$eaDatList[[1]]$lat, pch=".", col="blue")
+#' plot(adm2, asp=1)
+#' quilt.plot(popMatKenya$lon, popMatKenya$lat, simPop$pixelPop$pFineScalePrevalence, 
+#'            zlim=zlim, add=TRUE, FUN=function(x) {mean(x, na.rm=TRUE)})
+#' plot(adm2, asp=1)
+#' quilt.plot(popMatKenya$lon, popMatKenya$lat, simPop$pixelPop$pFineScaleRisk, 
+#'            zlim=zlim, add=TRUE, FUN=function(x) {mean(x, na.rm=TRUE)})
+#' quilt.plot(popMatKenya$lon, popMatKenya$lat, simPop$pixelPop$pSmoothRisk, 
+#'            zlim=zlim, FUN=function(x) {mean(x, na.rm=TRUE)}, asp=1)
+#' plot(adm2, add=TRUE)
+#' dev.off()
+#' 
+#' range(simPop$eaPop$eaDatList[[1]]$N)
+#' 
+#' # areal (Admin-1) level: these results should look essentially identical
+#' 
+#' tempDat = simPop$areaPop$aggregationResults[,c("region", "pFineScalePrevalence", 
+#'                                                "pFineScaleRisk", "pSmoothRisk")]
+#' pdf(file=paste0(tempDirectory, "simPopSPDEAdmin-1.pdf"), width=7, height=7)
+#' mapPlot(tempDat, 
+#'         variables=c("pFineScalePrevalence", "pFineScaleRisk", "pSmoothRisk"), 
+#'         geo=adm1, by.geo="NAME_1", by.data="region", is.long=FALSE)
+#' dev.off()
+#' 
+#' # subareal (Admin-2) level: these results should look subtley different 
+#' # depending on the type of prevalence/risk considered
+#' tempDat = simPop$subareaPop$aggregationResults[,c("region", "pFineScalePrevalence", 
+#'                                                   "pFineScaleRisk", "pSmoothRisk")]
+#' pdf(file=paste0(tempDirectory, "simPopSPDEAdmin-2.pdf"), width=7, height=7)
+#' mapPlot(tempDat, 
+#'         variables=c("pFineScalePrevalence", "pFineScaleRisk", "pSmoothRisk"), 
+#'         geo=adm2, by.geo="NAME_2", by.data="region", is.long=FALSE)
+#' dev.off()
+#' }
+#' @name simPop
+NULL
+#' @importFrom stats cor
+#' @describeIn simPop
+#' Simulate populations and population prevalences given census frame and population density 
+#' information. Uses SPDE model for generating spatial risk and can include iid cluster 
+#' level effect.
+#' 
+#' @export
+simPopSPDE = function(nsim=1, easpa, popMat, targetPopMat, poppsub, spdeMesh, 
+                      margVar=0.243, sigmaEpsilon=sqrt(0.463), 
+                      gamma=0.009, effRange=406.51, beta0=-3.922, 
+                      seed=NULL, inla.seed=-1L, nHHSampled=25, 
+                      stratifyByUrban=TRUE, subareaLevel=TRUE, 
+                      doFineScaleRisk=FALSE, doSmoothRisk=FALSE, 
+                      doSmoothRiskLogisticApprox=FALSE, 
+                      min1PerSubarea=TRUE) {
+  if(!is.null(seed))  {
+    set.seed(seed)
+    
+    if(inla.seed < 0) {
+      stop("seed specified, but not inla.seed. Set inla.seed to a positive integer to ensure reproducibility")
+    }
+  }
+  
+  if(nsim > 1) {
+    warning("nsim > 1. eaDat will only be generated for first simulation")
+  }
+  
+  totalEAs = sum(easpa$EATotal)
+  totalHouseholds = sum(easpa$HHTotal)
+  
+  ### generate Binomial probabilities from transformed logit scale GP
+  # generate SPDE simulations
+  pixelCoords = cbind(popMat$east, popMat$north)
+  
+  if(margVar != 0) {
+    SPDEArgs = list(coords=pixelCoords, nsim=nsim, margVar=margVar, effRange=effRange, 
+                    mesh=spdeMesh, inla.seed=inla.seed)
+    simVals = do.call("simSPDE", SPDEArgs)
+  } else {
+    simVals = matrix(rep(0, nrow(pixelCoords)), ncol=1)
+  }
+  
+  # add in intercept
+  simVals = simVals + beta0
+  
+  # add in urban effect
+  simVals = sweep(simVals, 1, gamma*popMat$urban, "+")
+  
+  # simulate nugget/cluster effect
+  epsc = matrix(stats::rnorm(totalEAs*nsim, sd=sigmaEpsilon), ncol=nsim)
+  
+  # transform back to original scale for the pixel level probabilities
+  probsNoNug = expit(simVals)
+  
+  # simulate the enumeration areas
+  logitRiskDraws = simVals
+  sigmaEpsilonDraws = rep(sigmaEpsilon, nsim)
+  
+  print("Using SPDE model to simulate EA and pixel level populations")
+  outPixelLevel = simPopCustom(logitRiskDraws=logitRiskDraws, sigmaEpsilonDraws=sigmaEpsilonDraws, easpa=easpa, 
+                              popMat=popMat, targetPopMat=targetPopMat, 
+                              stratifyByUrban=stratifyByUrban, doSmoothRisk=doSmoothRisk, 
+                              doSmoothRiskLogisticApprox=doSmoothRiskLogisticApprox, 
+                              doFineScaleRisk=doFineScaleRisk, poppsub=poppsub, 
+                              subareaLevel=subareaLevel, min1PerSubarea=min1PerSubarea, 
+                              returnEAinfo=TRUE, epsc=epsc)
+  eaPop = list(eaDatList=outPixelLevel$eaDatList, eaSamples=outPixelLevel$eaSamples)
+  outPixelLevel$eaDatList = NULL
+  outPixelLevel$eaSamples = NULL
+  
+  if(subareaLevel) {
+    print("aggregating from pixel level to subarea level")
+    outSubareaLevel = pixelPopToArea(pixelLevelPop=outPixelLevel, eaSamples=eaPop$eaSamples, 
+                                     areas=popMat$subarea, stratifyByUrban=stratifyByUrban, 
+                                     targetPopMat=targetPopMat, doFineScaleRisk=doFineScaleRisk, 
+                                     doSmoothRisk=doSmoothRisk)
+    
+    print("aggregating from subarea level to area level")
+    
+    # get areas associated with each subarea for aggregation
+    tempAreasFrom = popMat$subarea
+    tempAreasTo = popMat$area
+    areasFrom = sort(unique(tempAreasFrom))
+    areasToI = match(areasFrom, tempAreasFrom)
+    areasTo = tempAreasTo[areasToI]
+    
+    # do the aggregation from subareas to areas
+    outAreaLevel = areaPopToArea(areaLevelPop=outSubareaLevel, areasFrom=areasFrom, areasTo=areasTo, 
+                                 stratifyByUrban=stratifyByUrban, doFineScaleRisk=doFineScaleRisk, 
+                                 doSmoothRisk=doSmoothRisk)
+  } else {
+    outSubareaLevel = NULL
+    
+    print("aggregating from pixel level to area level")
+    outAreaLevel = pixelPopToArea(pixelLevelPop=outPixelLevel, eaSamples=eaPop$eaSamples, 
+                                  areas=popMat$area, stratifyByUrban=stratifyByUrban, 
+                                  doFineScaleRisk=doFineScaleRisk, doSmoothRisk=doSmoothRisk)
+  }
+  
+  list(eaPop=eaPop, pixelPop=outPixelLevel, subareaPop=outSubareaLevel, areaPop=outAreaLevel, logitRiskDraws=logitRiskDraws)
+}
+
+#' Aggregate populations to the specified areal level
+#' 
+#' Takes simulated populations and aggregates 
+#' them to the specified areal level. Also calculates the aggregated risk and prevalence.
+#' 
+#' @param pixelLevelPop pixel level population information that we want aggregate. In the same format as output from \code{\link{simPopCustom}}
+#' @param eaSamples nIntegrationPoint x nsim matrix of the number of enumeration areas per pixel sampled in the input pixel level population
+#' @param areas character vector of length nIntegrationPoints of area names over which we 
+#'        want to aggregate. Can also be subareas
+#' @param stratifyByUrban whether or not to stratify simulations by urban/rural classification
+#' @param targetPopMat pixellated grid data frame with variables `lon`, `lat`, `pop` (target population), `area`, `subareas` (if subareaLevel is TRUE), `urban` (if stratifyByUrban is TRUE), `east`, and `north`
+#' @param doFineScaleRisk whether or not to calculate the fine scale risk in addition to the prevalence. See details
+#' @param doSmoothRisk Whether or not to calculate the smooth risk in addition to the prevalence. See details
+#' @param areaLevelPop output of \code{\link{simPopCustom}} containing pixel level information 
+#'               about the population of interest
+#' @param areasFrom character vector of length equal to the number of areas from which 
+#'            we would like to aggregate containing the unique names of the areas. 
+#'            Can also be subareas, but these are smaller than the "to areas", and 
+#'            each "from area" must be entirely contained in a single "to area"
+#' @param areasTo character vector of length equal to the number of areas from which 
+#'          we would like to aggregate containing the names of the areas containing 
+#'          with each respective `from' area. Can also be a set of subareas, 
+#'          but these are larger than the "from areas".
+#' 
+#' @author John Paige
+#' @references In Preparation
+#' @return A list containing elements `fineScalePrevalence` and `fineScaleRisk`. Each 
+#' of these are in turn lists with aggregated prevalence and risk for the area of 
+#' interest, containg the following elements, were paranethesis indicate the elements 
+#' for the fineScaleRisk model rather than fineScalePrevalence:
+#' \item{p}{Aggregated prevalence (risk), calculated as aggregate of Z divided by 
+#' aggregate of N}
+#' \item{Z}{Aggregated (expected) population numerator}
+#' \item{N}{Aggregated (expected) population denominator}
+#' \item{pUrban}{Aggregated prevalence (risk) in urban part of the area, calculated 
+#' as aggregate of Z divided by aggregate of N}
+#' \item{ZUrban}{Aggregated (expected) population numerator in urban part of the area}
+#' \item{NUrban}{Aggregated (expected) population denominator in urban part of the area}
+#' \item{pRural}{Aggregated prevalence (risk) in rural part of the area, calculated 
+#' as aggregate of Z divided by aggregate of N}
+#' \item{ZRural}{Aggregated (expected) population numerator in rural part of the area}
+#' \item{NRural}{Aggregated (expected) population denominator in rural part of the area}
+#' \item{A}{Aggregation matrix used to aggregate from pixel level to areal level}
+#' \item{AUrban}{Aggregation matrix used to aggregate from pixel level to urban part of the areal level}
+#' \item{ARural}{Aggregation matrix used to aggregate from pixel level to rural part of the areal level}
+#' @seealso \code{\link{areaPopToArea}}
+#' @name aggPop
+#' @examples
+#' \dontrun{
+#' ##### Now we make a model for the risk. We will use an SPDE model with these 
+#' ##### parameters for the linear predictor on the logist scale, which are chosen 
+#' ##### to be of practical interest:
+#' beta0=-2.9 # intercept
+#' gamma=-1 # urban effect
+#' rho=(1/3)^2 # spatial variance
+#' effRange = 400 # effective spatial range in km
+#' sigmaEpsilon=sqrt(1/2.5) # cluster (nugget) effect standard deviation
+#' 
+#' # simulate the population! Note that this produces multiple dense 
+#' # nEA x nsim and nIntegrationPoint x nsim matrices. In the future 
+#' # sparse matrices will and chunk by chunk computations may be incorporated.
+#' simPop = simPopSPDE(nsim=1, easpa=easpaKenyaNeonatal, 
+#'                     popMat=popMatKenya, targetPopMat=popMatKenyaNeonatal, 
+#'                     poppsub=poppsubKenya, spdeMesh=kenyaMesh, 
+#'                     margVar=rho, sigmaEpsilonSq=sigmaEpsilon^2, 
+#'                     gamma=gamma, effRange=effRange, beta0=beta0, 
+#'                     seed=123, inla.seed=12, nHHSampled=25, 
+#'                     stratifyByUrban=TRUE, subareaLevel=TRUE, 
+#'                     doFineScaleRisk=TRUE, 
+#'                     min1PerSubarea=TRUE)
+#' 
+#' pixelPop = simPop$pixelPop
+#' subareaPop = pixelPopToArea(pixelLevelPop=pixelPop, eaSamples=pixelPop$eaSamples, 
+#'   areas=popMatKenya$subarea, stratifyByUrban=TRUE, 
+#'   targetPopMat=popMatKenyaNeonatal, doFineScaleRisk=TRUE)
+#' 
+#' # get areas associated with each subarea for aggregation
+#' tempAreasFrom = popMatKenya$subarea
+#' tempAreasTo = popMatKenya$area
+#' areasFrom = sort(unique(tempAreasFrom))
+#' areasToI = match(areasFrom, tempAreasFrom)
+#' areasTo = tempAreasTo[areasToI]
+#' 
+#' # do the aggregation from subareas to areas
+#' outAreaLevel = areaPopToArea(areaLevelPop=subareaPop, 
+#'   areasFrom=areasFrom, areasTo=areasTo, 
+#'   stratifyByUrban=TRUE, doFineScaleRisk=TRUE)
+#' }
+NULL
+
+#' @describeIn aggPop Aggregate from pixel to areal level
+#' @export
+pixelPopToArea = function(pixelLevelPop, eaSamples, areas, stratifyByUrban=TRUE, targetPopMat=NULL, 
+                          doFineScaleRisk=!is.null(pixelLevelPop$fineScaleRisk$p), 
+                          doSmoothRisk=!is.null(pixelLevelPop$smoothRisk$p)) {
+  
+  # fine scale prevalence aggregation model
+  nSamples = pixelLevelPop$NFineScalePrevalence
+  zSamples = pixelLevelPop$ZFineScalePrevalence
+  zSamples[is.na(zSamples)] = 0 # must set to zero temporarily so matrix multiplication works
+  out = aggPixelPreds(Zg=zSamples, Ng=nSamples, areas=areas, targetPopMat=targetPopMat, 
+                      useDensity=FALSE, stratifyByUrban=stratifyByUrban, normalize=FALSE)
+  aggregationResults = out$aggregationResults
+  aggregationMatrices = out$aggregationMatrices
+  names(aggregationResults)[-1] = paste(names(aggregationResults)[-1], "FineScalePrevalence", sep="")
+  names(aggregationMatrices) = paste(names(aggregationMatrices), "FineScalePrevalence", sep="")
+  
+  # fine scale risk aggregation model
+  if(doFineScaleRisk) {
+    nSamplesFineScaleRisk = pixelLevelPop$NFineScaleRisk
+    zSamplesFineScaleRisk = pixelLevelPop$ZFineScaleRisk
+    zSamplesFineScaleRisk[is.na(zSamplesFineScaleRisk)] = 0 # must set to zero temporarily so matrix multiplication works out
+    out = aggPixelPreds(Zg=zSamplesFineScaleRisk, Ng=nSamplesFineScaleRisk, areas=areas, targetPopMat=targetPopMat, 
+                        useDensity=FALSE, stratifyByUrban=stratifyByUrban, normalize=FALSE)
+    aggregationResultsFineScaleRisk = out$aggregationResults
+    aggregationMatricesFineScaleRisk = out$aggregationMatrices
+    names(aggregationResultsFineScaleRisk)[-1] = paste(names(aggregationResultsFineScaleRisk)[-1], "FineScaleRisk", sep="")
+    names(aggregationMatricesFineScaleRisk) = paste(names(aggregationMatricesFineScaleRisk), "FineScaleRisk", sep="")
+    
+    # aggregationResults = merge(aggregationResults, aggregationResultsFineScaleRisk, by="region")
+    aggregationResults = c(aggregationResults, aggregationResultsFineScaleRisk[-1])
+    aggregationMatrices = c(aggregationMatrices, aggregationMatricesFineScaleRisk)
+  }
+  
+  if(doSmoothRisk) {
+    # NOTE: although useDensity is set to FALSE, that's only because the density is already 
+    # directly incorporated into nSamplesSmoothRisk
+    nSamplesSmoothRisk = pixelLevelPop$NSmoothRisk
+    zSamplesSmoothRisk = pixelLevelPop$ZSmoothRisk
+    zSamplesSmoothRisk[is.na(zSamplesSmoothRisk)] = 0 # must set to zero temporarily so matrix multiplication works out
+    out = aggPixelPreds(Zg=zSamplesSmoothRisk, Ng=nSamplesSmoothRisk, areas=areas, targetPopMat=targetPopMat, 
+                        useDensity=FALSE, stratifyByUrban=stratifyByUrban, normalize=FALSE)
+    aggregationResultsSmoothRisk = out$aggregationResults
+    aggregationMatricesSmoothRisk = out$aggregationMatrices
+    names(aggregationResultsSmoothRisk)[-1] = paste(names(aggregationResultsSmoothRisk)[-1], "SmoothRisk", sep="")
+    names(aggregationMatricesSmoothRisk) = paste(names(aggregationMatricesSmoothRisk), "SmoothRisk", sep="")
+    
+    # aggregationResults = merge(aggregationResults, aggregationResultsSmoothRisk, by="region")
+    aggregationResults = c(aggregationResults, aggregationResultsSmoothRisk[-1])
+    aggregationMatrices = c(aggregationMatrices, aggregationMatricesSmoothRisk)
+  }
+  
+  list(aggregationResults=aggregationResults, aggregationMatrices=aggregationMatrices)
+}
+
+#' Helper function of \code{\link{pixelPopToArea}}
+#' 
+#' Aggregates population from the 
+#' pixel level to the level of the area of interest.
+#' 
+#' @param Zg nIntegrationPoint x nsim matrix of simulated response (population numerators) for each pixel and sample
+#' @param Ng nIntegrationPoint x nsim matrix of simulated counts (population denominators) for each pixel and sample
+#' @param areas nIntegrationPoint length character vector of areas (or subareas) 
+#' @param urban nIntegrationPoint length vector of indicators specifying whether or not pixels are urban or rural
+#' @param targetPopMat same as in \code{\link{simPopCustom}}
+#' @param useDensity whether to use population density as aggregation weights. 
+#' @param stratifyByUrban whether or not to stratify simulations by urban/rural classification
+#' @param normalize if TRUE, pixel level aggregation weights within specified area are normalized to sum to 1. This produces an 
+#' average of the values in Zg rather than a sum. In general, should only be set to TRUE for smooth integrals of risk.
+aggPixelPreds = function(Zg, Ng, areas, urban=targetPopMat$urban, targetPopMat=NULL, useDensity=FALSE, 
+                         stratifyByUrban=TRUE, normalize=useDensity) {
+  
+  if(useDensity && !normalize) {
+    stop("if useDensity is set to TRUE, normalize must be set to TRUE as well")
+  }
+  predsUrban = urban
+  predsArea = areas
+  
+  # set NAs and pixels without any sample size to 0
+  Ng[is.na(Ng)] = 0
+  if(!useDensity) {
+    # is useDensity is true, then Zg is really a set of probabilities, so no need to set to 0
+    Zg[Ng == 0] = 0
+  }
+  
+  # function to aggregate predictions over the 
+  # population density grid. Use the following function to get numerical 
+  # integration matrix for a given level of areal aggregation. returned 
+  # matrices have dimension length(unique(areaNames)) x length(areaNames)
+  # areaNames: 
+  # urbanProportions: DEPRACATED vector giving proportion of population urban for each unique area in areaNames. 
+  #                   If specified, ensure that urban and rural parts of the full integration 
+  #                   matrix have the appropriate relative weights for each area. Used for population 
+  #                   density based integration
+  # normalize: whether or not to normalize the rows of the matrices to sum to 1 or to instead 
+  #            contain only binary values (or non-binary values based on the binary values if 
+  #            urbanProportions is not NULL)
+  getIntegrationMatrix = function(areaNames, urbanProportions=NULL, normalize=FALSE) {
+    
+    if(useDensity) {
+      popDensities = targetPopMat$pop
+      densities = popDensities
+    } else {
+      equalDensities = rep(1, nrow(Zg))
+      densities = equalDensities
+    }
+    
+    uniqueNames = sort(unique(areaNames))
+    getMatrixHelper = function(i, thisUrban=NULL, thisUseDensity=useDensity, thisNormalize=normalize) {
+      areaI = areaNames == uniqueNames[i]
+      
+      if(thisUseDensity) {
+        theseDensities = popDensities
+      } else {
+        theseDensities = equalDensities
+      }
+      
+      # make sure we only include pixels in the given area and, if necessary, with the given urbanicity
+      theseDensities[!areaI] = 0
+      if(!is.null(thisUrban))
+        theseDensities[predsUrban != thisUrban] = 0
+      thisSum = sum(theseDensities)
+      if(thisSum != 0 && thisNormalize)
+        theseDensities * (1/thisSum)
+      else if(thisSum == 0)
+        rep(0, length(theseDensities))
+      else
+        theseDensities
+    }
+    
+    if(!stratifyByUrban) {
+      integrationMatrix = t(matrix(sapply(1:length(uniqueNames), getMatrixHelper), ncol=length(uniqueNames)))
+      
+      integrationMatrix
+    } else {
+      integrationMatrixUrban = t(matrix(sapply(1:length(uniqueNames), getMatrixHelper, thisUrban=TRUE), ncol=length(uniqueNames)))
+      integrationMatrixRural = t(matrix(sapply(1:length(uniqueNames), getMatrixHelper, thisUrban=FALSE), ncol=length(uniqueNames)))
+      if(!is.null(urbanProportions)) {
+        integrationMatrix = sweep(integrationMatrixUrban, 1, urbanProportions, "*") + sweep(integrationMatrixRural, 1, 1-urbanProportions, "*")
+      } else {
+        integrationMatrix = t(matrix(sapply(1:length(uniqueNames), getMatrixHelper), ncol=length(uniqueNames)))
+      }
+      
+      rownames(integrationMatrix) = uniqueNames
+      rownames(integrationMatrixUrban) = uniqueNames
+      rownames(integrationMatrixRural) = uniqueNames
+      
+      list(integrationMatrix=integrationMatrix, 
+           integrationMatrixUrban=integrationMatrixUrban, 
+           integrationMatrixRural=integrationMatrixRural)
+    }
+  }
+  
+  # Use the following function to perform the
+  # aggregations
+  getIntegratedPredictions = function(areaNames) {
+    # get numerical integration matrix
+    A = getIntegrationMatrix(areaNames, normalize=normalize)
+    
+    # aggregate the prediction and denominator matrices (for whole areas and also urban/rural strata if necessary)
+    if(!stratifyByUrban) {
+      ZAggregated = A %*% Zg
+      NAggregated = A %*% Ng
+      pAggregated = ZAggregated / NAggregated
+      pAggregated[NAggregated == 0] = NA
+      
+      aggregationResults = list(p=pAggregated, Z=ZAggregated, N=NAggregated)
+      aggregationMatrices = list(A=A, AUrban=NULL, ARural=NULL)
+    } else {
+      AUrban = A$integrationMatrixUrban
+      ARural = A$integrationMatrixRural
+      A = A$integrationMatrix
+      
+      # first aggregate the numerator. The denominator will depend on the aggregation method
+      ZAggregated = A %*% Zg
+      ZAggregatedUrban = AUrban %*% Zg
+      ZAggregatedRural = ARural %*% Zg
+      
+      if(useDensity) {
+        # for population density aggregation, we integrate probabilities rather than aggregate 
+        # empirical proportions
+        NAggregated = NULL
+        pAggregated = ZAggregated
+        ZAggregated = NULL
+        
+        NAggregatedUrban = NULL
+        pAggregatedUrban = ZAggregatedUrban
+        ZAggregatedUrban = NULL
+        
+        NAggregatedRural = NULL
+        pAggregatedRural = ZAggregatedRural
+        ZAggregatedRural = NULL
+      } else {
+        # if we do not use density, we must also aggregate the denominator to calculate 
+        # the aggregated empirical proportions
+        NAggregated = A %*% Ng
+        pAggregated = ZAggregated / NAggregated
+        pAggregated[NAggregated == 0] = NA
+        
+        NAggregatedUrban = AUrban %*% Ng
+        pAggregatedUrban = ZAggregatedUrban / NAggregatedUrban
+        pAggregatedUrban[NAggregatedUrban == 0] = NA
+        
+        NAggregatedRural = ARural %*% Ng
+        pAggregatedRural = ZAggregatedRural / NAggregatedRural
+        pAggregatedRural[NAggregatedRural == 0] = NA
+      }
+      
+      aggregationResults = list(region=sort(unique(areas)), p=pAggregated, Z=ZAggregated, N=NAggregated, 
+                                pUrban=pAggregatedUrban, ZUrban=ZAggregatedUrban, NUrban=NAggregatedUrban, 
+                                pRural=pAggregatedRural, ZRural=ZAggregatedRural, NRural=NAggregatedRural)
+      aggregationMatrices = list(A=A, AUrban=AUrban, ARural=ARural)
+    }
+    
+    list(aggregationResults=aggregationResults, aggregationMatrices=aggregationMatrices)
+  }
+  
+  areaResults = getIntegratedPredictions(predsArea)
+  
+  # return results
+  areaResults
+}
+
+#' @describeIn aggPop Aggregate areal populations to another areal level
+#' @export
+areaPopToArea = function(areaLevelPop, areasFrom, areasTo, 
+                         stratifyByUrban=TRUE, 
+                         doFineScaleRisk=!is.null(areaLevelPop$aggregationResults$pFineScaleRisk), 
+                         doSmoothRisk=!is.null(areaLevelPop$aggregationResults$pSmoothRisk)) {
+  
+  if(length(areasFrom) != length(unique(areasFrom))) {
+    stop("areasFrom must contain only unique names of areas to which we want to aggregate")
+  }
+  
+  uniqueNames = sort(unique(areasTo))
+  
+  # construct row of the aggregation matrix given toArea index
+  getMatrixHelper = function(i) {
+    # get areasTo associated with this fromArea
+    thisToArea = uniqueNames[i]
+    thisFromAreas = unique(areasFrom[areasTo == thisToArea])
+    areaI = areasFrom %in% thisFromAreas
+    
+    areaI
+  }
+  
+  # construct the aggregation matrix from areasFrom to areasTo
+  A = t(matrix(sapply(1:length(uniqueNames), getMatrixHelper), ncol=length(uniqueNames)))
+  rownames(A) = uniqueNames
+  colnames(A) = areasFrom
+  
+  ##### aggregate populations
+  # fine scale prevalence aggregation model
+  getaggregationResults = function(resultNameRoot="FineScalePrevalence") {
+    if(! (paste("N", resultNameRoot, sep="") %in% names(areaLevelPop$aggregationResults))) {
+      stop(paste0(resultNameRoot, " was not computed in input areaLevelPop"))
+    }
+    
+    nSamples = areaLevelPop$aggregationResults[[paste("N", resultNameRoot, sep="")]]
+    zSamples = areaLevelPop$aggregationResults[[paste("Z", resultNameRoot, sep="")]]
+    zSamples[is.na(zSamples)] = 0 # must set to zero temporarily so matrix multiplication works out
+    
+    ZAggregated =  A %*% zSamples
+    NAggregated =  A %*% nSamples
+    pAggregated = ZAggregated / NAggregated
+    pAggregated[NAggregated == 0] = NA
+    thisA=A %*% areaLevelPop$aggregationMatrices[[paste("A", resultNameRoot, sep="")]]
+    rownames(thisA) = uniqueNames
+    
+    if(stratifyByUrban) {
+      nSamplesUrban = areaLevelPop$aggregationResults[[paste("NUrban", resultNameRoot, sep="")]]
+      zSamplesUrban = areaLevelPop$aggregationResults[[paste("ZUrban", resultNameRoot, sep="")]]
+      zSamplesUrban[is.na(zSamplesUrban)] = 0 # must set to zero temporarily so matrix multiplication works out
+      
+      nSamplesRural = areaLevelPop$aggregationResults[[paste("NRural", resultNameRoot, sep="")]]
+      zSamplesRural = areaLevelPop$aggregationResults[[paste("ZRural", resultNameRoot, sep="")]]
+      zSamplesRural[is.na(zSamplesRural)] = 0 # must set to zero temporarily so matrix multiplication works out
+      
+      ZAggregatedUrban =  A %*% zSamplesUrban
+      NAggregatedUrban =  A %*% nSamplesUrban
+      pAggregatedUrban = ZAggregatedUrban / NAggregatedUrban
+      pAggregatedUrban[NAggregatedUrban == 0] = NA
+      thisAUrban=A %*% areaLevelPop$aggregationMatrices[[paste("AUrban", resultNameRoot, sep="")]]
+      rownames(thisAUrban) = uniqueNames
+      
+      ZAggregatedRural =  A %*% zSamplesRural
+      NAggregatedRural =  A %*% nSamplesRural
+      pAggregatedRural = ZAggregatedRural / NAggregatedRural
+      pAggregatedRural[NAggregatedRural == 0] = NA
+      thisARural=A %*% areaLevelPop$aggregationMatrices[[paste("ARural", resultNameRoot, sep="")]]
+      rownames(thisARural) = uniqueNames
+    } else {
+      ZAggregatedUrban = NULL
+      NAggregatedUrban = NULL
+      pAggregatedUrban = NULL
+      thisAUrban=NULL
+      
+      ZAggregatedRural = NULL
+      NAggregatedRural = NULL
+      pAggregatedRural = NULL
+      thisARural=NULL
+    }
+    
+    aggregationResults = list(region=uniqueNames, p=pAggregated, Z=ZAggregated, N=NAggregated, 
+                             pUrban=pAggregatedUrban, ZUrban=ZAggregatedUrban, NUrban=NAggregatedUrban, 
+                             pRural=pAggregatedRural, ZRural=ZAggregatedRural, NRural=NAggregatedRural)
+    aggregationMatrices = list(A=thisA, AUrban=thisAUrban, ARural=thisARural)
+    
+    capitalResultNameRoot = resultNameRoot
+    # capitalResultNameRoot = paste(toupper(substr(capitalResultNameRoot, 1, 1)), substr(capitalResultNameRoot, 2, nchar(capitalResultNameRoot)), sep="")
+    names(aggregationResults)[-1] = paste(names(aggregationResults)[-1], capitalResultNameRoot, sep="")
+    names(aggregationMatrices) = paste(names(aggregationMatrices)[-1], capitalResultNameRoot, sep="")
+    
+    list(aggregationResults=aggregationResults, aggregationMatrices=aggregationMatrices)
+  }
+  
+  # fine scale prevalence model 
+  out = getaggregationResults("FineScalePrevalence")
+  aggregationResults = out$aggregationResults
+  aggregationMatrices = out$aggregationMatrices
+  
+  # fine scale risk aggregation model
+  if(doFineScaleRisk) {
+    out = getaggregationResults("FineScaleRisk")
+    resFineScaleRisk = out$aggregationResults
+    resAggregationMatrices = out$aggregationMatrices
+    
+    # aggregationResults = merge(aggregationResults, resFineScaleRisk, by="region")
+    aggregationResults = c(aggregationResults, resFineScaleRisk)
+    aggregationMatrices = c(aggregationMatrices, resAggregationMatrices)
+  }
+  
+  if(doSmoothRisk) {
+    out = getaggregationResults("SmoothRisk")
+    resSmoothRisk = out$aggregationResults
+    resAggregationMatrices = out$aggregationMatrices
+    
+    # aggregationResults = merge(aggregationResults, resSmoothRisk, by="region")
+    aggregationResults = c(aggregationResults, resSmoothRisk)
+    aggregationMatrices = c(aggregationMatrices, resAggregationMatrices)
+  }
+  
+  list(aggregationResults=aggregationResults, aggregationMatrices=aggregationMatrices)
+}
+
+#' @describeIn simPop
+#' Simulate populations and population prevalences given census frame and population density 
+#' information. Uses custom spatial logit risk function and can include iid cluster 
+#' level effect.
+#' 
+#' @export
+simPopCustom = function(logitRiskDraws, sigmaEpsilonDraws, easpa, popMat, targetPopMat, 
+                        stratifyByUrban=TRUE, validationPixelI=NULL, validationClusterI=NULL, 
+                        clustersPerPixel=NULL, 
+                        doFineScaleRisk=FALSE, doSmoothRisk=FALSE, 
+                        doSmoothRiskLogisticApprox=FALSE, 
+                        poppsub=NULL, subareaLevel=FALSE, 
+                        min1PerSubarea=TRUE, 
+                        returnEAinfo=FALSE, epsc=NULL) {
+  
+  if(is.null(poppsub) && subareaLevel) {
+    stop("if subareaLevel is TRUE, user must specify poppsub")
+  }
+  
+  if(!is.null(validationPixelI) || !is.null(validationClusterI) || !is.null(clustersPerPixel)) {
+    stop("validationPixelI, validationClusterI, and clustersPerPixel not yet fully implemented")
+  }
+  
+  nDraws = ncol(logitRiskDraws)
+  
+  # set default inputs
+  totalEAs = sum(easpa$EATotal)
+  if(!is.null(clustersPerPixel)) {
+    emptyPixels = clustersPerPixel == 0
+    if(totalEAs != sum(clustersPerPixel))
+      stop("sum(easpa$EATotal) != sum(clustersPerPixel)")
+  }
+  
+  # get area names
+  areas = sort(unique(popMat$area))
+  if(any(areas != easpa$area))
+    stop("area names and easpa do not match popMat or are not in the correct order")
+  
+  # determine if we care about subareas (smallest areas we care about. No info of EAs per subarea)
+  subareaLevel = !is.null(popMat$subarea)
+  
+  ##### Line 1 (of the algorithm): take draws from the binomial process for each stratum (each row of easpa)
+  # get probabilities for each pixel (or at least something proportional within each stratum)
+  print("drawing EAs")
+  pixelProbs = popMat$pop
+  
+  # take draws from the stratified binomial process for each posterior sample
+  if(is.null(clustersPerPixel)) {
+    if(subareaLevel) {
+      eaSamples = rStratifiedMultnomialBySubarea(nDraws, popMat, easpa, stratifyByUrban, poppsub=poppsub, 
+                                                 min1PerSubarea=min1PerSubarea)
+    } else {
+      eaSamples = rStratifiedMultnomial(nDraws, popMat, easpa, stratifyByUrban)
+    }
+  }
+  
+  if(!is.null(clustersPerPixel) && !exists("eaSamples")) {
+    eaSamples = matrix(rep(clustersPerPixel, nDraws), ncol=nDraws)
+  }
+  
+  # make matrix (or list) of pixel indices mapping matrices of EA values to matrices of pixel values
+  if(!is.null(clustersPerPixel)) {
+    pixelIndices = rep(1:nrow(popMat), times=clustersPerPixel) # this contains repetitions and has length == nEAs
+    uniquePixelIndices = sort(unique(pixelIndices))
+  } else {
+    pixelIndexMat = matrix(rep(rep(1:nrow(popMat), nDraws), times=eaSamples), ncol=nDraws)
+  }
+  
+  # determine which EAs are urban if necessary
+  if(stratifyByUrban) {
+    # urbanMat = matrix(rep(rep(popMat$urban, nDraws), times=c(eaSamples)), ncol=nDraws)
+    if(!is.null(clustersPerPixel)) {
+      urbanVals = popMat$urban[pixelIndices]
+      uniqueUrbanVals = popMat$urban[uniquePixelIndices]
+    } else {
+      # urbanMat = matrix(popMat$urban[pixelIndexMat], ncol=nDraws)
+      urbanMat = NULL # don't calculate here for memory's sake
+    }
+  } else {
+    urbanMat = NULL
+  }
+  
+  # determine which EAs are from which area
+  if(!is.null(clustersPerPixel)) {
+    areaVals = popMat$area[pixelIndices]
+    uniqueAreaVals = popMat$area[uniquePixelIndices]
+  } else {
+    # areaMat = matrix(popMat$area[pixelIndexMat], ncol=nDraws)
+    areaMat = NULL # don't calculate here for memory's sake
+  }
+  
+  ##### Line 2: draw cluster effects, epsilon
+  # NOTE1: we assume there are many more EAs then sampled clusters, so that 
+  #       the cluster effects for each EA, including those sampled, are iid
+  print("simulating EA level risks, numerators, and denominators")
+  if(is.null(epsc)) {
+    epsc = matrix(stats::rnorm(totalEAs*nDraws, sd=rep(sigmaEpsilonDraws, each=totalEAs)), ncol=nDraws)
+  }
+  
+  ##### Line 3: draw EA population denominators, N
+  
+  if(!is.null(clustersPerPixel)) {
+    if(is.null(validationPixelI))
+      stop("clustersPerPixel must only be set for validation, but validationPixelI is NULL")
+    
+    # in this case, every left out cluster has exactly 25 households. Simply sample target population 
+    # with equal probability from each cluster/faux EA
+    Ncs = sampleNMultilevelMultinomialFixed(clustersPerPixel, nDraws=nDraws, pixelIndices=pixelIndices, 
+                                            urbanVals=urbanVals, areaVals=areaVals, easpa=easpa, popMat=popMat, stratifyByUrban=stratifyByUrban, 
+                                            verbose=TRUE)
+  } else {
+    if(returnEAinfo) {
+      out = sampleNMultilevelMultinomial(pixelIndexMat=pixelIndexMat, easpaList=list(easpa), 
+                                         popMat=popMat, stratifyByUrban=stratifyByUrban, verbose=TRUE, returnEAinfo=returnEAinfo)
+      householdDraws = out$householdDraws
+      Ncs = out$targetPopDraws
+    } else {
+      Ncs <- sampleNMultilevelMultinomial(pixelIndexMat=pixelIndexMat, easpaList=list(easpa), 
+                                       popMat=popMat, stratifyByUrban=stratifyByUrban, verbose=TRUE, returnEAinfo=returnEAinfo)
+      householdDraws = NULL
+    }
+  }
+  
+  ##### do part of Line 7 in advance
+  # calculate mu_{ic} for each EA in each pixel
+  if(!is.null(clustersPerPixel)) {
+    uc = logitRiskDraws[pixelIndices,]
+    muc = expit(uc + epsc)
+  } else {
+    uc = matrix(logitRiskDraws[cbind(rep(rep(1:nrow(logitRiskDraws), nDraws), times=c(eaSamples)), rep(1:nDraws, each=totalEAs))], ncol=nDraws)
+    muc = expit(uc + epsc)
+  }
+  
+  # calculate Z_{ic} for each EA in each pixel
+  Zc = matrix(stats::rbinom(n=totalEAs * nDraws, size=Ncs, prob=as.matrix(muc)), ncol=nDraws)
+  
+  ##### Line 4: Aggregate appropriate values from EAs to the grid cell level
+  
+  # function for aggregating values for each grid cell
+  getPixelColumnFromEAs = function(i, vals, applyFun=sum, popWeightMatrix=NULL) {
+    # calculate levels over which to aggregate
+    if(!is.null(clustersPerPixel)) {
+      indices = pixelIndices
+    } else {
+      indices = factor(as.character(pixelIndexMat[,i]))
+    }
+    
+    # in this case (the LCPb model), we calculate weighted means within factor levels using popWeightMatrix
+    if(!is.null(popWeightMatrix)) {
+      stop("using popWeightMatrix is no longer support, since this is much slower than calculating normalized 
+           weights separately and multiplying values by them outside this function")
+      # applyFun = function(x) {stats::weighted.mean(x, popWeightMatrix[,i], na.rm=TRUE)}
+      
+      Data = data.frame(v=vals[,i], w=popWeightMatrix[,i])
+      out = sapply(split(Data, indices), function(x) stats::weighted.mean(x$v,x$w))
+    } else {
+      if(!is.null(clustersPerPixel)) {
+        # out = tapply(vals[,i], factor(as.character(pixelIndices)), FUN=applyFun)
+        out = tapply(vals[,i], indices, FUN=applyFun)
+      } else {
+        out = tapply(vals[,i], indices, FUN=applyFun)
+      }
+    }
+    
+    if(!is.null(clustersPerPixel)) {
+      returnValues = out
+    } else {
+      indices = as.numeric(names(out))
+      
+      returnValues = rep(NA, nrow(logitRiskDraws))
+      returnValues[indices] = out
+    }
+    
+    returnValues
+  }
+  
+  ##### Line 5: We already did this, resulting in logitRiskDraws input
+  
+  ##### Line 6: aggregate population denominators for each grid cell to get N_{ig}
+  print("Aggregating from EA level to the pixel level")
+  Ng <- sapply(1:ncol(Ncs), getPixelColumnFromEAs, vals=Ncs)
+  Ng[is.na(Ng)] = 0
+  
+  ##### Line 7: aggregate response for each grid cell to get Z_{ig}
+  Zg <- sapply(1:ncol(Zc), getPixelColumnFromEAs, vals=Zc)
+  
+  ##### Line 8: Calculate empirical mortality proportions for each grid cell, p_{ig}. 
+  #####         Whenever N_{ig} is 0, set p_{ig} to NA as well
+  pg = Zg / Ng
+  pg[Ng == 0] = NA
+  
+  ##### calculate results for the other models if necessary
+  if(doFineScaleRisk || doSmoothRisk) {
+    nPerEA = getExpectedNperEA(easpa, targetPopMat)
+  }
+  
+  if(doFineScaleRisk) {
+    fineScaleRisk = sapply(1:ncol(muc), getPixelColumnFromEAs, vals=muc, applyFun=function(x) {mean(x, na.rm=TRUE)})
+    fineScaleRisk[!is.finite(fineScaleRisk)] = NA
+    
+    # in order to get valid count estimates, we also need the expected denominator per EA in each stratum:
+    nSamplesFineScaleRisk = sweep(eaSamples, 1, nPerEA, "*")
+    zSamplesFineScaleRisk = fineScaleRisk * nSamplesFineScaleRisk
+    zSamplesFineScaleRisk[is.na(zSamplesFineScaleRisk)] = 0
+  } else {
+    fineScaleRisk = NULL
+    zSamplesFineScaleRisk = NULL
+    nSamplesFineScaleRisk = NULL
+  }
+  
+  if(doSmoothRisk) {
+    # integrate out the cluster effect in the risk
+    smoothRisk = matrix(SUMMER::logitNormMean(cbind(c(as.matrix(logitRiskDraws)), rep(sigmaEpsilonDraws, each=nrow(logitRiskDraws))), logisticApprox=doSmoothRiskLogisticApprox), nrow=nrow(logitRiskDraws))
+    
+    # get expected numerator and denominators
+    nSamplesSmoothRisk = matrix(rep(targetPopMat$pop, nDraws), ncol=nDraws)
+    zSamplesSmoothRisk = sweep(smoothRisk, 1, targetPopMat$pop, "*")
+  } else {
+    smoothRisk = NULL
+    zSamplesSmoothRisk = NULL
+    nSamplesSmoothRisk = NULL
+  }
+  
+  ##### Extra steps: collect draws at each level and generate:
+  ##### areas, preds, 
+  pixelLevelPop = list(pFineScalePrevalence=pg, ZFineScalePrevalence=Zg, NFineScalePrevalence=Ng, 
+                       pFineScaleRisk=fineScaleRisk, ZFineScaleRisk=zSamplesFineScaleRisk, NFineScaleRisk=nSamplesFineScaleRisk, 
+                       pSmoothRisk=smoothRisk, ZSmoothRisk=zSamplesSmoothRisk, NSmoothRisk=nSamplesSmoothRisk)
+  
+  if(!returnEAinfo) {
+    pixelLevelPop
+  } else {
+    
+    
+    # return list of eaDat objects
+    getEADat = function(i) {
+      theseI = pixelIndexMat[,i]
+      
+      eaDat = data.frame(lon=popMat$lon[theseI], lat=popMat$lat[theseI], 
+                         area=popMat$area[theseI], subarea=rep("temp", length(theseI)), 
+                         urban=popMat$urban[theseI], east=popMat$east[theseI], north=popMat$north[theseI], 
+                         popDensity=popMat$pop[theseI], popDensityTarget=targetPopMat$pop[theseI], pixelIs=theseI, 
+                         nHH=householdDraws[,i], N=Ncs[,i], Z=Zc[,i], 
+                         pFineScalePrevalence=Zc[,i]/Ncs[,i])
+      if(doFineScaleRisk) {
+        eaDat$pFineScaleRisk=muc[,i]
+      }
+      if(doSmoothRisk) {
+        eaDat$pSmoothRisk=smoothRisk[theseI,i]
+      }
+      
+      if(subareaLevel) {
+        eaDat$subarea = popMat$subarea[theseI]
+      } else {
+        eaDat$subarea = NULL
+      }
+      
+      eaDat$pFineScalePrevalence[eaDat$N == 0] = NA
+      
+      eaDat
+    }
+    
+    print("Constructing list of simulated EA data.frames")
+    eaDatList = lapply(1:nDraws, getEADat)
+    
+    c(pixelLevelPop, list(eaDatList=eaDatList, eaSamples=eaSamples))
+  }
+}
+
+
+#' Internal functions for population simulation
+#' 
+#' Functions for calculating valuable quantities and for drawing from important 
+#' distributions for population simulation.
+#' 
+#' @param easpa Census frame. See \code{\link{simPopCustom}} for details
+#' @param popMat data.frame of pixellated grid of population densities. See \code{\link{simPopCustom}} for details
+#' @param i Index
+#' @param urban If TRUE, calculate only for urban part of the area. If FALSE, for only rural part
+#' @param stratifyByUrban whether or not to stratify calculations by urban/rural classification
+#' @param validationPixelI CURRENTLY FOR TESTING PURPOSES ONLY a set of indices of pixels for which we want to simulate populations (used for pixel level validation)
+#' @param n Number of samples
+#' @param poppsub Population per subarea. See \code{\link{simPopCustom}} for details
+#' @param min1PerSubarea Whether or not to ensure there is at least 1 EA per subarea. See \code{\link{simPopCustom}} for details
+#' @param method If min1PerSubarea is TRUE, the sampling method for the truncated multinomial to use with rmulitnom1. rmultinom1 automatically 
+#'         switches between them depending on the number of expected samples. The methods are:
+#' \describe{
+#'   \item{mult1}{rejection sampling from multinomial plus 1 in each category}
+#'   \item{mult}{rejection sampling from multinomial if any category has zero count}
+#'   \item{indepMH}{independent Metropolis-Hastings using multinomial plus 1 distribution as proposal}
+#' }
+#' @param minSample The minimum number of samples per `chunk` of samples for truncated multinomial sampling. Defaults to 1
+#' @param easpsub This could either be total EAs per subarea, or subarea crossed with urban or 
+#'          rural if stratifyByUrban is TRUE
+#' @param size Multinomial size parameter. See \code{\link[stats]{rmultinom}}
+#' @param prob Multinomial probability vector parameter. See \code{\link[stats]{rmultinom}}
+#' @param maxSize The maximum number of elements in a matrix drawn from the proposal distribution per sample chunk. 
+#' @param maxExpectedSizeBeforeSwitch Max expected number of samples / k, the number of categories, before switching method
+#' @param init Initial sample if method is `indepMH`
+#' @param burnIn Number of initial samples before samples are collected if method is `indepMH`
+#' @param filterEvery Store only every filterEvery samples if method is i`indepMH`
+#' @param zeroProbZeroSamples If TRUE, set samples for parts of prob vector that are zero to zero. Otherwise they are set to one.
+#' @param allowSizeLessThanK If TRUE, then if size < the number of categories (k), returns matrix where each 
+#'                     column is vector of size ones and k - size zeros. If FALSE, throws an error if size < k
+#' @param clustersPerPixel CURRENTLY FOR TESTING PURPOSES ONLY a vector of length nIntegrationPoints specifying the number of clusters per pixel if they are fixed
+#' @param pixelIndices A nEA x n matrix of pixel indices associated with each EA per simulation/draw
+#' @param urbanVals A nEA x n matrix of urbanicities associated with each EA per simulation/draw
+#' @param areaVals A nEA x n matrix of area names associated with each EA per simulation/draw
+#' @param easpaList A list of length n with each element being of the format of easpa 
+#'            giving the number of households and EAs 
+#'            per stratum. It is assumed that the number of EAs per stratum is 
+#'            the same in each list element. If easpaList is a data frame, 
+#'            number of households per stratum is assumed constant
+#' @param nDraws Number of draws
+#' @param pixelIndexMat Matrix of pixel indices associated with each EA and draw. Not 
+#' required by getExpectedNperEA unless level == "EA"
+#' @param urbanMat Matrix of urbanicities associated with each EA and draw
+#' @param areaMat Matrix of areas associated with each EA and draw
+#' @param verbose Whether to print progress as the function proceeds
+#' @param returnEAinfo Whether a data frame at the EA level is desired
+#' @param minHHPerEA The minimum number of households per EA (defaults to 25, since 
+#' that is the number of households sampled per DHS cluster)
+#' @param fixHHPerEA If not NULL, the fixed number of households per EA
+#' @param fixPopPerHH If not NULL, the fixed target population per household
+#' @param level Whether to calculate results at the integration grid or EA level
+#' @name simPopInternal
+NULL
+
+#' @describeIn simPopInternal Calculates expected denominator per enumeration area.
+getExpectedNperEA = function(easpa, popMat, level=c("grid", "EA"), pixelIndexMat=NULL) {
+  level = match.arg(level)
+  
+  # calculate the expected denominator per enumeration area in each stratum. 
+  nPerEAUrban = easpa$popUrb / easpa$EAUrb
+  nPerEARural = easpa$popRur / easpa$EARur
+  
+  # expanded the expected denominator values vector to be of length equal 
+  # to the number of grid cells
+  uniqueAreas = sort(unique(popMat$area))
+  outUrban = numeric(nrow(popMat))
+  outRural = numeric(nrow(popMat))
+  for(i in 1:length(uniqueAreas)) {
+    urbanI = getSortIndices(i, urban=TRUE, popMat=popMat, stratifyByUrban=TRUE)
+    ruralI = getSortIndices(i, urban=FALSE, popMat=popMat, stratifyByUrban=TRUE)
+    outUrban[urbanI] = nPerEAUrban[i]
+    outRural[ruralI] = nPerEARural[i]
+  }
+  
+  out = outUrban + outRural
+  
+  if(level == "EA") {
+    if(is.null(pixelIndexMat)) {
+      stop("if level == EA, must specify pixelIndexMat")
+    }
+    out = matrix(out[pixelIndexMat], ncol=ncol(pixelIndexMat))
+  }
+  
+  out
+}
+
+#' @describeIn simPopInternal For recombining separate multinomials into the draws over all grid points
+getSortIndices = function(i, urban=TRUE, popMat, stratifyByUrban=TRUE, validationPixelI=NULL) {
+  
+  # get area names
+  areas = sort(unique(popMat$area))
+  
+  # determine which pixels and how many EAs are in this stratum
+  if(stratifyByUrban) {
+    includeI = (popMat$area == areas[i]) & (popMat$urban == urban)
+  }
+  else {
+    includeI = popMat$area == areas[i]
+  }
+  
+  # include only indices included within validation if necessary
+  if(!is.null(validationPixelI)) {
+    
+    # convert validationPixelI into a logical
+    temp = rep(FALSE, length(includeI))
+    temp[validationPixelI] = TRUE
+    
+    # include only indices we are interested in for the validation
+    includeI = includeI & temp
+  }
+  
+  which(includeI)
+}
+
+#' @describeIn simPopInternal Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values 
+#' corresponding to the value of |C^g| for each pixel, g (the number of EAs/pixel)
+rStratifiedMultnomial = function(n, popMat, easpa, stratifyByUrban=TRUE) {
+  
+  # get area names
+  areas = sort(unique(popMat$area))
+  if(any(areas != easpa$area))
+    stop("area names and easpa do not match popMat or are not in the correct order")
+  
+  # we will need to draw separate multinomial for each stratum. Start by 
+  # creating matrix of all draws of |C^g|
+  eaSamples = matrix(NA, nrow=nrow(popMat), ncol=n)
+  
+  # now draw multinomials
+  if(stratifyByUrban) {
+    # draw for each area crossed with urban/rural
+    urbanSamples = do.call("rbind", lapply(1:length(areas), rMyMultinomial, n=n, urban=TRUE, 
+                                           stratifyByUrban=stratifyByUrban, popMat=popMat, easpa=easpa))
+    ruralSamples = do.call("rbind", lapply(1:length(areas), rMyMultinomial, n=n, urban=FALSE, 
+                                           stratifyByUrban=stratifyByUrban, popMat=popMat, easpa=easpa))
+    
+    # get the indices used to recombine into the full set of draws
+    urbanIndices = unlist(sapply(1:length(areas), getSortIndices, urban=TRUE, popMat=popMat, stratifyByUrban=stratifyByUrban))
+    ruralIndices = unlist(sapply(1:length(areas), getSortIndices, urban=FALSE, popMat=popMat, stratifyByUrban=stratifyByUrban))
+    
+    # recombine into eaSamples
+    eaSamples[urbanIndices,] = urbanSamples
+    eaSamples[ruralIndices,] = ruralSamples
+  } else {
+    # draw for each area
+    stratumSamples = rbind(sapply(1:length(areas), n=n, rMyMultinomial, 
+                                  stratifyByUrban=stratifyByUrban, popMat=popMat, easpa=easpa))
+    
+    # get the indices used to recombine into the full set of draws
+    stratumIndices = c(sapply(1:length(areas), getSortIndices, popMat=popMat, stratifyByUrban=stratifyByUrban))
+    
+    # recombine into eaSamples
+    eaSamples[stratumIndices,] = stratumSamples
+  }
+  
+  # return results
+  eaSamples
+}
+
+#' @describeIn simPopInternal Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values 
+# corresponding to the number of EAs in each pixel
+rStratifiedMultnomialBySubarea = function(n, popMat, easpa, stratifyByUrban=TRUE, poppsub=NULL, 
+                                          min1PerSubarea=TRUE, minSample=1) {
+  if(is.null(poppsub)) {
+    # use popMat to calculate poppsub
+    stop("Calculating poppsub with popMat not yet implemented")
+    
+    # poppsub: a table with the following variables:
+    # subarea
+    # area
+    # popUrb
+    # popRur
+    # popTotal
+  }
+  
+  # get area names
+  areas = sort(unique(popMat$area))
+  subareas = sort(unique(popMat$subarea))
+  if(any(areas != easpa$area))
+    stop("area names and easpa do not match popMat or are not in the correct order")
+  
+  # we will need to draw separate multinomial for each stratum
+  
+  # create temporary popMat, except with one row for each constituency
+  popSubareaMat = popMat[1:length(subareas),]
+  popSubareaMat$area = poppsub$area
+  popSubareaMat$subarea = poppsub$subarea
+  if(stratifyByUrban) {
+    popSubareaMat$urban = FALSE
+    popSubareaMat = rbind(popSubareaMat, popSubareaMat)
+    popSubareaMat$urban[1:length(subareas)] = TRUE
+    popSubareaMat$pop[1:length(subareas)] = poppsub$popUrb
+    popSubareaMat$pop[(length(subareas) + 1):(2 * length(subareas))] = poppsub$popRur
+  } else {
+    popSubareaMat$pop = poppsub$popTotal
+  }
+  
+  # now draw multinomials
+  if(stratifyByUrban) {
+    # draw for each constituency in each area crossed with urban/rural
+    urbanSamplesCon = do.call("rbind", lapply(1:length(areas), rMyMultinomial, n=n, urban=TRUE, 
+                                              stratifyByUrban=stratifyByUrban, popMat=popSubareaMat, easpa=easpa, 
+                                              min1PerSubarea=min1PerSubarea, method="mult", 
+                                              minSample=minSample))
+    ruralSamplesCon = do.call("rbind", lapply(1:length(areas), rMyMultinomial, n=n, urban=FALSE, 
+                                              stratifyByUrban=stratifyByUrban, popMat=popSubareaMat, easpa=easpa, 
+                                              min1PerSubarea=min1PerSubarea, method="mult", 
+                                              minSample=minSample))
+    
+    # get the indices used to recombine into the full set of draws for the subareas
+    urbanIndicesCon = unlist(sapply(1:length(areas), getSortIndices, urban=TRUE, popMat=popSubareaMat, stratifyByUrban=stratifyByUrban))
+    ruralIndicesCon = unlist(sapply(1:length(areas), getSortIndices, urban=FALSE, popMat=popSubareaMat, stratifyByUrban=stratifyByUrban)) - length(urbanIndicesCon)
+    
+    # recombine into eaSamples for the subareas
+    urbanSamplesCon[urbanIndicesCon,] = urbanSamplesCon
+    ruralSamplesCon[ruralIndicesCon,] = ruralSamplesCon
+    
+    # draw for each pixel crossed with urban/rural
+    urbanSamples = do.call("rbind", lapply(1:length(subareas), rMyMultinomialSubarea, n=n, urban=TRUE, 
+                                           stratifyByUrban=stratifyByUrban, popMat=popMat, easpsub=urbanSamplesCon))
+    ruralSamples = do.call("rbind", lapply(1:length(subareas), rMyMultinomialSubarea, n=n, urban=FALSE, 
+                                           stratifyByUrban=stratifyByUrban, popMat=popMat, easpsub=ruralSamplesCon))
+    
+    # get the indices used to recombine into the full set of draws
+    tempPopMat = popMat
+    tempPopMat$area = tempPopMat$subarea
+    urbanIndices = unlist(sapply(1:length(subareas), getSortIndices, urban=TRUE, popMat=tempPopMat, stratifyByUrban=stratifyByUrban))
+    ruralIndices = unlist(sapply(1:length(subareas), getSortIndices, urban=FALSE, popMat=tempPopMat, stratifyByUrban=stratifyByUrban))
+    
+    # create matrix of all draws of |C^g| and recombine urban/rural draws into eaSamples
+    eaSamples = matrix(NA, nrow=nrow(popMat), ncol=n)
+    eaSamples[urbanIndices,] = urbanSamples
+    eaSamples[ruralIndices,] = ruralSamples
+  } else {
+    # draw for each constituency in each area crossed with urban/rural
+    samplesCon = do.call("rbind", lapply(1:length(areas), rMyMultinomial, n=n, urban=TRUE, 
+                                         stratifyByUrban=stratifyByUrban, popMat=popSubareaMat, easpa=easpa, 
+                                         min1PerSubarea=min1PerSubarea, method="mult", 
+                                         minSample=minSample))
+    
+    # get the indices used to recombine into the full set of draws for the subareas
+    indicesCon = unlist(sapply(1:length(areas), getSortIndices, popMat=popSubareaMat, stratifyByUrban=stratifyByUrban))
+    
+    # recombine into eaSamples for the subareas
+    samplesCon[indicesCon,] = samplesCon
+    
+    # draw for each pixel in each constituency
+    stratumSamples = rbind(sapply(1:length(subareas), n=n, rMyMultinomialSubarea, 
+                                  stratifyByUrban=stratifyByUrban, popMat=popMat, easpsub=samplesCon))
+    
+    # get the indices used to recombine into the full set of draws
+    tempPopMat = popMat
+    tempPopMat$area = tempPopMat$subarea
+    stratumIndices = c(sapply(1:length(subareas), getSortIndices, popMat=tempPopMat, stratifyByUrban=stratifyByUrban))
+    
+    # create matrix of all draws of |C^g|
+    eaSamples = matrix(NA, nrow=nrow(popMat), ncol=n)
+    eaSamples[stratumIndices,] = stratumSamples
+  }
+  
+  # return results
+  eaSamples
+}
+
+#' @describeIn simPopInternal 
+rMyMultinomial = function(n, i, stratifyByUrban=TRUE, urban=TRUE, popMat=NULL, easpa=NULL, min1PerSubarea=FALSE, 
+                          method=c("mult1", "mult", "indepMH"), minSample=1) {
+  method = match.arg(method)
+  
+  # get area names
+  areas = sort(unique(popMat$area))
+  if(any(areas != easpa$area))
+    stop("area names and easpa do not match popMat or are not in the correct order")
+  
+  # determine which pixels and how many EAs are in this stratum
+  if(stratifyByUrban) {
+    includeI = (popMat$area == areas[i]) & (popMat$urban == urban)
+    nEA = ifelse(urban, easpa$EAUrb[i], easpa$EARur[i])
+  }
+  else {
+    includeI = popMat$area == areas[i]
+    nEA = ifelse(urban, easpa$EAUrb[i], easpa$EATotal[i])
+  }
+  
+  # sample from the pixels if this stratum exists
+  if(sum(includeI) == 0)
+    return(matrix(nrow=0, ncol=n))
+  thesePixelProbs = popMat$pop[includeI]
+  if(any(thesePixelProbs > 0)) {
+    if(!min1PerSubarea) {
+      stats::rmultinom(n, nEA, prob=thesePixelProbs)
+    } else {
+      rmultinom1(n, nEA, prob=thesePixelProbs, method=method, allowSizeLessThanK=TRUE, minSample=minSample)
+    }
+  } else {
+    matrix(0, nrow=length(thesePixelProbs), ncol=n)
+  }
+}
+
+#' @describeIn simPopInternal 
+rMyMultinomialSubarea = function(n, i, easpsub, stratifyByUrban=TRUE, urban=TRUE, popMat=NULL) {
+  
+  # get constituency names
+  subareas = sort(unique(popMat$subarea))
+  
+  # determine which pixels and how many EAs are in this stratum
+  if(stratifyByUrban) {
+    includeI = (popMat$subarea == subareas[i]) & (popMat$urban == urban)
+  }
+  else {
+    includeI = popMat$subarea == subareas[i]
+  }
+  nEA = easpsub[i,]
+  
+  # sample from the pixels if this stratum exists
+  if(sum(includeI) == 0){
+    if(any(nEA != 0))
+      stop(paste0("no valid pixels to put EAs in for subarea ", as.character(subareas[i]), " and urban level ", urban))
+    return(matrix(nrow=0, ncol=n))
+  }
+  
+  thesePixelProbs = popMat$pop[includeI]
+  nonZeroEAs = nEA != 0
+  out = matrix(0, nrow=sum(includeI), ncol=n)
+  if(any(nonZeroEAs)) {
+    out[,nonZeroEAs] = sapply(nEA[nonZeroEAs], stats::rmultinom, n=1, prob=thesePixelProbs)
+  }
+  out
+}
+
+#' @describeIn simPopInternal Random (truncated) multinomial draws conditional on the number of each type being at least one
+rmultinom1 = function(n=1, size, prob, maxSize=8000*8000, method=c("mult1", "mult", "indepMH"), verbose=FALSE, minSample=100, 
+                      maxExpectedSizeBeforeSwitch=1000*1e7, init=NULL, burnIn=floor(n/4), filterEvery=10, zeroProbZeroSamples=TRUE, 
+                      allowSizeLessThanK=FALSE) {
+  method = match.arg(method)
+  prob = prob*(1/sum(prob))
+  
+  if(zeroProbZeroSamples && any(prob == 0)) {
+    zero = prob == 0
+    out = matrix(0, nrow=length(prob), ncol=n)
+    
+    if(sum(!zero) > 0) {
+      out[!zero,] = rmultinom1(n, size, prob[!zero], maxSize, method, verbose, minSample, maxExpectedSizeBeforeSwitch, init, burnIn, 
+                               filterEvery, zeroProbZeroSamples, allowSizeLessThanK)
+    }
+    
+    return(out)
+  }
+  
+  k = length(prob)
+  if(allowSizeLessThanK && (size <= k)) {
+    return(replicate(n, as.numeric(1:k %in% sample(1:k, size, replace=FALSE))))
+  } else if(size < k) {
+    stop("size < k but rmultinom1 requires at least 1 sample per multinomial type")
+  }
+  
+  maxSamples = floor(maxSize / k)
+  averageProbMult = prod((size/k)*prob)
+  
+  if(method != "indepMH")
+    samples = matrix(NA, nrow=k, ncol=n)
+  else
+    samples = matrix(NA, nrow=k, ncol=round(n*filterEvery))
+  if(method == "mult1") {
+    averagex = 1 + (size-k)*prob
+    averageProb = (size-k) / (prod(averagex))
+    
+    while(any(is.na(samples))) {
+      # calculate the number of remaining samples
+      samplesLeft = sum(apply(samples, 2, function(x) {any(is.na(x))}))
+      
+      # approximate expected number of samples so that, after some are rejected, we will 
+      # have the right number of samples. Kappa is approximated for when p_1=p_2=...=p_k
+      expectedSamples = ceiling(samplesLeft/averageProb)
+      # logMStar = lfactorial(size) - lfactorial(size-k) + sum(log(prob)) - log(size-k+1)
+      # avgAcceptProb = exp(-logMStar)
+      # logKappaApprox = lchoose(size + k - 1, k - 1) - lchoose(size - 1, k - 1)
+      # logM = logMStar -logKappaApprox
+      # avgAcceptProb = exp(-logM)
+      # expectedSamples = ceiling(samplesLeft/avgAcceptProb)
+      
+      if(expectedSamples*k > maxExpectedSizeBeforeSwitch) {
+        warning("too many samples expected with method=='mult1'. Switching to method=='indepMH'")
+        return(rmultinom1(n, size, prob, maxSize, method="indepMH", verbose, minSample, 
+                          maxExpectedSizeBeforeSwitch, init, burnIn, filterEvery, 
+                          zeroProbZeroSamples, allowSizeLessThanK))
+      }
+      
+      # sample expectedSamples times a fudge factor, but make sure we don't get past memory limit
+      thisNumberOfSamples = max(minSample, min(maxSamples, expectedSamples * 1.1))
+      if(verbose)
+        print(paste0("Sampling ", thisNumberOfSamples, ". Sampled ", n-samplesLeft, "/", n, ". Expected remaining samples: ", expectedSamples))
+      thisSamples = matrix(1 + stats::rmultinom(thisNumberOfSamples, size-k, prob=prob), nrow=length(prob))
+      
+      # calculate accept probabilities
+      thisProbs = exp(log(size-k) - apply(log(thisSamples), 2, sum))
+      # thisProbs = (size-k) / apply(thisSamples, 2, prod)
+      if(verbose) {
+        print(paste0("Max sampled accept prob: ", max(thisProbs), ". Mean sampled accept prob: ", mean(thisProbs)))
+        print(paste0("Expected number of samples based on avg sampled acceptance prob: ", ceiling(samplesLeft/mean(thisProbs))))
+      }
+      
+      # reject relevant samples
+      u = stats::runif(thisNumberOfSamples)
+      thisSamples = matrix(thisSamples[,u<thisProbs], nrow=length(prob))
+      
+      # remove excess samples if necessary
+      totalSamples = ncol(thisSamples) + n - samplesLeft
+      if(totalSamples > n) {
+        thisSamples = matrix(thisSamples[,1:samplesLeft], nrow=length(prob))
+      }
+      
+      # add in accepted samples, if any
+      if(ncol(thisSamples) > 0) {
+        samples[,(n-samplesLeft+1):(n-samplesLeft+ncol(thisSamples))] = thisSamples
+      } else {
+        warning(paste0("no samples accepted this round out of ", thisNumberOfSamples, " total. Redrawing..."))
+      }
+    }
+  } else if(method == "mult") {
+    
+    while(any(is.na(samples))) {
+      # calculate the number of remaining samples
+      samplesLeft = sum(apply(samples, 2, function(x) {any(is.na(x))}))
+      
+      # approximate expected number of samples so that, after some are rejected, we will 
+      # have the right number of samples
+      expectedSamples = ceiling(samplesLeft/averageProbMult)
+      
+      if(expectedSamples*k > maxExpectedSizeBeforeSwitch) {
+        warning("too many samples expected with method=='mult'. Switching to method=='mult1'")
+        return(rmultinom1(n, size, prob, maxSize, method="mult1", verbose, minSample, maxExpectedSizeBeforeSwitch, 
+                          init, burnIn, filterEvery, 
+                          zeroProbZeroSamples, allowSizeLessThanK))
+      }
+      
+      # sample expectedSamples times a fudge factor, but make sure we don't get past memory limit
+      thisNumberOfSamples = max(minSample, min(maxSamples, expectedSamples * 1.1))
+      if(verbose)
+        print(paste0("Sampling ", thisNumberOfSamples, ". Sampled ", n-samplesLeft, "/", n, ". Expected remaining samples: ", expectedSamples))
+      thisSamples = matrix(stats::rmultinom(thisNumberOfSamples, size, prob=prob), ncol=thisNumberOfSamples)
+      
+      # reject relevant samples
+      accept = apply(thisSamples, 2, function(x) {all(x>0)})
+      thisSamples = matrix(thisSamples[,accept], nrow=length(prob))
+      
+      # remove excess samples if necessary
+      totalSamples = ncol(thisSamples) + n - samplesLeft
+      if(totalSamples > n) {
+        thisSamples = matrix(thisSamples[,1:samplesLeft], nrow=length(prob))
+      }
+      
+      # add in accepted samples, if any
+      if(ncol(thisSamples) > 0) {
+        samples[,(n-samplesLeft+1):(n-samplesLeft+ncol(thisSamples))] = thisSamples
+      } else {
+        warning(paste0("no samples accepted this round out of ", thisNumberOfSamples, " total. Redrawing..."))
+      }
+    }
+  } else if(method == "indepMH") {
+    # we use the mult1 method for independent proposals with independent Metropolis-Hastings
+    # (https://www.statlect.com/fundamentals-of-statistics/Metropolis-Hastings-algorithm#hid9)
+    
+    # initialize at something reasonable, if not set by user
+    if(is.null(init)) {
+      init = 1 + floor(size*prob)
+      while(sum(init) > size) {
+        tooMuchBy = sum(init) - size
+        numberReducible = sum(init > 1)
+        reduceNumber = min(tooMuchBy, numberReducible)
+        init[init > 1][1:reduceNumber] = init[init > 1][1:reduceNumber] - 1
+      }
+    }
+    if(sum(init) != size)
+      stop("sum(init) != size")
+    
+    # approximate target log-density
+    lp <- log(prob)
+    lf <- function(x) {
+      if(any(x < 1) || sum(x) != size)
+        return(-Inf)
+      sum(lp*x - lfactorial(x))
+    }
+    
+    # true proposal log-density
+    lq = function(x) {
+      if(sum(x) != size)
+        return(-Inf)
+      sum(lp*(x-1) - lfactorial(x-1)) + lfactorial(size-k)
+    }
+    
+    # proposal function
+    q <- function(x) {
+      1 + stats::rmultinom(1, size-k, prob)
+    }
+    
+    # do the sampling
+    tmp <- init
+    ar <- 0
+    for (i in 1:burnIn) {
+      proposal <- q(tmp)
+      p <- exp((lf(proposal) - lq(proposal)) - (lf(tmp) - lq(tmp)))
+      if (stats::runif(1) < p) {
+        tmp <- proposal
+      }
+    }
+    for (i in 1:ncol(samples)) {
+      proposal <- q(tmp)
+      p <- exp((lf(proposal) - lq(proposal)) - (lf(tmp) - lq(tmp)))
+      if (stats::runif(1) < p) {
+        tmp <- proposal
+        ar <- ar + 1
+      }
+      samples[,i] <- tmp
+    }
+    
+    # calculated acceptance percentage
+    if(verbose) {
+      print(paste0("acceptance percentage: ", ar/ncol(samples)))
+    }
+    
+    # estimate autocorrelation in samples (if it is na then all samples have the same value there)
+    calcCor = apply(samples, 1, function(x) {cor(x[-1], x[-length(x)])})
+    calcCor[is.na(calcCor)] = 1
+    print(paste0("estimated mean (max) lag 1 correlation after filtering: ", mean(calcCor^filterEvery), " (", max(calcCor^filterEvery), ")"))
+    
+    # filter out samples to reduce autocorrelation
+    samples = samples[,seq(from=1, to=ncol(samples), by=filterEvery)]
+  }
+  
+  samples
+}
+
+#' @describeIn simPopInternal Take multilevel multinomial draws first from joint distribution of 
+#' number of households per EA given the total per stratum, and then from the joint 
+#' distribution of the total target population per household given 
+#' the total per stratum
+sampleNMultilevelMultinomial = function(nDraws = ncol(pixelIndexMat), pixelIndexMat=NULL, urbanMat=NULL, areaMat=NULL, easpaList, 
+                                        popMat, stratifyByUrban=TRUE, verbose=TRUE, returnEAinfo=FALSE, 
+                                        minHHPerEA=25, fixHHPerEA=NULL, fixPopPerHH=NULL) {
+  
+  if(length(easpaList) == 1) {
+    easpaList = replicate(nDraws, easpaList[[1]], simplify=FALSE)
+  }
+  areas = easpaList[[1]]$area
+  
+  if((is.null(areaMat) || is.null(urbanMat)) && is.null(pixelIndexMat)) {
+    stop("user must either supply pixelIndexMat or both areaMat and urbanMat")
+  }
+  if(is.null(areaMat)) {
+    areaIs = match(popMat$area, sort(unique(areas))) # convert from character to indices to save memory
+    areaMat = matrix(areaIs[pixelIndexMat], ncol=nDraws)
+  }
+  if(is.null(urbanMat)) {
+    urbanMat = matrix(popMat$urban[pixelIndexMat], ncol=nDraws)
+  }
+  
+  # start by drawing the totals, then divide households amongst EAs, then divide target population amongst households. 
+  # Make sure there are at least 25 households per EA (only divide the rest randomly)
+  
+  ##### Draw the totals
+  
+  # get the total number of enumeration areas per stratum (this does not change between draws)
+  areasIs = match(areas, sort(unique(areas))) # convert from character to indices
+  totalEAsUrban = easpaList[[1]]$EAUrb
+  totalEAsRural = easpaList[[1]]$EARur
+  totalEAs = easpaList[[1]]$EATotal
+  nEAs = sum(totalEAs)
+  
+  ##### draw the total target population per enumeration area
+  
+  targetPopDraws = matrix(nrow=nEAs, ncol=nDraws)
+  if(returnEAinfo) {
+    householdDraws = matrix(nrow=nEAs, ncol=nDraws)
+  }
+  
+  # Draw the number of households per stratum area that will be randomly distributed (total minus the minimum minHHPerEA)
+  if(stratifyByUrban) {
+    totalHouseholdsUrban = sweep(matrix(sapply(easpaList, function(x) {x$HHUrb}), ncol=length(easpaList)), 1, -minHHPerEA*totalEAsUrban, "+")
+    totalHouseholdsRural = sweep(matrix(sapply(easpaList, function(x) {x$HHRur}), ncol=length(easpaList)), 1, -minHHPerEA*totalEAsRural, "+")
+    totalChildrenUrban = matrix(sapply(easpaList, function(x) {x$popUrb}), ncol=length(easpaList))
+    totalChildrenRural = matrix(sapply(easpaList, function(x) {x$popRur}), ncol=length(easpaList))
+  } else {
+    totalHouseholds = sweep(sapply(matrix(easpaList, function(x) {x$HHTotal}), ncol=length(easpaList)), 1, -minHHPerEA*totalEAs, "+")
+    totalChildren = matrix(sapply(easpaList, function(x) {x$popHHTotal}), ncol=length(easpaList))
+  }
+  
+  # distribute the households throughout the enumeration areas with multinomial distribution, then 
+  # distribute the target population amongst the households, also with a multinomial distribution
+  for(i in 1:length(areasIs)) {
+    thisArea = areasIs[i]
+    thisAreaL = areaMat==thisArea
+    
+    # print progress if in verbose mode
+    if(verbose) {
+      print(paste0("drawing Ns for each EA for area ", thisArea, " (", i, "/", length(areas), ")"))
+    }
+    
+    # draw households per EA (make sure there are any rural EAs)
+    if(stratifyByUrban) {
+      if(totalEAsUrban[i] != 0) {
+        householdDrawsUrban <- matrix(sapply(totalHouseholdsUrban[i,], stats::rmultinom, n=1, prob=rep(1/totalEAsUrban[i], totalEAsUrban[i])), nrow=totalEAsUrban[i], ncol=nDraws) + minHHPerEA
+      }
+      if(totalEAsRural[i] != 0) {
+        householdDrawsRural <- matrix(sapply(totalHouseholdsRural[i,], stats::rmultinom, n=1, prob=rep(1/totalEAsRural[i], totalEAsRural[i])), nrow=totalEAsRural[i], ncol=nDraws) + minHHPerEA
+      }
+      
+      # if we must return EA info, we must return the household draws for each EA:
+      if(returnEAinfo) {
+        if(totalEAsUrban[i] != 0) {
+          householdDraws[thisAreaL & urbanMat] = householdDrawsUrban
+        }
+        if(totalEAsRural[i] != 0) {
+          householdDraws[thisAreaL & !urbanMat] = householdDrawsRural
+        }
+      }
+    } else {
+      householdDraws = matrix(sapply(totalHouseholds[i,], stats::rmultinom, n=1, prob=rep(1/totalEAs[i], totalEAs[i])), nrow=totalEAs[i], ncol=nDraws) + minHHPerEA
+    }
+    
+    # drawing target population per EA
+    if(is.null(fixPopPerHH)) {
+      # draw target pop per EA with probability proportional to the number of households
+      if(stratifyByUrban) {
+        if(totalEAsUrban[i] != 0) {
+          probsUrban = sweep(householdDrawsUrban, 2, 1 / colSums(householdDrawsUrban), "*")
+          targetPopDraws[thisAreaL & urbanMat] = sapply(1:nDraws, function(j) {stats::rmultinom(1, totalChildrenUrban[i,j], probsUrban[,j])})
+        }
+        
+        if(totalEAsRural[i] != 0) {
+          probsRural = sweep(householdDrawsRural, 2, 1 / colSums(householdDrawsRural), "*")
+          targetPopDraws[thisAreaL & !urbanMat] = sapply(1:nDraws, function(j) {stats::rmultinom(1, totalChildrenRural[i,j], probsRural[,j])})
+        }
+      } else {
+        probs = sweep(householdDraws, 2, 1 / colSums(householdDraws), "*")
+        targetPopDraws[thisAreaL] = sapply(1:nDraws, function(j) {stats::rmultinom(1, totalChildren[i,j], probs[,j])})
+      }
+    } else {
+      # set target pop per EA based on fixed number per household
+      if(stratifyByUrban) {
+        if(totalEAsUrban[i] != 0) {
+          targetPopDraws[thisAreaL & urbanMat] = fixPopPerHH*householdDrawsUrban
+        }
+        if(totalEAsRural[i] != 0) {
+          targetPopDraws[thisAreaL & !urbanMat] = fixPopPerHH*householdDrawsRural
+        }
+      } else {
+        targetPopDraws[thisAreaL] = fixPopPerHH*householdDraws
+      }
+    }
+  }
+  
+  ##### Return results
+  if(!returnEAinfo) {
+    targetPopDraws
+  } else {
+    list(householdDraws=householdDraws, targetPopDraws=targetPopDraws)
+  }
+}
+
+#' @describeIn simPopInternal Same as sampleNMultilevelMultinomial, except the number of EAs per pixel is fixed
+sampleNMultilevelMultinomialFixed = function(clustersPerPixel, nDraws=ncol(pixelIndices), pixelIndices=NULL, 
+                                             urbanVals=NULL, areaVals=NULL, easpa, popMat, stratifyByUrban=TRUE, 
+                                             verbose=TRUE) {
+  
+  # set default inputs
+  if((is.null(areaVals) || is.null(urbanVals)) && is.null(pixelIndices)) {
+    stop("user must either supply pixelIndices or both areaVals and urbanVals")
+  }
+  if(is.null(areaVals)) {
+    areaVals = matrix(popMat$area[pixelIndices], ncol=nDraws)
+  }
+  if(is.null(urbanVals)) {
+    urbanVals = matrix(popMat$urban[pixelIndices], ncol=nDraws)
+  }
+  
+  # start by drawing the totals, then divide households amongst EAs, then divide target population amongst households. 
+  # Make sure there are at least 25 households per EA (only divide the rest randomly)
+  
+  ##### Draw the totals
+  
+  # get the total number of enumeration areas per stratum (this does not change between draws)
+  areas = easpa$area
+  totalEAsUrban = easpa$EAUrb
+  totalEAsRural = easpa$EARur
+  totalEAs = easpa$EATotal
+  nEAs = sum(totalEAs)
+  
+  if(nEAs != sum(clustersPerPixel)) {
+    stop("sum(easpa$EATotal) != sum(clustersPerPixel)")
+  }
+  
+  ##### draw the total target population per EA
+  targetPopDraws = matrix(nrow=nEAs, ncol=nDraws)
+  
+  # Draw the number of households per stratum area that will be randomly distributed (total minus the minimum 25)
+  if(stratifyByUrban) {
+    totalHouseholdsUrban = easpa$HHUrb -25*totalEAsUrban
+    totalHouseholdsRural = easpa$HHRur -25*totalEAsRural
+    totalChildrenUrban = easpa$popUrb
+    totalChildrenRural = easpa$popRur
+  } else {
+    totalHouseholds = easpa$HHTotal - 25*totalEAs
+    totalChildren = easpa$popHHTotal
+  }
+  
+  # distribute the households throughout the enumeration areas with multinomial distribution, then 
+  # distribute the target population amongst the households, also with a multinomial distribution
+  for(i in 1:length(areas)) {
+    thisArea = areas[i]
+    
+    # print progress if in verbose mode
+    if(verbose) {
+      print(paste0("drawing Ns for each EA for area ", thisArea, " (", i, "/", length(areas), ")"))
+    }
+    
+    # draw households per EA (make sure there are any rural EAs)
+    if(stratifyByUrban) {
+      if(totalEAsUrban[i] != 0) {
+        if(any(totalHouseholdsUrban != 0)) {
+          householdDrawsUrban = stats::rmultinom(n=nDraws, size=totalHouseholdsUrban[i], prob=rep(1/totalEAsUrban[i], totalEAsUrban[i])) + 25
+        } else {
+          householdDrawsUrban = matrix(rep(25, totalEAsUrban[i]*nDraws), ncol=nDraws)
+        }
+      }
+      
+      if(totalEAsRural[i] != 0) {
+        if(any(totalHouseholdsRural != 0)) {
+          householdDrawsRural = stats::rmultinom(n=nDraws, size=totalHouseholdsRural[i], prob=rep(1/totalEAsRural[i], totalEAsRural[i])) + 25
+        } else {
+          householdDrawsRural = matrix(rep(25, totalEAsRural[i]*nDraws), ncol=nDraws)
+        }
+      }
+    } else {
+      if(any(totalHouseholdsRural != 0)) {
+        householdDraws = stats::rmultinom(n=nDraws, size=totalHouseholds[i], prob=rep(1/totalEAs[i], totalEAs[i])) + 25
+      } else {
+        householdDraws = matrix(rep(25, totalEAs[i]*nDraws), ncol=nDraws)
+      }
+    }
+    
+    # drawing target population per EA, with probability proportional to the number of households
+    if(stratifyByUrban) {
+      
+      if(totalEAsUrban[i] != 0) {
+        probsUrban = sweep(householdDrawsUrban, 2, 1 / colSums(householdDrawsUrban), "*")
+        targetPopDraws[areaVals==thisArea & urbanVals] = sapply(1:nDraws, function(j) {stats::rmultinom(1, totalChildrenUrban[i], probsUrban[,j])})
+      }
+      
+      if(totalEAsRural[i] != 0) {
+        probsRural = sweep(householdDrawsRural, 2, 1 / colSums(householdDrawsRural), "*")
+        targetPopDraws[areaVals==thisArea & !urbanVals] = sapply(1:nDraws, function(j) {stats::rmultinom(1, totalChildrenRural[i], probsRural[,j])})
+      }
+      
+    } else {
+      probs = sweep(householdDraws, 2, 1 / colSums(householdDraws), "*")
+      targetPopDraws[areaVals==thisArea] = sapply(1:nDraws, function(j) {stats::rmultinom(1, totalChildren[i], probs[,j])})
+    }
+  }
+  
+  ##### Return results
+  targetPopDraws
+}
\ No newline at end of file
diff --git a/R/simSPDE.R b/R/simSPDE.R
new file mode 100755
index 0000000..79ae073
--- /dev/null
+++ b/R/simSPDE.R
@@ -0,0 +1,59 @@
+#' Simulate from the SPDE spatial model
+#' 
+#' Generates nCoords x nsim matrix of simulated 
+#' values of the SPDE spatial process
+#'
+#' @param coords 2 column matrix of spatial coordinates at which to simulate the spatial process
+#' @param nsim number of draws from the SPDE model
+#' @param mesh SPDE mesh
+#' @param effRange effective spatial range
+#' @param margVar marginal variance of the spatial process
+#' @param inla.seed seed input to inla.qsample. 0L sets seed intelligently, 
+#' > 0 sets a specific seed, < 0 keeps existing RNG
+#' 
+#' @author John Paige
+#' 
+#' @references Lindgren, F., Rue, H., Lindström, J., 2011. An explicit link between Gaussian fields and Gaussian Markov random fields: the stochastic differential equation approach (with discussion). Journal of the Royal Statistical Society, Series B 73, 423–498.
+#' 
+#' @examples
+#' \dontrun{
+#' set.seed(123)
+#' require(INLA)
+#' coords = matrix(runif(10*2), ncol=2)
+#' mesh = inla.mesh.2d(loc.domain=cbind(c(0, 0, 1, 1), c(0, 1, 0, 1)), 
+#'   n=3000, max.n=5000, max.edge=c(.01, .05), offset=-.1)
+#' simVals = simSPDE(coords, nsim=1, mesh, effRange=.2, inla.seed=1L)
+#' }
+#' 
+#' @export
+simSPDE = function(coords, nsim=1, mesh, effRange=(max(coords[,1])-min(coords[,1]))/3, margVar=1, inla.seed=0L) {
+  
+  # calculate SPDE model parameters based on Lindgren Rue (2015) "Bayesian Spatial Modelling with R-INLA"
+  meshSize <- min(c(diff(range(mesh$loc[, 1])), diff(range(mesh$loc[, 2]))))
+  # it is easier to use theta and set sigma0 to 1 then to set sigma0 and the effective range directly
+  # kappa0 <- sqrt(8)/effRange * meshSize # since nu = 1
+  # kappa0 <- sqrt(8)/effRange # since nu = 1
+  # kappa0 = sqrt(8) / 5
+  # logKappa = log(kappa0)
+  sigma0 = 1
+  # tau0 <- 1/(sqrt(4 * pi) * kappa0 * sigma0)
+  # logTau = log(tau0)
+  
+  # from page 5 of the paper listed above:
+  logKappa = 0.5 * log(8)
+  logTau = 0.5 * (lgamma(1) - (lgamma(2) + log(4*pi))) - logKappa
+  theta = c(log(sqrt(margVar)), log(effRange))
+  spde <- INLA::inla.spde2.matern(mesh, B.tau = cbind(logTau, -1, +1),
+                            B.kappa = cbind(logKappa, 0, -1), theta.prior.mean = theta,
+                            theta.prior.prec = c(0.1, 1))
+  
+  # generate A and Q precision matrix
+  Q = INLA::inla.spde2.precision(spde, theta = theta)
+  A = INLA::inla.spde.make.A(mesh, coords)
+  
+  # generate simulations
+  simField = INLA::inla.qsample(nsim, Q, seed=inla.seed)
+  simDat = as.matrix(A %*% simField)
+  
+  simDat
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index 12067d8..5de2eb0 100755
--- a/README.md
+++ b/README.md
@@ -4,10 +4,25 @@
 
 SAE Unit/area Models and Methods for Estimation in R
 
+
 ## Next major update (version 1.4.0)
-+ New major functions for simulation and using SPDE spatial model and aggregation over grids.
-+ New datasets on Kenya population map.
-+ New methods for benchmarking.
++ New major functions for  population and prevalence simulation based on population frame and population density information, along with methods for aggregating pixel level prevalences and populations to the areal level. 
+
++ New major functions.
+    * ``simSPDE``: Simulates from the SPDE spatial model.
+    * ``simPopSPDE``: Simulates population in a country based on SPDE spatial model and population frame information.
+    * ``simPopPixel``: Simulates population in a country based on custom spatial model and population frame information.
++ New minor functions.
+    * ``aggPixelPreds`` and ``areaPopToArea``: Functions for aggregating populations and prevalences to the areal level.
+    * Functions for setting pixellated grid and population density grid.
+    * Functions for adjusting population density grid based on population frame information.
+    * Functions for projections to use with Kenya maps and for getting Admin areas given spatial locations.
++ New internal functions for helping with population simulation.
++ New datasets
+    * ``kenyaMaps``: Kenya administrative area map shapefiles, and a triangular mesh for the SPDE model for Kenya.
+    * ``kenyaPopulationData``: General and neonatal population frames for Kenya, and general and neonatal population density information for Kenya.
+    * ``popMatKenya`` and ``popMatKenyaNeonatal``: General and neonatal population density grids at 5km resolution.
+
 
 ## Major update (version 1.3.0)
 + New SAE functions and vignette.
@@ -17,6 +32,12 @@ SAE Unit/area Models and Methods for Estimation in R
 + Allows `smoothDirect` and `smoothCluster` to fit space-only models.
 + S3 class output.
 
+## Major update (version 1.1.0)
++ Major expansion for `smoothSurvey` to implement popular SAE methods. Syntax change to the function. 
++ Allows `smoothDirect` and `smoothCluster` to fit space-only models.
++ `smoothSurvey`, `smoothDirect`, and `smoothCluster` now returns S3 classed objects.
+ 
+
 ## Major update (version 1.1.0)
 + Major expansion for `smoothSurvey` to implement popular SAE methods. Syntax change to the function. 
 + Allows `smoothDirect` and `smoothCluster` to fit space-only models.
diff --git a/data/kenyaPopulationData.rda b/data/kenyaPopulationData.rda
new file mode 100755
index 0000000..6706e2d
Binary files /dev/null and b/data/kenyaPopulationData.rda differ
diff --git a/inst/.DS_Store b/inst/.DS_Store
new file mode 100755
index 0000000..ec1eba7
Binary files /dev/null and b/inst/.DS_Store differ
diff --git a/man/malawiMap.Rd b/man/MalawiMap.Rd
similarity index 100%
rename from man/malawiMap.Rd
rename to man/MalawiMap.Rd
diff --git a/man/aggPixelPreds.Rd b/man/aggPixelPreds.Rd
new file mode 100755
index 0000000..afc3f1f
--- /dev/null
+++ b/man/aggPixelPreds.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simPop.R
+\name{aggPixelPreds}
+\alias{aggPixelPreds}
+\title{Helper function of \code{\link{pixelPopToArea}}}
+\usage{
+aggPixelPreds(
+  Zg,
+  Ng,
+  areas,
+  urban = targetPopMat$urban,
+  targetPopMat = NULL,
+  useDensity = FALSE,
+  stratifyByUrban = TRUE,
+  normalize = useDensity
+)
+}
+\arguments{
+\item{Zg}{nIntegrationPoint x nsim matrix of simulated response (population numerators) for each pixel and sample}
+
+\item{Ng}{nIntegrationPoint x nsim matrix of simulated counts (population denominators) for each pixel and sample}
+
+\item{areas}{nIntegrationPoint length character vector of areas (or subareas)}
+
+\item{urban}{nIntegrationPoint length vector of indicators specifying whether or not pixels are urban or rural}
+
+\item{targetPopMat}{same as in \code{\link{simPopCustom}}}
+
+\item{useDensity}{whether to use population density as aggregation weights.}
+
+\item{stratifyByUrban}{whether or not to stratify simulations by urban/rural classification}
+
+\item{normalize}{if TRUE, pixel level aggregation weights within specified area are normalized to sum to 1. This produces an 
+average of the values in Zg rather than a sum. In general, should only be set to TRUE for smooth integrals of risk.}
+}
+\description{
+Aggregates population from the 
+pixel level to the level of the area of interest.
+}
diff --git a/man/aggPop.Rd b/man/aggPop.Rd
new file mode 100755
index 0000000..5ef77f5
--- /dev/null
+++ b/man/aggPop.Rd
@@ -0,0 +1,139 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simPop.R
+\name{aggPop}
+\alias{aggPop}
+\alias{pixelPopToArea}
+\alias{areaPopToArea}
+\title{Aggregate populations to the specified areal level}
+\usage{
+pixelPopToArea(
+  pixelLevelPop,
+  eaSamples,
+  areas,
+  stratifyByUrban = TRUE,
+  targetPopMat = NULL,
+  doFineScaleRisk = !is.null(pixelLevelPop$fineScaleRisk$p),
+  doSmoothRisk = !is.null(pixelLevelPop$smoothRisk$p)
+)
+
+areaPopToArea(
+  areaLevelPop,
+  areasFrom,
+  areasTo,
+  stratifyByUrban = TRUE,
+  doFineScaleRisk = !is.null(areaLevelPop$aggregationResults$pFineScaleRisk),
+  doSmoothRisk = !is.null(areaLevelPop$aggregationResults$pSmoothRisk)
+)
+}
+\arguments{
+\item{pixelLevelPop}{pixel level population information that we want aggregate. In the same format as output from \code{\link{simPopCustom}}}
+
+\item{eaSamples}{nIntegrationPoint x nsim matrix of the number of enumeration areas per pixel sampled in the input pixel level population}
+
+\item{areas}{character vector of length nIntegrationPoints of area names over which we 
+want to aggregate. Can also be subareas}
+
+\item{stratifyByUrban}{whether or not to stratify simulations by urban/rural classification}
+
+\item{targetPopMat}{pixellated grid data frame with variables `lon`, `lat`, `pop` (target population), `area`, `subareas` (if subareaLevel is TRUE), `urban` (if stratifyByUrban is TRUE), `east`, and `north`}
+
+\item{doFineScaleRisk}{whether or not to calculate the fine scale risk in addition to the prevalence. See details}
+
+\item{doSmoothRisk}{Whether or not to calculate the smooth risk in addition to the prevalence. See details}
+
+\item{areaLevelPop}{output of \code{\link{simPopCustom}} containing pixel level information 
+about the population of interest}
+
+\item{areasFrom}{character vector of length equal to the number of areas from which 
+we would like to aggregate containing the unique names of the areas. 
+Can also be subareas, but these are smaller than the "to areas", and 
+each "from area" must be entirely contained in a single "to area"}
+
+\item{areasTo}{character vector of length equal to the number of areas from which 
+we would like to aggregate containing the names of the areas containing 
+with each respective `from' area. Can also be a set of subareas, 
+but these are larger than the "from areas".}
+}
+\value{
+A list containing elements `fineScalePrevalence` and `fineScaleRisk`. Each 
+of these are in turn lists with aggregated prevalence and risk for the area of 
+interest, containg the following elements, were paranethesis indicate the elements 
+for the fineScaleRisk model rather than fineScalePrevalence:
+\item{p}{Aggregated prevalence (risk), calculated as aggregate of Z divided by 
+aggregate of N}
+\item{Z}{Aggregated (expected) population numerator}
+\item{N}{Aggregated (expected) population denominator}
+\item{pUrban}{Aggregated prevalence (risk) in urban part of the area, calculated 
+as aggregate of Z divided by aggregate of N}
+\item{ZUrban}{Aggregated (expected) population numerator in urban part of the area}
+\item{NUrban}{Aggregated (expected) population denominator in urban part of the area}
+\item{pRural}{Aggregated prevalence (risk) in rural part of the area, calculated 
+as aggregate of Z divided by aggregate of N}
+\item{ZRural}{Aggregated (expected) population numerator in rural part of the area}
+\item{NRural}{Aggregated (expected) population denominator in rural part of the area}
+\item{A}{Aggregation matrix used to aggregate from pixel level to areal level}
+\item{AUrban}{Aggregation matrix used to aggregate from pixel level to urban part of the areal level}
+\item{ARural}{Aggregation matrix used to aggregate from pixel level to rural part of the areal level}
+}
+\description{
+Takes simulated populations and aggregates 
+them to the specified areal level. Also calculates the aggregated risk and prevalence.
+}
+\section{Functions}{
+\itemize{
+\item \code{pixelPopToArea()}: Aggregate from pixel to areal level
+
+\item \code{areaPopToArea()}: Aggregate areal populations to another areal level
+
+}}
+\examples{
+\dontrun{
+##### Now we make a model for the risk. We will use an SPDE model with these 
+##### parameters for the linear predictor on the logist scale, which are chosen 
+##### to be of practical interest:
+beta0=-2.9 # intercept
+gamma=-1 # urban effect
+rho=(1/3)^2 # spatial variance
+effRange = 400 # effective spatial range in km
+sigmaEpsilon=sqrt(1/2.5) # cluster (nugget) effect standard deviation
+
+# simulate the population! Note that this produces multiple dense 
+# nEA x nsim and nIntegrationPoint x nsim matrices. In the future 
+# sparse matrices will and chunk by chunk computations may be incorporated.
+simPop = simPopSPDE(nsim=1, easpa=easpaKenyaNeonatal, 
+                    popMat=popMatKenya, targetPopMat=popMatKenyaNeonatal, 
+                    poppsub=poppsubKenya, spdeMesh=kenyaMesh, 
+                    margVar=rho, sigmaEpsilonSq=sigmaEpsilon^2, 
+                    gamma=gamma, effRange=effRange, beta0=beta0, 
+                    seed=123, inla.seed=12, nHHSampled=25, 
+                    stratifyByUrban=TRUE, subareaLevel=TRUE, 
+                    doFineScaleRisk=TRUE, 
+                    min1PerSubarea=TRUE)
+
+pixelPop = simPop$pixelPop
+subareaPop = pixelPopToArea(pixelLevelPop=pixelPop, eaSamples=pixelPop$eaSamples, 
+  areas=popMatKenya$subarea, stratifyByUrban=TRUE, 
+  targetPopMat=popMatKenyaNeonatal, doFineScaleRisk=TRUE)
+
+# get areas associated with each subarea for aggregation
+tempAreasFrom = popMatKenya$subarea
+tempAreasTo = popMatKenya$area
+areasFrom = sort(unique(tempAreasFrom))
+areasToI = match(areasFrom, tempAreasFrom)
+areasTo = tempAreasTo[areasToI]
+
+# do the aggregation from subareas to areas
+outAreaLevel = areaPopToArea(areaLevelPop=subareaPop, 
+  areasFrom=areasFrom, areasTo=areasTo, 
+  stratifyByUrban=TRUE, doFineScaleRisk=TRUE)
+}
+}
+\references{
+In Preparation
+}
+\seealso{
+\code{\link{areaPopToArea}}
+}
+\author{
+John Paige
+}
diff --git a/man/calibrateByRegion.Rd b/man/calibrateByRegion.Rd
new file mode 100755
index 0000000..a9b4382
--- /dev/null
+++ b/man/calibrateByRegion.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/popGrid.R
+\name{calibrateByRegion}
+\alias{calibrateByRegion}
+\title{Calibrate the point level totals so their sum matches the regional totals}
+\usage{
+calibrateByRegion(pointTotals, pointRegions, regions, regionTotals)
+}
+\arguments{
+\item{pointTotals}{Vector of point level totals that will be calibrated/normalized}
+
+\item{pointRegions}{Vector of regions associated with each point}
+
+\item{regions}{Vector of region names}
+
+\item{regionTotals}{Vector of desired region level totals associated with `regions`}
+}
+\value{
+A vector of same length as pointTotals and pointRegions containing 
+the calibrated/normalized point totals that sum to the correct regional totals
+
+Vector of updated point level totals, calibrated to match region totals
+}
+\description{
+Calibrate/normalize the point level totals so their sum matches the 
+regional totals. Technically, the totals can be at any level smaller  
+than the region level specified.
+}
+\examples{
+pointTotals = c(1, 1, 1, 2)
+pointRegions = c("a", "a", "b", "b")
+regionTotals = c(10, 20)
+regions = c("a", "b")
+calibrateByRegion(pointTotals, pointRegions, regions, regionTotals)
+
+}
+\author{
+John Paige
+}
diff --git a/man/getAreaName.Rd b/man/getAreaName.Rd
new file mode 100755
index 0000000..2a24066
--- /dev/null
+++ b/man/getAreaName.Rd
@@ -0,0 +1,71 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/getAreaName.R
+\name{getAreaName}
+\alias{getAreaName}
+\title{Determines which administrative areas contain the given points}
+\usage{
+getAreaName(
+  pts,
+  shapefile,
+  areaNameVar = "NAME_1",
+  delta = 0.05,
+  mean.neighbor = 50,
+  maxBytes = 3 * 2^30
+)
+}
+\arguments{
+\item{pts}{2 column matrix of lon/lat coordinates}
+
+\item{shapefile}{A SpatialPolygonsDataFrame object}
+
+\item{areaNameVar}{The column name in \code{slot(shapefile, "data")} 
+corresponding to the area level of interest}
+
+\item{delta}{Argument passed to fields::fields.rdist.near in fields package}
+
+\item{mean.neighbor}{Argument passed to fields::fields.rdist.near in fields 
+package}
+
+\item{maxBytes}{Maximum allowed memory in bytes (default is 3Gb). Determines 
+whether to call fields::fields.rdist.near which saves memory bute requires 
+delta and mean.neighbor inputs to be specified or fields::rdist.}
+}
+\value{
+A list of area IDs, area names, whether or not 
+points are in multiple areas, and whether or not points 
+are in no areas and assigned to the nearest one.
+}
+\description{
+For any points not in an area, they are assigned the nearest area using 
+fields::fields.rdist.near or fields::rdist depending on the number of points 
+and the maximum memory in bytes with a warning.
+}
+\details{
+delta and mean.neighbor arguments only used when some points 
+are not in areas, perhaps due to inconsistencies in shapefiles.
+}
+\examples{
+\dontrun{
+# download Kenya GADM shapefiles from SUMMERdata github repository
+githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
+download.file(githubURL,"kenyaMaps.rda")
+
+# load it in
+load("kenyaMaps.rda")
+
+# use the shapefile data to see what Admin1 and 2 areas the 
+# points (0, 37) and (0.5, 38) are in
+# (these are longitude/latitude coordinates)
+pts = cbind(c(37, 38), c(0, .5))
+head(slot(adm1, "data"))
+admin1Areas = getAreaName(pts, adm1, "NAME_1")
+admin2Areas = getAreaName(pts, adm2, "NAME_2")
+}
+
+}
+\seealso{
+\code{\link{projKenya}}, \code{\link[fields]{fields.rdist.near}}
+}
+\author{
+John Paige
+}
diff --git a/man/kenyaPopulationData.Rd b/man/kenyaPopulationData.Rd
new file mode 100755
index 0000000..f3cb615
--- /dev/null
+++ b/man/kenyaPopulationData.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/kenyaPopulationData.R
+\docType{data}
+\name{kenyaPopulationData}
+\alias{kenyaPopulationData}
+\alias{easpaKenya}
+\alias{easpaKenyaNeonatal}
+\alias{poppaKenya}
+\alias{poppsubKenya}
+\title{Kenya 2009 Census Frame and Related Datasets}
+\format{
+A number of data.frames with information about the 2009 Kenya Population and Housing Census and the population in Kenya at the time of the 2014 Demographic Health Survey. Some of the 
+data.frames have been adjusted to contain information about neonatals born from 2010-2014 rather than general population in 2014. 
+The dataset names are: easpaKenya, easpaKenyaNeonatal, 
+poppaKenya, and poppsubKenya.
+
+An object of class \code{data.frame} with 47 rows and 12 columns.
+
+An object of class \code{data.frame} with 47 rows and 6 columns.
+
+An object of class \code{data.frame} with 300 rows and 7 columns.
+}
+\source{
+<http://dhsprogram.com/pubs/pdf/FR308/FR308.pdf>
+}
+\usage{
+data(kenyaPopulationData)
+
+easpaKenyaNeonatal
+
+poppaKenya
+
+poppsubKenya
+}
+\description{
+Datasets related to the 2009 census frame for Kenya based on the 2009 Kenya Population and Housing 
+Census. General population totals are estimated for 2014. Based on 2014 population density 
+estimates interpolated with exponential growth rate between 2010 and 2015 from WorldPop data.
+}
+\references{
+Kenya National Bureau of Statistics, Ministry of Health/Kenya, National AIDS Control Council/Kenya, Kenya Medical Research Institute, and National Council For Population And Development/Kenya, 2015. Kenya Demographic and Health Survey 2014. Rockville, Maryland, USA. URL: http://dhsprogram.com/pubs/pdf/FR308/FR308.pdf.
+
+Stevens, F.R., Gaughan, A.E., Linard, C., Tatem, A.J., 2015. Disaggregat- ing census data for population mapping using random forests with remotely-sensed and ancillary data. PloS One 10, e0107042.
+
+Tatem, A.J., 2017. WorldPop, open data for spatial demography. Scientific Data 4.
+}
+\keyword{datasets}
diff --git a/man/logitNormMean.Rd b/man/logitNormMean.Rd
new file mode 100755
index 0000000..a596339
--- /dev/null
+++ b/man/logitNormMean.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/logitNormMean.R
+\name{logitNormMean}
+\alias{logitNormMean}
+\title{Calculate the mean of a distribution whose 
+logit is Gaussian}
+\usage{
+logitNormMean(muSigmaMat, logisticApprox = FALSE, ...)
+}
+\arguments{
+\item{muSigmaMat}{An n x 2 matrix where each row is \eqn{\mu}{mu} and \eqn{\sigma}{sigma} 
+on the logit scale for an independent random variable.}
+
+\item{logisticApprox}{Whether or not to use logistic approximation to speed 
+up computation. See details for more information.}
+
+\item{...}{More arguments, passed to \code{integrate} function}
+}
+\value{
+A vector of expectations of the specified random variables
+}
+\description{
+Adapted from logitnorm package.  Calculates the mean of a distribution whose 
+logit is Gaussian. Each row of muSigmaMat is a mean and standard deviation 
+on the logit scale.
+}
+\details{
+If \eqn{\mbox{logit}(Y) \sim N(\mu, \sigma^2)}{logit(Y) ~ N(mu, sigma^2)}, 
+This function calculates \eqn{E[Y]}{E[Y]} via either numerical integration or by 
+assuming that Y follows a logistic distribution. Under this approximation, setting 
+\eqn{k = 16 \sqrt(3) / (15 \pi)}{k = 16 * sqrt(3) / (15 * pi)}, we approximate 
+the expectation as:
+\deqn{E[Y] = expit(\mu / \sqrt(1 + k^2 \sigma^2))}{E[Y] = expit(mu / sqrt(1 + k^2 * sigma^2))}.
+The above logistic approximation speeds up the computation, but also sacrifices 
+some accuracy.
+}
+\examples{
+mus = c(-5, 0, 5)
+sigmas = rep(1, 3)
+logitNormMean(cbind(mus, sigmas))
+logitNormMean(cbind(mus, sigmas), TRUE)
+
+}
+\author{
+John Paige
+}
diff --git a/man/makePopIntegrationTab.Rd b/man/makePopIntegrationTab.Rd
new file mode 100755
index 0000000..3fd6086
--- /dev/null
+++ b/man/makePopIntegrationTab.Rd
@@ -0,0 +1,368 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/popGrid.R
+\name{makePopIntegrationTab}
+\alias{makePopIntegrationTab}
+\alias{getPoppsub}
+\alias{adjustPopMat}
+\title{Generating pixellated populations, and population frames}
+\usage{
+makePopIntegrationTab(
+  kmRes = 5,
+  pop,
+  domainMapDat,
+  eastLim,
+  northLim,
+  mapProjection,
+  areaMapDat,
+  subareaMapDat,
+  areaNameVar = "NAME_1",
+  subareaNameVar = "NAME_2",
+  poppa = NULL,
+  poppsub = NULL,
+  stratifyByUrban = TRUE,
+  areapa = NULL,
+  areapsub = NULL,
+  customSubsetPolygons = NULL,
+  areaPolygonSubsetI = NULL,
+  subareaPolygonSubsetI = NULL,
+  mean.neighbor = 50,
+  delta = 0.1,
+  returnPoppTables = FALSE,
+  setNAsToZero = TRUE,
+  fixZeroPopDensitySubareas = FALSE,
+  extractMethod = "bilinear"
+)
+
+getPoppsub(
+  kmRes = 1,
+  pop,
+  domainMapDat,
+  eastLim,
+  northLim,
+  mapProjection,
+  poppa,
+  areapa = NULL,
+  areapsub,
+  subareaMapDat,
+  subareaNameVar = "NAME_2",
+  stratifyByUrban = TRUE,
+  areaMapDat = NULL,
+  areaNameVar = "NAME_1",
+  areaPolygonSubsetI = NULL,
+  subareaPolygonSubsetI = NULL,
+  customSubsetPolygons = NULL,
+  mean.neighbor = 50,
+  delta = 0.1,
+  setNAsToZero = TRUE,
+  fixZeroPopDensitySubareas = FALSE
+)
+
+adjustPopMat(
+  popMat,
+  poppaTarget = NULL,
+  adjustBy = c("area", "subarea"),
+  stratifyByUrban = TRUE
+)
+}
+\arguments{
+\item{kmRes}{The resolution of the pixelated grid in km}
+
+\item{pop}{Population density raster}
+
+\item{domainMapDat}{A shapefile representing the full spatial domain (e.g. country)}
+
+\item{eastLim}{Range in km easting over the spatial domain under the input projection}
+
+\item{northLim}{Range in km northing over the spatial domain under the input projection}
+
+\item{mapProjection}{A projection function taking longitude and latitude and returning easting and 
+northing in km. Or the inverse if inverse is set to TRUE. For example, 
+\code{\link{projKenya}}. Check https://epsg.io/ for example for best projection EPSG codes 
+for specific countries}
+
+\item{areaMapDat}{SpatialPolygonsDataFrame object with area level map information}
+
+\item{subareaMapDat}{SpatialPolygonsDataFrame object with subarea level map information}
+
+\item{areaNameVar}{The name of the area variable associated with \code{areaMapDat@data} 
+and \code{subareaMapDat@data}}
+
+\item{subareaNameVar}{The name of the subarea variable associated with \code{subareaMapDat@data}}
+
+\item{poppa}{data.frame of population per area separated by urban/rural. If `poppsub` 
+  is not included, this is used for normalization of populations associated with 
+  population integration points. Contains variables:
+\describe{
+  \item{area}{name of area}
+  \item{popUrb}{total urban (general) population of area}
+  \item{popRur}{total rural (general) population of area}
+  \item{popTotal}{total (general) population of area}
+  \item{pctUrb}{percentage of population in the area that is urban (between 0 and 100)}
+}}
+
+\item{poppsub}{data.frame of population per subarea separated by 
+urban/rural using for population normalization or urbanicity 
+classification. Often based on extra fine scale population density grid. 
+Has variables:
+\describe{
+  \item{subarea}{name of subarea}
+  \item{area}{name of area}
+  \item{popUrb}{total urban (general) population of subarea}
+  \item{popRur}{total rural (general) population of subarea}
+  \item{popTotal}{total (general) population of subarea}
+  \item{pctUrb}{percentage of population in the subarea that is urban (between 0 and 100)}
+}}
+
+\item{stratifyByUrban}{Whether to stratify the pixellated grid by urban/rural. If TRUE, 
+renormalizes population densities within areas or subareas crossed with urban/rural}
+
+\item{areapa}{A list with variables:
+\describe{
+  \item{area}{name of area}
+  \item{spatialArea}{spatial area of the subarea (e.g. in km^2)}
+}}
+
+\item{areapsub}{A list with variables:
+\describe{
+  \item{subarea}{name of subarea}
+  \item{spatialArea}{spatial area of the subarea (e.g. in km^2)}
+}}
+
+\item{customSubsetPolygons}{'SpatialPolygonsDataFrame' or 'SpatialPolygons' object to subset 
+the grid over. This option can help reduce computation time relative to 
+constructing the whole grid and subsetting afterwards. `areaPolygonSubsetI` or 
+`subareaPolygonSubsetI` can be used when subsetting by areas or subareas in 
+`areaMapDat` or `subareaMapDat`. Must be in latitude/longitude projection "EPSG:4326"}
+
+\item{areaPolygonSubsetI}{Index in areaMapDat for a specific area to subset the grid over. This 
+option can help reduce computation time relative to constructing the whole grid 
+and subsetting afterwards}
+
+\item{subareaPolygonSubsetI}{FOR EXPERIMENTAL PURPOSES ONLY. Index in subareaMapDat for a 
+specific area to subset the grid over. This 
+option can help reduce computation time relative to constructing the whole grid 
+and subsetting afterwards}
+
+\item{mean.neighbor}{For determining what area or subarea points are nearest to if they do not 
+directly fall into an area. See \code{\link[fields]{fields.rdist.near}} for details.}
+
+\item{delta}{For determining what area or subarea points are nearest to if they do not 
+directly fall into an area. See \code{\link[fields]{fields.rdist.near}} for details.}
+
+\item{returnPoppTables}{If TRUE, poppa and poppsub will be calculated based on the generated 
+population integration matrix and input area/subarea map data}
+
+\item{setNAsToZero}{If TRUE, sets NA populations to 0.}
+
+\item{fixZeroPopDensitySubareas}{If TRUE, if population density in a subarea is estimated to be 
+zero, but the total population in the subarea is nonzero, population is filled into the 
+area uniformly}
+
+\item{extractMethod}{Either 'bilinear' or 'simple'. see `method` from 
+\code{\link[terra]{extract}}}
+
+\item{popMat}{Pixellated grid data frame with variables `area` and `pop` such as that 
+generated by \code{\link{makePopIntegrationTab}}}
+
+\item{poppaTarget}{Target population per area stratified by urban rural. Same format as poppa}
+
+\item{adjustBy}{Whether to adjust population density by the `area` or `subarea` level}
+}
+\description{
+Functions for generating pixellated population information and 
+population frames at the `area` and `subarea` levels.  
+The `area` and `subarea` levels can be thought of as big 
+regions and little regions, where areas can be partitioned into 
+unique sets of subareas. For example, Admin-1 and Admin-2 
+areas might be areas and subareas respectively. The population totals are either 
+tabulated at the area x urban/rural level, the subarea x urban/rural 
+level, or at the pixel level of a specified resolution. Totals 
+are calculated using population density information, shapefiles, 
+and, possibly, preexisting population frames at different 
+areal levels. Note that area names should each be unique, and similarly for 
+subarea names.
+}
+\section{Functions}{
+\itemize{
+\item \code{makePopIntegrationTab()}: Generate pixellated `grid` of coordinates (both longitude/latitude and east/north) 
+over spatial domain of the given resolution with associated population totals, areas, subareas, 
+and urban/rural levels. For very small areas that might not 
+otherwise have a grid point in them, a custom integration point is added at their 
+centroid. Sets urbanicity classifications by thresholding input population density raster 
+using area and subarea population tables, and generates area and subarea population 
+tables from population density information if not already given. Can be used for integrating 
+predictions from the given coordinates to area and subarea levels using population weights.
+
+\item \code{getPoppsub()}: Generate table of estimates of population
+totals per subarea x urban/rural combination based on population density
+raster at `kmres` resolution "grid", including custom integration points
+for any subarea too small to include grid points at their centroids.
+
+\item \code{adjustPopMat()}: Adjust population densities in grid based on a population frame.
+
+}}
+\examples{
+\dontrun{
+# download Kenya GADM shapefiles from SUMMERdata github repository
+githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+                    "kenyaMaps.rda?raw=true")
+tempDirectory = "~/"
+mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
+if(!file.exists(mapsFilename)) {
+  download.file(githubURL,mapsFilename)
+}
+
+# load it in
+out = load(mapsFilename)
+out
+adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
+adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
+adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+
+# some Admin-2 areas have the same name
+adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") & 
+                   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") & 
+                   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") & 
+                   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") & 
+                   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
+
+# The spatial area of unknown 8 is so small, it causes problems unless its removed or 
+# unioned with another subarea. Union it with neighboring Kakeguria:
+newadm2 = adm2
+unknown8I = which(newadm2$NAME_2 == "unknown 8")
+newadm2$NAME_2[newadm2$NAME_2 \%in\% c("unknown 8", "Kapenguria")] <- 
+  "Kapenguria + unknown 8"
+admin2.IDs <- newadm2$NAME_2
+
+library(maptools)
+temp <- unionSpatialPolygons(newadm2, admin2.IDs)
+tempData = newadm2@data[-unknown8I,]
+tempData = tempData[order(tempData$NAME_2),]
+newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
+adm2 = newadm2
+
+# download 2014 Kenya population density and associated TIF file
+githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+                    "Kenya2014Pop/pop.rda?raw=true")
+popFilename = paste0(tempDirectory, "/pop.rda")
+if(!file.exists(popFilename)) {
+  download.file(githubURL,popFilename)
+}
+
+githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+                    "Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
+popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+if(!file.exists(popTIFFilename)) {
+  download.file(githubURL,popTIFFilename)
+}
+
+# load it in
+require(terra)
+out = load(popFilename)
+out
+
+# make sure this is correct for re-projections
+pop@file@name = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+
+eastLim = c(-110.6405, 832.4544)
+northLim = c(-555.1739, 608.7130)
+
+## Construct poppsubKenya, a table of urban/rural general population totals 
+## in each subarea. Technically, this is not necessary since we can load in 
+## poppsubKenya via data(kenyaPopulationData). First, we will need to calculate 
+## the areas in km^2 of the areas and subareas
+
+library(rgdal)
+library(sp)
+
+# use Lambert equal area projection of areas (Admin-1) and subareas (Admin-2)
+midLon = mean(adm1@bbox[1,])
+midLat = mean(adm1@bbox[2,])
+p4s = paste0("+proj=laea +x_0=0 +y_0=0 +lon_0=", midLon, 
+             " +lat_0=", midLat, " +units=km")
+
+library(rgdal)
+
+adm1proj <- spTransform(adm1, CRS(p4s))
+adm2proj <- spTransform(adm2, CRS(p4s))
+
+# now calculate spatial area in km^2
+library(rgeos)
+admin1Areas = gArea(adm1proj, TRUE)
+admin2Areas = gArea(adm2proj, TRUE)
+areapaKenya = data.frame(area=adm1proj@data$NAME_1, spatialArea=admin1Areas)
+areapsubKenya = data.frame(area=adm2proj@data$NAME_1, subarea=adm2proj@data$NAME_2, 
+                           spatialArea=admin2Areas)
+
+# Calculate general population totals at the subarea (Admin-2) x urban/rural 
+# level and using 1km resolution population grid. Assign urbanicity by 
+# thresholding population density based on estimated proportion population 
+# urban/rural, making sure total area (Admin-1) urban/rural populations in 
+# each area matches poppaKenya.
+require(fields)
+# NOTE: the following function will typically take ~20 minutes. Can speed up 
+#       by setting kmRes to be higher, but we recommend fine resolution for 
+#       this step, since it only needs to be done once. Instead of running this, 
+#       you can simply run data(kenyaPopulationData)
+system.time(poppsubKenya <- getPoppsub(
+  kmRes=1, pop=pop, domainMapDat=adm0,
+  eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+  poppa = poppaKenya, areapa=areapaKenya, areapsub=areapsubKenya, 
+  areaMapDat=adm1, subareaMapDat=adm2, 
+  areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+
+# Now generate a general population integration table at 5km resolution, 
+# based on subarea (Admin-2) x urban/rural population totals. This takes 
+# ~1 minute
+system.time(popMatKenya <- makePopIntegrationTab(
+  kmRes=5, pop=pop, domainMapDat=adm0,
+  eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+  poppa = poppaKenya, poppsub=poppsubKenya, 
+  areaMapDat = adm1, subareaMapDat = adm2,
+  areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+
+## Adjust popMat to be target (neonatal) rather than general population density. First
+## creat the target population frame
+## (these numbers are based on IPUMS microcensus data)
+mothersPerHouseholdUrb = 0.3497151
+childrenPerMotherUrb = 1.295917
+mothersPerHouseholdRur = 0.4787696
+childrenPerMotherRur = 1.455222
+targetPopPerStratumUrban = easpaKenya$HHUrb * mothersPerHouseholdUrb * childrenPerMotherUrb
+targetPopPerStratumRural = easpaKenya$HHRur * mothersPerHouseholdRur * childrenPerMotherRur
+easpaKenyaNeonatal = easpaKenya
+easpaKenyaNeonatal$popUrb = targetPopPerStratumUrban
+easpaKenyaNeonatal$popRur = targetPopPerStratumRural
+easpaKenyaNeonatal$popTotal = easpaKenyaNeonatal$popUrb + easpaKenyaNeonatal$popRur
+easpaKenyaNeonatal$pctUrb = 100 * easpaKenyaNeonatal$popUrb / easpaKenyaNeonatal$popTotal
+easpaKenyaNeonatal$pctTotal = 
+  100 * easpaKenyaNeonatal$popTotal / sum(easpaKenyaNeonatal$popTotal)
+
+# Generate the target population density by scaling the current population density grid 
+# at the Admin1 x urban/rural level
+popMatKenyaNeonatal = adjustPopMat(popMatKenya, easpaKenyaNeonatal)
+
+# Generate neonatal population table from the neonatal population integration matrix.
+# This is technically not necessary for population simulation purposes, but is here 
+# for illustrative purposes
+poppsubKenyaNeonatal = poppRegionFromPopMat(popMatKenyaNeonatal, popMatKenyaNeonatal$subarea)
+poppsubKenyaNeonatal = cbind(subarea=poppsubKenyaNeonatal$region, 
+                             area=adm2@data$NAME_1[match(poppsubKenyaNeonatal$region, 
+                               adm2@data$NAME_2)], 
+                             poppsubKenyaNeonatal[,-1])
+print(head(poppsubKenyaNeonatal))
+}
+}
+\seealso{
+\code{\link{setThresholdsByRegion}}, \code{\link{poppRegionFromPopMat}}, \code{\link{simPopSPDE}}, \code{\link{simPopCustom}}
+}
+\author{
+John Paige
+}
diff --git a/man/poppRegionFromPopMat.Rd b/man/poppRegionFromPopMat.Rd
new file mode 100755
index 0000000..12280b0
--- /dev/null
+++ b/man/poppRegionFromPopMat.Rd
@@ -0,0 +1,134 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/popGrid.R
+\name{poppRegionFromPopMat}
+\alias{poppRegionFromPopMat}
+\title{Generate a population frame of a similar format to poppa argument of \code{\link{simPopCustom}} with a custom set of regions}
+\usage{
+poppRegionFromPopMat(popMat, regions)
+}
+\arguments{
+\item{popMat}{Pixellated grid data frame with variables `area` and `pop`. Assumed to be stratified by urban/rural}
+
+\item{regions}{character vector of length nPixels giving a custom set of regions for which to generate 
+a population frame using population density}
+}
+\value{
+A table of population totals by region
+}
+\description{
+Generate a population frame of a similar format to poppa argument of \code{\link{simPopCustom}} with a custom set of regions
+}
+\details{
+Urbanicity thresholds are set based on that region's percent population 
+urban. Intended as a helper function of \code{\link{getPoppsub}}, but 
+can also be used for custom sets of regions (i.e. more than just 2 
+areal levels: area and subarea).
+}
+\examples{
+\dontrun{
+data(kenyaPopulationData)
+
+#' # download Kenya GADM shapefiles from SUMMERdata github repository
+githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
+tempDirectory = "~/"
+mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
+if(!file.exists(mapsFilename)) {
+  download.file(githubURL,mapsFilename)
+}
+
+# load it in
+out = load(mapsFilename)
+out
+adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
+adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
+adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+
+# some Admin-2 areas have the same name
+adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") & 
+  (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") & 
+  (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") & 
+  (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") & 
+  (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
+
+# The spatial area of unknown 8 is so small, it causes problems unless 
+# its removed or unioned with another subarea. Union it with neighboring 
+# Kakeguria:
+newadm2 = adm2
+unknown8I = which(newadm2$NAME_2 == "unknown 8")
+newadm2$NAME_2[newadm2$NAME_2 \%in\% c("unknown 8", "Kapenguria")] <- "Kapenguria + unknown 8"
+admin2.IDs <- newadm2$NAME_2
+
+library(maptools)
+temp <- unionSpatialPolygons(newadm2, admin2.IDs)
+tempData = newadm2@data[-unknown8I,]
+tempData = tempData[order(tempData$NAME_2),]
+newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
+adm2 = newadm2
+
+# download 2014 Kenya population density and associated TIF file
+githubURL <- 
+  "https://github.com/paigejo/SUMMERdata/blob/main/data/Kenya2014Pop/pop.rda?raw=true"
+popFilename = paste0(tempDirectory, "/pop.rda")
+if(!file.exists(popFilename)) {
+  download.file(githubURL,popFilename)
+}
+
+githubURL <- 
+  paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/Kenya2014Pop/", 
+    "worldpop_total_1y_2014_00_00.tif?raw=true")
+popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+if(!file.exists(popTIFFilename)) {
+  download.file(githubURL,popTIFFilename)
+}
+
+# load it in
+require(terra)
+out = load(popFilename)
+out
+
+# make sure this is correct for re-projections
+pop@file@name = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+
+eastLim = c(-110.6405, 832.4544)
+northLim = c(-555.1739, 608.7130)
+
+require(fields)
+
+# Now generate a general population integration table at 5km resolution, 
+# based on subarea (Admin-2) x urban/rural population totals. This takes 
+# ~1 minute
+system.time(popMatKenya <- makePopIntegrationTab(
+  kmRes=5, pop=pop, domainMapDat=adm0,
+  eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+  poppa = poppaKenya, poppsub=poppsubKenya, 
+  areaMapDat = adm1, subareaMapDat = adm2,
+  areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+  
+out = poppRegionFromPopMat(popMatKenya, popMatKenya$area)
+out
+poppaKenya
+
+out = poppRegionFromPopMat(popMatKenya, popMatKenya$subarea)
+out
+poppsubKenya
+
+popMatKenyaUnstratified = popMatKenya
+popMatKenyaUnstratified$urban = NULL
+out = poppRegionFromPopMat(popMatKenyaUnstratified, popMatKenyaUnstratified$area)
+out
+poppaKenya
+}
+}
+\seealso{
+\code{\link{getPoppsub}}
+}
+\author{
+John Paige
+}
diff --git a/man/projKenya.Rd b/man/projKenya.Rd
new file mode 100755
index 0000000..ca8a99a
--- /dev/null
+++ b/man/projKenya.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/projKenya.R
+\name{projKenya}
+\alias{projKenya}
+\title{Map projection for Kenya}
+\usage{
+projKenya(lon, lat = NULL, inverse = FALSE)
+}
+\arguments{
+\item{lon}{either longitude or, if inverse == TRUE, easting in km}
+
+\item{lat}{either latitude or, if inverse == TRUE, northing in km}
+
+\item{inverse}{if FALSE, projects from lon/lat to easting/northing.  Else from easting/northing to lon/lat}
+}
+\value{
+A 2 column matrix of easting/northing coordinates in km if inverse == FALSE. Otherwise, a 2 column matrix of longitude/latitude coordinates.
+}
+\description{
+Projection specifically chosen for Kenya. Project from lat/lon to northing/easting 
+in kilometers.  Uses epsg=21097 with km units. May not work on all systems due to 
+differences in the behavior between different PROJ and GDAL versions.
+}
+\examples{
+eastLim = c(-110.6405, 832.4544)
+northLim = c(-555.1739, 608.7130)
+coordMatrixEN = cbind(eastLim, northLim)
+coordMatrixLL = projKenya(coordMatrixEN, inverse=TRUE)
+
+coordMatrixLL
+# if the coordMatrixLL isn't the following, projKenya may not support 
+# your installation of GDAL and/or PROJ:
+#      east north
+# [1,] 33.5  -5.0
+# [2,] 42.0   5.5
+
+projKenya(coordMatrixLL, inverse=FALSE)
+# regardless of your PROJ/GDAL installations, the result of the 
+# above line of could should be:
+#            lon       lat
+# [1,] -110.6405 -555.1739
+# [2,]  832.4544  608.7130
+
+}
+\author{
+John Paige
+}
diff --git a/man/setThresholdsByRegion.Rd b/man/setThresholdsByRegion.Rd
new file mode 100755
index 0000000..61b197f
--- /dev/null
+++ b/man/setThresholdsByRegion.Rd
@@ -0,0 +1,131 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/popGrid.R
+\name{setThresholdsByRegion}
+\alias{setThresholdsByRegion}
+\title{Set thresholds of population density for urbanicity classifications 
+within each region of the given type}
+\usage{
+setThresholdsByRegion(popMat, poppr, regionType = "area")
+}
+\arguments{
+\item{popMat}{pixellated population density data frame with variables 
+regionType and `pop`}
+
+\item{poppr}{A table with population totals by region of the given type 
+(e.g. poppa or poppsub from \code{\link{makePopIntegrationTab}})}
+
+\item{regionType}{The variable name from poppr giving the region names. 
+Defaults to "area"}
+}
+\value{
+A list of region names and their urbanicity thresholds in population density
+}
+\description{
+Set thresholds of population density for urbanicity classifications 
+within each region of the given type
+}
+\details{
+Thresholds are set based on that region's percent population 
+urban. Intended as a helper function of \code{\link{makePopIntegrationTab}}.
+}
+\examples{
+\dontrun{
+data(kenyaPopulationData)
+
+#' # download Kenya GADM shapefiles from SUMMERdata github repository
+githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
+tempDirectory = "~/"
+mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
+if(!file.exists(mapsFilename)) {
+  download.file(githubURL,mapsFilename)
+}
+
+# load it in
+out = load(mapsFilename)
+out
+adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
+adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
+adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+
+# some Admin-2 areas have the same name
+adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") & 
+  (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") & 
+  (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") & 
+  (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") & 
+  (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
+
+# The spatial area of unknown 8 is so small, it causes problems unless 
+# its removed or unioned with another subarea. Union it with neighboring 
+# Kakeguria:
+newadm2 = adm2
+unknown8I = which(newadm2$NAME_2 == "unknown 8")
+newadm2$NAME_2[newadm2$NAME_2 \%in\% c("unknown 8", "Kapenguria")] <- "Kapenguria + unknown 8"
+admin2.IDs <- newadm2$NAME_2
+
+library(maptools)
+temp <- unionSpatialPolygons(newadm2, admin2.IDs)
+tempData = newadm2@data[-unknown8I,]
+tempData = tempData[order(tempData$NAME_2),]
+newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
+adm2 = newadm2
+
+# download 2014 Kenya population density and associated TIF file
+githubURL <- 
+  "https://github.com/paigejo/SUMMERdata/blob/main/data/Kenya2014Pop/pop.rda?raw=true"
+popFilename = paste0(tempDirectory, "/pop.rda")
+if(!file.exists(popFilename)) {
+  download.file(githubURL,popFilename)
+}
+
+githubURL <- 
+  paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/Kenya2014Pop/", 
+    "worldpop_total_1y_2014_00_00.tif?raw=true")
+popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+if(!file.exists(popTIFFilename)) {
+  download.file(githubURL,popTIFFilename)
+}
+
+# load it in
+require(terra)
+out = load(popFilename)
+out
+
+# make sure this is correct for re-projections
+pop@file@name = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+
+eastLim = c(-110.6405, 832.4544)
+northLim = c(-555.1739, 608.7130)
+
+require(fields)
+
+# Now generate a general population integration table at 5km resolution, 
+# based on subarea (Admin-2) x urban/rural population totals. This takes 
+# ~1 minute
+system.time(popMatKenya <- makePopIntegrationTab(
+  kmRes=5, pop=pop, domainMapDat=adm0,
+  eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+  poppa = poppaKenya, poppsub=poppsubKenya, 
+  areaMapDat = adm1, subareaMapDat = adm2,
+  areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+  
+out = setThresholdsByRegion(popMatKenya, poppaKenya)
+out
+
+out = setThresholdsByRegion(popMatKenya, poppsubKenya, regionType="subarea")
+out
+}
+
+}
+\seealso{
+\code{\link{makePopIntegrationTab}}
+}
+\author{
+John Paige
+}
diff --git a/man/simPop.Rd b/man/simPop.Rd
new file mode 100755
index 0000000..249f25e
--- /dev/null
+++ b/man/simPop.Rd
@@ -0,0 +1,447 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simPop.R
+\name{simPop}
+\alias{simPop}
+\alias{simPopSPDE}
+\alias{simPopCustom}
+\title{Simulate populations and areal prevalences}
+\usage{
+simPopSPDE(
+  nsim = 1,
+  easpa,
+  popMat,
+  targetPopMat,
+  poppsub,
+  spdeMesh,
+  margVar = 0.243,
+  sigmaEpsilon = sqrt(0.463),
+  gamma = 0.009,
+  effRange = 406.51,
+  beta0 = -3.922,
+  seed = NULL,
+  inla.seed = -1L,
+  nHHSampled = 25,
+  stratifyByUrban = TRUE,
+  subareaLevel = TRUE,
+  doFineScaleRisk = FALSE,
+  doSmoothRisk = FALSE,
+  doSmoothRiskLogisticApprox = FALSE,
+  min1PerSubarea = TRUE
+)
+
+simPopCustom(
+  logitRiskDraws,
+  sigmaEpsilonDraws,
+  easpa,
+  popMat,
+  targetPopMat,
+  stratifyByUrban = TRUE,
+  validationPixelI = NULL,
+  validationClusterI = NULL,
+  clustersPerPixel = NULL,
+  doFineScaleRisk = FALSE,
+  doSmoothRisk = FALSE,
+  doSmoothRiskLogisticApprox = FALSE,
+  poppsub = NULL,
+  subareaLevel = FALSE,
+  min1PerSubarea = TRUE,
+  returnEAinfo = FALSE,
+  epsc = NULL
+)
+}
+\arguments{
+\item{nsim}{Number of simulations}
+
+\item{easpa}{data.frame of enumeration area, households, and target population per area stratified by urban/rural with variables:
+\describe{
+  \item{area}{name of area}
+  \item{EAUrb}{number of urban enumeration areas in the area}
+  \item{EARur}{number of rural enumeration areas in the area}
+  \item{EATotal}{total number of enumeration areas in the area}
+  \item{HHUrb}{number of urban households in the area}
+  \item{HHRur}{number of rural households in the area}
+  \item{HHTotal}{total number of households in the area}
+  \item{popUrb}{total urban (target) population of area}
+  \item{popRur}{total rural (target) population of area}
+  \item{popTotal}{total (general) population of area}
+}}
+
+\item{popMat}{Pixellated grid data frame with variables `lon`, `lat`, `pop`, `area`, `subareas` (if subareaLevel is TRUE), `urban` (if stratifyByUrban is TRUE), `east`, and `north`}
+
+\item{targetPopMat}{Same as popMat, but `pop` variable gives target rather than general population}
+
+\item{poppsub}{data.frame of population per subarea separated by 
+urban/rural using for population density grid normalization or urbanicity 
+classification. Often based on extra fine scale population density grid. Has variables:}
+
+\item{spdeMesh}{Triangular mesh for the SPDE}
+
+\item{margVar}{Marginal variance of the spatial process, excluding cluster effects. 
+If 0, no spatial component is included}
+
+\item{sigmaEpsilon}{Standard deviation on the logit scale for iid Gaussian EA level random effects in the risk model}
+
+\item{gamma}{Effect of urban on logit scale for logit model for risk}
+
+\item{effRange}{Effective spatial range for the SPDE model}
+
+\item{beta0}{Intercept of logit model for risk}
+
+\item{seed}{Random number generator seed}
+
+\item{inla.seed}{Seed input to inla.qsample. 0L sets seed intelligently, 
+> 0 sets a specific seed, < 0 keeps existing RNG}
+
+\item{nHHSampled}{Number of households sampled per enumeration area. Default is 25 to match DHS surveys}
+
+\item{stratifyByUrban}{Whether or not to stratify simulations by urban/rural classification}
+
+\item{subareaLevel}{Whether or not to aggregate the population by subarea}
+
+\item{doFineScaleRisk}{Whether or not to calculate the fine scale risk at each aggregation level in addition to the prevalence}
+
+\item{doSmoothRisk}{Whether or not to calculate the smooth risk at each aggregation level in addition to the prevalence}
+
+\item{doSmoothRiskLogisticApprox}{Whether to use logistic approximation when calculating smooth risk. See 
+\code{\link{logitNormMean}} for details.}
+
+\item{min1PerSubarea}{If TRUE, ensures there is at least 1 EA per subarea. If subareas are particularly unlikely to 
+have enumeration areas since they have a very low proportion of the population in an area, then setting this to TRUE may be 
+computationally intensive.}
+
+\item{logitRiskDraws}{nIntegrationPoints x nsim dimension matrix of draws from the pixel leve risk field on logit scale, leaving out 
+potential nugget/cluster/EA level effects.}
+
+\item{sigmaEpsilonDraws}{nsim length vector of draws of cluster effect logit scale SD (joint draws with logitRiskDraws)}
+
+\item{validationPixelI}{CURRENTLY FOR TESTING PURPOSES ONLY a set of indices of pixels for which we want to simulate populations (used for pixel level validation)}
+
+\item{validationClusterI}{CURRENTLY FOR TESTING PURPOSES ONLY a set of indices of cluster for which we want to simulate populations (used for cluster level validation)}
+
+\item{clustersPerPixel}{CURRENTLY FOR TESTING PURPOSES ONLY Used for pixel level validation. Fixes the number of EAs per pixel.}
+
+\item{returnEAinfo}{If TRUE, returns information on every individual EA (BAU) for each simulated population}
+
+\item{epsc}{nEAs x nsim matrix of simulated EA (BAU) level iid effects representing fine scale variation in 
+      risk. If NULL, they are simulated as iid Gaussian on a logit scale with 
+      SD given by sigmaEpsilonDraws
+list(pixelPop=outPixelLevel, subareaPop=outSubareaLevel, areaPop=outAreaLevel, logitRiskDraws=logitRiskDraws)}
+}
+\value{
+The simulated population aggregated to the enumeration area, 
+pixel, subarea (generally Admin2), and area (generally Admin1) levels. Output includes:
+\item{pixelPop}{A list of pixel level population aggregates}
+\item{subareaPop}{A list of `subarea` level population aggregates}
+\item{areaPop}{A list of `area` level population aggregates}
+Each of these contains population numerator and denominator as well as prevalence and risk 
+information aggregated to the appropriate level.
+}
+\description{
+Given a spatial risk model, simulate populations and population prevalences at the 
+enumeration area level (represented as points), and aggregate to the pixel and 
+administrative areal level.
+}
+\details{
+For population simulation and aggregation, we consider three models: smooth  
+risk, fine scale risk, and the fine scale prevalence. All will be described in detail 
+in a paper in preparation. In the smooth risk model, pixel level risks are integrated 
+with respect to target population density when producing areal estimates on a prespecified 
+set of integration points. The target population may be, for example, neonatals rather 
+than the general population. In the fine scale models, enumeration areas (EAs) are simulated as 
+point locations and iid random effects in the EA level risk are allowed. EAs and populations are dispersed conditional on the (possibly 
+approximately) known number of EAs, households, and target population at a particular 
+areal level (these we call `areas`) using multilevel multinomial sampling, first 
+sampling the EAs, then distributing households among the EAs, then the target population 
+among the households. Any areal level below the `areas` we call `subareas`. For instance, 
+the `areas` might be Admin-1 if that is the smallest level at which the number of EAs, 
+households, and people is known, and the `subareas` might be Admin-2. The multilevel 
+multinomial sampling may be stratified by urban/rural within the areas if the number of 
+EAs, households, and people is also approximately known at that level.
+
+Within each EA we assume a fixed probability of an event occurring, which is the fine scale `risk`. 
+The fine scale `prevalence` is the empirical proportion of events within that EA. We assume EA 
+level logit scale iid N(0, sigmaEpsilon^2) random effects in the risk model. When averaged 
+with equal weights over all EAs in an areal unit, this forms the fine scale risk. When 
+instead the population numerators and denominators are aggregated, and are used to 
+calculate the empirical proportion of events occurring in an areal unit, the resulting 
+quantity is the fine scale prevalence in that areal unit.
+
+Note that these functions can be used for either simulating populations for simulation 
+studies, or for generating predictions accounting for uncertainty in EA locations 
+and fine scale variation occuring at the EA level due to EA level iid random effects. 
+Required, however, is a seperately fit EA level spatial risk model 
+and information on the spatial population density and the population frame.
+}
+\section{Functions}{
+\itemize{
+\item \code{simPopSPDE()}: Simulate populations and population prevalences given census frame and population density 
+information. Uses SPDE model for generating spatial risk and can include iid cluster 
+level effect.
+
+\item \code{simPopCustom()}: Simulate populations and population prevalences given census frame and population density 
+information. Uses custom spatial logit risk function and can include iid cluster 
+level effect.
+
+}}
+\examples{
+\dontrun{
+## In this script we will create 5km resolution pixellated grid over Kenya, 
+## and generate tables of estimated (both target and general) population 
+## totals at the area (e.g. Admin-1) and subarea (e.g. Admin-2) levels. Then 
+## we will use that to simulate populations of 
+
+# download Kenya GADM shapefiles from SUMMERdata github repository
+githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+                    "kenyaMaps.rda?raw=true")
+tempDirectory = "~/"
+mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
+if(!file.exists(mapsFilename)) {
+  download.file(githubURL,mapsFilename)
+}
+
+# load it in
+out = load(mapsFilename)
+out
+adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
+adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
+adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
+adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
+
+# some Admin-2 areas have the same name
+adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") & 
+                   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") & 
+                   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") & 
+                   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
+adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") & 
+                   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
+
+# The spatial area of unknown 8 is so small, it causes problems unless its removed or 
+# unioned with another subarea. Union it with neighboring Kakeguria:
+newadm2 = adm2
+unknown8I = which(newadm2$NAME_2 == "unknown 8")
+newadm2$NAME_2[newadm2$NAME_2 \%in\% c("unknown 8", "Kapenguria")] <- 
+  "Kapenguria + unknown 8"
+admin2.IDs <- newadm2$NAME_2
+
+library(maptools)
+temp <- unionSpatialPolygons(newadm2, admin2.IDs)
+tempData = newadm2@data[-unknown8I,]
+tempData = tempData[order(tempData$NAME_2),]
+newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
+adm2 = newadm2
+
+# download 2014 Kenya population density and associated TIF file
+githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+                    "Kenya2014Pop/pop.rda?raw=true")
+popFilename = paste0(tempDirectory, "/pop.rda")
+if(!file.exists(popFilename)) {
+  download.file(githubURL,popFilename)
+}
+
+githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
+                    "Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
+popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+if(!file.exists(popTIFFilename)) {
+  download.file(githubURL,popTIFFilename)
+}
+
+# load it in
+require(raster)
+out = load(popFilename)
+out
+
+# make sure this is correct for re-projections
+pop@file@name = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
+
+eastLim = c(-110.6405, 832.4544)
+northLim = c(-555.1739, 608.7130)
+
+## Construct poppsubKenya, a table of urban/rural general population totals 
+## in each subarea. Technically, this is not necessary since we can load in 
+## poppsubKenya via data(kenyaPopulationData). First, we will need to calculate 
+## the areas in km^2 of the areas and subareas
+
+library(rgdal)
+library(sp)
+
+# use Lambert equal area projection of areas (Admin-1) and subareas (Admin-2)
+midLon = mean(adm1@bbox[1,])
+midLat = mean(adm1@bbox[2,])
+p4s = paste0("+proj=laea +x_0=0 +y_0=0 +lon_0=", midLon, 
+             " +lat_0=", midLat, " +units=km")
+
+library(rgdal)
+
+adm1proj <- spTransform(adm1, CRS(p4s))
+adm2proj <- spTransform(adm2, CRS(p4s))
+
+# now calculate spatial area in km^2
+library(rgeos)
+admin1Areas = gArea(adm1proj, TRUE)
+admin2Areas = gArea(adm2proj, TRUE)
+areapaKenya = data.frame(area=adm1proj@data$NAME_1, spatialArea=admin1Areas)
+areapsubKenya = data.frame(area=adm2proj@data$NAME_1, subarea=adm2proj@data$NAME_2, 
+                           spatialArea=admin2Areas)
+
+# Calculate general population totals at the subarea (Admin-2) x urban/rural 
+# level and using 1km resolution population grid. Assign urbanicity by 
+# thresholding population density based on estimated proportion population 
+# urban/rural, making sure total area (Admin-1) urban/rural populations in 
+# each area matches poppaKenya.
+require(fields)
+# NOTE: the following function will typically take ~20 minutes. Can speed up 
+#       by setting kmRes to be higher, but we recommend fine resolution for 
+#       this step, since it only needs to be done once.
+system.time(poppsubKenya <- getPoppsub(
+  kmRes=1, pop=pop, domainPoly=kenyaPoly,
+  eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+  poppa = poppaKenya, areapa=areapaKenya, areapsub=areapsubKenya, 
+  areaMapDat=adm1, subareaMapDat=adm2, 
+  areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+
+# Now generate a general population integration table at 5km resolution, 
+# based on subarea (Admin-2) x urban/rural population totals. This takes 
+# ~1 minute
+system.time(popMatKenya <- makePopIntegrationTab(
+  kmRes=5, pop=pop, domainPoly=kenyaPoly,
+  eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
+  poppa = poppaKenya, poppsub=poppsubKenya, 
+  areaMapDat = adm1, subareaMapDat = adm2,
+  areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
+
+## Adjust popMat to be target (neonatal) rather than general population 
+## density. First create the target population frame
+## (these numbers are based on IPUMS microcensus data)
+mothersPerHouseholdUrb = 0.3497151
+childrenPerMotherUrb = 1.295917
+mothersPerHouseholdRur = 0.4787696
+childrenPerMotherRur = 1.455222
+targetPopPerStratumUrban = easpaKenya$HHUrb * mothersPerHouseholdUrb * 
+  childrenPerMotherUrb
+targetPopPerStratumRural = easpaKenya$HHRur * mothersPerHouseholdRur * 
+  childrenPerMotherRur
+easpaKenyaNeonatal = easpaKenya
+easpaKenyaNeonatal$popUrb = targetPopPerStratumUrban
+easpaKenyaNeonatal$popRur = targetPopPerStratumRural
+easpaKenyaNeonatal$popTotal = easpaKenyaNeonatal$popUrb + 
+  easpaKenyaNeonatal$popRur
+easpaKenyaNeonatal$pctUrb = 100 * easpaKenyaNeonatal$popUrb / 
+  easpaKenyaNeonatal$popTotal
+easpaKenyaNeonatal$pctTotal = 
+  100 * easpaKenyaNeonatal$popTotal / sum(easpaKenyaNeonatal$popTotal)
+
+# Generate the target population density by scaling the current 
+# population density grid at the Admin1 x urban/rural level
+popMatKenyaNeonatal = adjustPopMat(popMatKenya, easpaKenyaNeonatal)
+
+# Generate neonatal population table from the neonatal population integration 
+# matrix. This is technically not necessary for population simulation purposes, 
+# but is here for illustrative purposes
+poppsubKenyaNeonatal = poppRegionFromPopMat(popMatKenyaNeonatal, 
+                                            popMatKenyaNeonatal$subarea)
+poppsubKenyaNeonatal = 
+  cbind(subarea=poppsubKenyaNeonatal$region, 
+        area=adm2@data$NAME_1[match(poppsubKenyaNeonatal$region, adm2@data$NAME_2)], 
+        poppsubKenyaNeonatal[,-1])
+print(head(poppsubKenyaNeonatal))
+
+## Now we're ready to simulate neonatal populations along with neonatal 
+## mortality risks and prevalences
+
+# use the following model to simulate the neonatal population based roughly 
+# on Paige et al. (2020) neonatal mortality modeling for Kenya.
+beta0=-2.9 # intercept
+gamma=-1 # urban effect
+rho=(1/3)^2 # spatial variance
+effRange = 400 # effective spatial range in km
+sigmaEpsilon=sqrt(1/2.5) # cluster (nugget) effect standard deviation
+
+# Run a simulation! This produces multiple dense nEA x nsim and nPixel x nsim 
+# matrices. In the future sparse matrices and chunk by chunk computations 
+# may be incorporated.
+simPop = simPopSPDE(nsim=1, easpa=easpaKenyaNeonatal, 
+                    popMat=popMatKenya, targetPopMat=popMatKenyaNeonatal, 
+                    poppsub=poppsubKenya, spdeMesh=kenyaMesh, 
+                    margVar=rho, sigmaEpsilon=sigmaEpsilon, 
+                    gamma=gamma, effRange=effRange, beta0=beta0, 
+                    seed=12, inla.seed=12, nHHSampled=25, 
+                    stratifyByUrban=TRUE, subareaLevel=TRUE, 
+                    doFineScaleRisk=TRUE, doSmoothRisk=TRUE, 
+                    min1PerSubarea=TRUE)
+
+# get average absolute percent error relative to fine scale prevalence at Admin-2 level
+tempDat = simPop$subareaPop$aggregationResults[,c("region", "pFineScalePrevalence", 
+                                                  "pFineScaleRisk", "pSmoothRisk")]
+100*mean(abs(tempDat$pFineScalePrevalence - tempDat$pFineScaleRisk) / 
+           tempDat$pFineScalePrevalence)
+100*mean(abs(tempDat$pFineScalePrevalence - tempDat$pSmoothRisk) / 
+           tempDat$pFineScalePrevalence)
+100*mean(abs(tempDat$pFineScaleRisk - tempDat$pSmoothRisk) / 
+           tempDat$pFineScalePrevalence)
+
+# verify number of EAs per area and subarea
+cbind(aggregate(simPop$eaPop$eaSamples[,1], by=list(area=popMatKenya$area), FUN=sum), 
+      trueNumEAs=easpaKenya$EATotal[order(easpaKenya$area)])
+aggregate(simPop$eaPop$eaSamples[,1], by=list(area=popMatKenya$subarea), FUN=sum)
+
+## plot simulated population
+# directory for plotting 
+# (mapPlot takes longer when it doesn't save to a file)
+tempDirectory = "~/"
+
+# pixel level
+zlim = c(0, quantile(probs=.995, c(simPop$pixelPop$pFineScalePrevalence, 
+                                   simPop$pixelPop$pFineScaleRisk, 
+                                   simPop$pixelPop$pSmoothRisk), na.rm=TRUE))
+pdf(file=paste0(tempDirectory, "simPopSPDEPixel.pdf"), width=8, height=8)
+par(mfrow=c(2,2))
+plot(adm2, asp=1)
+points(simPop$eaPop$eaDatList[[1]]$lon, simPop$eaPop$eaDatList[[1]]$lat, pch=".", col="blue")
+plot(adm2, asp=1)
+quilt.plot(popMatKenya$lon, popMatKenya$lat, simPop$pixelPop$pFineScalePrevalence, 
+           zlim=zlim, add=TRUE, FUN=function(x) {mean(x, na.rm=TRUE)})
+plot(adm2, asp=1)
+quilt.plot(popMatKenya$lon, popMatKenya$lat, simPop$pixelPop$pFineScaleRisk, 
+           zlim=zlim, add=TRUE, FUN=function(x) {mean(x, na.rm=TRUE)})
+quilt.plot(popMatKenya$lon, popMatKenya$lat, simPop$pixelPop$pSmoothRisk, 
+           zlim=zlim, FUN=function(x) {mean(x, na.rm=TRUE)}, asp=1)
+plot(adm2, add=TRUE)
+dev.off()
+
+range(simPop$eaPop$eaDatList[[1]]$N)
+
+# areal (Admin-1) level: these results should look essentially identical
+
+tempDat = simPop$areaPop$aggregationResults[,c("region", "pFineScalePrevalence", 
+                                               "pFineScaleRisk", "pSmoothRisk")]
+pdf(file=paste0(tempDirectory, "simPopSPDEAdmin-1.pdf"), width=7, height=7)
+mapPlot(tempDat, 
+        variables=c("pFineScalePrevalence", "pFineScaleRisk", "pSmoothRisk"), 
+        geo=adm1, by.geo="NAME_1", by.data="region", is.long=FALSE)
+dev.off()
+
+# subareal (Admin-2) level: these results should look subtley different 
+# depending on the type of prevalence/risk considered
+tempDat = simPop$subareaPop$aggregationResults[,c("region", "pFineScalePrevalence", 
+                                                  "pFineScaleRisk", "pSmoothRisk")]
+pdf(file=paste0(tempDirectory, "simPopSPDEAdmin-2.pdf"), width=7, height=7)
+mapPlot(tempDat, 
+        variables=c("pFineScalePrevalence", "pFineScaleRisk", "pSmoothRisk"), 
+        geo=adm2, by.geo="NAME_2", by.data="region", is.long=FALSE)
+dev.off()
+}
+}
+\references{
+In preparation
+}
+\seealso{
+\code{\link{simPopCustom}}, \code{\link{makePopIntegrationTab}}, \code{\link{adjustPopMat}}, \code{\link{simSPDE}}.
+}
+\author{
+John Paige
+}
diff --git a/man/simPopInternal.Rd b/man/simPopInternal.Rd
new file mode 100755
index 0000000..4932623
--- /dev/null
+++ b/man/simPopInternal.Rd
@@ -0,0 +1,217 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simPop.R
+\name{simPopInternal}
+\alias{simPopInternal}
+\alias{getExpectedNperEA}
+\alias{getSortIndices}
+\alias{rStratifiedMultnomial}
+\alias{rStratifiedMultnomialBySubarea}
+\alias{rMyMultinomial}
+\alias{rMyMultinomialSubarea}
+\alias{rmultinom1}
+\alias{sampleNMultilevelMultinomial}
+\alias{sampleNMultilevelMultinomialFixed}
+\title{Internal functions for population simulation}
+\usage{
+getExpectedNperEA(easpa, popMat, level = c("grid", "EA"), pixelIndexMat = NULL)
+
+getSortIndices(
+  i,
+  urban = TRUE,
+  popMat,
+  stratifyByUrban = TRUE,
+  validationPixelI = NULL
+)
+
+rStratifiedMultnomial(n, popMat, easpa, stratifyByUrban = TRUE)
+
+rStratifiedMultnomialBySubarea(
+  n,
+  popMat,
+  easpa,
+  stratifyByUrban = TRUE,
+  poppsub = NULL,
+  min1PerSubarea = TRUE,
+  minSample = 1
+)
+
+rMyMultinomial(
+  n,
+  i,
+  stratifyByUrban = TRUE,
+  urban = TRUE,
+  popMat = NULL,
+  easpa = NULL,
+  min1PerSubarea = FALSE,
+  method = c("mult1", "mult", "indepMH"),
+  minSample = 1
+)
+
+rMyMultinomialSubarea(
+  n,
+  i,
+  easpsub,
+  stratifyByUrban = TRUE,
+  urban = TRUE,
+  popMat = NULL
+)
+
+rmultinom1(
+  n = 1,
+  size,
+  prob,
+  maxSize = 8000 * 8000,
+  method = c("mult1", "mult", "indepMH"),
+  verbose = FALSE,
+  minSample = 100,
+  maxExpectedSizeBeforeSwitch = 1000 * 1e+07,
+  init = NULL,
+  burnIn = floor(n/4),
+  filterEvery = 10,
+  zeroProbZeroSamples = TRUE,
+  allowSizeLessThanK = FALSE
+)
+
+sampleNMultilevelMultinomial(
+  nDraws = ncol(pixelIndexMat),
+  pixelIndexMat = NULL,
+  urbanMat = NULL,
+  areaMat = NULL,
+  easpaList,
+  popMat,
+  stratifyByUrban = TRUE,
+  verbose = TRUE,
+  returnEAinfo = FALSE,
+  minHHPerEA = 25,
+  fixHHPerEA = NULL,
+  fixPopPerHH = NULL
+)
+
+sampleNMultilevelMultinomialFixed(
+  clustersPerPixel,
+  nDraws = ncol(pixelIndices),
+  pixelIndices = NULL,
+  urbanVals = NULL,
+  areaVals = NULL,
+  easpa,
+  popMat,
+  stratifyByUrban = TRUE,
+  verbose = TRUE
+)
+}
+\arguments{
+\item{easpa}{Census frame. See \code{\link{simPopCustom}} for details}
+
+\item{popMat}{data.frame of pixellated grid of population densities. See \code{\link{simPopCustom}} for details}
+
+\item{level}{Whether to calculate results at the integration grid or EA level}
+
+\item{pixelIndexMat}{Matrix of pixel indices associated with each EA and draw. Not 
+required by getExpectedNperEA unless level == "EA"}
+
+\item{i}{Index}
+
+\item{urban}{If TRUE, calculate only for urban part of the area. If FALSE, for only rural part}
+
+\item{stratifyByUrban}{whether or not to stratify calculations by urban/rural classification}
+
+\item{validationPixelI}{CURRENTLY FOR TESTING PURPOSES ONLY a set of indices of pixels for which we want to simulate populations (used for pixel level validation)}
+
+\item{n}{Number of samples}
+
+\item{poppsub}{Population per subarea. See \code{\link{simPopCustom}} for details}
+
+\item{min1PerSubarea}{Whether or not to ensure there is at least 1 EA per subarea. See \code{\link{simPopCustom}} for details}
+
+\item{minSample}{The minimum number of samples per `chunk` of samples for truncated multinomial sampling. Defaults to 1}
+
+\item{method}{If min1PerSubarea is TRUE, the sampling method for the truncated multinomial to use with rmulitnom1. rmultinom1 automatically 
+        switches between them depending on the number of expected samples. The methods are:
+\describe{
+  \item{mult1}{rejection sampling from multinomial plus 1 in each category}
+  \item{mult}{rejection sampling from multinomial if any category has zero count}
+  \item{indepMH}{independent Metropolis-Hastings using multinomial plus 1 distribution as proposal}
+}}
+
+\item{easpsub}{This could either be total EAs per subarea, or subarea crossed with urban or 
+rural if stratifyByUrban is TRUE}
+
+\item{size}{Multinomial size parameter. See \code{\link[stats]{rmultinom}}}
+
+\item{prob}{Multinomial probability vector parameter. See \code{\link[stats]{rmultinom}}}
+
+\item{maxSize}{The maximum number of elements in a matrix drawn from the proposal distribution per sample chunk.}
+
+\item{verbose}{Whether to print progress as the function proceeds}
+
+\item{maxExpectedSizeBeforeSwitch}{Max expected number of samples / k, the number of categories, before switching method}
+
+\item{init}{Initial sample if method is `indepMH`}
+
+\item{burnIn}{Number of initial samples before samples are collected if method is `indepMH`}
+
+\item{filterEvery}{Store only every filterEvery samples if method is i`indepMH`}
+
+\item{zeroProbZeroSamples}{If TRUE, set samples for parts of prob vector that are zero to zero. Otherwise they are set to one.}
+
+\item{allowSizeLessThanK}{If TRUE, then if size < the number of categories (k), returns matrix where each 
+column is vector of size ones and k - size zeros. If FALSE, throws an error if size < k}
+
+\item{nDraws}{Number of draws}
+
+\item{urbanMat}{Matrix of urbanicities associated with each EA and draw}
+
+\item{areaMat}{Matrix of areas associated with each EA and draw}
+
+\item{easpaList}{A list of length n with each element being of the format of easpa 
+giving the number of households and EAs 
+per stratum. It is assumed that the number of EAs per stratum is 
+the same in each list element. If easpaList is a data frame, 
+number of households per stratum is assumed constant}
+
+\item{returnEAinfo}{Whether a data frame at the EA level is desired}
+
+\item{minHHPerEA}{The minimum number of households per EA (defaults to 25, since 
+that is the number of households sampled per DHS cluster)}
+
+\item{fixHHPerEA}{If not NULL, the fixed number of households per EA}
+
+\item{fixPopPerHH}{If not NULL, the fixed target population per household}
+
+\item{clustersPerPixel}{CURRENTLY FOR TESTING PURPOSES ONLY a vector of length nIntegrationPoints specifying the number of clusters per pixel if they are fixed}
+
+\item{pixelIndices}{A nEA x n matrix of pixel indices associated with each EA per simulation/draw}
+
+\item{urbanVals}{A nEA x n matrix of urbanicities associated with each EA per simulation/draw}
+
+\item{areaVals}{A nEA x n matrix of area names associated with each EA per simulation/draw}
+}
+\description{
+Functions for calculating valuable quantities and for drawing from important 
+distributions for population simulation.
+}
+\section{Functions}{
+\itemize{
+\item \code{getExpectedNperEA()}: Calculates expected denominator per enumeration area.
+
+\item \code{getSortIndices()}: For recombining separate multinomials into the draws over all grid points
+
+\item \code{rStratifiedMultnomial()}: Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values 
+corresponding to the value of |C^g| for each pixel, g (the number of EAs/pixel)
+
+\item \code{rStratifiedMultnomialBySubarea()}: Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values
+
+\item \code{rMyMultinomial()}: 
+
+\item \code{rMyMultinomialSubarea()}: 
+
+\item \code{rmultinom1()}: Random (truncated) multinomial draws conditional on the number of each type being at least one
+
+\item \code{sampleNMultilevelMultinomial()}: Take multilevel multinomial draws first from joint distribution of 
+number of households per EA given the total per stratum, and then from the joint 
+distribution of the total target population per household given 
+the total per stratum
+
+\item \code{sampleNMultilevelMultinomialFixed()}: Same as sampleNMultilevelMultinomial, except the number of EAs per pixel is fixed
+
+}}
diff --git a/man/simSPDE.Rd b/man/simSPDE.Rd
new file mode 100755
index 0000000..4d836d2
--- /dev/null
+++ b/man/simSPDE.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simSPDE.R
+\name{simSPDE}
+\alias{simSPDE}
+\title{Simulate from the SPDE spatial model}
+\usage{
+simSPDE(
+  coords,
+  nsim = 1,
+  mesh,
+  effRange = (max(coords[, 1]) - min(coords[, 1]))/3,
+  margVar = 1,
+  inla.seed = 0L
+)
+}
+\arguments{
+\item{coords}{2 column matrix of spatial coordinates at which to simulate the spatial process}
+
+\item{nsim}{number of draws from the SPDE model}
+
+\item{mesh}{SPDE mesh}
+
+\item{effRange}{effective spatial range}
+
+\item{margVar}{marginal variance of the spatial process}
+
+\item{inla.seed}{seed input to inla.qsample. 0L sets seed intelligently, 
+> 0 sets a specific seed, < 0 keeps existing RNG}
+}
+\description{
+Generates nCoords x nsim matrix of simulated 
+values of the SPDE spatial process
+}
+\examples{
+\dontrun{
+set.seed(123)
+require(INLA)
+coords = matrix(runif(10*2), ncol=2)
+mesh = inla.mesh.2d(loc.domain=cbind(c(0, 0, 1, 1), c(0, 1, 0, 1)), 
+  n=3000, max.n=5000, max.edge=c(.01, .05), offset=-.1)
+simVals = simSPDE(coords, nsim=1, mesh, effRange=.2, inla.seed=1L)
+}
+
+}
+\references{
+Lindgren, F., Rue, H., Lindström, J., 2011. An explicit link between Gaussian fields and Gaussian Markov random fields: the stochastic differential equation approach (with discussion). Journal of the Royal Statistical Society, Series B 73, 423–498.
+}
+\author{
+John Paige
+}