From a6cdbaf973d76b804e361993138c2988122565a6 Mon Sep 17 00:00:00 2001
From: John Paige <paigejo@gmail.com>
Date: Fri, 28 Apr 2023 14:17:05 +0200
Subject: [PATCH] fixed some typos and some documentation

---
 DESCRIPTION                  |  2 +-
 R/simPop.R                   | 44 +++++++++++++++++++++++++-----------
 man/aggPop.Rd                |  6 ++---
 man/calibrateByRegion.Rd     |  2 +-
 man/getBirths.Rd             |  4 ++--
 man/makePopIntegrationTab.Rd | 19 ++++++++++++----
 man/simPop.Rd                | 11 ++++-----
 man/simPopInternal.Rd        | 33 +++++++++++++++++++--------
 8 files changed, 79 insertions(+), 42 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 102f427..49b325a 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -22,7 +22,7 @@ License: GPL (>= 2)
 Imports: survey, stats, spdep, survival, ggplot2, utils, Matrix, reshape2, viridis, sp, shadowtext, ggridges, methods, data.table, RColorBrewer, grDevices, raster, fields, terra
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.2
 Additional_repositories: https://inla.r-inla-download.org/R/testing/
 Suggests: INLA, knitr, rmarkdown, readstata13, patchwork, rdhs, R.rsp
 VignetteBuilder: R.rsp, knitr
diff --git a/R/simPop.R b/R/simPop.R
index 9c97975..7a3d047 100755
--- a/R/simPop.R
+++ b/R/simPop.R
@@ -1256,7 +1256,7 @@ getSortIndices = function(i, urban=TRUE, popMat, stratifyByUrban=TRUE, validatio
   
   # determine which pixels and how many EAs are in this stratum
   if(stratifyByUrban) {
-    includeI = popMat$area == areas[i] & popMat$urban == urban
+    includeI = (popMat$area == areas[i]) & (popMat$urban == urban)
   }
   else {
     includeI = popMat$area == areas[i]
@@ -1357,7 +1357,7 @@ rStratifiedMultnomialBySubarea = function(n, popMat, easpa, stratifyByUrban=TRUE
   } else {
     popSubareaMat$pop = poppsub$popTotal
   }
-  # browser()
+  
   # now draw multinomials
   if(stratifyByUrban) {
     # draw for each constituency in each area crossed with urban/rural
@@ -1437,7 +1437,7 @@ rMyMultinomial = function(n, i, stratifyByUrban=TRUE, urban=TRUE, popMat=NULL, e
   
   # determine which pixels and how many EAs are in this stratum
   if(stratifyByUrban) {
-    includeI = popMat$area == areas[i] & popMat$urban == urban
+    includeI = (popMat$area == areas[i]) & (popMat$urban == urban)
     nEA = ifelse(urban, easpa$EAUrb[i], easpa$EARur[i])
   }
   else {
@@ -1468,7 +1468,7 @@ rMyMultinomialSubarea = function(n, i, easpsub, stratifyByUrban=TRUE, urban=TRUE
   
   # determine which pixels and how many EAs are in this stratum
   if(stratifyByUrban) {
-    includeI = popMat$subarea == subareas[i] & popMat$urban == urban
+    includeI = (popMat$subarea == subareas[i]) & (popMat$urban == urban)
   }
   else {
     includeI = popMat$subarea == subareas[i]
@@ -1478,15 +1478,21 @@ rMyMultinomialSubarea = function(n, i, easpsub, stratifyByUrban=TRUE, urban=TRUE
   # sample from the pixels if this stratum exists
   if(sum(includeI) == 0){
     if(any(nEA != 0))
-      stop(paste0("no valid pixels to put EAs in for constituency ", as.character(subareas[i]), " and urban level ", urban))
+      stop(paste0("no valid pixels to put EAs in for subarea ", as.character(subareas[i]), " and urban level ", urban))
     return(matrix(nrow=0, ncol=n))
   }
+  
   thesePixelProbs = popMat$pop[includeI]
-  sapply(nEA, stats::rmultinom, n=1, prob=thesePixelProbs)
+  nonZeroEAs = nEA != 0
+  out = matrix(0, nrow=sum(includeI), ncol=n)
+  if(any(nonZeroEAs)) {
+    out[,nonZeroEAs] = sapply(nEA[nonZeroEAs], stats::rmultinom, n=1, prob=thesePixelProbs)
+  }
+  out
 }
 
 #' @describeIn simPopInternal Random (truncated) multinomial draws conditional on the number of each type being at least one
-rmultinom1 = function(n=1, size, prob, maxSize=5000*5000, method=c("mult1", "mult", "indepMH"), verbose=FALSE, minSample=100, 
+rmultinom1 = function(n=1, size, prob, maxSize=8000*8000, method=c("mult1", "mult", "indepMH"), verbose=FALSE, minSample=100, 
                       maxExpectedSizeBeforeSwitch=1000*1e7, init=NULL, burnIn=floor(n/4), filterEvery=10, zeroProbZeroSamples=TRUE, 
                       allowSizeLessThanK=FALSE) {
   method = match.arg(method)
@@ -1527,8 +1533,14 @@ rmultinom1 = function(n=1, size, prob, maxSize=5000*5000, method=c("mult1", "mul
       samplesLeft = sum(apply(samples, 2, function(x) {any(is.na(x))}))
       
       # approximate expected number of samples so that, after some are rejected, we will 
-      # have the right number of samples
+      # have the right number of samples. Kappa is approximated for when p_1=p_2=...=p_k
       expectedSamples = ceiling(samplesLeft/averageProb)
+      # logMStar = lfactorial(size) - lfactorial(size-k) + sum(log(prob)) - log(size-k+1)
+      # avgAcceptProb = exp(-logMStar)
+      # logKappaApprox = lchoose(size + k - 1, k - 1) - lchoose(size - 1, k - 1)
+      # logM = logMStar -logKappaApprox
+      # avgAcceptProb = exp(-logM)
+      # expectedSamples = ceiling(samplesLeft/avgAcceptProb)
       
       if(expectedSamples*k > maxExpectedSizeBeforeSwitch) {
         warning("too many samples expected with method=='mult1'. Switching to method=='indepMH'")
@@ -1541,23 +1553,24 @@ rmultinom1 = function(n=1, size, prob, maxSize=5000*5000, method=c("mult1", "mul
       thisNumberOfSamples = max(minSample, min(maxSamples, expectedSamples * 1.1))
       if(verbose)
         print(paste0("Sampling ", thisNumberOfSamples, ". Sampled ", n-samplesLeft, "/", n, ". Expected remaining samples: ", expectedSamples))
-      thisSamples = 1 + stats::rmultinom(thisNumberOfSamples, size-k, prob=prob)
+      thisSamples = matrix(1 + stats::rmultinom(thisNumberOfSamples, size-k, prob=prob), nrow=length(prob))
       
       # calculate accept probabilities
-      thisProbs = (size-k) / apply(thisSamples, 2, prod)
+      thisProbs = exp(log(size-k) - apply(log(thisSamples), 2, sum))
+      # thisProbs = (size-k) / apply(thisSamples, 2, prod)
       if(verbose) {
         print(paste0("Max sampled accept prob: ", max(thisProbs), ". Mean sampled accept prob: ", mean(thisProbs)))
-        print(paste0("Max theoretical accept prob: ", 1, ". Mean 'theoretical' accept prob: ", averageProb))
+        print(paste0("Expected number of samples based on avg sampled acceptance prob: ", ceiling(samplesLeft/mean(thisProbs))))
       }
       
       # reject relevant samples
       u = stats::runif(thisNumberOfSamples)
-      thisSamples = thisSamples[,u<thisProbs]
+      thisSamples = matrix(thisSamples[,u<thisProbs], nrow=length(prob))
       
       # remove excess samples if necessary
       totalSamples = ncol(thisSamples) + n - samplesLeft
       if(totalSamples > n) {
-        thisSamples = thisSamples[,1:samplesLeft]
+        thisSamples = matrix(thisSamples[,1:samplesLeft], nrow=length(prob))
       }
       
       # add in accepted samples, if any
@@ -1669,6 +1682,11 @@ rmultinom1 = function(n=1, size, prob, maxSize=5000*5000, method=c("mult1", "mul
       print(paste0("acceptance percentage: ", ar/ncol(samples)))
     }
     
+    # estimate autocorrelation in samples (if it is na then all samples have the same value there)
+    calcCor = apply(samples, 1, function(x) {cor(x[-1], x[-length(x)])})
+    calcCor[is.na(calcCor)] = 1
+    print(paste0("estimated mean (max) lag 1 correlation after filtering: ", mean(calcCor^filterEvery), " (", max(calcCor^filterEvery), ")"))
+    
     # filter out samples to reduce autocorrelation
     samples = samples[,seq(from=1, to=ncol(samples), by=filterEvery)]
   }
diff --git a/man/aggPop.Rd b/man/aggPop.Rd
index e90a140..5ef77f5 100755
--- a/man/aggPop.Rd
+++ b/man/aggPop.Rd
@@ -81,11 +81,11 @@ them to the specified areal level. Also calculates the aggregated risk and preva
 }
 \section{Functions}{
 \itemize{
-\item \code{pixelPopToArea}: Aggregate from pixel to areal level
+\item \code{pixelPopToArea()}: Aggregate from pixel to areal level
 
-\item \code{areaPopToArea}: Aggregate areal populations to another areal level
-}}
+\item \code{areaPopToArea()}: Aggregate areal populations to another areal level
 
+}}
 \examples{
 \dontrun{
 ##### Now we make a model for the risk. We will use an SPDE model with these 
diff --git a/man/calibrateByRegion.Rd b/man/calibrateByRegion.Rd
index 2dc5822..a9b4382 100755
--- a/man/calibrateByRegion.Rd
+++ b/man/calibrateByRegion.Rd
@@ -13,7 +13,7 @@ calibrateByRegion(pointTotals, pointRegions, regions, regionTotals)
 
 \item{regions}{Vector of region names}
 
-\item{regionTotals}{Vector Of region level totals associated with `regions`}
+\item{regionTotals}{Vector of desired region level totals associated with `regions`}
 }
 \value{
 A vector of same length as pointTotals and pointRegions containing 
diff --git a/man/getBirths.Rd b/man/getBirths.Rd
index 3a607ca..d7131bd 100755
--- a/man/getBirths.Rd
+++ b/man/getBirths.Rd
@@ -8,8 +8,8 @@ getBirths(
   filepath = NULL,
   data = NULL,
   surveyyear = NA,
-  variables = c("caseid", "v001", "v002", "v004", "v005", "v021", "v022", "v023",
-    "v024", "v025", "v139", "bidx"),
+  variables = c("caseid", "v001", "v002", "v004", "v005", "v021", "v022", "v023", "v024",
+    "v025", "v139", "bidx"),
   strata = c("v024", "v025"),
   dob = "b3",
   alive = "b5",
diff --git a/man/makePopIntegrationTab.Rd b/man/makePopIntegrationTab.Rd
index faf954d..3fd6086 100755
--- a/man/makePopIntegrationTab.Rd
+++ b/man/makePopIntegrationTab.Rd
@@ -22,6 +22,7 @@ makePopIntegrationTab(
   stratifyByUrban = TRUE,
   areapa = NULL,
   areapsub = NULL,
+  customSubsetPolygons = NULL,
   areaPolygonSubsetI = NULL,
   subareaPolygonSubsetI = NULL,
   mean.neighbor = 50,
@@ -49,6 +50,7 @@ getPoppsub(
   areaNameVar = "NAME_1",
   areaPolygonSubsetI = NULL,
   subareaPolygonSubsetI = NULL,
+  customSubsetPolygons = NULL,
   mean.neighbor = 50,
   delta = 0.1,
   setNAsToZero = TRUE,
@@ -82,7 +84,8 @@ for specific countries}
 
 \item{subareaMapDat}{SpatialPolygonsDataFrame object with subarea level map information}
 
-\item{areaNameVar}{The name of the area variable associated with \code{areaMapDat@data} and \code{subareaMapDat@data}}
+\item{areaNameVar}{The name of the area variable associated with \code{areaMapDat@data} 
+and \code{subareaMapDat@data}}
 
 \item{subareaNameVar}{The name of the subarea variable associated with \code{subareaMapDat@data}}
 
@@ -125,6 +128,12 @@ renormalizes population densities within areas or subareas crossed with urban/ru
   \item{spatialArea}{spatial area of the subarea (e.g. in km^2)}
 }}
 
+\item{customSubsetPolygons}{'SpatialPolygonsDataFrame' or 'SpatialPolygons' object to subset 
+the grid over. This option can help reduce computation time relative to 
+constructing the whole grid and subsetting afterwards. `areaPolygonSubsetI` or 
+`subareaPolygonSubsetI` can be used when subsetting by areas or subareas in 
+`areaMapDat` or `subareaMapDat`. Must be in latitude/longitude projection "EPSG:4326"}
+
 \item{areaPolygonSubsetI}{Index in areaMapDat for a specific area to subset the grid over. This 
 option can help reduce computation time relative to constructing the whole grid 
 and subsetting afterwards}
@@ -175,7 +184,7 @@ subarea names.
 }
 \section{Functions}{
 \itemize{
-\item \code{makePopIntegrationTab}: Generate pixellated `grid` of coordinates (both longitude/latitude and east/north) 
+\item \code{makePopIntegrationTab()}: Generate pixellated `grid` of coordinates (both longitude/latitude and east/north) 
 over spatial domain of the given resolution with associated population totals, areas, subareas, 
 and urban/rural levels. For very small areas that might not 
 otherwise have a grid point in them, a custom integration point is added at their 
@@ -184,14 +193,14 @@ using area and subarea population tables, and generates area and subarea populat
 tables from population density information if not already given. Can be used for integrating 
 predictions from the given coordinates to area and subarea levels using population weights.
 
-\item \code{getPoppsub}: Generate table of estimates of population
+\item \code{getPoppsub()}: Generate table of estimates of population
 totals per subarea x urban/rural combination based on population density
 raster at `kmres` resolution "grid", including custom integration points
 for any subarea too small to include grid points at their centroids.
 
-\item \code{adjustPopMat}: Adjust population densities in grid based on a population frame.
-}}
+\item \code{adjustPopMat()}: Adjust population densities in grid based on a population frame.
 
+}}
 \examples{
 \dontrun{
 # download Kenya GADM shapefiles from SUMMERdata github repository
diff --git a/man/simPop.Rd b/man/simPop.Rd
index 4d4925b..3e8b9e5 100755
--- a/man/simPop.Rd
+++ b/man/simPop.Rd
@@ -174,15 +174,15 @@ and information on the spatial population density and the population frame.
 }
 \section{Functions}{
 \itemize{
-\item \code{simPopSPDE}: Simulate populations and population prevalences given census frame and population density 
+\item \code{simPopSPDE()}: Simulate populations and population prevalences given census frame and population density 
 information. Uses SPDE model for generating spatial risk and can include iid cluster 
 level effect.
 
-\item \code{simPopCustom}: Simulate populations and population prevalences given census frame and population density 
+\item \code{simPopCustom()}: Simulate populations and population prevalences given census frame and population density 
 information. Uses custom spatial logit risk function and can include iid cluster 
 level effect.
-}}
 
+}}
 \examples{
 \dontrun{
 ## In this script we will create 5km resolution pixellated grid over Kenya, 
@@ -251,7 +251,7 @@ if(!file.exists(popTIFFilename)) {
 }
 
 # load it in
-require(raster)
+require(terra)
 out = load(popFilename)
 out
 
@@ -266,7 +266,6 @@ northLim = c(-555.1739, 608.7130)
 ## poppsubKenya via data(kenyaPopulationData). First, we will need to calculate 
 ## the areas in km^2 of the areas and subareas
 
-library(rgdal)
 library(sp)
 
 # use Lambert equal area projection of areas (Admin-1) and subareas (Admin-2)
@@ -275,8 +274,6 @@ midLat = mean(adm1@bbox[2,])
 p4s = paste0("+proj=laea +x_0=0 +y_0=0 +lon_0=", midLon, 
              " +lat_0=", midLat, " +units=km")
 
-library(rgdal)
-
 adm1proj <- spTransform(adm1, CRS(p4s))
 adm2proj <- spTransform(adm2, CRS(p4s))
 
diff --git a/man/simPopInternal.Rd b/man/simPopInternal.Rd
index 8edd414..6a48d53 100755
--- a/man/simPopInternal.Rd
+++ b/man/simPopInternal.Rd
@@ -4,6 +4,7 @@
 \alias{simPopInternal}
 \alias{getExpectedNperEA}
 \alias{getSortIndices}
+\alias{getSortIndicesSubarea}
 \alias{rStratifiedMultnomial}
 \alias{rStratifiedMultnomialBySubarea}
 \alias{rMyMultinomial}
@@ -23,6 +24,14 @@ getSortIndices(
   validationPixelI = NULL
 )
 
+getSortIndicesSubarea(
+  i,
+  urban = TRUE,
+  popMat,
+  stratifyByUrban = TRUE,
+  validationPixelI = NULL
+)
+
 rStratifiedMultnomial(n, popMat, easpa, stratifyByUrban = TRUE)
 
 rStratifiedMultnomialBySubarea(
@@ -192,26 +201,30 @@ distributions for population simulation.
 }
 \section{Functions}{
 \itemize{
-\item \code{getExpectedNperEA}: Calculates expected denominator per enumeration area.
+\item \code{getExpectedNperEA()}: Calculates expected denominator per enumeration area.
+
+\item \code{getSortIndices()}: For recombining separate multinomials over 
+multiple areas into the draws over all grid points
 
-\item \code{getSortIndices}: For recombining separate multinomials into the draws over all grid points
+\item \code{getSortIndicesSubarea()}: For recombining separate multinomials over 
+multiple areas into the draws over all grid points
 
-\item \code{rStratifiedMultnomial}: Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values 
+\item \code{rStratifiedMultnomial()}: Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values 
 corresponding to the value of |C^g| for each pixel, g (the number of EAs/pixel)
 
-\item \code{rStratifiedMultnomialBySubarea}: Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values
+\item \code{rStratifiedMultnomialBySubarea()}: Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values
 
-\item \code{rMyMultinomial}: 
+\item \code{rMyMultinomial()}: 
 
-\item \code{rMyMultinomialSubarea}: 
+\item \code{rMyMultinomialSubarea()}: 
 
-\item \code{rmultinom1}: Random (truncated) multinomial draws conditional on the number of each type being at least one
+\item \code{rmultinom1()}: Random (truncated) multinomial draws conditional on the number of each type being at least one
 
-\item \code{sampleNMultilevelMultinomial}: Take multilevel multinomial draws first from joint distribution of 
+\item \code{sampleNMultilevelMultinomial()}: Take multilevel multinomial draws first from joint distribution of 
 number of households per EA given the total per stratum, and then from the joint 
 distribution of the total target population per household given 
 the total per stratum
 
-\item \code{sampleNMultilevelMultinomialFixed}: Same as sampleNMultilevelMultinomial, except the number of EAs per pixel is fixed
-}}
+\item \code{sampleNMultilevelMultinomialFixed()}: Same as sampleNMultilevelMultinomial, except the number of EAs per pixel is fixed
 
+}}