From a6cdbaf973d76b804e361993138c2988122565a6 Mon Sep 17 00:00:00 2001 From: John Paige Date: Fri, 28 Apr 2023 14:17:05 +0200 Subject: [PATCH] fixed some typos and some documentation --- DESCRIPTION | 2 +- R/simPop.R | 44 +++++++++++++++++++++++++----------- man/aggPop.Rd | 6 ++--- man/calibrateByRegion.Rd | 2 +- man/getBirths.Rd | 4 ++-- man/makePopIntegrationTab.Rd | 19 ++++++++++++---- man/simPop.Rd | 11 ++++----- man/simPopInternal.Rd | 33 +++++++++++++++++++-------- 8 files changed, 79 insertions(+), 42 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 102f427..49b325a 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,7 @@ License: GPL (>= 2) Imports: survey, stats, spdep, survival, ggplot2, utils, Matrix, reshape2, viridis, sp, shadowtext, ggridges, methods, data.table, RColorBrewer, grDevices, raster, fields, terra Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.2 Additional_repositories: https://inla.r-inla-download.org/R/testing/ Suggests: INLA, knitr, rmarkdown, readstata13, patchwork, rdhs, R.rsp VignetteBuilder: R.rsp, knitr diff --git a/R/simPop.R b/R/simPop.R index 9c97975..7a3d047 100755 --- a/R/simPop.R +++ b/R/simPop.R @@ -1256,7 +1256,7 @@ getSortIndices = function(i, urban=TRUE, popMat, stratifyByUrban=TRUE, validatio # determine which pixels and how many EAs are in this stratum if(stratifyByUrban) { - includeI = popMat$area == areas[i] & popMat$urban == urban + includeI = (popMat$area == areas[i]) & (popMat$urban == urban) } else { includeI = popMat$area == areas[i] @@ -1357,7 +1357,7 @@ rStratifiedMultnomialBySubarea = function(n, popMat, easpa, stratifyByUrban=TRUE } else { popSubareaMat$pop = poppsub$popTotal } - # browser() + # now draw multinomials if(stratifyByUrban) { # draw for each constituency in each area crossed with urban/rural @@ -1437,7 +1437,7 @@ rMyMultinomial = function(n, i, stratifyByUrban=TRUE, urban=TRUE, popMat=NULL, e # determine which pixels and how many EAs are in this stratum if(stratifyByUrban) { - includeI = popMat$area == areas[i] & popMat$urban == urban + includeI = (popMat$area == areas[i]) & (popMat$urban == urban) nEA = ifelse(urban, easpa$EAUrb[i], easpa$EARur[i]) } else { @@ -1468,7 +1468,7 @@ rMyMultinomialSubarea = function(n, i, easpsub, stratifyByUrban=TRUE, urban=TRUE # determine which pixels and how many EAs are in this stratum if(stratifyByUrban) { - includeI = popMat$subarea == subareas[i] & popMat$urban == urban + includeI = (popMat$subarea == subareas[i]) & (popMat$urban == urban) } else { includeI = popMat$subarea == subareas[i] @@ -1478,15 +1478,21 @@ rMyMultinomialSubarea = function(n, i, easpsub, stratifyByUrban=TRUE, urban=TRUE # sample from the pixels if this stratum exists if(sum(includeI) == 0){ if(any(nEA != 0)) - stop(paste0("no valid pixels to put EAs in for constituency ", as.character(subareas[i]), " and urban level ", urban)) + stop(paste0("no valid pixels to put EAs in for subarea ", as.character(subareas[i]), " and urban level ", urban)) return(matrix(nrow=0, ncol=n)) } + thesePixelProbs = popMat$pop[includeI] - sapply(nEA, stats::rmultinom, n=1, prob=thesePixelProbs) + nonZeroEAs = nEA != 0 + out = matrix(0, nrow=sum(includeI), ncol=n) + if(any(nonZeroEAs)) { + out[,nonZeroEAs] = sapply(nEA[nonZeroEAs], stats::rmultinom, n=1, prob=thesePixelProbs) + } + out } #' @describeIn simPopInternal Random (truncated) multinomial draws conditional on the number of each type being at least one -rmultinom1 = function(n=1, size, prob, maxSize=5000*5000, method=c("mult1", "mult", "indepMH"), verbose=FALSE, minSample=100, +rmultinom1 = function(n=1, size, prob, maxSize=8000*8000, method=c("mult1", "mult", "indepMH"), verbose=FALSE, minSample=100, maxExpectedSizeBeforeSwitch=1000*1e7, init=NULL, burnIn=floor(n/4), filterEvery=10, zeroProbZeroSamples=TRUE, allowSizeLessThanK=FALSE) { method = match.arg(method) @@ -1527,8 +1533,14 @@ rmultinom1 = function(n=1, size, prob, maxSize=5000*5000, method=c("mult1", "mul samplesLeft = sum(apply(samples, 2, function(x) {any(is.na(x))})) # approximate expected number of samples so that, after some are rejected, we will - # have the right number of samples + # have the right number of samples. Kappa is approximated for when p_1=p_2=...=p_k expectedSamples = ceiling(samplesLeft/averageProb) + # logMStar = lfactorial(size) - lfactorial(size-k) + sum(log(prob)) - log(size-k+1) + # avgAcceptProb = exp(-logMStar) + # logKappaApprox = lchoose(size + k - 1, k - 1) - lchoose(size - 1, k - 1) + # logM = logMStar -logKappaApprox + # avgAcceptProb = exp(-logM) + # expectedSamples = ceiling(samplesLeft/avgAcceptProb) if(expectedSamples*k > maxExpectedSizeBeforeSwitch) { warning("too many samples expected with method=='mult1'. Switching to method=='indepMH'") @@ -1541,23 +1553,24 @@ rmultinom1 = function(n=1, size, prob, maxSize=5000*5000, method=c("mult1", "mul thisNumberOfSamples = max(minSample, min(maxSamples, expectedSamples * 1.1)) if(verbose) print(paste0("Sampling ", thisNumberOfSamples, ". Sampled ", n-samplesLeft, "/", n, ". Expected remaining samples: ", expectedSamples)) - thisSamples = 1 + stats::rmultinom(thisNumberOfSamples, size-k, prob=prob) + thisSamples = matrix(1 + stats::rmultinom(thisNumberOfSamples, size-k, prob=prob), nrow=length(prob)) # calculate accept probabilities - thisProbs = (size-k) / apply(thisSamples, 2, prod) + thisProbs = exp(log(size-k) - apply(log(thisSamples), 2, sum)) + # thisProbs = (size-k) / apply(thisSamples, 2, prod) if(verbose) { print(paste0("Max sampled accept prob: ", max(thisProbs), ". Mean sampled accept prob: ", mean(thisProbs))) - print(paste0("Max theoretical accept prob: ", 1, ". Mean 'theoretical' accept prob: ", averageProb)) + print(paste0("Expected number of samples based on avg sampled acceptance prob: ", ceiling(samplesLeft/mean(thisProbs)))) } # reject relevant samples u = stats::runif(thisNumberOfSamples) - thisSamples = thisSamples[,u n) { - thisSamples = thisSamples[,1:samplesLeft] + thisSamples = matrix(thisSamples[,1:samplesLeft], nrow=length(prob)) } # add in accepted samples, if any @@ -1669,6 +1682,11 @@ rmultinom1 = function(n=1, size, prob, maxSize=5000*5000, method=c("mult1", "mul print(paste0("acceptance percentage: ", ar/ncol(samples))) } + # estimate autocorrelation in samples (if it is na then all samples have the same value there) + calcCor = apply(samples, 1, function(x) {cor(x[-1], x[-length(x)])}) + calcCor[is.na(calcCor)] = 1 + print(paste0("estimated mean (max) lag 1 correlation after filtering: ", mean(calcCor^filterEvery), " (", max(calcCor^filterEvery), ")")) + # filter out samples to reduce autocorrelation samples = samples[,seq(from=1, to=ncol(samples), by=filterEvery)] } diff --git a/man/aggPop.Rd b/man/aggPop.Rd index e90a140..5ef77f5 100755 --- a/man/aggPop.Rd +++ b/man/aggPop.Rd @@ -81,11 +81,11 @@ them to the specified areal level. Also calculates the aggregated risk and preva } \section{Functions}{ \itemize{ -\item \code{pixelPopToArea}: Aggregate from pixel to areal level +\item \code{pixelPopToArea()}: Aggregate from pixel to areal level -\item \code{areaPopToArea}: Aggregate areal populations to another areal level -}} +\item \code{areaPopToArea()}: Aggregate areal populations to another areal level +}} \examples{ \dontrun{ ##### Now we make a model for the risk. We will use an SPDE model with these diff --git a/man/calibrateByRegion.Rd b/man/calibrateByRegion.Rd index 2dc5822..a9b4382 100755 --- a/man/calibrateByRegion.Rd +++ b/man/calibrateByRegion.Rd @@ -13,7 +13,7 @@ calibrateByRegion(pointTotals, pointRegions, regions, regionTotals) \item{regions}{Vector of region names} -\item{regionTotals}{Vector Of region level totals associated with `regions`} +\item{regionTotals}{Vector of desired region level totals associated with `regions`} } \value{ A vector of same length as pointTotals and pointRegions containing diff --git a/man/getBirths.Rd b/man/getBirths.Rd index 3a607ca..d7131bd 100755 --- a/man/getBirths.Rd +++ b/man/getBirths.Rd @@ -8,8 +8,8 @@ getBirths( filepath = NULL, data = NULL, surveyyear = NA, - variables = c("caseid", "v001", "v002", "v004", "v005", "v021", "v022", "v023", - "v024", "v025", "v139", "bidx"), + variables = c("caseid", "v001", "v002", "v004", "v005", "v021", "v022", "v023", "v024", + "v025", "v139", "bidx"), strata = c("v024", "v025"), dob = "b3", alive = "b5", diff --git a/man/makePopIntegrationTab.Rd b/man/makePopIntegrationTab.Rd index faf954d..3fd6086 100755 --- a/man/makePopIntegrationTab.Rd +++ b/man/makePopIntegrationTab.Rd @@ -22,6 +22,7 @@ makePopIntegrationTab( stratifyByUrban = TRUE, areapa = NULL, areapsub = NULL, + customSubsetPolygons = NULL, areaPolygonSubsetI = NULL, subareaPolygonSubsetI = NULL, mean.neighbor = 50, @@ -49,6 +50,7 @@ getPoppsub( areaNameVar = "NAME_1", areaPolygonSubsetI = NULL, subareaPolygonSubsetI = NULL, + customSubsetPolygons = NULL, mean.neighbor = 50, delta = 0.1, setNAsToZero = TRUE, @@ -82,7 +84,8 @@ for specific countries} \item{subareaMapDat}{SpatialPolygonsDataFrame object with subarea level map information} -\item{areaNameVar}{The name of the area variable associated with \code{areaMapDat@data} and \code{subareaMapDat@data}} +\item{areaNameVar}{The name of the area variable associated with \code{areaMapDat@data} +and \code{subareaMapDat@data}} \item{subareaNameVar}{The name of the subarea variable associated with \code{subareaMapDat@data}} @@ -125,6 +128,12 @@ renormalizes population densities within areas or subareas crossed with urban/ru \item{spatialArea}{spatial area of the subarea (e.g. in km^2)} }} +\item{customSubsetPolygons}{'SpatialPolygonsDataFrame' or 'SpatialPolygons' object to subset +the grid over. This option can help reduce computation time relative to +constructing the whole grid and subsetting afterwards. `areaPolygonSubsetI` or +`subareaPolygonSubsetI` can be used when subsetting by areas or subareas in +`areaMapDat` or `subareaMapDat`. Must be in latitude/longitude projection "EPSG:4326"} + \item{areaPolygonSubsetI}{Index in areaMapDat for a specific area to subset the grid over. This option can help reduce computation time relative to constructing the whole grid and subsetting afterwards} @@ -175,7 +184,7 @@ subarea names. } \section{Functions}{ \itemize{ -\item \code{makePopIntegrationTab}: Generate pixellated `grid` of coordinates (both longitude/latitude and east/north) +\item \code{makePopIntegrationTab()}: Generate pixellated `grid` of coordinates (both longitude/latitude and east/north) over spatial domain of the given resolution with associated population totals, areas, subareas, and urban/rural levels. For very small areas that might not otherwise have a grid point in them, a custom integration point is added at their @@ -184,14 +193,14 @@ using area and subarea population tables, and generates area and subarea populat tables from population density information if not already given. Can be used for integrating predictions from the given coordinates to area and subarea levels using population weights. -\item \code{getPoppsub}: Generate table of estimates of population +\item \code{getPoppsub()}: Generate table of estimates of population totals per subarea x urban/rural combination based on population density raster at `kmres` resolution "grid", including custom integration points for any subarea too small to include grid points at their centroids. -\item \code{adjustPopMat}: Adjust population densities in grid based on a population frame. -}} +\item \code{adjustPopMat()}: Adjust population densities in grid based on a population frame. +}} \examples{ \dontrun{ # download Kenya GADM shapefiles from SUMMERdata github repository diff --git a/man/simPop.Rd b/man/simPop.Rd index 4d4925b..3e8b9e5 100755 --- a/man/simPop.Rd +++ b/man/simPop.Rd @@ -174,15 +174,15 @@ and information on the spatial population density and the population frame. } \section{Functions}{ \itemize{ -\item \code{simPopSPDE}: Simulate populations and population prevalences given census frame and population density +\item \code{simPopSPDE()}: Simulate populations and population prevalences given census frame and population density information. Uses SPDE model for generating spatial risk and can include iid cluster level effect. -\item \code{simPopCustom}: Simulate populations and population prevalences given census frame and population density +\item \code{simPopCustom()}: Simulate populations and population prevalences given census frame and population density information. Uses custom spatial logit risk function and can include iid cluster level effect. -}} +}} \examples{ \dontrun{ ## In this script we will create 5km resolution pixellated grid over Kenya, @@ -251,7 +251,7 @@ if(!file.exists(popTIFFilename)) { } # load it in -require(raster) +require(terra) out = load(popFilename) out @@ -266,7 +266,6 @@ northLim = c(-555.1739, 608.7130) ## poppsubKenya via data(kenyaPopulationData). First, we will need to calculate ## the areas in km^2 of the areas and subareas -library(rgdal) library(sp) # use Lambert equal area projection of areas (Admin-1) and subareas (Admin-2) @@ -275,8 +274,6 @@ midLat = mean(adm1@bbox[2,]) p4s = paste0("+proj=laea +x_0=0 +y_0=0 +lon_0=", midLon, " +lat_0=", midLat, " +units=km") -library(rgdal) - adm1proj <- spTransform(adm1, CRS(p4s)) adm2proj <- spTransform(adm2, CRS(p4s)) diff --git a/man/simPopInternal.Rd b/man/simPopInternal.Rd index 8edd414..6a48d53 100755 --- a/man/simPopInternal.Rd +++ b/man/simPopInternal.Rd @@ -4,6 +4,7 @@ \alias{simPopInternal} \alias{getExpectedNperEA} \alias{getSortIndices} +\alias{getSortIndicesSubarea} \alias{rStratifiedMultnomial} \alias{rStratifiedMultnomialBySubarea} \alias{rMyMultinomial} @@ -23,6 +24,14 @@ getSortIndices( validationPixelI = NULL ) +getSortIndicesSubarea( + i, + urban = TRUE, + popMat, + stratifyByUrban = TRUE, + validationPixelI = NULL +) + rStratifiedMultnomial(n, popMat, easpa, stratifyByUrban = TRUE) rStratifiedMultnomialBySubarea( @@ -192,26 +201,30 @@ distributions for population simulation. } \section{Functions}{ \itemize{ -\item \code{getExpectedNperEA}: Calculates expected denominator per enumeration area. +\item \code{getExpectedNperEA()}: Calculates expected denominator per enumeration area. + +\item \code{getSortIndices()}: For recombining separate multinomials over +multiple areas into the draws over all grid points -\item \code{getSortIndices}: For recombining separate multinomials into the draws over all grid points +\item \code{getSortIndicesSubarea()}: For recombining separate multinomials over +multiple areas into the draws over all grid points -\item \code{rStratifiedMultnomial}: Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values +\item \code{rStratifiedMultnomial()}: Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values corresponding to the value of |C^g| for each pixel, g (the number of EAs/pixel) -\item \code{rStratifiedMultnomialBySubarea}: Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values +\item \code{rStratifiedMultnomialBySubarea()}: Gives nIntegrationPoints x n matrix of draws from the stratified multinomial with values -\item \code{rMyMultinomial}: +\item \code{rMyMultinomial()}: -\item \code{rMyMultinomialSubarea}: +\item \code{rMyMultinomialSubarea()}: -\item \code{rmultinom1}: Random (truncated) multinomial draws conditional on the number of each type being at least one +\item \code{rmultinom1()}: Random (truncated) multinomial draws conditional on the number of each type being at least one -\item \code{sampleNMultilevelMultinomial}: Take multilevel multinomial draws first from joint distribution of +\item \code{sampleNMultilevelMultinomial()}: Take multilevel multinomial draws first from joint distribution of number of households per EA given the total per stratum, and then from the joint distribution of the total target population per household given the total per stratum -\item \code{sampleNMultilevelMultinomialFixed}: Same as sampleNMultilevelMultinomial, except the number of EAs per pixel is fixed -}} +\item \code{sampleNMultilevelMultinomialFixed()}: Same as sampleNMultilevelMultinomial, except the number of EAs per pixel is fixed +}}