R/popGrid.R

Defines functions setThresholdsByRegion poppRegionFromPopMat calibrateByRegion adjustPopMat getPoppsub makePopIntegrationTab

Documented in adjustPopMat calibrateByRegion getPoppsub makePopIntegrationTab poppRegionFromPopMat setThresholdsByRegion

#' Generating pixellated populations, and population frames
#' 
#' Functions for generating pixellated population information and 
#' population frames at the `area` and `subarea` levels.  
#' The `area` and `subarea` levels can be thought of as big 
#' regions and little regions, where areas can be partitioned into 
#' unique sets of subareas. For example, Admin-1 and Admin-2 
#' areas might be areas and subareas respectively. The population totals are either 
#' tabulated at the area x urban/rural level, the subarea x urban/rural 
#' level, or at the pixel level of a specified resolution. Totals 
#' are calculated using population density information, shapefiles, 
#' and, possibly, preexisting population frames at different 
#' areal levels. Note that area names should each be unique, and similarly for 
#' subarea names.
#' 
#' @describeIn makePopIntegrationTab Generate pixellated `grid` of coordinates (both longitude/latitude and east/north) 
#' over spatial domain of the given resolution with associated population totals, areas, subareas, 
#' and urban/rural levels. For very small areas that might not 
#' otherwise have a grid point in them, a custom integration point is added at their 
#' centroid. Sets urbanicity classifications by thresholding input population density raster 
#' using area and subarea population tables, and generates area and subarea population 
#' tables from population density information if not already given. Can be used for integrating 
#' predictions from the given coordinates to area and subarea levels using population weights.
#' 
#' @param popMat Pixellated grid data frame with variables `area` and `pop` such as that 
#' generated by \code{\link{makePopIntegrationTab}}
#' @param poppa data.frame of population per area separated by urban/rural. If `poppsub` 
#'   is not included, this is used for normalization of populations associated with 
#'   population integration points. Contains variables:
#' \describe{
#'   \item{area}{name of area}
#'   \item{popUrb}{total urban (general) population of area}
#'   \item{popRur}{total rural (general) population of area}
#'   \item{popTotal}{total (general) population of area}
#'   \item{pctUrb}{percentage of population in the area that is urban (between 0 and 100)}
#' }
#' @param poppsub data.frame of population per subarea separated by 
#' urban/rural using for population normalization or urbanicity 
#' classification. Often based on extra fine scale population density grid. 
#' Has variables:
#' \describe{
#'   \item{subarea}{name of subarea}
#'   \item{area}{name of area}
#'   \item{popUrb}{total urban (general) population of subarea}
#'   \item{popRur}{total rural (general) population of subarea}
#'   \item{popTotal}{total (general) population of subarea}
#'   \item{pctUrb}{percentage of population in the subarea that is urban (between 0 and 100)}
#' }
#' @param areapa A list with variables:
#' \describe{
#'   \item{area}{name of area}
#'   \item{spatialArea}{spatial area of the subarea (e.g. in km^2)}
#' }
#' @param areapsub A list with variables:
#' \describe{
#'   \item{subarea}{name of subarea}
#'   \item{spatialArea}{spatial area of the subarea (e.g. in km^2)}
#' }
#' @param kmRes The resolution of the pixelated grid in km
#' @param domainMapDat A shapefile representing the full spatial domain (e.g. country)
#' @param eastLim Range in km easting over the spatial domain under the input projection
#' @param northLim Range in km northing over the spatial domain under the input projection
#' @param mapProjection A projection function taking longitude and latitude and returning easting and 
#'                northing in km. Or the inverse if inverse is set to TRUE. For example, 
#'                \code{\link{projKenya}}. Check https://epsg.io/ for example for best projection EPSG codes 
#'                for specific countries
#' @param pop Population density raster
#' @param areaMapDat SpatialPolygonsDataFrame object with area level map information
#' @param subareaMapDat SpatialPolygonsDataFrame object with subarea level map information
#' @param areaNameVar The name of the area variable associated with \code{areaMapDat@data} 
#'            and \code{subareaMapDat@data}
#' @param subareaNameVar The name of the subarea variable associated with \code{subareaMapDat@data}
#' @param stratifyByUrban Whether to stratify the pixellated grid by urban/rural. If TRUE, 
#'   renormalizes population densities within areas or subareas crossed with urban/rural
#' @param poppaTarget Target population per area stratified by urban rural. Same format as poppa
#' @param adjustBy Whether to adjust population density by the `area` or `subarea` level
#' @param areaPolygonSubsetI Index in areaMapDat for a specific area to subset the grid over. This 
#'            option can help reduce computation time relative to constructing the whole grid 
#'            and subsetting afterwards
#' @param subareaPolygonSubsetI FOR EXPERIMENTAL PURPOSES ONLY. Index in subareaMapDat for a 
#'            specific area to subset the grid over. This 
#'            option can help reduce computation time relative to constructing the whole grid 
#'            and subsetting afterwards
#' @param customSubsetPolygons 'SpatialPolygonsDataFrame' or 'SpatialPolygons' object to subset 
#'            the grid over. This option can help reduce computation time relative to 
#'            constructing the whole grid and subsetting afterwards. `areaPolygonSubsetI` or 
#'            `subareaPolygonSubsetI` can be used when subsetting by areas or subareas in 
#'            `areaMapDat` or `subareaMapDat`. Must be in latitude/longitude projection "EPSG:4326"
#' @param returnPoppTables If TRUE, poppa and poppsub will be calculated based on the generated 
#'                          population integration matrix and input area/subarea map data
#' @param mean.neighbor For determining what area or subarea points are nearest to if they do not 
#' directly fall into an area. See \code{\link[fields]{fields.rdist.near}} for details.
#' @param delta For determining what area or subarea points are nearest to if they do not 
#' directly fall into an area. See \code{\link[fields]{fields.rdist.near}} for details.
#' @param setNAsToZero If TRUE, sets NA populations to 0.
#' @param fixZeroPopDensitySubareas If TRUE, if population density in a subarea is estimated to be 
#' zero, but the total population in the subarea is nonzero, population is filled into the 
#' area uniformly
#' @param extractMethod Either 'bilinear' or 'simple'. see `method` from 
#' \code{\link[terra]{extract}}
#' 
#' @author John Paige
#' @seealso \code{\link{setThresholdsByRegion}}, \code{\link{poppRegionFromPopMat}}, \code{\link{simPopSPDE}}, \code{\link{simPopCustom}}
#' @examples 
#' \dontrun{
#' 
#' library(sp)
#' library(sf)
#' # download Kenya GADM shapefiles from SUMMERdata github repository
#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
#'                     "kenyaMaps.rda?raw=true")
#' tempDirectory = "~/"
#' mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
#' if(!file.exists(mapsFilename)) {
#'   download.file(githubURL,mapsFilename)
#' }
#' 
#' # load it in
#' out = load(mapsFilename)
#' out
#' adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#' adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#' 
#' # some Admin-2 areas have the same name
#' adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") & 
#'                    (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") & 
#'                    (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") & 
#'                    (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") & 
#'                    (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
#' 
#' # The spatial area of unknown 8 is so small, it causes problems unless its removed or 
#' # unioned with another subarea. Union it with neighboring Kakeguria:
#' newadm2 = adm2
#' unknown8I = which(newadm2$NAME_2 == "unknown 8")
#' newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <- 
#'   "Kapenguria + unknown 8"
#' admin2.IDs <- newadm2$NAME_2
#' 
#' newadm2@data = cbind(newadm2@data, NAME_2OLD = newadm2@data$NAME_2)
#' newadm2@data$NAME_2OLD = newadm2@data$NAME_2
#' newadm2@data$NAME_2 = admin2.IDs
#' newadm2$NAME_2 = admin2.IDs
#' temp <- terra::aggregate(as(newadm2, "SpatVector"), by="NAME_2")
#' 
#' temp <- sf::st_as_sf(temp)
#' temp <- sf::as_Spatial(temp)
#' 
#' tempData = newadm2@data[-unknown8I,]
#' tempData = tempData[order(tempData$NAME_2),]
#' newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
#' adm2 = newadm2
#' 
#' # download 2014 Kenya population density TIF file
#' 
#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
#'                     "Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
#' popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
#' if(!file.exists(popTIFFilename)) {
#'   download.file(githubURL,popTIFFilename)
#' }
#' 
#' # load it in
#' pop = terra::rast(popTIFFilename)
#' 
#' eastLim = c(-110.6405, 832.4544)
#' northLim = c(-555.1739, 608.7130)
#' 
#' ## Construct poppsubKenya, a table of urban/rural general population totals 
#' ## in each subarea. Technically, this is not necessary since we can load in 
#' ## poppsubKenya via data(kenyaPopulationData). First, we will need to calculate 
#' ## the areas in km^2 of the areas and subareas
#' 
#' 
#' # use Lambert equal area projection of areas (Admin-1) and subareas (Admin-2)
#' midLon = mean(adm1@bbox[1,])
#' midLat = mean(adm1@bbox[2,])
#' p4s = paste0("+proj=laea +x_0=0 +y_0=0 +lon_0=", midLon, 
#'              " +lat_0=", midLat, " +units=km")
#' 
#' adm1_sf = st_as_sf(adm1)
#' adm1proj_sf = st_transform(adm1_sf, p4s)
#' adm1proj = as(adm1proj_sf, "Spatial")
#' 
#' adm2_sf = st_as_sf(adm2)
#' adm2proj_sf = st_transform(adm2_sf, p4s)
#' adm2proj = as(adm2proj_sf, "Spatial")
#' 
#' # now calculate spatial area in km^2
#' admin1Areas = as.numeric(st_area(adm1proj_sf))
#' admin2Areas = as.numeric(st_area(adm2proj_sf))
#' 
#' areapaKenya = data.frame(area=adm1proj@data$NAME_1, spatialArea=admin1Areas)
#' areapsubKenya = data.frame(area=adm2proj@data$NAME_1, subarea=adm2proj@data$NAME_2, 
#'                            spatialArea=admin2Areas)
#' 
#' # Calculate general population totals at the subarea (Admin-2) x urban/rural 
#' # level and using 1km resolution population grid. Assign urbanicity by 
#' # thresholding population density based on estimated proportion population 
#' # urban/rural, making sure total area (Admin-1) urban/rural populations in 
#' # each area matches poppaKenya.
#' require(fields)
#' # NOTE: the following function will typically take ~15-20 minutes. Can speed up 
#' #       by setting kmRes to be higher, but we recommend fine resolution for 
#' #       this step, since it only needs to be done once. Instead of running this, 
#' #       you can simply run data(kenyaPopulationData)
#' system.time(poppsubKenya <- getPoppsub(
#'   kmRes=1, pop=pop, domainMapDat=adm0,
#'   eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
#'   poppa = poppaKenya, areapa=areapaKenya, areapsub=areapsubKenya, 
#'   areaMapDat=adm1, subareaMapDat=adm2, 
#'   areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
#' 
#' # Now generate a general population integration table at 5km resolution, 
#' # based on subarea (Admin-2) x urban/rural population totals. This takes 
#' # ~1 minute
#' system.time(popMatKenya <- makePopIntegrationTab(
#'   kmRes=5, pop=pop, domainMapDat=adm0,
#'   eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
#'   poppa = poppaKenya, poppsub=poppsubKenya, 
#'   areaMapDat = adm1, subareaMapDat = adm2,
#'   areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
#' 
#' ## Adjust popMat to be target (neonatal) rather than general population density. First
#' ## creat the target population frame
#' ## (these numbers are based on IPUMS microcensus data)
#' mothersPerHouseholdUrb = 0.3497151
#' childrenPerMotherUrb = 1.295917
#' mothersPerHouseholdRur = 0.4787696
#' childrenPerMotherRur = 1.455222
#' targetPopPerStratumUrban = easpaKenya$HHUrb * mothersPerHouseholdUrb * childrenPerMotherUrb
#' targetPopPerStratumRural = easpaKenya$HHRur * mothersPerHouseholdRur * childrenPerMotherRur
#' easpaKenyaNeonatal = easpaKenya
#' easpaKenyaNeonatal$popUrb = targetPopPerStratumUrban
#' easpaKenyaNeonatal$popRur = targetPopPerStratumRural
#' easpaKenyaNeonatal$popTotal = easpaKenyaNeonatal$popUrb + easpaKenyaNeonatal$popRur
#' easpaKenyaNeonatal$pctUrb = 100 * easpaKenyaNeonatal$popUrb / easpaKenyaNeonatal$popTotal
#' easpaKenyaNeonatal$pctTotal = 
#'   100 * easpaKenyaNeonatal$popTotal / sum(easpaKenyaNeonatal$popTotal)
#' 
#' # Generate the target population density by scaling the current population density grid 
#' # at the Admin1 x urban/rural level
#' popMatKenyaNeonatal = adjustPopMat(popMatKenya, easpaKenyaNeonatal)
#' 
#' # Generate neonatal population table from the neonatal population integration matrix.
#' # This is technically not necessary for population simulation purposes, but is here 
#' # for illustrative purposes
#' poppsubKenyaNeonatal = poppRegionFromPopMat(popMatKenyaNeonatal, popMatKenyaNeonatal$subarea)
#' poppsubKenyaNeonatal = cbind(subarea=poppsubKenyaNeonatal$region, 
#'                              area=adm2@data$NAME_1[match(poppsubKenyaNeonatal$region, 
#'                                adm2@data$NAME_2)], 
#'                              poppsubKenyaNeonatal[,-1])
#' print(head(poppsubKenyaNeonatal))
#' }
#' @importFrom terra extract
#' @importFrom terra gdal
#' @importFrom terra crs<-
#' @importFrom sp SpatialPoints
#' @importFrom sp CRS
#' @importFrom sp over
#' @importFrom sp coordinates
#' @importFrom sp as.SpatialPolygons.PolygonsList
#' @importFrom fields make.surface.grid
#' @export
makePopIntegrationTab = function(kmRes=5, pop, domainMapDat, eastLim, northLim, mapProjection, 
                                 areaMapDat, subareaMapDat, 
                                 areaNameVar="NAME_1", subareaNameVar="NAME_2", 
                                 poppa=NULL, poppsub=NULL, stratifyByUrban=TRUE, 
                                 areapa=NULL, areapsub=NULL, customSubsetPolygons=NULL, 
                                 areaPolygonSubsetI=NULL, subareaPolygonSubsetI=NULL, 
                                 mean.neighbor=50, delta=.1, returnPoppTables=FALSE, 
                                 setNAsToZero=TRUE, fixZeroPopDensitySubareas=FALSE, 
                                 extractMethod="bilinear") {
  thresholdUrbanBy = ifelse(is.null(poppsub), "area", "subarea")
  
  # some basic checks to make sure inputs are correct
  
  # make sure area and subarea names are unique
  nameCounts = aggregate(areaMapDat@data[[areaNameVar]], by=list(area=areaMapDat@data[[areaNameVar]]), FUN=length)
  if(any(nameCounts$x > 1)) {
    stop("area names are not unique in areaMapDat")
  }
  if(!is.null(subareaMapDat)) {
    nameCounts = aggregate(subareaMapDat@data[[subareaNameVar]], by=list(area=subareaMapDat@data[[subareaNameVar]]), FUN=length)
    if(any(nameCounts$x > 1)) {
      stop("subarea names are not unique in subareaMapDat")
    }
  }
  if(!is.null(poppa)) {
    nameCounts = aggregate(poppa$area, by=list(area=poppa$area), FUN=length)
    if(any(nameCounts$x > 1)) {
      stop("area names are not unique in poppa")
    }
    
    # make sure area names match up
    if(!all.equal(sort(poppa$area), sort(areaMapDat@data[[areaNameVar]]))) {
      stop("area names in poppa do not match those in areaMapDat@data")
    }
  }
  if(!is.null(poppsub)) {
    nameCounts = aggregate(poppsub$subarea, by=list(area=poppsub$subarea), FUN=length)
    if(any(nameCounts$x > 1)) {
      stop("subarea names are not unique in poppsub")
    }
    
    # make sure subarea names match up
    if(!is.null(subareaMapDat)) {
      if(!all.equal(sort(poppsub$subarea), sort(subareaMapDat@data[[subareaNameVar]]))) {
        stop("subarea names in poppsub do not match those in subareaMapDat@data")
      }
    }
  }
  
  # get a rectangular grid
  eastGrid = seq(eastLim[1], eastLim[2], by=kmRes)
  northGrid = seq(northLim[1], northLim[2], by=kmRes)
  
  if(!is.null(areaPolygonSubsetI)) {
    # get range of the grid that we actually need
    temp = areaMapDat@polygons[[areaPolygonSubsetI]]
    allPolygons = temp@Polygons
    eastNorthCoords = do.call("rbind", lapply(1:length(allPolygons), function(i) {mapProjection(allPolygons[[i]]@coords)}))
    eastSubRange = range(eastNorthCoords[,1])
    northSubRange = range(eastNorthCoords[,2])
    
    # subset grid to the range we need
    eastGrid = eastGrid[eastGrid >= eastSubRange[1]]
    eastGrid = eastGrid[eastGrid <= eastSubRange[2]]
    northGrid = northGrid[northGrid >= northSubRange[1]]
    northGrid = northGrid[northGrid <= northSubRange[2]]
  }
  
  if(!is.null(subareaPolygonSubsetI)) {
    # get range of the grid that we actually need
    temp = subareaMapDat@polygons[[subareaPolygonSubsetI]]
    allPolygons = temp@Polygons
    eastNorthCoords = do.call("rbind", lapply(1:length(allPolygons), function(i) {mapProjection(allPolygons[[i]]@coords)}))
    eastSubRange = range(eastNorthCoords[,1])
    northSubRange = range(eastNorthCoords[,2])
    
    # subset grid to the range we need
    eastGrid = eastGrid[eastGrid >= eastSubRange[1]]
    eastGrid = eastGrid[eastGrid <= eastSubRange[2]]
    northGrid = northGrid[northGrid >= northSubRange[1]]
    northGrid = northGrid[northGrid <= northSubRange[2]]
  }
  
  if(!is.null(customSubsetPolygons)) {
    if(("SpatialPolygons" %in% class(customSubsetPolygons)) || ("SpatialPolygonsDataFrame" %in% class(customSubsetPolygons))) {
      
      # calculate the east/north range of each polygon
      getPolygonENrange = function(pol) {
        # get range of the grid that we actually need
        temp = pol
        allPolygons = temp@Polygons
        eastNorthCoords = do.call("rbind", lapply(1:length(allPolygons), function(i) {mapProjection(allPolygons[[i]]@coords)}))
        eastSubRange = range(eastNorthCoords[,1])
        northSubRange = range(eastNorthCoords[,2])
        
        # subset grid to the range we need
        rbind(eastSubRange, 
              northSubRange)
      }
      allRanges = lapply(getPolygonENrange, customSubsetPolygons@polygons)
      
      # calculate the full range based on each individual polygon range
      eastings = sapply(allRanges, function(x) {x[1,]})
      northings = sapply(allRanges, function(x) {x[2,]})
      eastSubRange = range(eastings)
      northSubRange = range(northings)
      
      # subset grid to the range we need
      eastGrid = eastGrid[eastGrid >= eastSubRange[1]]
      eastGrid = eastGrid[eastGrid <= eastSubRange[2]]
      northGrid = northGrid[northGrid >= northSubRange[1]]
      northGrid = northGrid[northGrid <= northSubRange[2]]
    } else {
      stop("customSubsetPolygons must be of class 'SpatialPolygons' or 'SpatialPolygonsDataFrame'")
    }
  }
  
  utmGrid = matrix(fields::make.surface.grid(list(east=eastGrid, north=northGrid)), ncol=2)
  
  # project coordinates into lat/lon
  if(length(utmGrid) > 0) {
    lonLatGrid = matrix(mapProjection(utmGrid, inverse=TRUE), ncol=2)
  } else {
    warning(paste0("no grid cell centroid are in the areas of interest. ", 
                   "Integration grid will be composed entirely of custom ", 
                   "integration points at the centroids of the areas of interest"))
    lonLatGrid = utmGrid
  }
  
  # subset grid so it's in the domain
  # inDomain = in.poly(lonLatGrid, domainPoly)
  # determine version of PROJ
  ver = terra::gdal(lib="proj")
  PROJ6 <- as.numeric(substr(ver, 1, 1)) >= 6
  
  # from lon/lat coords to easting/northing
  # if(!PROJ6) {
  #   lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS("+proj=longlat"))
  # } else {
  #   lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS(SRS_string="EPSG:4326"))
  # }
  lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=domainMapDat@proj4string)
  inDomain = sp::over(lonLatCoords, domainMapDat)
  inDomain = !is.na(inDomain[,1])
  utmGrid = matrix(utmGrid[inDomain,], ncol=2)
  lonLatGrid = matrix(lonLatGrid[inDomain,], ncol=2)
  
  # compute areas associated with locations
  if(length(lonLatGrid) > 0) {
    if(!is.null(subareaMapDat)) {
      subareas = SUMMER::getAreaName(lonLatGrid, subareaMapDat, areaNameVar=subareaNameVar, mean.neighbor=mean.neighbor, delta=delta)$areaNames
    } else {
      subareas = NULL
    }
    
    if(!is.null(areaMapDat)) {
      areas = SUMMER::getAreaName(lonLatGrid, areaMapDat, areaNameVar=areaNameVar, mean.neighbor=mean.neighbor, delta=delta)$areaNames
    } else {
      areas = NULL
    }
    
  } else {
    subareas = character(0)
    areas = character(0)
  }
  
  if(!is.null(areaPolygonSubsetI)) {
    areaSubsetName = areaMapDat@data[areaPolygonSubsetI,areaNameVar]
    insideArea = areas == areaSubsetName
    
    # subset grid and area/subarea names so they're in the area of interest
    utmGrid = matrix(utmGrid[insideArea,], ncol=2)
    lonLatGrid = matrix(lonLatGrid[insideArea,], ncol=2)
    areas = areas[insideArea]
    subareas = subareas[insideArea]
  }
  
  if(!is.null(subareaPolygonSubsetI)) {
    subareaSubsetName = subareaMapDat@data[subareaPolygonSubsetI,subareaNameVar]
    insideSubarea = subareas == subareaSubsetName
    
    # subset grid and area/subarea names so they're in the area of interest
    utmGrid = matrix(utmGrid[insideSubarea,], ncol=2)
    lonLatGrid = matrix(lonLatGrid[insideSubarea,], ncol=2)
    areas = areas[insideSubarea]
    subareas = subareas[insideSubarea]
  }
  
  if(!is.null(customSubsetPolygons)) {
    if(!grepl("+proj=longlat", customSubsetPolygons@proj4string@projargs)) {
      warning(paste0("customSubsetPolygons does not use longitude/latitude ", 
                     "coordinate system? Using makePopIntegrationTab outside ", 
                     "its use case, which may result in errors"))
    }
    
    # if(!PROJ6) {
    #   lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS("+proj=longlat"))
    # } else {
    #   lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS(SRS_string="EPSG:4326"))
    # }
    lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=customSubsetPolygons@proj4string)
    insideCustomSubset = sp::over(lonLatCoords, customSubsetPolygons)
    
    # subset grid and area/subarea names so they're in the area of interest
    utmGrid = matrix(utmGrid[insideCustomSubset,], ncol=2)
    lonLatGrid = matrix(lonLatGrid[insideCustomSubset,], ncol=2)
    areas = areas[insideCustomSubset]
    
    if(!is.null(subareaMapDat)) {
      subareas = subareas[insideCustomSubset]
    }
  }
  
  # determine what subareas we want our grid to contain
  if(!is.null(subareaMapDat)) {
    allSubareas = sort(unique(subareaMapDat@data[[subareaNameVar]]))
    if(!is.null(areaPolygonSubsetI)) {
      allSubareas = sort(unique(subareaMapDat@data[subareaMapDat@data[[areaNameVar]] == areaSubsetName,][[subareaNameVar]]))
    }
    if(!is.null(subareaPolygonSubsetI)) {
      allSubareas = subareaSubsetName
    }
    if(!is.null(customSubsetPolygons)) {
      allSubareas = sort(unique(subareas))
    }
  }
  
  # filter out parts of poppsub that are irrelevant for the subareas of 
  # interest
  if(!is.null(poppsub)) {
    poppsub = poppsub[poppsub$subarea %in% allSubareas,]
  }
  
  if(!is.null(subareaMapDat)) {
    # check to make sure every subarea has at least 2 pixels
    subareasFactor = factor(subareas, levels=allSubareas)
    if(length(lonLatGrid) > 0) {
      out = aggregate(subareas, by=list(subarea=subareasFactor), FUN=length, drop=FALSE)
    } else {
      out = data.frame(subarea=sort(unique(subareaMapDat@data[[subareaNameVar]])), 
                       x=NA)
    }
    noPixels = is.na(out$x)
    onePixel = out$x == 1
    onePixel[is.na(onePixel)] = FALSE
    onePixelNames = out$subarea[onePixel]
    badSubareas = noPixels | onePixel
    badSubareaNames = as.character(out$subarea[badSubareas])
    
    if(any(badSubareas)) {
      badSubareaString = paste(badSubareaNames, collapse=", ")
      warning(paste0("The following subareas have < 2 regular grid points ", 
                     "at the given resolution: ", badSubareaString, ", so ", 
                     "they will be given custom integration points"))
      
      # get centroids of the subareas (or it's single pixel coordinates)
      thisSpatialPolyList = sp::as.SpatialPolygons.PolygonsList(subareaMapDat@polygons)
      centroidsLonLat = matrix(ncol=2, nrow=length(allSubareas))
      
      for(i in 1:length(allSubareas)) {
        thisSubarea = allSubareas[i]
        if(thisSubarea %in% onePixelNames) {
          thisCentroid = lonLatGrid[subareas == thisSubarea,]
        } else {
          subareaI = match(as.character(thisSubarea), as.character(subareaMapDat@data[[subareaNameVar]]))
          thisCentroid = sp::coordinates(thisSpatialPolyList[subareaI])
        }
        
        centroidsLonLat[i,] = thisCentroid
      }
      
      # sort to match results of aggregate (alphabetical order)
      # sortI = sort(as.character(subareaMapDat@data[[subareaNameVar]]), index.return=TRUE)$ix
      sortI = sort(as.character(allSubareas), index.return=TRUE)$ix
      centroidsLonLat = centroidsLonLat[sortI,]
      
      # remove the one pixel for subareas with only one pixel 
      # (we will add it in again later, twice if stratified: both urban and rural)
      onePixel = which(subareas %in% onePixelNames)
      if(length(onePixel) > 0) {
        lonLatGrid = lonLatGrid[-onePixel,]
        utmGrid = utmGrid[-onePixel,]
        subareas = subareas[-onePixel]
        areas = areas[-onePixel]
      }
      
      # add centroids of only the bad subareas
      centroidsLonLat = matrix(centroidsLonLat[badSubareas,], ncol=2)
      
      # convert to east/north
      centroidsEastNorth = projKenya(centroidsLonLat[,1], centroidsLonLat[,2])
      
      # only add centroid in stratum if bad subareas have any population in the stratum. 
      # If poppsub not included and resolution not high enough to have multiple points 
      # in a subarea, treat it as entirely urban or rural
      if(is.null(poppsub)) {
        hasUrbanPop = rep(TRUE, sum(badSubareas))
        hasRuralPop = rep(FALSE, sum(badSubareas))
      } else {
        hasUrbanPop = (poppsub$popUrb > 0)[badSubareas]
        hasRuralPop = (poppsub$popRur > 0)[badSubareas]
      }
      
      # add centroids to the matrices of pixellated grid coordinates. 
      # Add them twice: once for urban, once for rural
      lonLatGrid = rbind(lonLatGrid, centroidsLonLat[hasUrbanPop,], centroidsLonLat[hasRuralPop,])
      utmGrid = rbind(utmGrid, centroidsEastNorth[hasUrbanPop,], centroidsEastNorth[hasRuralPop,])
      
      # add associated consituencies, areas, provinces to respective vectors
      # Normally, we could just caluclate what subarea/area the points are in. 
      # However, since centroids might not be in the associated subarea, we 
      # instead just assign the known subareas directly
      # newSubareas = getAreaName(rbind(centroidsLonLat[hasUrbanPop,], 
      #                                 centroidsLonLat[hasRuralPop,]), 
      #                           subareaMapDat, subareaNameVar, delta, mean.neighbor)$areaNames
      # newAreas = getAreaName(rbind(centroidsLonLat[hasUrbanPop,], 
      #                              centroidsLonLat[hasRuralPop,]), 
      #                        areaMapDat, areaNameVar, delta, mean.neighbor)$areaNames
      newSubareas = c(badSubareaNames[hasUrbanPop], badSubareaNames[hasRuralPop])
      newAreas = subareaMapDat@data[[areaNameVar]][match(newSubareas, subareaMapDat@data[[subareaNameVar]])]
      
      subareas = c(as.character(subareas), as.character(newSubareas))
      areas = c(as.character(areas), as.character(newAreas))
    }
  } else {
    badSubareas = FALSE
  }
  
  # get population density at those coordinates
  if(!PROJ6) {
    # extract the raster values for each chunk of points
    interpPopVals <- tryCatch(
      {
        terra::extract(pop, lonLatGrid,method=extractMethod)
      },
      error=function(cond) {
        message(cond)
        stop(paste0("Error extracting raster values. In case of memory limitations, see ", 
                    "terra::terraOptions()"))
        # Choose a return value in case of error
        return(NA)
      }
    )
  } else {
    # make sure CRS string of population density SpatRaster is "EPSG:4326", i.e. 
    # longitude + latitude
    tempCRSstr = sp::CRS(SRS_string="EPSG:4326")
    crsStr = attr(tempCRSstr, "comment")
    if(is.null(crsStr)) {
      crsStr <- tempCRSstr@projargs
      if(is.na(crsStr)) {
        crsStr <- ""
      }
    }
    terra::crs(pop) = crsStr
    
    # get population density values from the raster
    interpPopVals <- tryCatch(
      {
        terra::extract(pop, lonLatGrid, method="bilinear")
      },
      error=function(cond) {
        message(cond)
        stop(paste0("Error extracting raster values. In case of memory limitations, see ", 
                    "terra::aggregate, terra::resample, terra::projectRaster, and terra::terraOptions"))
        # Choose a return value in case of error
        return(NA)
      }
    )
  }
  interpPopVals = unlist(interpPopVals)
  names(interpPopVals) = NULL
  
  if(setNAsToZero) {
    interpPopVals[is.na(interpPopVals)] = 0
  }
  
  # if requested, fix subareas with entirely zero population density
  if(fixZeroPopDensitySubareas && !is.null(poppsub)) {
    newPop = data.frame(list(lon=lonLatGrid[,1], lat=lonLatGrid[,2], pop=interpPopVals, area=areas, subarea=subareas, urban=NA))
    poppsubCurrent = poppRegionFromPopMat(newPop, newPop$subarea)
    
    zeroPopSubareas = poppsubCurrent$region[poppsubCurrent$popTotal == 0]
    if(length(zeroPopSubareas) > 0) {
      warning(paste0("The following subareas have entirely zero population density ", 
                     "but nonzero total population, and their population will be filled ", 
                     "in uniformly: ", paste(zeroPopSubareas, collapse=", ")))
      # fill in population density uniformly in these subareas
      for(i in 1:length(zeroPopSubareas)) {
        thisSub = zeroPopSubareas[i]
        thisSubI = newPop$subarea == thisSub
        thisSubPop = poppsubCurrent$popTotal[poppsubCurrent$region == thisSub]
        newPop$pop[thisSubI] = thisSubPop/sum(thisSubI)
      }
    }
    
    interpPop = newPop$pop
  }
  
  if(any(badSubareas)) {
    # make sure population densities in the bad subareas 
    # are slightly different so one will be classified as urban and 
    # the other as rural. They will be renormalized later based on poppsub, 
    # or, if poppsub doesn't exist, only new/custom "urban" points will be kept, 
    # so no densities are modified here
    nUnits = length(interpPopVals)
    nNewRural = sum(hasRuralPop)
    if(nNewRural >= 1) {
      interpPopVals[(nUnits-nNewRural + 1):nUnits] = interpPopVals[(nUnits-nNewRural + 1):nUnits] / 2
    }
  }
  
  # determine which points are urban via population density thresholding
  newPop = data.frame(list(lon=lonLatGrid[,1], lat=lonLatGrid[,2], pop=interpPopVals, area=areas, subarea=subareas, urban=NA))
  if(thresholdUrbanBy == "area") {
    threshes = SUMMER::setThresholdsByRegion(newPop, poppa, regionType="area")
    popThreshes = sapply(1:nrow(newPop), function(i) {threshes$threshes[as.character(threshes$regions) == as.character(newPop$area[i])]})
    urban = newPop$pop >= unlist(popThreshes)
    newPop$urban = urban
  } else {
    tempSubarea = poppsub
    tempPop = newPop
    allSubareas = sort(unique(tempPop$subarea))
    tempSubarea = tempSubarea[tempSubarea$subarea %in% allSubareas,]
    threshes = SUMMER::setThresholdsByRegion(tempPop, poppr = tempSubarea, regionType="subarea")
    popThreshes = sapply(1:nrow(newPop), function(i) {threshes$threshes[as.character(threshes$regions) == as.character(newPop$subarea[i])]})
    urban = newPop$pop >= unlist(popThreshes)
    newPop$urban = urban
  }
  
  newPop$east = utmGrid[,1]
  newPop$north = utmGrid[,2]
  
  pointAreaNames = as.character(newPop$area)
  pointAreaNamesU = as.character(newPop$area)
  pointAreaNamesU[!newPop$urban] = "DoNotUseThis"
  pointAreaNamesR = as.character(newPop$area)
  pointAreaNamesR[newPop$urban] = "DoNotUseThis"
  
  if(!is.null(subareaMapDat)) {
    pointSubareaNames = as.character(newPop$subarea)
    pointSubareaNamesU = as.character(newPop$subarea)
    pointSubareaNamesU[!newPop$urban] = "DoNotUseThis"
    pointSubareaNamesR = as.character(newPop$subarea)
    pointSubareaNamesR[newPop$urban] = "DoNotUseThis"
  }
  
  # if necessary, renormalize population values within subareas or areas 
  # crossed with urban/rural to be the correct value
  if(!is.null(poppsub)) {
    # In this case, areapsub and areapa are not necessary
    
    if(stratifyByUrban) {
      pointPopUrb = calibrateByRegion(newPop$pop, pointSubareaNamesU, poppsub$subarea, poppsub$popUrb)
      pointPopRur = calibrateByRegion(newPop$pop, pointSubareaNamesR, poppsub$subarea, poppsub$popRur)
      newPop$pop = (pointPopUrb + pointPopRur)
    } else {
      newPop$pop = calibrateByRegion(newPop$pop, pointSubareaNames, poppsub$subarea, poppsub$popTotal)
    }
  } else if(!is.null(poppa)) {
    # i.e. we don't have poppsub, so now we need to be careful about the spatial areas 
    # (e.g. area in km^2) associated with each point
    
    # determine how to use spatial areas (e.g. in km^2) of areas/subareas:
    # Case 1: We don't have subareaMapDat, but do have areapa
    # Case 2: We don't have subareaMapDat, and don't have areapa
    # Case 3: We have subareaMapDat and areapsub
    # Case 4: We have subareaMapDat but don't have areapsub (this is sketchy if 
    #         there's any custom integration points)
    
    # set the areas of each point to be equal to start out. This must be 
    # proportional to, but not necessarily equal to, the actual area 
    # associated with each point
    pointAreas = rep(1, nrow(newPop))
    if(is.null(subareaMapDat) && !is.null(areapa)) {
      pointAreas = calibrateByRegion(pointAreas, pointAreaNames, areapa$area, areapa$spatialArea)
    } else if(is.null(subareaMapDat) && is.null(areapa)) {
      # do nothing, since pointAreas = rep(1, nrow(newPop)) is fine
    } else if(!is.null(subareaMapDat) && !is.null(areapsub)) {
      pointAreas = calibrateByRegion(pointAreas, pointSubareaNames, areapsub$subarea, areapsub$spatialArea)
    } else if(!is.null(subareaMapDat) && is.null(areapsub)) {
      if(any(badSubareas)) {
        stop("poppsub and areapsub are NULL, but subareaMapDat is included")
      }
      # could continue here, since pointAreas = rep(1, nrow(newPop)) might not be disastrous, 
      # but it would be wrong
    }
    
    # use the population per stratum to renormalize population densities
    if(stratifyByUrban) {
      popUrb = calibrateByRegion(newPop$pop*pointAreas, pointAreaNamesU, poppa$area, poppa$popUrb)
      popRur = calibrateByRegion(newPop$pop*pointAreas, pointAreaNamesR, poppa$area, poppa$popRur)
      
      newPop$pop = popUrb + popRur
    } else {
      newPop$pop = calibrateByRegion(newPop$pop*pointAreas, pointAreaNames, poppa$area, poppa$popTotal)
    }
  } else {
    stop("must either include poppsub, or poppa")
  }
  
  # if requested by user, return the population integration matrix as well 
  # as the associate population per area and subarea tables
  if(returnPoppTables) {
    if(!is.null(areaMapDat)) {
      areas = getAreaName(cbind(newPop$lon, newPop$lat), shapefile=areaMapDat, areaNameVar=areaNameVar)$areaNames
      newpoppa = poppRegionFromPopMat(newPop, areas)
      names(newpoppa)[1] = "area"
      newpoppa$pctTotal = 100 * newpoppa$popTotal/sum(newpoppa$popTotal)
      newpoppa$pctUrb = 100 * newpoppa$popUrb/newpoppa$popTotal
    } else {
      if(is.null(poppa)) {
        warning(paste0("returnPoppTables set to TRUE, but areaMapDat is NULL, ", 
                       "so no area level population table will be calculated"))
      }
      newpoppa = NULL
    }
    if(!is.null(subareaMapDat)) {
      subareas = getAreaName(cbind(newPop$lon, newPop$lat), shapefile=subareaMapDat, areaNameVar=subareaNameVar)$areaNames
      newpoppsub = poppRegionFromPopMat(newPop, subareas)
      names(newpoppsub)[1] = "subarea"
      
      # get area associated with subareas
      associatedAreas = sapply(newpoppsub$subarea, function(subareaName) {newPop$area[match(subareaName, newPop$subarea)]})
      newpoppsub$area = associatedAreas
      
      newpoppsub = newpoppsub[,c("subarea", "area", "popUrb", "popRur", "popTotal")]
      
      # calculate percent urban and percent of total population
      newpoppsub$pctUrb = 100 * newpoppsub$popUrb / newpoppsub$popTotal
      newpoppsub$pctTotal = 100 * newpoppsub$popTotal / sum(newpoppsub$popTotal)
    } else {
      if(is.null(poppsub)) {
        warning(paste0("returnPoppTables set to TRUE, but subareaMapDat is NULL, ", 
                       "so no subarea level population table will be calculated"))
      }
      newpoppsub = NULL
    }
    
    return(list(popMat=newPop, poppa=newpoppa, poppsub=newpoppsub))
  }
  
  newPop
}

#' @describeIn makePopIntegrationTab Generate table of estimates of population
#'   totals per subarea x urban/rural combination based on population density
#'   raster at `kmres` resolution "grid", including custom integration points
#'   for any subarea too small to include grid points at their centroids.
#' @export
getPoppsub = function(kmRes=1, pop, domainMapDat, eastLim, northLim, mapProjection, 
                      poppa, areapa=NULL, areapsub, subareaMapDat, subareaNameVar="NAME_2", 
                      stratifyByUrban=TRUE, areaMapDat=NULL, areaNameVar="NAME_1", 
                      areaPolygonSubsetI=NULL, subareaPolygonSubsetI=NULL, 
                      customSubsetPolygons=NULL, 
                      mean.neighbor=50, delta=.1, setNAsToZero=TRUE, fixZeroPopDensitySubareas=FALSE) {
  
  out = SUMMER::makePopIntegrationTab(kmRes=kmRes, pop=pop, domainMapDat=domainMapDat, 
                              areapa=areapa, areapsub=areapsub, 
                              eastLim=eastLim, northLim=northLim, mapProjection=mapProjection, 
                              subareaMapDat=subareaMapDat, areaMapDat=areaMapDat, 
                              areaNameVar=areaNameVar, subareaNameVar=subareaNameVar, 
                              poppa=poppa, stratifyByUrban=stratifyByUrban, 
                              areaPolygonSubsetI=areaPolygonSubsetI, 
                              subareaPolygonSubsetI=subareaPolygonSubsetI, 
                              customSubsetPolygons=customSubsetPolygons, 
                              mean.neighbor=mean.neighbor, delta=delta, 
                              setNAsToZero=setNAsToZero, 
                              fixZeroPopDensitySubareas=fixZeroPopDensitySubareas, 
                              returnPoppTables=TRUE)
  out$poppsub
}

#' @describeIn makePopIntegrationTab Adjust population densities in grid based on a population frame.
#' @export
adjustPopMat = function(popMat, poppaTarget=NULL, adjustBy=c("area", "subarea"), stratifyByUrban=TRUE) {
  adjustBy = match.arg(adjustBy)
  
  # sort get population per stratum from poppaTarget
  if(adjustBy == "area") {
    areas=sort(unique(poppaTarget$area))
  } else {
    areas=sort(unique(poppaTarget$subarea))
  }
  
  if(stratifyByUrban) {
    targetPopPerStratumUrban = poppaTarget$popUrb
    targetPopPerStratumRural = poppaTarget$popRur
    
    # generate 2 nArea x nPixels matrices for urban and rural strata integrating pixels with respect to population density to get area estimates
    getAreaStratumIntegrationMatrix = function(getUrban=TRUE) {
      areas = as.character(areas)
      
      if(adjustBy == "area") {
        mat = t(sapply(areas, function(area) {
          popMat$area == area
        }))
      } else {
        mat = t(sapply(areas, function(area) {
          popMat$subarea == area
        }))
      }
      
      mat = sweep(mat, 2, popMat$pop, "*")
      
      sweep(mat, 2, popMat$urban == getUrban, "*")
    }
    urbanIntegrationMat = getAreaStratumIntegrationMatrix()
    ruralIntegrationMat = getAreaStratumIntegrationMatrix(FALSE)
    
    # calculate number of people per stratum by integrating the population density surface
    urbanPopulations = rowSums(urbanIntegrationMat)
    ruralPopulations = rowSums(ruralIntegrationMat)
    
    # adjust each row of the integration matrices to get the correct expected number of children per stratum
    urbanIntegrationMat = sweep(urbanIntegrationMat, 1, targetPopPerStratumUrban / urbanPopulations, "*")
    urbanIntegrationMat[urbanPopulations == 0,] = 0
    ruralIntegrationMat = sweep(ruralIntegrationMat, 1, targetPopPerStratumRural / ruralPopulations, "*")
    ruralIntegrationMat[ruralPopulations == 0,] = 0
    
    # the column sums of the matrices give the correct modified population densities
    popMat$pop = colSums(urbanIntegrationMat) + colSums(ruralIntegrationMat)
  } else {
    targetPopPerArea = poppaTarget$popTotal
    
    # generate 2 nArea x nPixels matrices for urban and rural strata integrating pixels with respect to population density to get area estimates
    getAreaIntegrationMatrix = function() {
      areas = as.character(areas)
      
      if(adjustBy == "area") {
        mat = t(sapply(areas, function(area) {
          popMat$area == area
        }))
      } else {
        mat = t(sapply(areas, function(area) {
          popMat$subarea == area
        }))
      }
      
      mat = sweep(mat, 2, popMat$pop, "*")
      mat
    }
    integrationMat = getAreaIntegrationMatrix()
    
    # calculate number of people per stratum by integrating the population density surface
    totalPopulations = rowSums(integrationMat)
    
    # adjust each row of the integration matrices to get the correct expected number of children per stratum
    urbanIntegrationMat = sweep(integrationMat, 1, targetPopPerArea / totalPopulations, "*")
    urbanIntegrationMat[totalPopulations == 0,] = 0
    
    # the column sums of the matrices give the correct modified population densities
    popMat$pop = colSums(integrationMat)
  }
  
  
  popMat
}

#' Calibrate the point level totals so their sum matches the regional totals
#' 
#' Calibrate/normalize the point level totals so their sum matches the 
#' regional totals. Technically, the totals can be at any level smaller  
#' than the region level specified.
#' 
#' @param pointTotals Vector of point level totals that will be calibrated/normalized
#' @param pointRegions Vector of regions associated with each point
#' @param regions Vector of region names
#' @param regionTotals Vector of desired region level totals associated with `regions`
#' 
#' @return A vector of same length as pointTotals and pointRegions containing 
#' the calibrated/normalized point totals that sum to the correct regional totals
#' 
#' @author John Paige
#' 
#' @examples
#' pointTotals = c(1, 1, 1, 2)
#' pointRegions = c("a", "a", "b", "b")
#' regionTotals = c(10, 20)
#' regions = c("a", "b")
#' calibrateByRegion(pointTotals, pointRegions, regions, regionTotals)
#' 
#' @return Vector of updated point level totals, calibrated to match region totals
#' 
#' @export
calibrateByRegion = function(pointTotals, pointRegions, regions, regionTotals) {
  regions = as.character(regions)
  pointRegions = as.character(pointRegions)
  
  # generate nRegions x nPoints matrix that separates points totals to each region
  getRegionIntegrationMatrix = function() {
    
    mat = t(sapply(regions, function(regionName) {pointRegions == regionName}))
    mat = sweep(mat, 2, pointTotals, "*")
    mat
  }
  integrationMat = getRegionIntegrationMatrix()
  
  # calculate total per region before calibration
  tempRegionTotals = rowSums(integrationMat)
  
  # adjust each row of the integration matrices to get the correct totals per region
  integrationMat = sweep(integrationMat, 1, regionTotals / tempRegionTotals, "*")
  integrationMat[tempRegionTotals == 0,] = 0
  
  # check to make sure all zero population totals are in zero population regions. If 
  # any extra, spread the region population in the associated pointTotals
  badRegions = (tempRegionTotals == 0) & (regionTotals != 0)
  if(any(badRegions)) {
    badRegionIs = which(badRegions)
    for(i in 1:length(badRegionIs)) {
      thisRegionI = badRegionIs[i]
      thisRegion = pointRegions == regions[thisRegionI]
      integrationMat[thisRegionI,thisRegion] = regionTotals[thisRegionI] * (1/sum(thisRegion))
    }
  }
  
  # the column sums of the matrix give the correct calibrated regional totals
  colSums(integrationMat)
}

#' Generate a population frame of a similar format to poppa argument of \code{\link{simPopCustom}} with a custom set of regions
#' 
#' @param popMat Pixellated grid data frame with variables `area` and `pop`. Assumed to be stratified by urban/rural
#' @param regions character vector of length nPixels giving a custom set of regions for which to generate 
#'  a population frame using population density
#' 
#' @details Urbanicity thresholds are set based on that region's percent population 
#' urban. Intended as a helper function of \code{\link{getPoppsub}}, but 
#' can also be used for custom sets of regions (i.e. more than just 2 
#' areal levels: area and subarea). 
#' 
#' @author John Paige
#' 
#' @seealso \code{\link{getPoppsub}}
#' 
#' @examples
#' \dontrun{
#' data(kenyaPopulationData)
#' 
#' #' # download Kenya GADM shapefiles from SUMMERdata github repository
#' githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
#' tempDirectory = "~/"
#' mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
#' if(!file.exists(mapsFilename)) {
#'   download.file(githubURL,mapsFilename)
#' }
#' 
#' # load it in
#' out = load(mapsFilename)
#' out
#' adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#' adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#' 
#' # some Admin-2 areas have the same name
#' adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") & 
#'   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") & 
#'   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") & 
#'   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") & 
#'   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
#' 
#' # The spatial area of unknown 8 is so small, it causes problems unless 
#' # its removed or unioned with another subarea. Union it with neighboring 
#' # Kakeguria:
#' newadm2 = adm2
#' unknown8I = which(newadm2$NAME_2 == "unknown 8")
#' newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <- "Kapenguria + unknown 8"
#' admin2.IDs <- newadm2$NAME_2
#' 
#' newadm2@data = cbind(newadm2@data, NAME_2OLD = newadm2@data$NAME_2)
#' newadm2@data$NAME_2OLD = newadm2@data$NAME_2
#' newadm2@data$NAME_2 = admin2.IDs
#' newadm2$NAME_2 = admin2.IDs
#' temp <- terra::aggregate(as(newadm2, "SpatVector"), by="NAME_2")
#' 
#' library(sf)
#' temp <- sf::st_as_sf(temp)
#' temp <- sf::as_Spatial(temp)
#' 
#' tempData = newadm2@data[-unknown8I,]
#' tempData = tempData[order(tempData$NAME_2),]
#' newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
#' adm2 = newadm2
#' 
#' # download 2014 Kenya population density TIF file
#' 
#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
#'                     "Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
#' popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
#' if(!file.exists(popTIFFilename)) {
#'   download.file(githubURL,popTIFFilename)
#' }
#' 
#' # load it in
#' pop = terra::rast(popTIFFilename)
#' 
#' eastLim = c(-110.6405, 832.4544)
#' northLim = c(-555.1739, 608.7130)
#' 
#' require(fields)
#' 
#' # Now generate a general population integration table at 5km resolution, 
#' # based on subarea (Admin-2) x urban/rural population totals. This takes 
#' # ~1 minute
#' system.time(popMatKenya <- makePopIntegrationTab(
#'   kmRes=5, pop=pop, domainMapDat=adm0,
#'   eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
#'   poppa = poppaKenya, poppsub=poppsubKenya, 
#'   areaMapDat = adm1, subareaMapDat = adm2,
#'   areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
#'   
#' out = poppRegionFromPopMat(popMatKenya, popMatKenya$area)
#' out
#' poppaKenya
#' 
#' out = poppRegionFromPopMat(popMatKenya, popMatKenya$subarea)
#' out
#' poppsubKenya
#' 
#' popMatKenyaUnstratified = popMatKenya
#' popMatKenyaUnstratified$urban = NULL
#' out = poppRegionFromPopMat(popMatKenyaUnstratified, popMatKenyaUnstratified$area)
#' out
#' poppaKenya
#' }
#' @return A table of population totals by region
#' 
#' @export
poppRegionFromPopMat = function(popMat, regions) {
  stratifyByUrban = ("urban" %in% names(popMat)) && !all(is.na(popMat$urban))
  
  if(stratifyByUrban) {
    # in this case, generate urban, rural, and overall totals within each region
    out = aggregate(popMat$pop, by=list(region=as.character(regions), urban=popMat$urban), FUN=sum, drop=FALSE)
    regions = sort(unique(out$region))
    poppr = data.frame(region=regions, popUrb=out[(length(regions) + 1):(2*length(regions)), 3], 
                       popRur=out[1:length(regions), 3])
    poppr$popUrb[is.na(poppr$popUrb)] = 0
    poppr$popRur[is.na(poppr$popRur)] = 0
    poppr$popTotal = poppr$popUrb + poppr$popRur
  } else {
    # in this case, only generate overall totals within each region
    out = aggregate(popMat$pop, by=list(region=as.character(regions)), FUN=sum, drop=FALSE)
    regions = sort(unique(out$region))
    poppr = data.frame(region=regions, popTotal=out[1:length(regions), 2])
    poppr$popTotal[is.na(poppr$popTotal)] = 0
  }
  
  poppr
}

#' Set thresholds of population density for urbanicity classifications 
#' within each region of the given type 
#' 
#' @param popMat pixellated population density data frame with variables 
#' regionType and `pop`
#' @param poppr A table with population totals by region of the given type 
#' (e.g. poppa or poppsub from \code{\link{makePopIntegrationTab}})
#' @param regionType The variable name from poppr giving the region names. 
#' Defaults to "area"
#' 
#' @details Thresholds are set based on that region's percent population 
#' urban. Intended as a helper function of \code{\link{makePopIntegrationTab}}. 
#' 
#' @author John Paige
#' 
#' @seealso \code{\link{makePopIntegrationTab}}
#' 
#' @examples
#' \dontrun{
#' data(kenyaPopulationData)
#' 
#' #' # download Kenya GADM shapefiles from SUMMERdata github repository
#' githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
#' tempDirectory = "~/"
#' mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
#' if(!file.exists(mapsFilename)) {
#'   download.file(githubURL,mapsFilename)
#' }
#' 
#' # load it in
#' out = load(mapsFilename)
#' out
#' adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#' adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#' 
#' # some Admin-2 areas have the same name
#' adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") & 
#'   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") & 
#'   (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") & 
#'   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") & 
#'   (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
#' 
#' # The spatial area of unknown 8 is so small, it causes problems unless 
#' # its removed or unioned with another subarea. Union it with neighboring 
#' # Kakeguria:
#' newadm2 = adm2
#' unknown8I = which(newadm2$NAME_2 == "unknown 8")
#' newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <- "Kapenguria + unknown 8"
#' admin2.IDs <- newadm2$NAME_2
#' 
#' newadm2@data = cbind(newadm2@data, NAME_2OLD = newadm2@data$NAME_2)
#' newadm2@data$NAME_2OLD = newadm2@data$NAME_2
#' newadm2@data$NAME_2 = admin2.IDs
#' newadm2$NAME_2 = admin2.IDs
#' temp <- terra::aggregate(as(newadm2, "SpatVector"), by="NAME_2")
#' 
#' library(sf)
#' temp <- sf::st_as_sf(temp)
#' temp <- sf::as_Spatial(temp)
#' 
#' tempData = newadm2@data[-unknown8I,]
#' tempData = tempData[order(tempData$NAME_2),]
#' newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
#' adm2 = newadm2
#' 
#' # download 2014 Kenya population density TIF file
#' 
#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/", 
#'                     "Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
#' popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
#' if(!file.exists(popTIFFilename)) {
#'   download.file(githubURL,popTIFFilename)
#' }
#' 
#' # load it in
#' pop = terra::rast(popTIFFilename)
#' 
#' eastLim = c(-110.6405, 832.4544)
#' northLim = c(-555.1739, 608.7130)
#' 
#' require(fields)
#' 
#' # Now generate a general population integration table at 5km resolution, 
#' # based on subarea (Admin-2) x urban/rural population totals. This takes 
#' # ~1 minute
#' system.time(popMatKenya <- makePopIntegrationTab(
#'   kmRes=5, pop=pop, domainMapDat=adm0,
#'   eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
#'   poppa = poppaKenya, poppsub=poppsubKenya, 
#'   areaMapDat = adm1, subareaMapDat = adm2,
#'   areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
#' 
#' out = setThresholdsByRegion(popMatKenya, poppaKenya)
#' out
#' 
#' out = setThresholdsByRegion(popMatKenya, poppsubKenya, regionType="subarea")
#' out
#' }
#' 
#' @return A list of region names and their urbanicity thresholds in population density
#' 
#' @export
setThresholdsByRegion = function(popMat, poppr, regionType="area") {
  
  getRegionThresh = function(regionName) {
    # do the setup
    thisRegion = as.character(popMat[[regionType]]) == regionName
    thisPop = popMat$pop[thisRegion]
    thisTot = sum(thisPop, na.rm=TRUE)
    pctUrb = poppr$pctUrb[poppr[[regionType]] == regionName]/100
    pctRural = 1 - pctUrb
    
    # must round to avoid numerical inaccuracies
    if(round(pctUrb, 10) == 1) {
      return(-Inf)
    } else if(round(pctUrb, 10) == 0) {
      return(Inf)
    }
    
    # calculate threshold by integrating ecdf via sorted value cumulative sum
    sortedPop = sort(thisPop)
    cumsumPop = cumsum(sortedPop)
    threshI = match(1, cumsumPop >= thisTot*pctRural)
    if((threshI != 1) && (threshI != length(thisPop))) {
      thresh = sortedPop[threshI]
    } else {
      # make sure not all pixels are urban or all are rural
      if(threshI == 1) {
        thresh = mean(c(sortedPop[1], sortedPop[2]), na.rm=TRUE)
      } else {
        thresh = mean(c(sortedPop[length(thisPop)], sortedPop[length(thisPop)-1]), na.rm=TRUE)
      }
    }
    
    thresh
  }
  
  # compute threshold for each area
  regions = poppr[[regionType]]
  threshes = sapply(regions, getRegionThresh)
  
  list(regions=regions, threshes=threshes)
}
bryandmartin/SUMMER documentation built on April 9, 2024, 10:27 a.m.