#' Generating pixellated populations, and population frames
#'
#' Functions for generating pixellated population information and
#' population frames at the `area` and `subarea` levels.
#' The `area` and `subarea` levels can be thought of as big
#' regions and little regions, where areas can be partitioned into
#' unique sets of subareas. For example, Admin-1 and Admin-2
#' areas might be areas and subareas respectively. The population totals are either
#' tabulated at the area x urban/rural level, the subarea x urban/rural
#' level, or at the pixel level of a specified resolution. Totals
#' are calculated using population density information, shapefiles,
#' and, possibly, preexisting population frames at different
#' areal levels. Note that area names should each be unique, and similarly for
#' subarea names.
#'
#' @describeIn makePopIntegrationTab Generate pixellated `grid` of coordinates (both longitude/latitude and east/north)
#' over spatial domain of the given resolution with associated population totals, areas, subareas,
#' and urban/rural levels. For very small areas that might not
#' otherwise have a grid point in them, a custom integration point is added at their
#' centroid. Sets urbanicity classifications by thresholding input population density raster
#' using area and subarea population tables, and generates area and subarea population
#' tables from population density information if not already given. Can be used for integrating
#' predictions from the given coordinates to area and subarea levels using population weights.
#'
#' @param popMat Pixellated grid data frame with variables `area` and `pop` such as that
#' generated by \code{\link{makePopIntegrationTab}}
#' @param poppa data.frame of population per area separated by urban/rural. If `poppsub`
#' is not included, this is used for normalization of populations associated with
#' population integration points. Contains variables:
#' \describe{
#' \item{area}{name of area}
#' \item{popUrb}{total urban (general) population of area}
#' \item{popRur}{total rural (general) population of area}
#' \item{popTotal}{total (general) population of area}
#' \item{pctUrb}{percentage of population in the area that is urban (between 0 and 100)}
#' }
#' @param poppsub data.frame of population per subarea separated by
#' urban/rural using for population normalization or urbanicity
#' classification. Often based on extra fine scale population density grid.
#' Has variables:
#' \describe{
#' \item{subarea}{name of subarea}
#' \item{area}{name of area}
#' \item{popUrb}{total urban (general) population of subarea}
#' \item{popRur}{total rural (general) population of subarea}
#' \item{popTotal}{total (general) population of subarea}
#' \item{pctUrb}{percentage of population in the subarea that is urban (between 0 and 100)}
#' }
#' @param areapa A list with variables:
#' \describe{
#' \item{area}{name of area}
#' \item{spatialArea}{spatial area of the subarea (e.g. in km^2)}
#' }
#' @param areapsub A list with variables:
#' \describe{
#' \item{subarea}{name of subarea}
#' \item{spatialArea}{spatial area of the subarea (e.g. in km^2)}
#' }
#' @param kmRes The resolution of the pixelated grid in km
#' @param domainMapDat A shapefile representing the full spatial domain (e.g. country)
#' @param eastLim Range in km easting over the spatial domain under the input projection
#' @param northLim Range in km northing over the spatial domain under the input projection
#' @param mapProjection A projection function taking longitude and latitude and returning easting and
#' northing in km. Or the inverse if inverse is set to TRUE. For example,
#' \code{\link{projKenya}}. Check https://epsg.io/ for example for best projection EPSG codes
#' for specific countries
#' @param pop Population density raster
#' @param areaMapDat SpatialPolygonsDataFrame object with area level map information
#' @param subareaMapDat SpatialPolygonsDataFrame object with subarea level map information
#' @param areaNameVar The name of the area variable associated with \code{areaMapDat@data}
#' and \code{subareaMapDat@data}
#' @param subareaNameVar The name of the subarea variable associated with \code{subareaMapDat@data}
#' @param stratifyByUrban Whether to stratify the pixellated grid by urban/rural. If TRUE,
#' renormalizes population densities within areas or subareas crossed with urban/rural
#' @param poppaTarget Target population per area stratified by urban rural. Same format as poppa
#' @param adjustBy Whether to adjust population density by the `area` or `subarea` level
#' @param areaPolygonSubsetI Index in areaMapDat for a specific area to subset the grid over. This
#' option can help reduce computation time relative to constructing the whole grid
#' and subsetting afterwards
#' @param subareaPolygonSubsetI FOR EXPERIMENTAL PURPOSES ONLY. Index in subareaMapDat for a
#' specific area to subset the grid over. This
#' option can help reduce computation time relative to constructing the whole grid
#' and subsetting afterwards
#' @param customSubsetPolygons 'SpatialPolygonsDataFrame' or 'SpatialPolygons' object to subset
#' the grid over. This option can help reduce computation time relative to
#' constructing the whole grid and subsetting afterwards. `areaPolygonSubsetI` or
#' `subareaPolygonSubsetI` can be used when subsetting by areas or subareas in
#' `areaMapDat` or `subareaMapDat`. Must be in latitude/longitude projection "EPSG:4326"
#' @param returnPoppTables If TRUE, poppa and poppsub will be calculated based on the generated
#' population integration matrix and input area/subarea map data
#' @param mean.neighbor For determining what area or subarea points are nearest to if they do not
#' directly fall into an area. See \code{\link[fields]{fields.rdist.near}} for details.
#' @param delta For determining what area or subarea points are nearest to if they do not
#' directly fall into an area. See \code{\link[fields]{fields.rdist.near}} for details.
#' @param setNAsToZero If TRUE, sets NA populations to 0.
#' @param fixZeroPopDensitySubareas If TRUE, if population density in a subarea is estimated to be
#' zero, but the total population in the subarea is nonzero, population is filled into the
#' area uniformly
#' @param extractMethod Either 'bilinear' or 'simple'. see `method` from
#' \code{\link[terra]{extract}}
#'
#' @author John Paige
#' @seealso \code{\link{setThresholdsByRegion}}, \code{\link{poppRegionFromPopMat}}, \code{\link{simPopSPDE}}, \code{\link{simPopCustom}}
#' @examples
#' \dontrun{
#'
#' library(sp)
#' library(sf)
#' # download Kenya GADM shapefiles from SUMMERdata github repository
#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/",
#' "kenyaMaps.rda?raw=true")
#' tempDirectory = "~/"
#' mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
#' if(!file.exists(mapsFilename)) {
#' download.file(githubURL,mapsFilename)
#' }
#'
#' # load it in
#' out = load(mapsFilename)
#' out
#' adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#' adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#'
#' # some Admin-2 areas have the same name
#' adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") &
#' (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") &
#' (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") &
#' (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") &
#' (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
#'
#' # The spatial area of unknown 8 is so small, it causes problems unless its removed or
#' # unioned with another subarea. Union it with neighboring Kakeguria:
#' newadm2 = adm2
#' unknown8I = which(newadm2$NAME_2 == "unknown 8")
#' newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <-
#' "Kapenguria + unknown 8"
#' admin2.IDs <- newadm2$NAME_2
#'
#' newadm2@data = cbind(newadm2@data, NAME_2OLD = newadm2@data$NAME_2)
#' newadm2@data$NAME_2OLD = newadm2@data$NAME_2
#' newadm2@data$NAME_2 = admin2.IDs
#' newadm2$NAME_2 = admin2.IDs
#' temp <- terra::aggregate(as(newadm2, "SpatVector"), by="NAME_2")
#'
#' temp <- sf::st_as_sf(temp)
#' temp <- sf::as_Spatial(temp)
#'
#' tempData = newadm2@data[-unknown8I,]
#' tempData = tempData[order(tempData$NAME_2),]
#' newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
#' adm2 = newadm2
#'
#' # download 2014 Kenya population density TIF file
#'
#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/",
#' "Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
#' popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
#' if(!file.exists(popTIFFilename)) {
#' download.file(githubURL,popTIFFilename)
#' }
#'
#' # load it in
#' pop = terra::rast(popTIFFilename)
#'
#' eastLim = c(-110.6405, 832.4544)
#' northLim = c(-555.1739, 608.7130)
#'
#' ## Construct poppsubKenya, a table of urban/rural general population totals
#' ## in each subarea. Technically, this is not necessary since we can load in
#' ## poppsubKenya via data(kenyaPopulationData). First, we will need to calculate
#' ## the areas in km^2 of the areas and subareas
#'
#'
#' # use Lambert equal area projection of areas (Admin-1) and subareas (Admin-2)
#' midLon = mean(adm1@bbox[1,])
#' midLat = mean(adm1@bbox[2,])
#' p4s = paste0("+proj=laea +x_0=0 +y_0=0 +lon_0=", midLon,
#' " +lat_0=", midLat, " +units=km")
#'
#' adm1_sf = st_as_sf(adm1)
#' adm1proj_sf = st_transform(adm1_sf, p4s)
#' adm1proj = as(adm1proj_sf, "Spatial")
#'
#' adm2_sf = st_as_sf(adm2)
#' adm2proj_sf = st_transform(adm2_sf, p4s)
#' adm2proj = as(adm2proj_sf, "Spatial")
#'
#' # now calculate spatial area in km^2
#' admin1Areas = as.numeric(st_area(adm1proj_sf))
#' admin2Areas = as.numeric(st_area(adm2proj_sf))
#'
#' areapaKenya = data.frame(area=adm1proj@data$NAME_1, spatialArea=admin1Areas)
#' areapsubKenya = data.frame(area=adm2proj@data$NAME_1, subarea=adm2proj@data$NAME_2,
#' spatialArea=admin2Areas)
#'
#' # Calculate general population totals at the subarea (Admin-2) x urban/rural
#' # level and using 1km resolution population grid. Assign urbanicity by
#' # thresholding population density based on estimated proportion population
#' # urban/rural, making sure total area (Admin-1) urban/rural populations in
#' # each area matches poppaKenya.
#' require(fields)
#' # NOTE: the following function will typically take ~15-20 minutes. Can speed up
#' # by setting kmRes to be higher, but we recommend fine resolution for
#' # this step, since it only needs to be done once. Instead of running this,
#' # you can simply run data(kenyaPopulationData)
#' system.time(poppsubKenya <- getPoppsub(
#' kmRes=1, pop=pop, domainMapDat=adm0,
#' eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
#' poppa = poppaKenya, areapa=areapaKenya, areapsub=areapsubKenya,
#' areaMapDat=adm1, subareaMapDat=adm2,
#' areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
#'
#' # Now generate a general population integration table at 5km resolution,
#' # based on subarea (Admin-2) x urban/rural population totals. This takes
#' # ~1 minute
#' system.time(popMatKenya <- makePopIntegrationTab(
#' kmRes=5, pop=pop, domainMapDat=adm0,
#' eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
#' poppa = poppaKenya, poppsub=poppsubKenya,
#' areaMapDat = adm1, subareaMapDat = adm2,
#' areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
#'
#' ## Adjust popMat to be target (neonatal) rather than general population density. First
#' ## creat the target population frame
#' ## (these numbers are based on IPUMS microcensus data)
#' mothersPerHouseholdUrb = 0.3497151
#' childrenPerMotherUrb = 1.295917
#' mothersPerHouseholdRur = 0.4787696
#' childrenPerMotherRur = 1.455222
#' targetPopPerStratumUrban = easpaKenya$HHUrb * mothersPerHouseholdUrb * childrenPerMotherUrb
#' targetPopPerStratumRural = easpaKenya$HHRur * mothersPerHouseholdRur * childrenPerMotherRur
#' easpaKenyaNeonatal = easpaKenya
#' easpaKenyaNeonatal$popUrb = targetPopPerStratumUrban
#' easpaKenyaNeonatal$popRur = targetPopPerStratumRural
#' easpaKenyaNeonatal$popTotal = easpaKenyaNeonatal$popUrb + easpaKenyaNeonatal$popRur
#' easpaKenyaNeonatal$pctUrb = 100 * easpaKenyaNeonatal$popUrb / easpaKenyaNeonatal$popTotal
#' easpaKenyaNeonatal$pctTotal =
#' 100 * easpaKenyaNeonatal$popTotal / sum(easpaKenyaNeonatal$popTotal)
#'
#' # Generate the target population density by scaling the current population density grid
#' # at the Admin1 x urban/rural level
#' popMatKenyaNeonatal = adjustPopMat(popMatKenya, easpaKenyaNeonatal)
#'
#' # Generate neonatal population table from the neonatal population integration matrix.
#' # This is technically not necessary for population simulation purposes, but is here
#' # for illustrative purposes
#' poppsubKenyaNeonatal = poppRegionFromPopMat(popMatKenyaNeonatal, popMatKenyaNeonatal$subarea)
#' poppsubKenyaNeonatal = cbind(subarea=poppsubKenyaNeonatal$region,
#' area=adm2@data$NAME_1[match(poppsubKenyaNeonatal$region,
#' adm2@data$NAME_2)],
#' poppsubKenyaNeonatal[,-1])
#' print(head(poppsubKenyaNeonatal))
#' }
#' @importFrom terra extract
#' @importFrom terra gdal
#' @importFrom terra crs<-
#' @importFrom sp SpatialPoints
#' @importFrom sp CRS
#' @importFrom sp over
#' @importFrom sp coordinates
#' @importFrom sp as.SpatialPolygons.PolygonsList
#' @importFrom fields make.surface.grid
#' @export
makePopIntegrationTab = function(kmRes=5, pop, domainMapDat, eastLim, northLim, mapProjection,
areaMapDat, subareaMapDat,
areaNameVar="NAME_1", subareaNameVar="NAME_2",
poppa=NULL, poppsub=NULL, stratifyByUrban=TRUE,
areapa=NULL, areapsub=NULL, customSubsetPolygons=NULL,
areaPolygonSubsetI=NULL, subareaPolygonSubsetI=NULL,
mean.neighbor=50, delta=.1, returnPoppTables=FALSE,
setNAsToZero=TRUE, fixZeroPopDensitySubareas=FALSE,
extractMethod="bilinear") {
thresholdUrbanBy = ifelse(is.null(poppsub), "area", "subarea")
# some basic checks to make sure inputs are correct
# make sure area and subarea names are unique
nameCounts = aggregate(areaMapDat@data[[areaNameVar]], by=list(area=areaMapDat@data[[areaNameVar]]), FUN=length)
if(any(nameCounts$x > 1)) {
stop("area names are not unique in areaMapDat")
}
if(!is.null(subareaMapDat)) {
nameCounts = aggregate(subareaMapDat@data[[subareaNameVar]], by=list(area=subareaMapDat@data[[subareaNameVar]]), FUN=length)
if(any(nameCounts$x > 1)) {
stop("subarea names are not unique in subareaMapDat")
}
}
if(!is.null(poppa)) {
nameCounts = aggregate(poppa$area, by=list(area=poppa$area), FUN=length)
if(any(nameCounts$x > 1)) {
stop("area names are not unique in poppa")
}
# make sure area names match up
if(!all.equal(sort(poppa$area), sort(areaMapDat@data[[areaNameVar]]))) {
stop("area names in poppa do not match those in areaMapDat@data")
}
}
if(!is.null(poppsub)) {
nameCounts = aggregate(poppsub$subarea, by=list(area=poppsub$subarea), FUN=length)
if(any(nameCounts$x > 1)) {
stop("subarea names are not unique in poppsub")
}
# make sure subarea names match up
if(!is.null(subareaMapDat)) {
if(!all.equal(sort(poppsub$subarea), sort(subareaMapDat@data[[subareaNameVar]]))) {
stop("subarea names in poppsub do not match those in subareaMapDat@data")
}
}
}
# get a rectangular grid
eastGrid = seq(eastLim[1], eastLim[2], by=kmRes)
northGrid = seq(northLim[1], northLim[2], by=kmRes)
if(!is.null(areaPolygonSubsetI)) {
# get range of the grid that we actually need
temp = areaMapDat@polygons[[areaPolygonSubsetI]]
allPolygons = temp@Polygons
eastNorthCoords = do.call("rbind", lapply(1:length(allPolygons), function(i) {mapProjection(allPolygons[[i]]@coords)}))
eastSubRange = range(eastNorthCoords[,1])
northSubRange = range(eastNorthCoords[,2])
# subset grid to the range we need
eastGrid = eastGrid[eastGrid >= eastSubRange[1]]
eastGrid = eastGrid[eastGrid <= eastSubRange[2]]
northGrid = northGrid[northGrid >= northSubRange[1]]
northGrid = northGrid[northGrid <= northSubRange[2]]
}
if(!is.null(subareaPolygonSubsetI)) {
# get range of the grid that we actually need
temp = subareaMapDat@polygons[[subareaPolygonSubsetI]]
allPolygons = temp@Polygons
eastNorthCoords = do.call("rbind", lapply(1:length(allPolygons), function(i) {mapProjection(allPolygons[[i]]@coords)}))
eastSubRange = range(eastNorthCoords[,1])
northSubRange = range(eastNorthCoords[,2])
# subset grid to the range we need
eastGrid = eastGrid[eastGrid >= eastSubRange[1]]
eastGrid = eastGrid[eastGrid <= eastSubRange[2]]
northGrid = northGrid[northGrid >= northSubRange[1]]
northGrid = northGrid[northGrid <= northSubRange[2]]
}
if(!is.null(customSubsetPolygons)) {
if(("SpatialPolygons" %in% class(customSubsetPolygons)) || ("SpatialPolygonsDataFrame" %in% class(customSubsetPolygons))) {
# calculate the east/north range of each polygon
getPolygonENrange = function(pol) {
# get range of the grid that we actually need
temp = pol
allPolygons = temp@Polygons
eastNorthCoords = do.call("rbind", lapply(1:length(allPolygons), function(i) {mapProjection(allPolygons[[i]]@coords)}))
eastSubRange = range(eastNorthCoords[,1])
northSubRange = range(eastNorthCoords[,2])
# subset grid to the range we need
rbind(eastSubRange,
northSubRange)
}
allRanges = lapply(getPolygonENrange, customSubsetPolygons@polygons)
# calculate the full range based on each individual polygon range
eastings = sapply(allRanges, function(x) {x[1,]})
northings = sapply(allRanges, function(x) {x[2,]})
eastSubRange = range(eastings)
northSubRange = range(northings)
# subset grid to the range we need
eastGrid = eastGrid[eastGrid >= eastSubRange[1]]
eastGrid = eastGrid[eastGrid <= eastSubRange[2]]
northGrid = northGrid[northGrid >= northSubRange[1]]
northGrid = northGrid[northGrid <= northSubRange[2]]
} else {
stop("customSubsetPolygons must be of class 'SpatialPolygons' or 'SpatialPolygonsDataFrame'")
}
}
utmGrid = matrix(fields::make.surface.grid(list(east=eastGrid, north=northGrid)), ncol=2)
# project coordinates into lat/lon
if(length(utmGrid) > 0) {
lonLatGrid = matrix(mapProjection(utmGrid, inverse=TRUE), ncol=2)
} else {
warning(paste0("no grid cell centroid are in the areas of interest. ",
"Integration grid will be composed entirely of custom ",
"integration points at the centroids of the areas of interest"))
lonLatGrid = utmGrid
}
# subset grid so it's in the domain
# inDomain = in.poly(lonLatGrid, domainPoly)
# determine version of PROJ
ver = terra::gdal(lib="proj")
PROJ6 <- as.numeric(substr(ver, 1, 1)) >= 6
# from lon/lat coords to easting/northing
# if(!PROJ6) {
# lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS("+proj=longlat"))
# } else {
# lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS(SRS_string="EPSG:4326"))
# }
lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=domainMapDat@proj4string)
inDomain = sp::over(lonLatCoords, domainMapDat)
inDomain = !is.na(inDomain[,1])
utmGrid = matrix(utmGrid[inDomain,], ncol=2)
lonLatGrid = matrix(lonLatGrid[inDomain,], ncol=2)
# compute areas associated with locations
if(length(lonLatGrid) > 0) {
if(!is.null(subareaMapDat)) {
subareas = SUMMER::getAreaName(lonLatGrid, subareaMapDat, areaNameVar=subareaNameVar, mean.neighbor=mean.neighbor, delta=delta)$areaNames
} else {
subareas = NULL
}
if(!is.null(areaMapDat)) {
areas = SUMMER::getAreaName(lonLatGrid, areaMapDat, areaNameVar=areaNameVar, mean.neighbor=mean.neighbor, delta=delta)$areaNames
} else {
areas = NULL
}
} else {
subareas = character(0)
areas = character(0)
}
if(!is.null(areaPolygonSubsetI)) {
areaSubsetName = areaMapDat@data[areaPolygonSubsetI,areaNameVar]
insideArea = areas == areaSubsetName
# subset grid and area/subarea names so they're in the area of interest
utmGrid = matrix(utmGrid[insideArea,], ncol=2)
lonLatGrid = matrix(lonLatGrid[insideArea,], ncol=2)
areas = areas[insideArea]
subareas = subareas[insideArea]
}
if(!is.null(subareaPolygonSubsetI)) {
subareaSubsetName = subareaMapDat@data[subareaPolygonSubsetI,subareaNameVar]
insideSubarea = subareas == subareaSubsetName
# subset grid and area/subarea names so they're in the area of interest
utmGrid = matrix(utmGrid[insideSubarea,], ncol=2)
lonLatGrid = matrix(lonLatGrid[insideSubarea,], ncol=2)
areas = areas[insideSubarea]
subareas = subareas[insideSubarea]
}
if(!is.null(customSubsetPolygons)) {
if(!grepl("+proj=longlat", customSubsetPolygons@proj4string@projargs)) {
warning(paste0("customSubsetPolygons does not use longitude/latitude ",
"coordinate system? Using makePopIntegrationTab outside ",
"its use case, which may result in errors"))
}
# if(!PROJ6) {
# lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS("+proj=longlat"))
# } else {
# lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=sp::CRS(SRS_string="EPSG:4326"))
# }
lonLatCoords = sp::SpatialPoints(lonLatGrid, proj4string=customSubsetPolygons@proj4string)
insideCustomSubset = sp::over(lonLatCoords, customSubsetPolygons)
# subset grid and area/subarea names so they're in the area of interest
utmGrid = matrix(utmGrid[insideCustomSubset,], ncol=2)
lonLatGrid = matrix(lonLatGrid[insideCustomSubset,], ncol=2)
areas = areas[insideCustomSubset]
if(!is.null(subareaMapDat)) {
subareas = subareas[insideCustomSubset]
}
}
# determine what subareas we want our grid to contain
if(!is.null(subareaMapDat)) {
allSubareas = sort(unique(subareaMapDat@data[[subareaNameVar]]))
if(!is.null(areaPolygonSubsetI)) {
allSubareas = sort(unique(subareaMapDat@data[subareaMapDat@data[[areaNameVar]] == areaSubsetName,][[subareaNameVar]]))
}
if(!is.null(subareaPolygonSubsetI)) {
allSubareas = subareaSubsetName
}
if(!is.null(customSubsetPolygons)) {
allSubareas = sort(unique(subareas))
}
}
# filter out parts of poppsub that are irrelevant for the subareas of
# interest
if(!is.null(poppsub)) {
poppsub = poppsub[poppsub$subarea %in% allSubareas,]
}
if(!is.null(subareaMapDat)) {
# check to make sure every subarea has at least 2 pixels
subareasFactor = factor(subareas, levels=allSubareas)
if(length(lonLatGrid) > 0) {
out = aggregate(subareas, by=list(subarea=subareasFactor), FUN=length, drop=FALSE)
} else {
out = data.frame(subarea=sort(unique(subareaMapDat@data[[subareaNameVar]])),
x=NA)
}
noPixels = is.na(out$x)
onePixel = out$x == 1
onePixel[is.na(onePixel)] = FALSE
onePixelNames = out$subarea[onePixel]
badSubareas = noPixels | onePixel
badSubareaNames = as.character(out$subarea[badSubareas])
if(any(badSubareas)) {
badSubareaString = paste(badSubareaNames, collapse=", ")
warning(paste0("The following subareas have < 2 regular grid points ",
"at the given resolution: ", badSubareaString, ", so ",
"they will be given custom integration points"))
# get centroids of the subareas (or it's single pixel coordinates)
thisSpatialPolyList = sp::as.SpatialPolygons.PolygonsList(subareaMapDat@polygons)
centroidsLonLat = matrix(ncol=2, nrow=length(allSubareas))
for(i in 1:length(allSubareas)) {
thisSubarea = allSubareas[i]
if(thisSubarea %in% onePixelNames) {
thisCentroid = lonLatGrid[subareas == thisSubarea,]
} else {
subareaI = match(as.character(thisSubarea), as.character(subareaMapDat@data[[subareaNameVar]]))
thisCentroid = sp::coordinates(thisSpatialPolyList[subareaI])
}
centroidsLonLat[i,] = thisCentroid
}
# sort to match results of aggregate (alphabetical order)
# sortI = sort(as.character(subareaMapDat@data[[subareaNameVar]]), index.return=TRUE)$ix
sortI = sort(as.character(allSubareas), index.return=TRUE)$ix
centroidsLonLat = centroidsLonLat[sortI,]
# remove the one pixel for subareas with only one pixel
# (we will add it in again later, twice if stratified: both urban and rural)
onePixel = which(subareas %in% onePixelNames)
if(length(onePixel) > 0) {
lonLatGrid = lonLatGrid[-onePixel,]
utmGrid = utmGrid[-onePixel,]
subareas = subareas[-onePixel]
areas = areas[-onePixel]
}
# add centroids of only the bad subareas
centroidsLonLat = matrix(centroidsLonLat[badSubareas,], ncol=2)
# convert to east/north
centroidsEastNorth = projKenya(centroidsLonLat[,1], centroidsLonLat[,2])
# only add centroid in stratum if bad subareas have any population in the stratum.
# If poppsub not included and resolution not high enough to have multiple points
# in a subarea, treat it as entirely urban or rural
if(is.null(poppsub)) {
hasUrbanPop = rep(TRUE, sum(badSubareas))
hasRuralPop = rep(FALSE, sum(badSubareas))
} else {
hasUrbanPop = (poppsub$popUrb > 0)[badSubareas]
hasRuralPop = (poppsub$popRur > 0)[badSubareas]
}
# add centroids to the matrices of pixellated grid coordinates.
# Add them twice: once for urban, once for rural
lonLatGrid = rbind(lonLatGrid, centroidsLonLat[hasUrbanPop,], centroidsLonLat[hasRuralPop,])
utmGrid = rbind(utmGrid, centroidsEastNorth[hasUrbanPop,], centroidsEastNorth[hasRuralPop,])
# add associated consituencies, areas, provinces to respective vectors
# Normally, we could just caluclate what subarea/area the points are in.
# However, since centroids might not be in the associated subarea, we
# instead just assign the known subareas directly
# newSubareas = getAreaName(rbind(centroidsLonLat[hasUrbanPop,],
# centroidsLonLat[hasRuralPop,]),
# subareaMapDat, subareaNameVar, delta, mean.neighbor)$areaNames
# newAreas = getAreaName(rbind(centroidsLonLat[hasUrbanPop,],
# centroidsLonLat[hasRuralPop,]),
# areaMapDat, areaNameVar, delta, mean.neighbor)$areaNames
newSubareas = c(badSubareaNames[hasUrbanPop], badSubareaNames[hasRuralPop])
newAreas = subareaMapDat@data[[areaNameVar]][match(newSubareas, subareaMapDat@data[[subareaNameVar]])]
subareas = c(as.character(subareas), as.character(newSubareas))
areas = c(as.character(areas), as.character(newAreas))
}
} else {
badSubareas = FALSE
}
# get population density at those coordinates
if(!PROJ6) {
# extract the raster values for each chunk of points
interpPopVals <- tryCatch(
{
terra::extract(pop, lonLatGrid,method=extractMethod)
},
error=function(cond) {
message(cond)
stop(paste0("Error extracting raster values. In case of memory limitations, see ",
"terra::terraOptions()"))
# Choose a return value in case of error
return(NA)
}
)
} else {
# make sure CRS string of population density SpatRaster is "EPSG:4326", i.e.
# longitude + latitude
tempCRSstr = sp::CRS(SRS_string="EPSG:4326")
crsStr = attr(tempCRSstr, "comment")
if(is.null(crsStr)) {
crsStr <- tempCRSstr@projargs
if(is.na(crsStr)) {
crsStr <- ""
}
}
terra::crs(pop) = crsStr
# get population density values from the raster
interpPopVals <- tryCatch(
{
terra::extract(pop, lonLatGrid, method="bilinear")
},
error=function(cond) {
message(cond)
stop(paste0("Error extracting raster values. In case of memory limitations, see ",
"terra::aggregate, terra::resample, terra::projectRaster, and terra::terraOptions"))
# Choose a return value in case of error
return(NA)
}
)
}
interpPopVals = unlist(interpPopVals)
names(interpPopVals) = NULL
if(setNAsToZero) {
interpPopVals[is.na(interpPopVals)] = 0
}
# if requested, fix subareas with entirely zero population density
if(fixZeroPopDensitySubareas && !is.null(poppsub)) {
newPop = data.frame(list(lon=lonLatGrid[,1], lat=lonLatGrid[,2], pop=interpPopVals, area=areas, subarea=subareas, urban=NA))
poppsubCurrent = poppRegionFromPopMat(newPop, newPop$subarea)
zeroPopSubareas = poppsubCurrent$region[poppsubCurrent$popTotal == 0]
if(length(zeroPopSubareas) > 0) {
warning(paste0("The following subareas have entirely zero population density ",
"but nonzero total population, and their population will be filled ",
"in uniformly: ", paste(zeroPopSubareas, collapse=", ")))
# fill in population density uniformly in these subareas
for(i in 1:length(zeroPopSubareas)) {
thisSub = zeroPopSubareas[i]
thisSubI = newPop$subarea == thisSub
thisSubPop = poppsubCurrent$popTotal[poppsubCurrent$region == thisSub]
newPop$pop[thisSubI] = thisSubPop/sum(thisSubI)
}
}
interpPop = newPop$pop
}
if(any(badSubareas)) {
# make sure population densities in the bad subareas
# are slightly different so one will be classified as urban and
# the other as rural. They will be renormalized later based on poppsub,
# or, if poppsub doesn't exist, only new/custom "urban" points will be kept,
# so no densities are modified here
nUnits = length(interpPopVals)
nNewRural = sum(hasRuralPop)
if(nNewRural >= 1) {
interpPopVals[(nUnits-nNewRural + 1):nUnits] = interpPopVals[(nUnits-nNewRural + 1):nUnits] / 2
}
}
# determine which points are urban via population density thresholding
newPop = data.frame(list(lon=lonLatGrid[,1], lat=lonLatGrid[,2], pop=interpPopVals, area=areas, subarea=subareas, urban=NA))
if(thresholdUrbanBy == "area") {
threshes = SUMMER::setThresholdsByRegion(newPop, poppa, regionType="area")
popThreshes = sapply(1:nrow(newPop), function(i) {threshes$threshes[as.character(threshes$regions) == as.character(newPop$area[i])]})
urban = newPop$pop >= unlist(popThreshes)
newPop$urban = urban
} else {
tempSubarea = poppsub
tempPop = newPop
allSubareas = sort(unique(tempPop$subarea))
tempSubarea = tempSubarea[tempSubarea$subarea %in% allSubareas,]
threshes = SUMMER::setThresholdsByRegion(tempPop, poppr = tempSubarea, regionType="subarea")
popThreshes = sapply(1:nrow(newPop), function(i) {threshes$threshes[as.character(threshes$regions) == as.character(newPop$subarea[i])]})
urban = newPop$pop >= unlist(popThreshes)
newPop$urban = urban
}
newPop$east = utmGrid[,1]
newPop$north = utmGrid[,2]
pointAreaNames = as.character(newPop$area)
pointAreaNamesU = as.character(newPop$area)
pointAreaNamesU[!newPop$urban] = "DoNotUseThis"
pointAreaNamesR = as.character(newPop$area)
pointAreaNamesR[newPop$urban] = "DoNotUseThis"
if(!is.null(subareaMapDat)) {
pointSubareaNames = as.character(newPop$subarea)
pointSubareaNamesU = as.character(newPop$subarea)
pointSubareaNamesU[!newPop$urban] = "DoNotUseThis"
pointSubareaNamesR = as.character(newPop$subarea)
pointSubareaNamesR[newPop$urban] = "DoNotUseThis"
}
# if necessary, renormalize population values within subareas or areas
# crossed with urban/rural to be the correct value
if(!is.null(poppsub)) {
# In this case, areapsub and areapa are not necessary
if(stratifyByUrban) {
pointPopUrb = calibrateByRegion(newPop$pop, pointSubareaNamesU, poppsub$subarea, poppsub$popUrb)
pointPopRur = calibrateByRegion(newPop$pop, pointSubareaNamesR, poppsub$subarea, poppsub$popRur)
newPop$pop = (pointPopUrb + pointPopRur)
} else {
newPop$pop = calibrateByRegion(newPop$pop, pointSubareaNames, poppsub$subarea, poppsub$popTotal)
}
} else if(!is.null(poppa)) {
# i.e. we don't have poppsub, so now we need to be careful about the spatial areas
# (e.g. area in km^2) associated with each point
# determine how to use spatial areas (e.g. in km^2) of areas/subareas:
# Case 1: We don't have subareaMapDat, but do have areapa
# Case 2: We don't have subareaMapDat, and don't have areapa
# Case 3: We have subareaMapDat and areapsub
# Case 4: We have subareaMapDat but don't have areapsub (this is sketchy if
# there's any custom integration points)
# set the areas of each point to be equal to start out. This must be
# proportional to, but not necessarily equal to, the actual area
# associated with each point
pointAreas = rep(1, nrow(newPop))
if(is.null(subareaMapDat) && !is.null(areapa)) {
pointAreas = calibrateByRegion(pointAreas, pointAreaNames, areapa$area, areapa$spatialArea)
} else if(is.null(subareaMapDat) && is.null(areapa)) {
# do nothing, since pointAreas = rep(1, nrow(newPop)) is fine
} else if(!is.null(subareaMapDat) && !is.null(areapsub)) {
pointAreas = calibrateByRegion(pointAreas, pointSubareaNames, areapsub$subarea, areapsub$spatialArea)
} else if(!is.null(subareaMapDat) && is.null(areapsub)) {
if(any(badSubareas)) {
stop("poppsub and areapsub are NULL, but subareaMapDat is included")
}
# could continue here, since pointAreas = rep(1, nrow(newPop)) might not be disastrous,
# but it would be wrong
}
# use the population per stratum to renormalize population densities
if(stratifyByUrban) {
popUrb = calibrateByRegion(newPop$pop*pointAreas, pointAreaNamesU, poppa$area, poppa$popUrb)
popRur = calibrateByRegion(newPop$pop*pointAreas, pointAreaNamesR, poppa$area, poppa$popRur)
newPop$pop = popUrb + popRur
} else {
newPop$pop = calibrateByRegion(newPop$pop*pointAreas, pointAreaNames, poppa$area, poppa$popTotal)
}
} else {
stop("must either include poppsub, or poppa")
}
# if requested by user, return the population integration matrix as well
# as the associate population per area and subarea tables
if(returnPoppTables) {
if(!is.null(areaMapDat)) {
areas = getAreaName(cbind(newPop$lon, newPop$lat), shapefile=areaMapDat, areaNameVar=areaNameVar)$areaNames
newpoppa = poppRegionFromPopMat(newPop, areas)
names(newpoppa)[1] = "area"
newpoppa$pctTotal = 100 * newpoppa$popTotal/sum(newpoppa$popTotal)
newpoppa$pctUrb = 100 * newpoppa$popUrb/newpoppa$popTotal
} else {
if(is.null(poppa)) {
warning(paste0("returnPoppTables set to TRUE, but areaMapDat is NULL, ",
"so no area level population table will be calculated"))
}
newpoppa = NULL
}
if(!is.null(subareaMapDat)) {
subareas = getAreaName(cbind(newPop$lon, newPop$lat), shapefile=subareaMapDat, areaNameVar=subareaNameVar)$areaNames
newpoppsub = poppRegionFromPopMat(newPop, subareas)
names(newpoppsub)[1] = "subarea"
# get area associated with subareas
associatedAreas = sapply(newpoppsub$subarea, function(subareaName) {newPop$area[match(subareaName, newPop$subarea)]})
newpoppsub$area = associatedAreas
newpoppsub = newpoppsub[,c("subarea", "area", "popUrb", "popRur", "popTotal")]
# calculate percent urban and percent of total population
newpoppsub$pctUrb = 100 * newpoppsub$popUrb / newpoppsub$popTotal
newpoppsub$pctTotal = 100 * newpoppsub$popTotal / sum(newpoppsub$popTotal)
} else {
if(is.null(poppsub)) {
warning(paste0("returnPoppTables set to TRUE, but subareaMapDat is NULL, ",
"so no subarea level population table will be calculated"))
}
newpoppsub = NULL
}
return(list(popMat=newPop, poppa=newpoppa, poppsub=newpoppsub))
}
newPop
}
#' @describeIn makePopIntegrationTab Generate table of estimates of population
#' totals per subarea x urban/rural combination based on population density
#' raster at `kmres` resolution "grid", including custom integration points
#' for any subarea too small to include grid points at their centroids.
#' @export
getPoppsub = function(kmRes=1, pop, domainMapDat, eastLim, northLim, mapProjection,
poppa, areapa=NULL, areapsub, subareaMapDat, subareaNameVar="NAME_2",
stratifyByUrban=TRUE, areaMapDat=NULL, areaNameVar="NAME_1",
areaPolygonSubsetI=NULL, subareaPolygonSubsetI=NULL,
customSubsetPolygons=NULL,
mean.neighbor=50, delta=.1, setNAsToZero=TRUE, fixZeroPopDensitySubareas=FALSE) {
out = SUMMER::makePopIntegrationTab(kmRes=kmRes, pop=pop, domainMapDat=domainMapDat,
areapa=areapa, areapsub=areapsub,
eastLim=eastLim, northLim=northLim, mapProjection=mapProjection,
subareaMapDat=subareaMapDat, areaMapDat=areaMapDat,
areaNameVar=areaNameVar, subareaNameVar=subareaNameVar,
poppa=poppa, stratifyByUrban=stratifyByUrban,
areaPolygonSubsetI=areaPolygonSubsetI,
subareaPolygonSubsetI=subareaPolygonSubsetI,
customSubsetPolygons=customSubsetPolygons,
mean.neighbor=mean.neighbor, delta=delta,
setNAsToZero=setNAsToZero,
fixZeroPopDensitySubareas=fixZeroPopDensitySubareas,
returnPoppTables=TRUE)
out$poppsub
}
#' @describeIn makePopIntegrationTab Adjust population densities in grid based on a population frame.
#' @export
adjustPopMat = function(popMat, poppaTarget=NULL, adjustBy=c("area", "subarea"), stratifyByUrban=TRUE) {
adjustBy = match.arg(adjustBy)
# sort get population per stratum from poppaTarget
if(adjustBy == "area") {
areas=sort(unique(poppaTarget$area))
} else {
areas=sort(unique(poppaTarget$subarea))
}
if(stratifyByUrban) {
targetPopPerStratumUrban = poppaTarget$popUrb
targetPopPerStratumRural = poppaTarget$popRur
# generate 2 nArea x nPixels matrices for urban and rural strata integrating pixels with respect to population density to get area estimates
getAreaStratumIntegrationMatrix = function(getUrban=TRUE) {
areas = as.character(areas)
if(adjustBy == "area") {
mat = t(sapply(areas, function(area) {
popMat$area == area
}))
} else {
mat = t(sapply(areas, function(area) {
popMat$subarea == area
}))
}
mat = sweep(mat, 2, popMat$pop, "*")
sweep(mat, 2, popMat$urban == getUrban, "*")
}
urbanIntegrationMat = getAreaStratumIntegrationMatrix()
ruralIntegrationMat = getAreaStratumIntegrationMatrix(FALSE)
# calculate number of people per stratum by integrating the population density surface
urbanPopulations = rowSums(urbanIntegrationMat)
ruralPopulations = rowSums(ruralIntegrationMat)
# adjust each row of the integration matrices to get the correct expected number of children per stratum
urbanIntegrationMat = sweep(urbanIntegrationMat, 1, targetPopPerStratumUrban / urbanPopulations, "*")
urbanIntegrationMat[urbanPopulations == 0,] = 0
ruralIntegrationMat = sweep(ruralIntegrationMat, 1, targetPopPerStratumRural / ruralPopulations, "*")
ruralIntegrationMat[ruralPopulations == 0,] = 0
# the column sums of the matrices give the correct modified population densities
popMat$pop = colSums(urbanIntegrationMat) + colSums(ruralIntegrationMat)
} else {
targetPopPerArea = poppaTarget$popTotal
# generate 2 nArea x nPixels matrices for urban and rural strata integrating pixels with respect to population density to get area estimates
getAreaIntegrationMatrix = function() {
areas = as.character(areas)
if(adjustBy == "area") {
mat = t(sapply(areas, function(area) {
popMat$area == area
}))
} else {
mat = t(sapply(areas, function(area) {
popMat$subarea == area
}))
}
mat = sweep(mat, 2, popMat$pop, "*")
mat
}
integrationMat = getAreaIntegrationMatrix()
# calculate number of people per stratum by integrating the population density surface
totalPopulations = rowSums(integrationMat)
# adjust each row of the integration matrices to get the correct expected number of children per stratum
urbanIntegrationMat = sweep(integrationMat, 1, targetPopPerArea / totalPopulations, "*")
urbanIntegrationMat[totalPopulations == 0,] = 0
# the column sums of the matrices give the correct modified population densities
popMat$pop = colSums(integrationMat)
}
popMat
}
#' Calibrate the point level totals so their sum matches the regional totals
#'
#' Calibrate/normalize the point level totals so their sum matches the
#' regional totals. Technically, the totals can be at any level smaller
#' than the region level specified.
#'
#' @param pointTotals Vector of point level totals that will be calibrated/normalized
#' @param pointRegions Vector of regions associated with each point
#' @param regions Vector of region names
#' @param regionTotals Vector of desired region level totals associated with `regions`
#'
#' @return A vector of same length as pointTotals and pointRegions containing
#' the calibrated/normalized point totals that sum to the correct regional totals
#'
#' @author John Paige
#'
#' @examples
#' pointTotals = c(1, 1, 1, 2)
#' pointRegions = c("a", "a", "b", "b")
#' regionTotals = c(10, 20)
#' regions = c("a", "b")
#' calibrateByRegion(pointTotals, pointRegions, regions, regionTotals)
#'
#' @return Vector of updated point level totals, calibrated to match region totals
#'
#' @export
calibrateByRegion = function(pointTotals, pointRegions, regions, regionTotals) {
regions = as.character(regions)
pointRegions = as.character(pointRegions)
# generate nRegions x nPoints matrix that separates points totals to each region
getRegionIntegrationMatrix = function() {
mat = t(sapply(regions, function(regionName) {pointRegions == regionName}))
mat = sweep(mat, 2, pointTotals, "*")
mat
}
integrationMat = getRegionIntegrationMatrix()
# calculate total per region before calibration
tempRegionTotals = rowSums(integrationMat)
# adjust each row of the integration matrices to get the correct totals per region
integrationMat = sweep(integrationMat, 1, regionTotals / tempRegionTotals, "*")
integrationMat[tempRegionTotals == 0,] = 0
# check to make sure all zero population totals are in zero population regions. If
# any extra, spread the region population in the associated pointTotals
badRegions = (tempRegionTotals == 0) & (regionTotals != 0)
if(any(badRegions)) {
badRegionIs = which(badRegions)
for(i in 1:length(badRegionIs)) {
thisRegionI = badRegionIs[i]
thisRegion = pointRegions == regions[thisRegionI]
integrationMat[thisRegionI,thisRegion] = regionTotals[thisRegionI] * (1/sum(thisRegion))
}
}
# the column sums of the matrix give the correct calibrated regional totals
colSums(integrationMat)
}
#' Generate a population frame of a similar format to poppa argument of \code{\link{simPopCustom}} with a custom set of regions
#'
#' @param popMat Pixellated grid data frame with variables `area` and `pop`. Assumed to be stratified by urban/rural
#' @param regions character vector of length nPixels giving a custom set of regions for which to generate
#' a population frame using population density
#'
#' @details Urbanicity thresholds are set based on that region's percent population
#' urban. Intended as a helper function of \code{\link{getPoppsub}}, but
#' can also be used for custom sets of regions (i.e. more than just 2
#' areal levels: area and subarea).
#'
#' @author John Paige
#'
#' @seealso \code{\link{getPoppsub}}
#'
#' @examples
#' \dontrun{
#' data(kenyaPopulationData)
#'
#' #' # download Kenya GADM shapefiles from SUMMERdata github repository
#' githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
#' tempDirectory = "~/"
#' mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
#' if(!file.exists(mapsFilename)) {
#' download.file(githubURL,mapsFilename)
#' }
#'
#' # load it in
#' out = load(mapsFilename)
#' out
#' adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#' adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#'
#' # some Admin-2 areas have the same name
#' adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") &
#' (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") &
#' (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") &
#' (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") &
#' (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
#'
#' # The spatial area of unknown 8 is so small, it causes problems unless
#' # its removed or unioned with another subarea. Union it with neighboring
#' # Kakeguria:
#' newadm2 = adm2
#' unknown8I = which(newadm2$NAME_2 == "unknown 8")
#' newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <- "Kapenguria + unknown 8"
#' admin2.IDs <- newadm2$NAME_2
#'
#' newadm2@data = cbind(newadm2@data, NAME_2OLD = newadm2@data$NAME_2)
#' newadm2@data$NAME_2OLD = newadm2@data$NAME_2
#' newadm2@data$NAME_2 = admin2.IDs
#' newadm2$NAME_2 = admin2.IDs
#' temp <- terra::aggregate(as(newadm2, "SpatVector"), by="NAME_2")
#'
#' library(sf)
#' temp <- sf::st_as_sf(temp)
#' temp <- sf::as_Spatial(temp)
#'
#' tempData = newadm2@data[-unknown8I,]
#' tempData = tempData[order(tempData$NAME_2),]
#' newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
#' adm2 = newadm2
#'
#' # download 2014 Kenya population density TIF file
#'
#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/",
#' "Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
#' popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
#' if(!file.exists(popTIFFilename)) {
#' download.file(githubURL,popTIFFilename)
#' }
#'
#' # load it in
#' pop = terra::rast(popTIFFilename)
#'
#' eastLim = c(-110.6405, 832.4544)
#' northLim = c(-555.1739, 608.7130)
#'
#' require(fields)
#'
#' # Now generate a general population integration table at 5km resolution,
#' # based on subarea (Admin-2) x urban/rural population totals. This takes
#' # ~1 minute
#' system.time(popMatKenya <- makePopIntegrationTab(
#' kmRes=5, pop=pop, domainMapDat=adm0,
#' eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
#' poppa = poppaKenya, poppsub=poppsubKenya,
#' areaMapDat = adm1, subareaMapDat = adm2,
#' areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
#'
#' out = poppRegionFromPopMat(popMatKenya, popMatKenya$area)
#' out
#' poppaKenya
#'
#' out = poppRegionFromPopMat(popMatKenya, popMatKenya$subarea)
#' out
#' poppsubKenya
#'
#' popMatKenyaUnstratified = popMatKenya
#' popMatKenyaUnstratified$urban = NULL
#' out = poppRegionFromPopMat(popMatKenyaUnstratified, popMatKenyaUnstratified$area)
#' out
#' poppaKenya
#' }
#' @return A table of population totals by region
#'
#' @export
poppRegionFromPopMat = function(popMat, regions) {
stratifyByUrban = ("urban" %in% names(popMat)) && !all(is.na(popMat$urban))
if(stratifyByUrban) {
# in this case, generate urban, rural, and overall totals within each region
out = aggregate(popMat$pop, by=list(region=as.character(regions), urban=popMat$urban), FUN=sum, drop=FALSE)
regions = sort(unique(out$region))
poppr = data.frame(region=regions, popUrb=out[(length(regions) + 1):(2*length(regions)), 3],
popRur=out[1:length(regions), 3])
poppr$popUrb[is.na(poppr$popUrb)] = 0
poppr$popRur[is.na(poppr$popRur)] = 0
poppr$popTotal = poppr$popUrb + poppr$popRur
} else {
# in this case, only generate overall totals within each region
out = aggregate(popMat$pop, by=list(region=as.character(regions)), FUN=sum, drop=FALSE)
regions = sort(unique(out$region))
poppr = data.frame(region=regions, popTotal=out[1:length(regions), 2])
poppr$popTotal[is.na(poppr$popTotal)] = 0
}
poppr
}
#' Set thresholds of population density for urbanicity classifications
#' within each region of the given type
#'
#' @param popMat pixellated population density data frame with variables
#' regionType and `pop`
#' @param poppr A table with population totals by region of the given type
#' (e.g. poppa or poppsub from \code{\link{makePopIntegrationTab}})
#' @param regionType The variable name from poppr giving the region names.
#' Defaults to "area"
#'
#' @details Thresholds are set based on that region's percent population
#' urban. Intended as a helper function of \code{\link{makePopIntegrationTab}}.
#'
#' @author John Paige
#'
#' @seealso \code{\link{makePopIntegrationTab}}
#'
#' @examples
#' \dontrun{
#' data(kenyaPopulationData)
#'
#' #' # download Kenya GADM shapefiles from SUMMERdata github repository
#' githubURL <- "https://github.com/paigejo/SUMMERdata/blob/main/data/kenyaMaps.rda?raw=true"
#' tempDirectory = "~/"
#' mapsFilename = paste0(tempDirectory, "/kenyaMaps.rda")
#' if(!file.exists(mapsFilename)) {
#' download.file(githubURL,mapsFilename)
#' }
#'
#' # load it in
#' out = load(mapsFilename)
#' out
#' adm1@data$NAME_1 = as.character(adm1@data$NAME_1)
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm1@data$NAME_1[adm1@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#' adm2@data$NAME_1 = as.character(adm2@data$NAME_1)
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Trans Nzoia"] = "Trans-Nzoia"
#' adm2@data$NAME_1[adm2@data$NAME_1 == "Elgeyo-Marakwet"] = "Elgeyo Marakwet"
#'
#' # some Admin-2 areas have the same name
#' adm2@data$NAME_2 = as.character(adm2@data$NAME_2)
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Bungoma") &
#' (adm2@data$NAME_2 == "Lugari")] = "Lugari, Bungoma"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Kakamega") &
#' (adm2@data$NAME_2 == "Lugari")] = "Lugari, Kakamega"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Meru") &
#' (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Meru"
#' adm2@data$NAME_2[(adm2@data$NAME_1 == "Tharaka-Nithi") &
#' (adm2@data$NAME_2 == "Igembe South")] = "Igembe South, Tharaka-Nithi"
#'
#' # The spatial area of unknown 8 is so small, it causes problems unless
#' # its removed or unioned with another subarea. Union it with neighboring
#' # Kakeguria:
#' newadm2 = adm2
#' unknown8I = which(newadm2$NAME_2 == "unknown 8")
#' newadm2$NAME_2[newadm2$NAME_2 %in% c("unknown 8", "Kapenguria")] <- "Kapenguria + unknown 8"
#' admin2.IDs <- newadm2$NAME_2
#'
#' newadm2@data = cbind(newadm2@data, NAME_2OLD = newadm2@data$NAME_2)
#' newadm2@data$NAME_2OLD = newadm2@data$NAME_2
#' newadm2@data$NAME_2 = admin2.IDs
#' newadm2$NAME_2 = admin2.IDs
#' temp <- terra::aggregate(as(newadm2, "SpatVector"), by="NAME_2")
#'
#' library(sf)
#' temp <- sf::st_as_sf(temp)
#' temp <- sf::as_Spatial(temp)
#'
#' tempData = newadm2@data[-unknown8I,]
#' tempData = tempData[order(tempData$NAME_2),]
#' newadm2 <- SpatialPolygonsDataFrame(temp, tempData, match.ID = F)
#' adm2 = newadm2
#'
#' # download 2014 Kenya population density TIF file
#'
#' githubURL <- paste0("https://github.com/paigejo/SUMMERdata/blob/main/data/",
#' "Kenya2014Pop/worldpop_total_1y_2014_00_00.tif?raw=true")
#' popTIFFilename = paste0(tempDirectory, "/worldpop_total_1y_2014_00_00.tif")
#' if(!file.exists(popTIFFilename)) {
#' download.file(githubURL,popTIFFilename)
#' }
#'
#' # load it in
#' pop = terra::rast(popTIFFilename)
#'
#' eastLim = c(-110.6405, 832.4544)
#' northLim = c(-555.1739, 608.7130)
#'
#' require(fields)
#'
#' # Now generate a general population integration table at 5km resolution,
#' # based on subarea (Admin-2) x urban/rural population totals. This takes
#' # ~1 minute
#' system.time(popMatKenya <- makePopIntegrationTab(
#' kmRes=5, pop=pop, domainMapDat=adm0,
#' eastLim=eastLim, northLim=northLim, mapProjection=projKenya,
#' poppa = poppaKenya, poppsub=poppsubKenya,
#' areaMapDat = adm1, subareaMapDat = adm2,
#' areaNameVar = "NAME_1", subareaNameVar="NAME_2"))
#'
#' out = setThresholdsByRegion(popMatKenya, poppaKenya)
#' out
#'
#' out = setThresholdsByRegion(popMatKenya, poppsubKenya, regionType="subarea")
#' out
#' }
#'
#' @return A list of region names and their urbanicity thresholds in population density
#'
#' @export
setThresholdsByRegion = function(popMat, poppr, regionType="area") {
getRegionThresh = function(regionName) {
# do the setup
thisRegion = as.character(popMat[[regionType]]) == regionName
thisPop = popMat$pop[thisRegion]
thisTot = sum(thisPop, na.rm=TRUE)
pctUrb = poppr$pctUrb[poppr[[regionType]] == regionName]/100
pctRural = 1 - pctUrb
# must round to avoid numerical inaccuracies
if(round(pctUrb, 10) == 1) {
return(-Inf)
} else if(round(pctUrb, 10) == 0) {
return(Inf)
}
# calculate threshold by integrating ecdf via sorted value cumulative sum
sortedPop = sort(thisPop)
cumsumPop = cumsum(sortedPop)
threshI = match(1, cumsumPop >= thisTot*pctRural)
if((threshI != 1) && (threshI != length(thisPop))) {
thresh = sortedPop[threshI]
} else {
# make sure not all pixels are urban or all are rural
if(threshI == 1) {
thresh = mean(c(sortedPop[1], sortedPop[2]), na.rm=TRUE)
} else {
thresh = mean(c(sortedPop[length(thisPop)], sortedPop[length(thisPop)-1]), na.rm=TRUE)
}
}
thresh
}
# compute threshold for each area
regions = poppr[[regionType]]
threshes = sapply(regions, getRegionThresh)
list(regions=regions, threshes=threshes)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.