#' Reapportion data from one geography to another
#'
#' This function allows to reapportion data from one geography to another, for example in the context of working with different administrative units.
#' @author Joel Gombin
#' @note Inspiration from http://stackoverflow.com/a/17703903 and http://rstudio-pubs-static.s3.amazonaws.com/6577_3b66f8d8f4984fb2807e91224defa854.html. All mistakes are mine, obviously.
#' @param old_geom a `SpatialPolygonsDataFrame` representing the initial geometry.
#' @param new_geom a `SpatialPolygonsDataFrame` representing the geometry you want to reapportion data to.
#' @param data a `data.frame` containing the data to reapportion, and an ID allowing to match it to the `old_geom`.
#' @param old_ID a string, the name of the ID variable in the `old_geom`.
#' @param new_ID a string, the name of the ID variable in the `new_geom`.
#' @param data_ID a string, the name of the ID variable in the `data`.
#' @param variables a character vector, representing the names of the variables in the `data` set to reapportion. By default, all data variables except for the ID.
#' @param mode either `"count"` or `"proportion"`. `"count"` is for absolute values, `"proportion"` is for, well, proportions (expressed between 0 and 1). If `"proportion"`, you need to provide a weights variable.
#' @param weights In case the variables are proportions, the name of the variable containing weights (i.e. the total number of observations per unit in the `old_geom`).
#' @param weight_matrix a SpatialPointsDataFrame indicating where are the observations (inhabitants, voters, etc.) (optional).
#' @param weight_matrix_var the name of the variable in \code{weight_matrix} containing the weights.
#' @export
#' @import sp maptools rgeos purrr
#'
spReapportion <- function(old_geom, new_geom, data, old_ID, new_ID, data_ID, variables = names(data)[-which(names(data) %in% data_ID)], mode = "count", weights = NULL, weight_matrix = NULL, weight_matrix_var = NULL) {
if (!(old_ID %in% names(old_geom@data))) stop(paste(old_ID, "is not a variable from", deparse(substitute(old_geom)),"!", sep=" "))
if (!(new_ID %in% names(new_geom@data))) stop(paste(new_ID, "is not a variable from", deparse(substitute(new_geom)),"!", sep=" "))
if (sum(!(variables %in% names(data))) > 0) stop(paste(variables[!(variables %in% names(data))], "is not a variable from", deparse(substitute(data)),"!",sep=" "))
if (mode %in% "proportion" & is.null(weights)) stop("When mode = 'proportion', you must provide weights.")
if (mode %in% "proportion")
if (!(weights %in% names(data)))
stop(paste0(weights, " is not a variable from ", deparse(substitute(data)), "!"))
if (!is.null(weight_matrix) & !inherits(weight_matrix, "SpatialPointsDataFrame", which = FALSE))
stop("The weight_matrix argument is not a SpatialPointsDataFrame!")
# if several polygons with the same ID, merge them
if (length(old_geom@data[,old_ID]) > length(unique(old_geom@data[,old_ID]))) {
df <- old_geom@data[match(unique(old_geom@data[,old_ID]),old_geom@data[,old_ID]),]
old_geom <- unionSpatialPolygons(old_geom, old_geom@data[,old_ID])
old_geom <- SpatialPolygonsDataFrame(old_geom, df, old_ID)
}
if (length(new_geom@data[,new_ID]) > length(unique(new_geom@data[,new_ID]))) {
df <- new_geom@data[match(unique(new_geom@data[,new_ID]),new_geom@data[,new_ID]),]
new_geom <- unionSpatialPolygons(new_geom, new_geom@data[,new_ID])
new_geom <- SpatialPolygonsDataFrame(new_geom, df, new_ID)
}
# make sure SPDF IDs are OK
old_geom <- spChFIDs(old_geom, as.character(old_geom@data[,old_ID]))
new_geom <- spChFIDs(new_geom, as.character(new_geom@data[,new_ID]))
names(data)[names(data) %in% data_ID] <- "old_ID"
# make sure both SPDFs have the same projection
if (!identicalCRS(old_geom, new_geom)) {
message("Reprojecting new_geom to the same projection as old_geom...")
new_geom <- spTransform(new_geom, old_geom@proj4string)
}
# use weight matrix if provided
if (!is.null(weight_matrix)) {
if (old_ID %in% names(weight_matrix@data)) {
weight_matrix@data <- weight_matrix@data[, -match(old_ID, names(weight_matrix@data))]
}
weight_matrix <- weight_matrix[colSums(gWithin(weight_matrix, old_geom, byid = TRUE)) > 0,]
weight_matrix_total <- sum(weight_matrix@data[, weight_matrix_var], na.rm = TRUE)
weight_matrix@data <- cbind(weight_matrix@data, over(weight_matrix, old_geom))
}
# start by trimming out areas that don't intersect
old_geom_sub <- rgeos::gIntersects(old_geom, new_geom, byid = TRUE) # test for areas that don't intersect
old_geom_sub2 <- apply(old_geom_sub, 2, function(x) {sum(x)}) # test across all polygons in the SpatialPolygon whether it intersects or not
old_geom_sub3 <- old_geom[old_geom_sub2 > 0,] # keep only the ones that actually intersect
# perform the intersection. This takes a while since it also calculates area and other things, which is why we trimmed out irrelevant areas first
int <- rgeos::gIntersection(old_geom_sub3, new_geom, byid = TRUE, drop_lower_td = TRUE) # intersect the polygon and your administrative boundaries
intdf <- data.frame(intname = names(int)) # make a data frame for the intersected SpatialPolygon, using names from the output list from int
intdf$intname <- as.character(intdf$intname) # convert the name to character
splitid <- strsplit(intdf$intname, " ", fixed=TRUE) # split the names
splitid <- do.call("rbind", splitid) # rbind those back together
colnames(splitid) <- c("old_ID", "new_ID") # now you have the administrative area ID and the polygonID as separate variables in a dataframe that correspond to the int SpatialPolygon.
intdf <- data.frame(intdf, splitid) # make that into a dataframe
intdf$old_ID <- as.character(intdf$old_ID) # convert to character
intdf$new_ID <- as.character(intdf$new_ID) # convert to character.
# now you have a dataframe corresponding to the intersected SpatialPolygon object
if (!is.null(weight_matrix)) {
# check in which intersected polygon each point stands
weight_matrix_int <- over(weight_matrix, int)
# use points weights to reapportion
intdf$polyarea <- map_int(1:length(int), ~ sum(weight_matrix@data[weight_matrix_int %in% .x, weight_matrix_var]))
data$departarea <- map_int(old_geom@data[, old_ID], ~ sum(weight_matrix@data[weight_matrix@data[, old_ID] %in% .x, weight_matrix_var]))[match(data[["old_ID"]], old_geom@data[[old_ID]])]
} else {
# if we don't have weights we just use areas
intdf$polyarea <- gArea(int, byid = TRUE) # get area from the polygon SP object and put it in the df
data$departarea <- gArea(old_geom, byid = TRUE)[match(data$old_ID, old_geom@data[, old_ID])]
}
intdf2 <- dplyr::left_join(intdf, data, by = c("old_ID" = "old_ID")) # join together the two dataframes by the administrative ID
if (mode %in% "count") {
intdf2[,paste(variables,"inpoly",sep="")] <- plyr::numcolwise(function(x) {x * (intdf2$polyarea / intdf2$departarea)})(as.data.frame(intdf2[,variables]))
intpop <- plyr::ddply(intdf2, "new_ID", function(x) {plyr::numcolwise(sum, na.rm = TRUE)(as.data.frame(x[,paste(variables,"inpoly",sep="")]))}) # sum population lying within each polygon
names(intpop)[-1] <- variables
} else {
intdf2[,paste(variables,"inpoly",sep="")] <- plyr::numcolwise(function(x) {x * (intdf2$polyarea / intdf2$departarea)})(as.data.frame(intdf2[,variables]) * intdf2[,weights])
intdf2$weights <- intdf2[,weights] * (intdf2$polyarea / intdf2$departarea)
intpop <- plyr::ddply(intdf2, "new_ID", function(x) {plyr::numcolwise(sum, na.rm = TRUE)(as.data.frame(x[,c(paste(variables,"inpoly",sep=""), "weights")]))}) # sum population lying within each polygon
intpop[,paste0(variables, "inpoly")] <- intpop[,paste0(variables, "inpoly")] / intpop$weights
names(intpop)[-c(1, length(names(intpop)))] <- variables
names(intpop)[length(names(intpop))] <- weights
}
names(intpop)[1] <- new_ID
return(intpop) # done!
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.