This document contains replication code for the examples provided in our Journal of Open Source Software manuscript.
This notebook requires
# primary package library(areal) # tidyverse packages library(dplyr) # spatial packages library(sf) library(tidycensus) library(tigris) # other packages library(gridExtra) library(microbenchmark) library(testthat)
sf
First, we'll create three spatially extensive estimates for comparison. Two will use the areal
package, varying the type of weight applied to the estimate:
# areal package, spatially extensive using total areal_exT <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID, weight = "total", output = "tibble", extensive = "TOTAL_E") # areal package, spatially extensive using sum areal_exS <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID, weight = "sum", output = "tibble", extensive = "TOTAL_E")
Next, we'll replicate the process using sf
:
# sf package, spatially extensive sf_ex <- st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE)
We'll also produce a spatially intensive estimate using areal
:
# areal package, spatially intensive areal_in <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID, weight = "sum", output = "tibble", intensive = "ASTHMA")
And finally, we'll replicate the spatially intensive estimate using sf
:
# sf package, spatially intensive sf_in <- st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE)
First, we'll compile the extensive results:
# areal, extensive sum areal_exS <- areal_exS %>% select(WARD, TOTAL_E) %>% rename(areal_exS = TOTAL_E) # areal, extensive total areal_exT <- areal_exT %>% select(WARD, TOTAL_E) %>% rename(areal_exT = TOTAL_E) # sf, extensive total sf_ex <- sf_ex %>% rename(sf_ex = TOTAL_E) st_geometry(sf_ex) <- NULL # combine extensive <- left_join(sf_ex, areal_exT, by = c("Group.1" = "WARD")) %>% left_join(., areal_exS, by = c("Group.1" = "WARD")) %>% mutate(delta = areal_exT-areal_exS) %>% rename(Ward = Group.1) %>% as_tibble()
We'll make a similar compliation of the intensive results:
# areal, intensive areal_in <- areal_in %>% select(WARD, ASTHMA) %>% rename(areal_in = ASTHMA) # sf, intensive sf_in <- sf_in %>% rename(sf_in = ASTHMA) st_geometry(sf_in) <- NULL # combine intensive <- left_join(sf_in, areal_in, by = c("Group.1" = "WARD")) %>% rename(Ward = Group.1) %>% as_tibble()
The following code chunk produces two tables for the manuscript:
# produce rounded extensive estimates extensiveSub <- extensive %>% filter(Ward >= 1 & Ward <= 10) %>% mutate( sf_ex = round(sf_ex, digits = 3), areal_exT = round(areal_exT, digits = 3), areal_exS = round(areal_exS, digits = 3), delta = round(delta, digits = 3) ) %>% rename( `sf` = sf_ex, `areal, total weight` = areal_exT, `areal, sum weight` = areal_exS ) # print extensive table png(filename = "paper/extensiveTable.png", width = 480, height = 300, bg = "white", type = "cairo-png") grid.arrange(tableGrob(extensiveSub, rows = NULL), top = "Comparison of sf and areal Output\nSpatially Extensive Interpolation") dev.off() # produce rounded intensive estimates intensiveSub <- intensive %>% filter(Ward >= 1 & Ward <= 10) %>% mutate( sf_in = round(sf_in, digits = 3), areal_in = round(areal_in, digits = 3) ) %>% rename( `sf` = sf_in, `areal` = areal_in ) # print intensive table png(filename = "paper/intensiveTable.png", width = 480, height = 300, bg = "white", type = "cairo-png") grid.arrange(tableGrob(intensiveSub, rows = NULL), top = "Comparison of sf and areal Output\nSpatially Intensive Interpolation") dev.off()
We can verify that the areal
workflow with weight = "total"
matches the sf
extensive output:
expect_equal(extensive$sf_ex, extensive$areal_exT)
We can do the same for the intensive interpolations:
expect_equal(intensive$sf_in, intensive$areal_in)
Next, we'll benchmark the extensive estimation times:
# compare spatially extensive interpolations microbenchmark( aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID, weight = "total", output = "tibble", extensive = "TOTAL_E"), suppressWarnings(st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE)) )
We'll repeat the process for the intensive estimations:
# compare spatially intensive interpolations microbenchmark( aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID, weight = "sum", output = "tibble", intensive = "ASTHMA"), suppressWarnings(st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE)) )
Finally, we'll provide an example of a more intensive estimation process that also triggers the geometry collection workflow, which will add to the estimation time. We need to download several data sets using tigris
and tidycensus
:
# county populations moPop <- get_acs(geography = "county", variables = "B01003_001", output = "wide", state = 29, geometry = TRUE) %>% st_transform(crs = 26915) %>% select(GEOID, B01003_001E) %>% rename(totalPop = B01003_001E) # block group geometry moBlockGroups <- block_groups(state = 29, class = "sf") %>% st_transform(crs = 26915) %>% select(GEOID)
Here are the sample sizes for both data sets:
nrow(moPop) nrow(moBlockGroups)
Here is the benchmark for the estimates produced with these data:
microbenchmark( aw_interpolate(moBlockGroups, tid = GEOID, source = moPop, sid = GEOID, weight = "sum", output = "tibble", intensive = "totalPop") )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.