In slu-openGIS/areal: Areal Weighted Interpolation

Introduction

This document contains replication code for the examples provided in our Journal of Open Source Software manuscript.

Dependencies

This notebook requires

# primary package
library(areal)

# tidyverse packages
library(dplyr)

# spatial packages
library(sf)
library(tidycensus)
library(tigris)

# other packages
library(gridExtra)
library(microbenchmark)
library(testthat)

Comparisons with `sf`

Produce Estimates

First, we'll create three spatially extensive estimates for comparison. Two will use the areal package, varying the type of weight applied to the estimate:

# areal package, spatially extensive using total
areal_exT <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
               weight = "total", output = "tibble", extensive = "TOTAL_E")

# areal package, spatially extensive using sum
areal_exS <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
                            weight = "sum", output = "tibble", extensive = "TOTAL_E")

Next, we'll replicate the process using sf:

# sf package, spatially extensive
sf_ex <- st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE)

We'll also produce a spatially intensive estimate using areal:

# areal package, spatially intensive
areal_in <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID,
                            weight = "sum", output = "tibble", intensive = "ASTHMA")

And finally, we'll replicate the spatially intensive estimate using sf:

# sf package, spatially intensive
sf_in <- st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE)

Compile Results

First, we'll compile the extensive results:

# areal, extensive sum
areal_exS <- areal_exS %>%
  select(WARD, TOTAL_E) %>%
  rename(areal_exS = TOTAL_E)

# areal, extensive total
areal_exT <- areal_exT %>%
  select(WARD, TOTAL_E) %>%
  rename(areal_exT = TOTAL_E)

# sf, extensive total
sf_ex <- sf_ex %>%
  rename(sf_ex = TOTAL_E)
st_geometry(sf_ex) <- NULL

# combine
extensive <- left_join(sf_ex, areal_exT, by = c("Group.1" = "WARD")) %>%
  left_join(., areal_exS, by = c("Group.1" = "WARD")) %>%
  mutate(delta = areal_exT-areal_exS) %>%
  rename(Ward = Group.1) %>%
  as_tibble()

We'll make a similar compliation of the intensive results:

# areal, intensive
areal_in <- areal_in %>%
  select(WARD, ASTHMA) %>%
  rename(areal_in = ASTHMA)

# sf, intensive
sf_in <- sf_in %>%
  rename(sf_in = ASTHMA)
st_geometry(sf_in) <- NULL

# combine
intensive <- left_join(sf_in, areal_in, by = c("Group.1" = "WARD")) %>%
  rename(Ward = Group.1) %>%
  as_tibble()

Print Tables

The following code chunk produces two tables for the manuscript:

# produce rounded extensive estimates
extensiveSub <- extensive %>%
  filter(Ward >= 1 & Ward <= 10) %>%
  mutate(
    sf_ex = round(sf_ex, digits = 3),
    areal_exT = round(areal_exT, digits = 3),
    areal_exS = round(areal_exS, digits = 3),
    delta = round(delta, digits = 3)
  ) %>%
  rename(
    `sf` = sf_ex,
    `areal, total weight` = areal_exT,
    `areal, sum weight` = areal_exS
  )

# print extensive table
png(filename = "paper/extensiveTable.png", width = 480, height = 300, bg = "white", type = "cairo-png")
grid.arrange(tableGrob(extensiveSub, rows = NULL), top = "Comparison of sf and areal Output\nSpatially Extensive Interpolation")
dev.off()

# produce rounded intensive estimates
intensiveSub <- intensive %>%
  filter(Ward >= 1 & Ward <= 10) %>%
  mutate(
    sf_in = round(sf_in, digits = 3),
    areal_in = round(areal_in, digits = 3)
  ) %>%
  rename(
    `sf` = sf_in,
    `areal` = areal_in
  )

# print intensive table
png(filename = "paper/intensiveTable.png", width = 480, height = 300, bg = "white", type = "cairo-png")
grid.arrange(tableGrob(intensiveSub, rows = NULL), top = "Comparison of sf and areal Output\nSpatially Intensive Interpolation")
dev.off()

Compare Results

We can verify that the areal workflow with weight = "total" matches the sf extensive output:

expect_equal(extensive$sf_ex, extensive$areal_exT)

We can do the same for the intensive interpolations:

expect_equal(intensive$sf_in, intensive$areal_in)

Benchmark

Next, we'll benchmark the extensive estimation times:

# compare spatially extensive interpolations
microbenchmark(
  aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
                 weight = "total", output = "tibble", extensive = "TOTAL_E"),
  suppressWarnings(st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE))
)

We'll repeat the process for the intensive estimations:

# compare spatially intensive interpolations
microbenchmark(
  aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID,
                 weight = "sum", output = "tibble", intensive = "ASTHMA"),
  suppressWarnings(st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE))
)

Geometry Collections

Finally, we'll provide an example of a more intensive estimation process that also triggers the geometry collection workflow, which will add to the estimation time. We need to download several data sets using tigris and tidycensus:

# county populations
moPop <- get_acs(geography = "county", variables = "B01003_001", output = "wide", state = 29, geometry = TRUE) %>%
  st_transform(crs = 26915) %>%
  select(GEOID, B01003_001E) %>%
  rename(totalPop = B01003_001E)

# block group geometry
moBlockGroups <- block_groups(state = 29, class = "sf") %>%
  st_transform(crs = 26915) %>%
  select(GEOID)

Here are the sample sizes for both data sets:

nrow(moPop)
nrow(moBlockGroups)

Here is the benchmark for the estimates produced with these data:

microbenchmark(
  aw_interpolate(moBlockGroups, tid = GEOID, source = moPop, sid = GEOID,
                 weight = "sum", output = "tibble", intensive = "totalPop")
)