Introduction

This document contains replication code for the examples provided in our Journal of Open Source Software manuscript.

Dependencies

This notebook requires

# primary package
library(areal)

# tidyverse packages
library(dplyr)

# spatial packages
library(sf)
library(tidycensus)
library(tigris)

# other packages
library(gridExtra)
library(microbenchmark)
library(testthat)

Comparisons with sf

Produce Estimates

First, we'll create three spatially extensive estimates for comparison. Two will use the areal package, varying the type of weight applied to the estimate:

# areal package, spatially extensive using total
areal_exT <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
               weight = "total", output = "tibble", extensive = "TOTAL_E")

# areal package, spatially extensive using sum
areal_exS <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
                            weight = "sum", output = "tibble", extensive = "TOTAL_E")

Next, we'll replicate the process using sf:

# sf package, spatially extensive
sf_ex <- st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE)

We'll also produce a spatially intensive estimate using areal:

# areal package, spatially intensive
areal_in <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID,
                            weight = "sum", output = "tibble", intensive = "ASTHMA")

And finally, we'll replicate the spatially intensive estimate using sf:

# sf package, spatially intensive
sf_in <- st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE)

Compile Results

First, we'll compile the extensive results:

# areal, extensive sum
areal_exS <- areal_exS %>%
  select(WARD, TOTAL_E) %>%
  rename(areal_exS = TOTAL_E)

# areal, extensive total
areal_exT <- areal_exT %>%
  select(WARD, TOTAL_E) %>%
  rename(areal_exT = TOTAL_E)

# sf, extensive total
sf_ex <- sf_ex %>%
  rename(sf_ex = TOTAL_E)
st_geometry(sf_ex) <- NULL

# combine
extensive <- left_join(sf_ex, areal_exT, by = c("Group.1" = "WARD")) %>%
  left_join(., areal_exS, by = c("Group.1" = "WARD")) %>%
  mutate(delta = areal_exT-areal_exS) %>%
  rename(Ward = Group.1) %>%
  as_tibble()

We'll make a similar compliation of the intensive results:

# areal, intensive
areal_in <- areal_in %>%
  select(WARD, ASTHMA) %>%
  rename(areal_in = ASTHMA)

# sf, intensive
sf_in <- sf_in %>%
  rename(sf_in = ASTHMA)
st_geometry(sf_in) <- NULL

# combine
intensive <- left_join(sf_in, areal_in, by = c("Group.1" = "WARD")) %>%
  rename(Ward = Group.1) %>%
  as_tibble()

Print Tables

The following code chunk produces two tables for the manuscript:

# produce rounded extensive estimates
extensiveSub <- extensive %>%
  filter(Ward >= 1 & Ward <= 10) %>%
  mutate(
    sf_ex = round(sf_ex, digits = 3),
    areal_exT = round(areal_exT, digits = 3),
    areal_exS = round(areal_exS, digits = 3),
    delta = round(delta, digits = 3)
  ) %>%
  rename(
    `sf` = sf_ex,
    `areal, total weight` = areal_exT,
    `areal, sum weight` = areal_exS
  )

# print extensive table
png(filename = "paper/extensiveTable.png", width = 480, height = 300, bg = "white", type = "cairo-png")
grid.arrange(tableGrob(extensiveSub, rows = NULL), top = "Comparison of sf and areal Output\nSpatially Extensive Interpolation")
dev.off()

# produce rounded intensive estimates
intensiveSub <- intensive %>%
  filter(Ward >= 1 & Ward <= 10) %>%
  mutate(
    sf_in = round(sf_in, digits = 3),
    areal_in = round(areal_in, digits = 3)
  ) %>%
  rename(
    `sf` = sf_in,
    `areal` = areal_in
  )

# print intensive table
png(filename = "paper/intensiveTable.png", width = 480, height = 300, bg = "white", type = "cairo-png")
grid.arrange(tableGrob(intensiveSub, rows = NULL), top = "Comparison of sf and areal Output\nSpatially Intensive Interpolation")
dev.off()

Compare Results

We can verify that the areal workflow with weight = "total" matches the sf extensive output:

expect_equal(extensive$sf_ex, extensive$areal_exT)

We can do the same for the intensive interpolations:

expect_equal(intensive$sf_in, intensive$areal_in)

Benchmark

Next, we'll benchmark the extensive estimation times:

# compare spatially extensive interpolations
microbenchmark(
  aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
                 weight = "total", output = "tibble", extensive = "TOTAL_E"),
  suppressWarnings(st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE))
)

We'll repeat the process for the intensive estimations:

# compare spatially intensive interpolations
microbenchmark(
  aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID,
                 weight = "sum", output = "tibble", intensive = "ASTHMA"),
  suppressWarnings(st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE))
)

Geometry Collections

Finally, we'll provide an example of a more intensive estimation process that also triggers the geometry collection workflow, which will add to the estimation time. We need to download several data sets using tigris and tidycensus:

# county populations
moPop <- get_acs(geography = "county", variables = "B01003_001", output = "wide", state = 29, geometry = TRUE) %>%
  st_transform(crs = 26915) %>%
  select(GEOID, B01003_001E) %>%
  rename(totalPop = B01003_001E)

# block group geometry
moBlockGroups <- block_groups(state = 29, class = "sf") %>%
  st_transform(crs = 26915) %>%
  select(GEOID)

Here are the sample sizes for both data sets:

nrow(moPop)
nrow(moBlockGroups)

Here is the benchmark for the estimates produced with these data:

microbenchmark(
  aw_interpolate(moBlockGroups, tid = GEOID, source = moPop, sid = GEOID,
                 weight = "sum", output = "tibble", intensive = "totalPop")
)


slu-openGIS/areal documentation built on June 10, 2022, 11:29 a.m.