knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

This vignette presents an example workflow for using the anthromes R package for analyzing long-term human populations and land use.

First load the anthromes package.

# analysis packages
library(anthromes)
library(stars)
library(dplyr)
library(ggplot2)
# visualization packages
library(gganimate)
library(patchwork)

Although the package is able to work with global data, we'll focus just on a subset of the eastern Mediterranean for simplicity.

bbox <- st_bbox(c(xmin = 29, xmax = 39, ymin = 30, ymax = 40), crs = 4326)

Download HYDE 3.2 data

Read in the anthromes data as a stars object. stars in an R package for working with space-time cubes like the HYDE 3.2 data, which are spatial rasters representing multiple time steps.

hyde_med

A stars object prints two pieces of information, the attribute data (which is essentially a tibble that can be manipulated via typical tidyverse functions) and dimension information (which records the spatial and temporal dimensions of the object).

hyde_med

Stars objects are useful for many reasons. One is that they can take units.

hyde_med %>% dplyr::mutate(crops = units::set_units(crops, km2))

Also load in the HYDE 3.2 supporting grids: land area, potential vegetation, and potential villages.

inputs_med

You can easily plot these objects in ggplot using geom_stars().

ggplot() +
  geom_stars(data = hyde_med) +
  facet_wrap(~time) +
  coord_quickmap() +
  scale_fill_viridis_c(na.value = NA, name = expression(km^2)) +
  theme_bw() +
  labs(title = 'HYDE 3.2 cropland', x = 'Latitude', y = 'Longitude')

You can easily animate these data using gganimate.

ggplot() +
  geom_stars(data = filter(hyde_med)) +
  coord_quickmap() +
  scale_fill_viridis_c(na.value = NA, name = expression(km^2)) +
  theme_bw() +
  # use transition_states() from gganimate instead of facet_wrap 
  gganimate::transition_states(time) +
  labs(title = 'HYDE 3.2 land use', subtitle = 'Cropland at {closest_state}', x = 'Latitude', y = 'Longitude')

By default, geom_stars will only plot the first attribute. If you'd like to plot multiple attributes at a time, the easiest way is to convert the attributes to an extra dimension.

ggplot() +
  geom_stars(data = merge(hyde_med[c(1:2,5),,,])) +
  facet_grid(attributes~time) +
  coord_quickmap() +
  scale_fill_viridis_c(na.value = NA, name = expression(km^2)) +
  theme_bw() +
  labs(title = 'HYDE 3.2 land use', x = 'Latitude', y = 'Longitude')

Anthromes classification

anthromes <- anthrome_classify(hyde_med, inputs_med)

ggplot() +
  geom_stars(data = anthromes) +
  facet_wrap(~time) +
  coord_quickmap() +
  scale_fill_manual(values = anthrome_colors(), drop = TRUE) +
  theme_bw() +
  labs(title = 'Anthromes-12k', x = 'Latitude', y = 'Longitude')

Discrete Global Grids

Rasters are great, but a hexagonal discrete global grid system would keep the cell areas the same and better represent shapes over the Earth's surface.

ggplot(dgg_med) +
  geom_sf() +
  theme_bw()

Use the dgg_extract() function to extract the HYDE 3.2 data using the DGG hexagons. It uses exactextractr under the hood, which prints a progress bar by default.

crops <-  dgg_extract(hyde_med, dgg_med, 'crops', fun = 'sum')
ggplot() +
  geom_stars(data = crops) +
  facet_wrap(~time) +
  scale_fill_viridis_c(na.value = NA, name = expression(km^2)) +
  theme_bw() +
  labs(title = 'HYDE 3.2 cropland', subtitle = 'Discrete global grid system', x = 'Latitude', y = 'Longitude')

Let's compare it to the 5' raster

a <- ggplot() +
  geom_stars(data = hyde_med[,,,5]) +
  coord_quickmap() +
  scale_fill_viridis_c(na.value = NA, name = expression(km^2)) +
  theme_bw() +
  labs(title = 'HYDE 3.2 cropland', x = 'Latitude', y = 'Longitude')

b <- ggplot() +
  geom_stars(data = crops[,,5]) +
  scale_fill_viridis_c(na.value = NA, name = expression(km^2)) +
  theme_bw() +
  labs(title = '', subtitle = 'Discrete global grid system', x = 'Latitude', y = 'Longitude')

a + b

Map this function over the list of HYDE variable names.

hyde_dgg <- names(hyde_med) %>%
  purrr::map(~dgg_extract(hyde_med, dgg_med, var = ., fun = 'sum')) %>%
  do.call(c, .)
ggplot() +
  geom_stars(data = merge(hyde_dgg[c(1:2,5),,])) +
  facet_grid(attributes~time) +
  scale_fill_viridis_c(na.value = NA, name = expression(km^2)) +
  theme_bw() +
  labs(title = 'HYDE 3.2 land use', x = 'Latitude', y = 'Longitude')

And extract the fixed inputs as well.

# fold this into dgg_extract too?
inputs_dgg <- dgg_extract(merge(inputs_med), dgg_med, fun = c('sum', 'mode')) %>%
   dplyr::select(land_area = sum.land_area, 
                pot_veg =  mode.pot_veg, 
                pot_vill = mode.pot_vill,
                regions = mode.regions,
                iso = mode.iso)

Apply the anthromes classifier to each level

anthromes_dgg <- anthrome_classify(hyde_dgg, inputs_dgg)
ggplot() +
  geom_stars(data = anthromes_dgg) +
  facet_wrap(~time) +
  scale_fill_manual(values = anthrome_colors(), drop = TRUE) +
  theme_bw() +
  labs(title = 'Anthromes-12k DGG', x = 'Latitude', y = 'Longitude')
 ggplot() +
  geom_stars(data = anthromes[,,,6]) +
  coord_quickmap() +
  scale_fill_manual(values = anthrome_colors(), drop = TRUE) +
  theme_bw() +
  labs(title = 'Anthromes-12k', x = 'Latitude', y = 'Longitude')

ggplot() +
  geom_stars(data = anthromes_dgg[,,6]) +
  scale_fill_manual(values = anthrome_colors(), drop = TRUE) +
  theme_bw() +
  labs(title = 'Anthromes-12k DGG', x = 'Latitude', y = 'Longitude')

Summary statistics

Use the anthrome_summary() function to produce formatted summary tables of the percent land area of under each anthrome type for each time period.

anthrome_summary(anthromes_dgg, inputs_dgg)

anthrome_summary() also has a "by" argument, which adds an additional grouping factor. The resulting summaries are now in percent of land area under each anthrome in each time period in each of the grouping variables.

anthrome_summary(anthromes_dgg, 
                 mutate(inputs_dgg, pot_veg = as.factor(pot_veg)), 
                 by = pot_veg)
anthrome_summary(anthromes_dgg, 
                 mutate(inputs_dgg, iso = as.factor(iso)), 
                 by = iso)

You can also use the hyde_summary() function to extract population estimates from HYDE.

hyde_summary(hyde_dgg, inputs_dgg) %>%
  ggplot(aes(time, pop, group = 1)) +
  geom_point() +
  stat_summary(fun.y=sum, geom="line") +
  theme_bw()
hyde_summary(hyde_dgg, inputs_dgg, by = iso) %>%
  ggplot(aes(time, pop, group = iso)) +
  geom_point() +
  stat_summary(fun.y=sum, geom="line", aes(color = as.factor(iso))) +
  theme_bw()

Statistical modeling

As stars objects, the data can be easily coerced to a tibble for statistical analysis.

pc <- hyde_dgg %>%
  as_tibble() %>%
  select(-geometry, -time) %>%
  prcomp(scale. = TRUE)

pc
knitr::knit_exit()

Global analyses

get_hyde('inputs', dir = '..') %>% merge %>% plot
hyde <- get_hyde(dir = '..') %>%
  st_crop(bbox) %>%
  # current bug in stars loses offset info when filtered
  #filter(time %in% time_steps_millennia) %>% 
  # so subset instead
  .[,,,c(8,9,10,11,21,58)] %>%
  st_as_stars() %>% 
  # change the names to something more readable
  setNames(c('crops', 'grazing', 'rice', 'pop', 'irrigation', 'urban'))
inputs <- get_hyde('inputs', dir = '..') %>%
  st_crop(bbox)

the rest

why does exact_extractr error here?

test <- aggregate(hyde[,,,1], dgg, sum) 
test2 <- aggregate(hyde[,,,1], dgg, sum, as_points = FALSE) 
test3 <- aggregate(hyde[,,,1], dgg, sum, exact = TRUE) 
test4 <- aggregate(hyde[,,,1], dgg, sum, as_points = FALSE, exact = TRUE) 

Why is get dgg anthromes so much slower for dgg

    # the problem here is that the slice and map approach repeats the geometry vector a bunch of times ... slow!

st_get_dimension_values(hyde_dgg, 'time') %>%
  seq_along() %>% # iterate along the time steps

  map(~anthromes_classify(slice(hyde, 'time', .), inputs)) %>%
  do.call(c, .) 

    dat %>%
    `/`(inputs['land_area']) %>% # area_weight
    mutate(used = crops + grazing + urban) %>%
    c(inputs) 
anthromes_classify2 <- function(crops, grazing, rice, pop, irrigation, urban, used, land_area, pot_veg, pot_vill){
  case_when(
    urban >= 0.2 | pop >= 2500 ~ 11L,
    pop >= 100 & pot_vill == FALSE ~ 12L,
    pop >= 100 & rice >= 0.2 ~ 21L,
    pop >= 100 & irrigation >= 0.2 ~ 22L,
    pop >= 100 & crops >= 0.2 ~ 23L,
    pop >= 100 & grazing >= 0.2 ~ 24L,
    pop >= 100 ~ 12L,
    crops >= 0.2 & pop >= 10 & pop < 100 & irrigation >= 0.2 ~ 31L,
    crops >= 0.2 & pop >= 10 & pop < 100 ~ 32L,
    crops >= 0.2 & pop >= 1 & pop < 10 ~ 33L,
    crops >= 0.2 & pop > 0 & pop < 1 ~ 34L,
    grazing >= 0.2 & pop >= 10 & pop < 100 ~ 41L,
    grazing >= 0.2 & pop >= 1 & pop < 10 ~ 42L,
    grazing >= 0.2 & pop > 0 & pop < 1 ~ 43L,
    trees == TRUE & pop >= 10 & pop < 100 ~ 51L,
    trees == TRUE & pop >= 1 & pop < 10 ~ 52L,
    trees == TRUE & pop > 0 & pop < 1 & used < 0.2 ~ 53L,
    trees == TRUE & pop > 0 & pop < 1 & used >= 0.2 & crops >= grazing ~ 34L,
    trees == TRUE & pop > 0 & pop < 1 & used >= 0.2 & crops < grazing ~ 43L,
    pop > 0 & trees == FALSE & used < 0.2 ~ 54L,
    pop > 0 & trees == FALSE & used >= 0.2 & crops >= grazing ~ 34L,
    pop > 0 & trees == FALSE & used >= 0.2 & crops < grazing ~ 43L,
    used >= 0.2 & crops >= 0.2 ~ 34L, # in python code but not paper
    used >= 0.2 & grazing >= 0.2 ~ 43L, # in python code but not paper
    used >= 0.2 & crops >= grazing ~ 34L,
    used >= 0.2 & crops < grazing ~ 43L,
    pot_veg != 15 & trees == TRUE ~ 61L,
    pot_veg != 15 & trees == FALSE ~ 62L,
    pot_veg == 15 & used > 0 ~ 62L, # in python code but not paper
    pot_veg == 15 ~ 63L,
    TRUE ~ 70L)
}

anthromes_classify2(c(0,1),c(0,1),c(0,1),c(0,1),c(0,1),c(0,1),c(0,1),c(0,1),c(0,1),c(0,1))


anthrome_lookup[anthromes_classify2(1,1,1,1,1,0,1,0,1,1)]
anthrome_lookup['11']

And animate it.

ggplot() +
  geom_stars(data = filter(hyde_dgg, time %in% time_steps_centuries)) +
  scale_fill_viridis_c(na.value = NA, name = expression(km^2)) +
  theme_bw() +
  # use transition_states() from gganimate instead of facet_wrap 
  gganimate::transition_states(time) +
  labs(title = 'HYDE 3.2 land use', subtitle = 'Cropland at {closest_state}', x = 'Latitude', y = 'Longitude')
saveRDS(hyde, 'hyde.rds')
write_stars(hyde %>% merge, 'hyde.nc')
library(gdalcubes)

gdalcubes::write_ncdf(hyde, 'hyde.nc')

# there looks like a bug so that filtering looses dimension information
hyde;
hyde %>% filter(time %in% time_steps_centuries)
hyde[,,,which(st_get_dimension_values(hyde, 'time') %in% time_steps_centuries)]
readRDS('hyde.rds')
slice(hyde, 'time', 1) %>% c(inputs)
#yes, because the following works
slice(hyde, 'time', 1) %>% c(filter(inputs))


nick-gauthier/anthromes-12k documentation built on June 26, 2022, 3:21 p.m.