knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
This vignette presents an example workflow for using the anthromes
R package for analyzing long-term human populations and land use.
First load the anthromes
package.
# analysis packages library(anthromes) library(stars) library(dplyr) library(ggplot2) # visualization packages library(gganimate) library(patchwork)
Although the package is able to work with global data, we'll focus just on a subset of the eastern Mediterranean for simplicity.
bbox <- st_bbox(c(xmin = 29, xmax = 39, ymin = 30, ymax = 40), crs = 4326)
Read in the anthromes data as a stars object. stars in an R package for working with space-time cubes like the HYDE 3.2 data, which are spatial rasters representing multiple time steps.
hyde_med
A stars object prints two pieces of information, the attribute data (which is essentially a tibble that can be manipulated via typical tidyverse functions) and dimension information (which records the spatial and temporal dimensions of the object).
hyde_med
Stars objects are useful for many reasons. One is that they can take units.
hyde_med %>% dplyr::mutate(crops = units::set_units(crops, km2))
Also load in the HYDE 3.2 supporting grids: land area, potential vegetation, and potential villages.
inputs_med
You can easily plot these objects in ggplot using geom_stars()
.
ggplot() + geom_stars(data = hyde_med) + facet_wrap(~time) + coord_quickmap() + scale_fill_viridis_c(na.value = NA, name = expression(km^2)) + theme_bw() + labs(title = 'HYDE 3.2 cropland', x = 'Latitude', y = 'Longitude')
You can easily animate these data using gganimate.
ggplot() + geom_stars(data = filter(hyde_med)) + coord_quickmap() + scale_fill_viridis_c(na.value = NA, name = expression(km^2)) + theme_bw() + # use transition_states() from gganimate instead of facet_wrap gganimate::transition_states(time) + labs(title = 'HYDE 3.2 land use', subtitle = 'Cropland at {closest_state}', x = 'Latitude', y = 'Longitude')
By default, geom_stars will only plot the first attribute. If you'd like to plot multiple attributes at a time, the easiest way is to convert the attributes to an extra dimension.
ggplot() + geom_stars(data = merge(hyde_med[c(1:2,5),,,])) + facet_grid(attributes~time) + coord_quickmap() + scale_fill_viridis_c(na.value = NA, name = expression(km^2)) + theme_bw() + labs(title = 'HYDE 3.2 land use', x = 'Latitude', y = 'Longitude')
anthromes <- anthrome_classify(hyde_med, inputs_med)
ggplot() + geom_stars(data = anthromes) + facet_wrap(~time) + coord_quickmap() + scale_fill_manual(values = anthrome_colors(), drop = TRUE) + theme_bw() + labs(title = 'Anthromes-12k', x = 'Latitude', y = 'Longitude')
Rasters are great, but a hexagonal discrete global grid system would keep the cell areas the same and better represent shapes over the Earth's surface.
ggplot(dgg_med) + geom_sf() + theme_bw()
Use the dgg_extract()
function to extract the HYDE 3.2 data using the DGG hexagons. It uses exactextractr
under the hood, which prints a progress bar by default.
crops <- dgg_extract(hyde_med, dgg_med, 'crops', fun = 'sum')
ggplot() + geom_stars(data = crops) + facet_wrap(~time) + scale_fill_viridis_c(na.value = NA, name = expression(km^2)) + theme_bw() + labs(title = 'HYDE 3.2 cropland', subtitle = 'Discrete global grid system', x = 'Latitude', y = 'Longitude')
Let's compare it to the 5' raster
a <- ggplot() + geom_stars(data = hyde_med[,,,5]) + coord_quickmap() + scale_fill_viridis_c(na.value = NA, name = expression(km^2)) + theme_bw() + labs(title = 'HYDE 3.2 cropland', x = 'Latitude', y = 'Longitude') b <- ggplot() + geom_stars(data = crops[,,5]) + scale_fill_viridis_c(na.value = NA, name = expression(km^2)) + theme_bw() + labs(title = '', subtitle = 'Discrete global grid system', x = 'Latitude', y = 'Longitude') a + b
Map this function over the list of HYDE variable names.
hyde_dgg <- names(hyde_med) %>% purrr::map(~dgg_extract(hyde_med, dgg_med, var = ., fun = 'sum')) %>% do.call(c, .)
ggplot() + geom_stars(data = merge(hyde_dgg[c(1:2,5),,])) + facet_grid(attributes~time) + scale_fill_viridis_c(na.value = NA, name = expression(km^2)) + theme_bw() + labs(title = 'HYDE 3.2 land use', x = 'Latitude', y = 'Longitude')
And extract the fixed inputs as well.
# fold this into dgg_extract too? inputs_dgg <- dgg_extract(merge(inputs_med), dgg_med, fun = c('sum', 'mode')) %>% dplyr::select(land_area = sum.land_area, pot_veg = mode.pot_veg, pot_vill = mode.pot_vill, regions = mode.regions, iso = mode.iso)
Apply the anthromes classifier to each level
anthromes_dgg <- anthrome_classify(hyde_dgg, inputs_dgg)
ggplot() + geom_stars(data = anthromes_dgg) + facet_wrap(~time) + scale_fill_manual(values = anthrome_colors(), drop = TRUE) + theme_bw() + labs(title = 'Anthromes-12k DGG', x = 'Latitude', y = 'Longitude')
ggplot() + geom_stars(data = anthromes[,,,6]) + coord_quickmap() + scale_fill_manual(values = anthrome_colors(), drop = TRUE) + theme_bw() + labs(title = 'Anthromes-12k', x = 'Latitude', y = 'Longitude') ggplot() + geom_stars(data = anthromes_dgg[,,6]) + scale_fill_manual(values = anthrome_colors(), drop = TRUE) + theme_bw() + labs(title = 'Anthromes-12k DGG', x = 'Latitude', y = 'Longitude')
Use the anthrome_summary()
function to produce formatted summary tables of the percent land area of under each anthrome type for each time period.
anthrome_summary(anthromes_dgg, inputs_dgg)
anthrome_summary()
also has a "by" argument, which adds an additional grouping factor. The resulting summaries are now in percent of land area under each anthrome in each time period in each of the grouping variables.
anthrome_summary(anthromes_dgg, mutate(inputs_dgg, pot_veg = as.factor(pot_veg)), by = pot_veg)
anthrome_summary(anthromes_dgg, mutate(inputs_dgg, iso = as.factor(iso)), by = iso)
You can also use the hyde_summary()
function to extract population estimates from HYDE.
hyde_summary(hyde_dgg, inputs_dgg) %>% ggplot(aes(time, pop, group = 1)) + geom_point() + stat_summary(fun.y=sum, geom="line") + theme_bw()
hyde_summary(hyde_dgg, inputs_dgg, by = iso) %>% ggplot(aes(time, pop, group = iso)) + geom_point() + stat_summary(fun.y=sum, geom="line", aes(color = as.factor(iso))) + theme_bw()
As stars
objects, the data can be easily coerced to a tibble
for statistical analysis.
pc <- hyde_dgg %>% as_tibble() %>% select(-geometry, -time) %>% prcomp(scale. = TRUE) pc
knitr::knit_exit()
get_hyde('inputs', dir = '..') %>% merge %>% plot
hyde <- get_hyde(dir = '..') %>% st_crop(bbox) %>% # current bug in stars loses offset info when filtered #filter(time %in% time_steps_millennia) %>% # so subset instead .[,,,c(8,9,10,11,21,58)] %>% st_as_stars() %>% # change the names to something more readable setNames(c('crops', 'grazing', 'rice', 'pop', 'irrigation', 'urban'))
inputs <- get_hyde('inputs', dir = '..') %>% st_crop(bbox)
why does exact_extractr error here?
test <- aggregate(hyde[,,,1], dgg, sum) test2 <- aggregate(hyde[,,,1], dgg, sum, as_points = FALSE) test3 <- aggregate(hyde[,,,1], dgg, sum, exact = TRUE) test4 <- aggregate(hyde[,,,1], dgg, sum, as_points = FALSE, exact = TRUE)
Why is get dgg anthromes so much slower for dgg
# the problem here is that the slice and map approach repeats the geometry vector a bunch of times ... slow! st_get_dimension_values(hyde_dgg, 'time') %>% seq_along() %>% # iterate along the time steps map(~anthromes_classify(slice(hyde, 'time', .), inputs)) %>% do.call(c, .) dat %>% `/`(inputs['land_area']) %>% # area_weight mutate(used = crops + grazing + urban) %>% c(inputs)
anthromes_classify2 <- function(crops, grazing, rice, pop, irrigation, urban, used, land_area, pot_veg, pot_vill){ case_when( urban >= 0.2 | pop >= 2500 ~ 11L, pop >= 100 & pot_vill == FALSE ~ 12L, pop >= 100 & rice >= 0.2 ~ 21L, pop >= 100 & irrigation >= 0.2 ~ 22L, pop >= 100 & crops >= 0.2 ~ 23L, pop >= 100 & grazing >= 0.2 ~ 24L, pop >= 100 ~ 12L, crops >= 0.2 & pop >= 10 & pop < 100 & irrigation >= 0.2 ~ 31L, crops >= 0.2 & pop >= 10 & pop < 100 ~ 32L, crops >= 0.2 & pop >= 1 & pop < 10 ~ 33L, crops >= 0.2 & pop > 0 & pop < 1 ~ 34L, grazing >= 0.2 & pop >= 10 & pop < 100 ~ 41L, grazing >= 0.2 & pop >= 1 & pop < 10 ~ 42L, grazing >= 0.2 & pop > 0 & pop < 1 ~ 43L, trees == TRUE & pop >= 10 & pop < 100 ~ 51L, trees == TRUE & pop >= 1 & pop < 10 ~ 52L, trees == TRUE & pop > 0 & pop < 1 & used < 0.2 ~ 53L, trees == TRUE & pop > 0 & pop < 1 & used >= 0.2 & crops >= grazing ~ 34L, trees == TRUE & pop > 0 & pop < 1 & used >= 0.2 & crops < grazing ~ 43L, pop > 0 & trees == FALSE & used < 0.2 ~ 54L, pop > 0 & trees == FALSE & used >= 0.2 & crops >= grazing ~ 34L, pop > 0 & trees == FALSE & used >= 0.2 & crops < grazing ~ 43L, used >= 0.2 & crops >= 0.2 ~ 34L, # in python code but not paper used >= 0.2 & grazing >= 0.2 ~ 43L, # in python code but not paper used >= 0.2 & crops >= grazing ~ 34L, used >= 0.2 & crops < grazing ~ 43L, pot_veg != 15 & trees == TRUE ~ 61L, pot_veg != 15 & trees == FALSE ~ 62L, pot_veg == 15 & used > 0 ~ 62L, # in python code but not paper pot_veg == 15 ~ 63L, TRUE ~ 70L) } anthromes_classify2(c(0,1),c(0,1),c(0,1),c(0,1),c(0,1),c(0,1),c(0,1),c(0,1),c(0,1),c(0,1)) anthrome_lookup[anthromes_classify2(1,1,1,1,1,0,1,0,1,1)] anthrome_lookup['11']
And animate it.
ggplot() + geom_stars(data = filter(hyde_dgg, time %in% time_steps_centuries)) + scale_fill_viridis_c(na.value = NA, name = expression(km^2)) + theme_bw() + # use transition_states() from gganimate instead of facet_wrap gganimate::transition_states(time) + labs(title = 'HYDE 3.2 land use', subtitle = 'Cropland at {closest_state}', x = 'Latitude', y = 'Longitude')
saveRDS(hyde, 'hyde.rds') write_stars(hyde %>% merge, 'hyde.nc') library(gdalcubes) gdalcubes::write_ncdf(hyde, 'hyde.nc') # there looks like a bug so that filtering looses dimension information hyde; hyde %>% filter(time %in% time_steps_centuries) hyde[,,,which(st_get_dimension_values(hyde, 'time') %in% time_steps_centuries)] readRDS('hyde.rds') slice(hyde, 'time', 1) %>% c(inputs) #yes, because the following works slice(hyde, 'time', 1) %>% c(filter(inputs))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.