This RMarkdown document performs all of the analyses reported in

d'Alpoim Guedes, Jade and R. Kyle Bocinsky. Climate change stimulated agricultural innovation and exchange across Asia. In review.

# params <- list(cores = 2,
#                clean = FALSE)

# Set the knitting behavior
knitr::opts_chunk$set(echo = TRUE,
                      warning = FALSE,
                      message = FALSE,
                      collapse = TRUE, 
                      # cache = FALSE,
                      cache = !params$clean,
                      cache.lazy = FALSE,
                      results = 'hold',
                      out.width = "100%",
                      fig.height = 5,  
                      comment = "#>",
                      fig.path = "./figures/"
)

library(guedesbocinsky2018)
library(sf)
library(magrittr)
library(fields)

# Set the behavior for printing scientific notation
# options(digits = 3, scipen = -2)
options(scipen = 999,
        knitr.table.format = "html")

# Force Raster to load large rasters into memory
raster::rasterOptions(chunksize = 2e+08,
                      maxmemory = 2e+09)

# Create the plan for (possibly parallel) execution
future::plan(future::multisession, 
             workers = min(future::availableCores(), 
                           params$cores),
             gc = TRUE)

# Create output directories
dir.create("./data/raw_data",
           recursive = TRUE,
           showWarnings = FALSE)
dir.create("./data/derived_data",
           recursive = TRUE,
           showWarnings = FALSE)
dir.create("./figures",
           recursive = TRUE,
           showWarnings = FALSE)

Set parameters {-}

In this section we define the study area by a bounding box, the calendar years used to define climatology (here, 1961--1990), and a plotting theme for ggplot.

# Set a series of colors for the crops
crop_colors <- RColorBrewer::brewer.pal(6,"Dark2")

# A function to set an objects class
set_class <- function(x, classes){
  class(x) <- classes
  return(x)
}

#Define the study region
ASIA_poly <-
  raster::extent(55,125,5,60) %>% ## THE REAL ASIAN POLYGON
  # raster::extent(66,72,37,43) %>% ## FOR TESTING PURPOSES ONLY
  # raster::extent(94,104.5,26,39) %>% ## Eastern Tibetan Plateau FOR TESTING PURPOSES ONLY
  FedData::polygon_from_extent("+proj=longlat") %>%
  sf::st_as_sf()

# Set the calibration period for paleoclimate reconstructions
calibration.years <- 1961:1990

# A ggplot2 theme for Nature Publishing Group
nature_theme <- ggplot2::theme(panel.grid.major = ggplot2::element_line(size = 0.5, 
                                                                        color = "grey"),
                               axis.line = ggplot2::element_line(size = 0.7, 
                                                                 color = "black"),
                               legend.position = c(0.85, 0.7),
                               text = ggplot2::element_text(size = 14))

Load ETOPO5 grids {-}

Indicator kriging takes place on the ETOPO5 digital elevation model. Here, we load the ETOPO5 data from the geomapdata package, then mask out the oceans adn large lakes, as well as islands outside of mainland Asia.

message("Preparing the ETOPO5 grid-aligned dataset")

time_check <-  Sys.time()

if(!params$clean & file.exists("./data/derived_data/ASIA_rast_etopo5.tif")){

  ASIA_rast_etopo5 <- raster::raster("./data/derived_data/ASIA_rast_etopo5.tif")

}else{

  # Get the Natural Earth country lakes data
  dir.create("./data/derived_data/NaturalEarth/", 
             showWarnings = FALSE, 
             recursive = TRUE)

  FedData::download_data(
    url = "http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_lakes.zip",
    destdir = "./data/derived_data/NaturalEarth/"
  )

  unzip(
    "./data/derived_data/NaturalEarth/ne_10m_lakes.zip", 
    exdir = "./data/derived_data/NaturalEarth/ne_10m_lakes/"
  )

  ne_10m_lakes <- "./data/derived_data/NaturalEarth/ne_10m_lakes/ne_10m_lakes.shp" %>%
    sf::st_read() %>%
    sf::st_transform(sf::st_crs(ASIA_poly)) %>%
    sf::st_intersection(ASIA_poly)

  # Get the Natural Earth country boundaries data
  dir.create(
    "./data/derived_data/NaturalEarth/ne_10m_admin_0_countries_lakes/",
    showWarnings = FALSE, 
    recursive = TRUE
  )
  FedData::download_data(
    url = "http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries_lakes.zip",
    destdir = "./data/derived_data/NaturalEarth/"
  )

  unzip(
    "./data/derived_data/NaturalEarth/ne_10m_admin_0_countries_lakes.zip", 
    exdir = "./data/derived_data/NaturalEarth/ne_10m_admin_0_countries_lakes/"
  )

  ne_10m_admin_0_countries_lakes <- "./data/derived_data/NaturalEarth/ne_10m_admin_0_countries_lakes/ne_10m_admin_0_countries_lakes.shp" %>%
    sf::st_read() %>%
    sf::st_transform(sf::st_crs(ASIA_poly)) %>%
    sf::st_intersection(ASIA_poly) %>%
    dplyr::filter(!(NAME %in% c("Indonesia",
                                "Scarborough Reef",
                                "Malaysia",
                                "Philippines",
                                "Maldives",
                                "Spratly Is.",
                                "Oman",
                                "United Arab Emirates",
                                "Saudi Arabia")))

  library(geomapdata)
  data("ETOPO5")

  ASIA_rast_etopo5 <- ETOPO5 %>%
    t() %>%
    raster::raster(xmn = 0,
                   xmx = 360,
                   ymn = -90,
                   ymx = 90,
                   crs = sp::CRS("+proj=longlat +ellps=clrk66 +no_defs")) %>%
    raster::crop(ASIA_poly %>%
                   sf::st_transform("+proj=longlat +ellps=clrk66 +no_defs") %>%
                   as("Spatial")) %>%
    raster::mask(ne_10m_admin_0_countries_lakes %>%
                   sf::st_transform("+proj=longlat +ellps=clrk66 +no_defs") %>%
                   as("Spatial")) %>%
    raster::mask(ne_10m_lakes %>%
                   sf::st_transform("+proj=longlat +ellps=clrk66 +no_defs") %>%
                   as("Spatial"),
                 inverse = TRUE) %T>%
    raster::writeRaster(filename = "./data/derived_data/ASIA_rast_etopo5.tif",
                        datatype="INT2S",
                        options=c("COMPRESS=DEFLATE", "ZLEVEL=9", "INTERLEAVE=BAND", "PHOTOMETRIC=MINISWHITE"),
                        overwrite=T,
                        setStatistics=FALSE)
  rm(ETOPO5)
}

countries <- "./data/derived_data/NaturalEarth/ne_10m_admin_0_countries_lakes/ne_10m_admin_0_countries_lakes.shp" %>%
  sf::st_read()  %>%
  sf::st_transform(sf::st_crs(ASIA_poly)) %>%
  sf::st_intersection(ASIA_poly) %>%
  dplyr::filter(!(NAME %in% c("Indonesia",
                              "Scarborough Reef",
                              "Malaysia",
                              "Philippines",
                              "Maldives",
                              "Spratly Is.",
                              "Oman",
                              "United Arab Emirates",
                              "Saudi Arabia"))) %>%
  dplyr::select(NAME, geometry)

message("ETOPO5 grid-aligned dataset preparation complete: ", capture.output(Sys.time() - time_check))

Prepare the GHCN data {-}

Here, we download daily weather records from the Global Historical Climatology Network,

## Downloads and cleans daily climate records from the Global Historical Climate Database.
message("Preparing the daily climate records from the Global Historical Climate Database")
time_check <-  Sys.time()
GHCN.data.final <- prepare_ghcn(region = ASIA_poly %>%
                                  as("Spatial"),
                                label = "ASIA_poly",
                                calibration.years = calibration.years,
                                google_maps_elevation_api_key = params$google_maps_elevation_api_key,
                                raw_dir = "./data/raw_data/ghcn/",
                                derived_dir = "./data/derived_data/ghcn/",
                                force.redo = params$clean)
message("GHCN preparation complete: ", capture.output(Sys.time() - time_check))
## An example of plotting the GHCN data
# climate_plotter(data = GHCN.data.final, station = "CHM00051334", element = "TMIN")

Prepare the Marcott data {-}

# Run the script that transforms the Marcott et al. 2013 data into standard scores.
message("Preparing the Marcott et al. 2013 data")
time_check <-  Sys.time()
marcott2013 <- prepare_marcott(calibration.years = calibration.years)
message("Marcott et al. 2013 data preparation complete: ", capture.output(Sys.time() - time_check))

Modulating climatology by SD {-}

#### Calculating growing degree days ####
message("Modulating local climatology by Marcott et al. 2013 data")
time_check <-  Sys.time()
# How often to sample GDD, in z-space, for model tuning
# Here, we sample from -20 to 20 SD, at 1 SD interval
sample.points <- -20:20

# Read in data on different crop GDD needs
crop_GDD <- readr::read_csv("../inst/crops.csv",
                            col_types = readr::cols(
                              cultivar_long = readr::col_character(),
                              cultivar = readr::col_character(),
                              crop_long = readr::col_character(),
                              crop = readr::col_character(),
                              t_base = readr::col_double(),
                              min_gdd = readr::col_integer()
                            ))

crop_GDD %>%
  readr::write_csv("./data/derived_data/Table_S1_crops.csv")

# Transform GHCN data to GDDs of each base, and modulate to Marcott
GDDs <- sort(unique(crop_GDD$t_base))

GHCN.GDD.incremented.sd <-
  furrr::future_map(GDDs,
                    # purrr::map(GDDs,
                    .options = furrr::future_options(
                      packages = c("magrittr",
                                   "guedesbocinsky2018"),
                      globals = c("sample.points",
                                  "GHCN.data.final")),
                    .f = function(base){
                      purrr::map_dfr(sample.points,
                                     function(change){

                                       GHCN.GDDs <-
                                         purrr::map_dbl(GHCN.data.final$climatology,
                                                        function(station){
                                                          return(sdModulator(data.df = station,
                                                                             temp.change.sd = change,
                                                                             t.base = base,
                                                                             t.cap = 30))
                                                        })


                                       names(GHCN.GDDs) <- names(GHCN.data.final$climatology)

                                       return(tibble::tibble(SD_change = change,
                                                             ID = names(GHCN.GDDs),
                                                             GDD = GHCN.GDDs))

                                     }) %>%
                        dplyr::left_join(GHCN.data.final$spatial %>%
                                           dplyr::as_data_frame() %>%
                                           magrittr::set_names(c("ID",
                                                                 "NAME",
                                                                 "elevation",
                                                                 "x",
                                                                 "y")),
                                         by = "ID")

                    })

names(GHCN.GDD.incremented.sd) <- GDDs

# stop the cluster (will free memory)
# stopCluster(cl)
message("Modulation of local climatology by Marcott et al. 2013 data complete: ", capture.output(Sys.time() - time_check))

Generate the crop niche models {-}

message("Calculating indicator Krige models")
time_check <-  Sys.time()
# Create a spatialPointsDataFrame of the etopo5 data, and convert to WGS84 ellipsoid
ASIA_rast_etopo5.sp <-
  ASIA_rast_etopo5 %>%
  magrittr::set_names("elevation") %>%
  raster::rasterToPoints(spatial = T) %>%
  sp::spTransform(sp::CRS(raster::projection(GHCN.data.final$spatial)))

# A function that generates the kriging model, then predicts
krige_and_predict <- function(dt){
  model <- fields::mKrig(x = dt[,c("x","y")],
                         y = dt$GDD_thresh,
                         Z = dt$elevation,
                         Covariance = "Exponential",
                         Distance = "rdist.earth")

  prediction <- ASIA_rast_etopo5.sp %>%
    tibble::as_tibble() %>%
    dplyr::mutate(chunk = rep(1:ceiling(nrow(.)/chunk_size),length.out = nrow(.)) %>%
                    sort()
    ) %>%
    dplyr::group_by(chunk) %>%
    dplyr::do(prediction = fields::predict.mKrig(model,
                                                 xnew = .[,c("x","y")],
                                                 Z = .$elevation) %>%
                as.vector()) %$%
    prediction %>%
    unlist()

  return(list(model = model, prediction = prediction))

}

# Calculate gdd kriging models for each crop
gdd_model_files <- paste0("./data/derived_data/models/",crop_GDD$cultivar,"_models.rds")

if(params$clean){
  unlink("./data/derived_data/models/", recursive = TRUE, force = TRUE)
  dir.create("./data/derived_data/models/", recursive = TRUE, showWarnings = F)
  crop_GDD_run <- crop_GDD
}else{
  dir.create("./data/derived_data/models/", recursive = TRUE, showWarnings = F)
  crop_GDD_run <- crop_GDD[!file.exists(gdd_model_files),]
}

if(nrow(crop_GDD_run) == 0){
  message("All indicator Krige models have already been calculated. Continuing.")
}else message("Calculating indicator Krige models for ",
              nrow(crop_GDD_run),
              " cultivars:\n",
              paste0(capture.output(crop_GDD_run), collapse = "\n"))

# A function to reduce the size of a loess model by half
skinny.loess <- function(x){
  x[c("fitted",
      "residuals",
      "enp",
      "one.delta",
      "two.delta",
      "trace.hat",
      "call",
      "terms",
      "xnames")] <- NULL
  return(x)
}

# A function of correct the indication predictions and estimate a smooth
# monotonic function
# This first uses isotonic regression, then loess smoothing with a degree of 1
smooth.preds <- function(y){
  y[y<0] <- 0
  y[y>1] <- 1
  y <- loess(isoreg(y~sample.points)$yf~sample.points, span=0.1, degree=1) %>%
    skinny.loess()
  return(y)
}

if(nrow(crop_GDD_run) > 0){
  options(dplyr.show_progress = FALSE)
  chunk_size <- 10000

  gdd.models <-
    furrr::future_map(1:nrow(crop_GDD_run),
                      .options = furrr::future_options(
                        packages = c("fields",
                                     "dplyr",
                                     "magrittr",
                                     "readr",
                                     "guedesbocinsky2018")),
                      .f = function(crop){

                        # Threshold for indicator kriging
                        GHCN.GDD.incremented.sd[[as.character(crop_GDD_run[crop,"t_base"])]] %>%
                          dplyr::mutate(GDD_thresh = {GDD >= as.numeric(crop_GDD_run[crop,"min_gdd"])}) %>%
                          dplyr::group_by(SD_change) %>%
                          dplyr::do(out_preds = krige_and_predict(.)) %$%
                          out_preds %>%
                          sapply("[[","prediction") %>%
                          apply(1,smooth.preds) %>%
                          tibble::tibble(model = .) %>%
                          maptools::spCbind(ASIA_rast_etopo5.sp,.) %>%
                          readr::write_rds(paste0("./data/derived_data/models/",
                                                  crop_GDD_run[crop,"cultivar"],
                                                  "_models.rds"), 
                                           compress = "xz")

                        return(paste0("./data/derived_data/models/",
                                      crop_GDD_run[crop,"cultivar"],
                                      "_models.rds"))

                      })

}

rm(ASIA_rast_etopo5,
   ASIA_rast_etopo5.sp,
   GHCN.data.final,
   GHCN.GDD.incremented.sd)

message("Calculation indicator Krige models complete: ", capture.output(Sys.time() - time_check))

Predict crop niches through time {-}

## Predicting crop niche from smoothed Krige models
# Calculate niches for each crop using the Marcott et al. 2013.
# create the cluster for parallel computation
message("Generating niche reconstructions")
time_check <-  Sys.time()
if(params$clean){
  unlink("./data/derived_data/recons/", recursive = TRUE, force = TRUE)
}
dir.create("./data/derived_data/recons/", recursive = TRUE, showWarnings = F)

# Create the plan for (possibly parallel) execution
future::plan(future::multisession, 
             workers = max(
               floor(
                 min(
                   future::availableCores(), 
                   params$cores)/2), 
               1),
             gc = TRUE)

junk <- furrr::future_map(crop_GDD$cultivar,    
                          .options = furrr::future_options(
                            packages = c("fields",
                                         "dplyr",
                                         "magrittr",
                                         "sp",
                                         "readr",
                                         "guedesbocinsky2018")),
                          function(crop){

                            if(file.exists(paste0("./data/derived_data/recons/",
                                                  crop,
                                                  "_recons.rds")))
                              return()

                            if(!file.exists(paste0("./data/derived_data/models/",
                                                   crop,
                                                   "_models.rds"))) 
                              stop("Models for ",
                                   crop,
                                   " are missing! Aborting.")

                            Zs <- c("Z_Lower","Z","Z_Upper")

                            crop.models <- readr::read_rds(paste0("./data/derived_data/models/",
                                                                  crop,
                                                                  "_models.rds"))

                            out <- purrr::map(Zs, 
                                              function(z){
                                                suppressWarnings(crop.models@data %$%
                                                                   model %>%
                                                                   purrr::map(
                                                                     function(x){
                                                                       x %>%
                                                                         predict(newdata = marcott2013[[z]]) %>%
                                                                         magrittr::multiply_by(100) %>%
                                                                         round() %>%
                                                                         as.integer()
                                                                     }) %>%
                                                                   do.call(rbind, .) %>%
                                                                   tibble::as_tibble() %>%
                                                                   magrittr::set_colnames(marcott2013$YearBP) %>%
                                                                   new("SpatialPointsDataFrame",
                                                                       data = .,
                                                                       coords.nrs = crop.models@coords.nrs,
                                                                       coords = crop.models@coords,
                                                                       bbox = crop.models@bbox,
                                                                       proj4string = crop.models@proj4string) %>%
                                                                   as("SpatialPixelsDataFrame") %>%
                                                                   raster::brick() %>%
                                                                   raster::setZ(marcott2013$YearBP,
                                                                                name="Years BP"))
                                              }) %>%
                              magrittr::set_names(Zs) %>%
                              readr::write_rds(paste0("./data/derived_data/recons/",
                                                      crop,
                                                      "_recons.rds"),
                                               compress = "xz",
                                               compression = 9)

                            return()
                          })

message("Generation of niche reconstructions complete: ", capture.output(Sys.time() - time_check))

Combine similar crop niches {-}

## Combining crop niches from similar crops by taking an arithmatic mean
message("Combining like crop niches")
time_check <-  Sys.time()

purrr::walk(crop_GDD %>%
              split(as.factor(crop_GDD$crop)),
            function(x){
              if(file.exists(paste0("./data/derived_data/recons/all_",x$crop[[1]],".rds"))) return()
              n_crops <- length(x$crop)
              if(n_crops == 1){
                file.copy(paste0("./data/derived_data/recons/",
                                 x$cultivar[[1]],
                                 "_recons.rds"),
                          paste0("./data/derived_data/recons/all_",
                                 x$crop[[1]],
                                 ".rds"))
                return()
              }

              test <- purrr::map(x$cultivar, function(cultivar){
                paste0("./data/derived_data/recons/",cultivar,"_recons.rds") %>%
                  readr::read_rds()
              }) %>%
                purrr::transpose() %>%
                purrr::map(function(x){
                  x %>%
                    Reduce(f = "+", x = .) %>%
                    magrittr::divide_by(n_crops) %>%
                    round() %>%
                    as.integer()
                }) %>%
                readr::write_rds(paste0("./data/derived_data/recons/all_",
                                        x$crop[[1]],
                                        ".rds"),
                                 compress = "xz",
                                 compression = 9)
            })

message("Combining like crop niches complete: ", capture.output(Sys.time() - time_check))

1961--1990 Niche Analysis {-}

# Create the plan for (possibly parallel) execution
future::plan(future::multisession, 
             workers = max(
               floor(
                 min(
                   future::availableCores(), 
                   params$cores)/2), 
               1),
             gc = TRUE)

if(!file.exists("./data/derived_data/recons/modern_niches.rds")){
  modern_niches <- 
    purrr::map(crop_GDD$cultivar,
               function(crop){
                 paste0("./data/derived_data/models/",
                        crop,
                        "_models.rds") %>%
                   readr::read_rds() %>%
                   slot("data") %$%
                   model %>%
                   purrr::map_int(
                     function(x){
                       x %>%
                         predict(newdata = 0) %>%
                         magrittr::multiply_by(100) %>%
                         round() %>%
                         as.integer()
                     })
               }) %>%
    magrittr::set_names(crop_GDD$cultivar)

  crop.models <- paste0("./data/derived_data/models/foxtail_millet_110-125day_models.rds") %>%
    readr::read_rds()

  modern_niches %>%
    tibble::as_tibble() %>%
    new("SpatialPointsDataFrame",
        data = .,
        coords.nrs = crop.models@coords.nrs,
        coords = crop.models@coords,
        bbox = crop.models@bbox,
        proj4string = crop.models@proj4string) %>%
    as("SpatialPixelsDataFrame") %>%
    raster::brick() %>%
    readr::write_rds("./data/derived_data/recons/modern_niches.rds",
                     compress = "xz",
                     compression = 9)

  rm(crop.models)
  gc()
  gc()

}
modern_niches <- readr::read_rds("./data/derived_data/recons/modern_niches.rds")

modern_niches %<>%
  magrittr::extract2(which(!(names(modern_niches) %in% c("buckwheat","barley_alaska","barley_juskiw"))))

# Log in to tDAR API
tdar::tdar_login(tdar_user = params$tdar_un,
                 tdar_password = params$tdar_pw)

## Read the China Vegetation Atlas data from tDAR
vegetation_atlas_data <- 
  get_restricted_vegetation_atlas_data(tdar_user = params$tdar_un,
                                       tdar_password = params$tdar_pw)

vegetation_atlas_data %<>%
  sf::st_intersection(countries %>%
                        sf::st_union() %>%
                        sf::st_transform(4326))

modern_vegetation_type_niche_percent <- 
  vegetation_atlas_data$crop %>%
  unique() %>%
  purrr::map(function(crop_name){
    niche_vx <- modern_niches %>%
      magrittr::extract2(grep(crop_name,names(modern_niches))) %>%
      raster::mean() %>%
      round() %>%
      as.integer() %>%
      velox::velox()

    out <- niche_vx$extract(vegetation_atlas_data %>%
                              dplyr::filter(crop == crop_name) %>%
                              sf::st_cast(),
                            small = TRUE) %>%
      purrr::map(as.integer)

    vegetation_atlas_data %>%
      dplyr::filter(crop == crop_name) %>%
      dplyr::mutate(count = purrr::map_int(out %>%
                                             na.omit(), length)) %>%
      dplyr::bind_cols(purrr::map(out, stats::quantile, na.rm = TRUE) %>%
                         purrr::map(as.matrix) %>%
                         purrr::map(t) %>%
                         purrr::map(tibble::as_tibble) %>%
                         dplyr::bind_rows())
  }) %>%
  do.call(rbind, .) %>%
  # sf::st_cast() %>%
  dplyr::arrange(crop,
                 -count) %>%
  dplyr::select(crop,
                dplyr::everything())

vegetation_atlas_data_crops <- 
  vegetation_atlas_data %>%
  dplyr::select(crop) %>%
  dplyr::group_by(crop) %>%
  dplyr::summarise() %>%
  sf::st_cast()

modern_crop_niche_percent <- 
  vegetation_atlas_data_crops$crop %>%
  unique() %>%
  purrr::map(function(crop_name){
    niche_vx <- modern_niches %>%
      magrittr::extract2(grep(crop_name,names(modern_niches))) %>%
      raster::mean() %>%
      round() %>%
      as.integer() %>%
      velox::velox()

    out <- niche_vx$extract(vegetation_atlas_data_crops %>%
                              dplyr::filter(crop == crop_name) %>%
                              sf::st_cast(),
                            small = TRUE) %>%
      purrr::map(as.integer)

    vegetation_atlas_data_crops %>%
      dplyr::filter(crop == crop_name) %>%
      dplyr::mutate(count = purrr::map_int(out %>%
                                             na.omit(), length)) %>%
      dplyr::bind_cols(purrr::map(out, stats::quantile, na.rm = TRUE) %>%
                         purrr::map(as.matrix) %>%
                         purrr::map(t) %>%
                         purrr::map(tibble::as_tibble) %>%
                         dplyr::bind_rows())
  }) %>%
  do.call(rbind, .) %>%
  dplyr::arrange(crop,
                 -count) %>%
  dplyr::select(crop,
                dplyr::everything())

vegetation_atlas_data_crops_simple <- 
  vegetation_atlas_data_crops %>%
  rmapshaper::ms_simplify() %>%
  sf::st_intersection(countries %>%
                        sf::st_union() %>%
                        sf::st_transform(4326))

# Map the sites
cairo_pdf("./figures/Figure_5_modern_crops.pdf", width = 7.2, height = 7)
print(vegetation_atlas_data_crops_simple %>%
        dplyr::mutate(Crop = as.factor(crop),
                      Crop = dplyr::recode_factor(Crop,
                                                  millet = "Millet",
                                                  wheat = "Wheat",
                                                  barley = "Barley",
                                                  rice = "Rice")) %>%
        ggplot2::ggplot() +
        ggplot2::geom_sf(data = countries %>%
                           sf::st_transform(4326),
                         inherit.aes = FALSE,
                         show.legend = FALSE) +
        ggplot2::geom_sf(ggplot2::aes(fill = Crop),
                         colour = NA,
                         inherit.aes = FALSE) +
        ggplot2::scale_x_continuous(expand = c(0, 0)) +
        ggplot2::scale_y_continuous(expand = c(0, 0)) +
        ggplot2::theme_minimal(base_size = 7) +
        ggplot2::guides(fill = FALSE) +
        ggplot2::theme(legend.position = "none",
                       strip.text.x = ggplot2::element_text(size = 9, face = "bold")) +
        ggplot2::facet_wrap( ~ Crop, ncol = 2))
dev.off()

# Distributions
modern_crop_niche_data <- 
  vegetation_atlas_data_crops$crop %>%
  unique() %>%
  purrr::map(function(crop_name){
    niche_vx <- modern_niches %>%
      magrittr::extract2(grep(crop_name,names(modern_niches))) %>%
      raster::mean() %>%
      round() %>%
      as.integer() %>%
      velox::velox()

    niche_vx$extract(vegetation_atlas_data_crops %>%
                       dplyr::filter(crop == crop_name) %>%
                       sf::st_cast(),
                     small = TRUE) %>%
      purrr::map(as.integer) %>%
      unlist() %>%
      tibble::tibble(Values = .) %>%
      dplyr::mutate(Crop = crop_name)

  }) %>%
  dplyr::bind_rows()


cairo_pdf("./figures/Figure_6_modern_crops_density.pdf", width = 4.6, height = 3)
print(modern_crop_niche_data %>%
        dplyr::mutate(Crop = as.factor(Crop),
                      Crop = dplyr::recode_factor(Crop,
                                                  millet = "Millet",
                                                  wheat = "Wheat",
                                                  barley = "Barley",
                                                  rice = "Rice")) %>%
        ggplot2::ggplot(ggplot2::aes(Values,
                                     ..density..,
                                     color = Crop,
                                     fill = Crop)) +
        ggplot2::geom_density(n = 1000,
                              bw = 2,
                              alpha = 0.1) +
        ggplot2::scale_x_continuous(limits = c(0, 100),
                                    expand = c(0, 0)) +
        ggplot2::scale_y_continuous(expand = c(0, 0, 0, 0.001)) +
        ggplot2::theme_minimal(base_size = 7) +
        ggplot2::xlab("Probability of being in the thermal niche (%)") +
        ggplot2::ylab("Probability density"))
dev.off()
## Plotting cultivar niche
# create the cluster for parallel computation
message("Plotting cultivar niche reconstructions")
time_check <-  Sys.time()

dir.create("./figures/cultivar_niches/",
           recursive = TRUE,
           showWarnings = FALSE)

gdd.recons <- 
  purrr::map_chr(1:nrow(crop_GDD),
                 function(n){

                   cultivar <- crop_GDD[n,]$cultivar

                   title <- stringr::str_c(crop_GDD[n,]$cultivar_long,
                                           " \u2014 T_base: ",
                                           crop_GDD[n,]$t_base,
                                           "°C, Required GDD: ",
                                           crop_GDD[n,]$min_gdd)

                   if(file.exists(paste0("./figures/cultivar_niches/",cultivar,".pdf")) & file.exists(paste0("./figures/cultivar_niches/",cultivar,".mp4")))
                     return(paste0("./figures/cultivar_niches/",cultivar,".pdf"))

                   rasts <- readr::read_rds(paste0("./data/derived_data/recons/",cultivar,"_recons.rds")) %>%
                     purrr::map(function(x){
                       x %>%
                         magrittr::extract2(which(.@z$`Years BP` > 1000)) %>%
                         # raster:::readAll() %>%
                         magrittr::extract2(raster::nlayers(.):1) 
                     })


                   years <- rasts[[1]] %>%
                     names() %>%
                     gsub(pattern = "X",
                          replacement = "",
                          x = .) %>%
                     as.numeric()

                   breaks <- seq(0, 100, 10)

                   pal <- (RColorBrewer::brewer.pal(10, "Spectral") %>%
                             rev() %>%
                             colorRampPalette(.))(length(breaks) - 1)

                   if(!file.exists(paste0("./figures/cultivar_niches/",cultivar,".pdf")))
                     guedesbocinsky2018:::space_time_plot(
                       the_brick = rasts$Z,
                       the_brick_lower = rasts$Z_Lower,
                       the_brick_upper = rasts$Z_Upper,
                       out_file = paste0("./figures/cultivar_niches/",cultivar,".pdf"),
                       title = title,
                       time = years,
                       timelim = c(max(years),min(years)),
                       timeaxis =  seq(from = max(years)-500,
                                       to = min(years),
                                       by = -500),
                       timelab = "Years BP",
                       zbreaks = breaks,
                       zlab = "Probability of being in niche",
                       zaxis = seq(0,100,10),
                       zcolors = pal
                     )

                   if(!file.exists(paste0("./figures/cultivar_niches/",cultivar,".mp4")))
                     guedesbocinsky2018:::space_time_video(                       the_brick = rasts$Z,
                                                                                  the_brick_lower = rasts$Z_Lower,
                                                                                  the_brick_upper = rasts$Z_Upper,
                                                                                  out_file = paste0("./figures/cultivar_niches/",cultivar,".mp4"),
                                                                                  title = title,
                                                                                  time = years,
                                                                                  timelim = c(max(years),min(years)),
                                                                                  timeaxis =  seq(from = max(years)-500,
                                                                                                  to = min(years),
                                                                                                  by = -500),
                                                                                  timelab = "Years BP",
                                                                                  zbreaks = breaks,
                                                                                  zlab = "Probability of being in niche",
                                                                                  zaxis = seq(0,100,10),
                                                                                  zcolors = pal
                     )

                   return(paste0("./figures/cultivar_niches/",cultivar,".pdf"))

                 })

message("Plotting of cultivar niche reconstructions complete: ", capture.output(Sys.time() - time_check))

Chronometric analysis {-}

message("Beginning chronometric co-analysis")

# Log in to tDAR API
tdar::tdar_login(tdar_user = params$tdar_un,
                 tdar_password = params$tdar_pw)

# Read in the site/chronometric data
chronometric_data <- 
  get_restricted_site_data(tdar_user = params$tdar_un,
                           tdar_password = params$tdar_pw)

labs <- chronometric_data$labs
refs <- chronometric_data$refs
chronometric_data <- chronometric_data$data

# Filter out sites not in study area
chronometric_data %<>%
  sf::st_as_sf(coords = c("Longitude", "Latitude"), 
               remove = FALSE) %>%
  sf::st_set_crs("+proj=longlat") %>%
  sf::st_intersection(ASIA_poly %>%
                        sf::st_as_sf() %>%
                        sf::st_transform("+proj=longlat") %>%
                        sf::st_geometry()) %>%
  sf::st_intersection(countries %>%
                        sf::st_union() %>%
                        sf::st_transform("+proj=longlat")) %>%
  sf::st_set_geometry(NULL) %>%
  magrittr::set_names(names(chronometric_data))

# Write table of dates
list(`Chronometric Data` = 
       chronometric_data %>%
       dplyr::mutate(`Estimated age range (BP)` = stringr::str_c(`Age range lower (BP)`,"–",
                                                                 `Age range upper (BP)`)) %>%
       dplyr::select(Site,
                     Period,
                     `Lab sample identifier`,
                     Material,
                     `14C age BP`,
                     `1-sigma uncertainty`,
                     `Estimated age range (BP)`,
                     Reference) %>%
       # dplyr::filter(!is.na(`14C age BP`)) %>%
       dplyr::arrange(Site, Period),

     `Radiocarbon Labs` = labs,

     `References` = refs) %>%
  writexl::write_xlsx("./data/derived_data/Data_file_S1_sites_dates.xlsx")

# Write some stats about the sites data used in this analysis
readr::write_lines(c("Sites data (after spatial and crop filtering)",
                     paste0("Number of entries: ",
                            chronometric_data %>%
                              nrow()),
                     paste0("Number of sites: ",
                            chronometric_data %$%
                              Site %>%
                              unique() %>%
                              length()),
                     paste0("Number of distinct occupations: ",
                            chronometric_data %>%
                              dplyr::select(Site,Period) %>%
                              dplyr::distinct() %>%
                              nrow()),
                     paste0("Number of radiocarbon dates: ",
                            chronometric_data %>%
                              dplyr::filter(!is.na(`14C age BP`)) %>%
                              nrow()),
                     paste0("Number of radiocarbon sites: ",
                            chronometric_data %>%
                              dplyr::filter(!is.na(`14C age BP`)) %$%
                              Site %>%
                              unique() %>%
                              length())
),
"./data/derived_data/data_stats.txt")


# A function to calibrate either 14C dates (using BchronCalibrate), or
# site age estimates (using a flat density estimation)
calibrate_density <- function(x){
  if(!is.na(x$`14C age BP`)){
    out_calib <- Bchron::BchronCalibrate(ages = x$`14C age BP`,
                                         ageSds = x$`1-sigma uncertainty`,
                                         calCurves = 'intcal13') %>%
      magrittr::extract2(1)
    out_calib <- out_calib[c("ageGrid","densities")]
  } else {
    out_calib <- list()
    out_calib$ageGrid <- x$`Age range lower (BP)` : x$`Age range upper (BP)`
    out_calib$densities <- rep(1 / (x$`Age range lower (BP)` - x$`Age range upper (BP)`), length(out_calib$ageGrid))
  }
  return(out_calib)
}

# A function to extract density estimates over a vector of ages (years)
extract_density <- function(dens, vect){
  out_extract <- rep(0,length(vect))
  out_extract[which(vect %in% dens$ageGrid)] <- dens$densities[which(dens$ageGrid %in% vect)]
  return(out_extract)
}

densities <- chronometric_data %>%
  dplyr::select(Site,
                Period,
                `14C age BP`,
                `1-sigma uncertainty`,
                `Age range lower (BP)`,
                `Age range upper (BP)`) %>%
  purrrlyr::by_row(calibrate_density, # Generate probability densities for 14C dates and age ranges
                   .to = "Density") %>%
  dplyr::mutate(Prediction = purrr::map(Density, # Extract probabilities for Marcott years
                                        extract_density,
                                        vect = marcott2013$YearBP)) %>%
  dplyr::select(Site, Period, Prediction) %>%
  dplyr::group_by(Site, Period) %>%
  purrrlyr::by_slice(purrr::map,
                     ~ Reduce(x = .x, f = "+"), 
                     .to = "Density") %>% # Sum probabilities over sites and periods (as in Oxcal SUM command)
  dplyr::mutate(Density = purrr::map(Density, "Prediction"),
                Density = purrr::map(Density, ~ .x / sum(.x, na.rm = T))) # re-normalize to 1

# Filter out sites that don't have any occupation during the Marcott reconstruction
densities %<>%
  dplyr::filter(purrr::map_lgl(Density,
                               ~any(.x > 0)))

# Plot each site's probablility distribution
dir.create("./figures/site_densities/",
           recursive = TRUE,
           showWarnings = FALSE)
purrr::walk(unique(densities$Site), 
            function(site){
              if(!file.exists(paste0("./figures/site_densities/",site,".pdf"))){
                cairo_pdf(filename = paste0("./figures/site_densities/",site,".pdf"))
                g <- densities %>%
                  dplyr::filter(`Site` == site) %$%
                  Density %>%
                  magrittr::set_names(1:length(.)) %>%
                  tibble::as_tibble() %>%
                  dplyr::mutate(`Years BP` = marcott2013$YearBP) %>%
                  tidyr::gather(Period, 
                                Density, 
                                dplyr::num_range("",
                                                 1:(ncol(.)-1))
                  ) %>%
                  ggplot2::ggplot(ggplot2::aes(x = `Years BP`,
                                               y = Density,
                                               colour = Period)) +
                  ggplot2::geom_line() +
                  ggplot2::xlim(6000,0) +
                  ggplot2::ggtitle(site)
                print(g)
                dev.off()
              }
            })

# # Summing across all distributions yields the net distribution
# cairo_pdf(filename = "./figures/all_sites_density.pdf")
# g <- densities %$%
#   Density %>%
#   do.call(cbind,.) %>%
#   {rowSums(.)/ncol(.)} %>%
#   magrittr::multiply_by(20) %>%
#   tibble::tibble(`Years BP` = marcott2013$YearBP,
#                  Density = .) %>%
#   ggplot2::ggplot(ggplot2::aes(x = `Years BP`,
#                                y = Density)) +
#   ggplot2::geom_line() +
#   ggplot2::xlim(6000,0)
# print(g)
# dev.off()

# Create a spatial object of the sites
sites <- chronometric_data %>%
  dplyr::mutate(foxtail_millet = ifelse(is.na(`Foxtail millet`), FALSE, TRUE),
                broomcorn_millet = ifelse(is.na(`Broomcorn millet`), FALSE, TRUE),
                wheat = ifelse(is.na(Wheat), FALSE, TRUE),
                barley = ifelse(is.na(Barley), FALSE, TRUE),
                buckwheat = ifelse(is.na(Buckwheat), FALSE, TRUE),
                rice = ifelse(is.na(`Rice`), FALSE, TRUE)) %>%
  dplyr::select(Site,
                Period,
                Longitude,
                Latitude,
                `14C date on cereal?`,
                foxtail_millet,
                broomcorn_millet,
                wheat,
                barley,
                buckwheat,
                rice) %>%
  dplyr::ungroup() %>%
  dplyr::distinct() %>%
  tidyr::gather(Crop, Present, -Site, -Period, -Longitude, -Latitude, -`14C date on cereal?`) %>%
  dplyr::filter(Present) %>%
  dplyr::select(-Present) %>%
  dplyr::distinct() %>%
  dplyr::arrange(Site, Period) %>%
  sf::st_as_sf(coords = c("Longitude", "Latitude")) %>%
  sf::st_set_crs("+proj=longlat")

# Map the sites
cairo_pdf("./figures/Figure_1_crop_map.pdf", width = 7.2, height = 10.5)
print(sites %>%
        dplyr::mutate(Crop = dplyr::recode_factor(Crop,
                                                  foxtail_millet = "Foxtail millet",
                                                  broomcorn_millet = "Broomcorn millet",
                                                  wheat = "Wheat",
                                                  barley = "Barley",
                                                  buckwheat = "Buckwheat",
                                                  rice = "Rice")) %>%
        ggplot2::ggplot() +
        ggplot2::geom_sf(data = countries,
                         inherit.aes = FALSE,
                         show.legend = FALSE) +
        ggplot2::geom_sf(ggplot2::aes(colour = Crop,
                                      shape = `14C date on cereal?`),
                         show.legend = "point",
                         inherit.aes = FALSE,
                         size = 1) +
        ggplot2::scale_x_continuous(expand = c(0, 0)) +
        ggplot2::scale_y_continuous(expand = c(0, 0)) +
        ggplot2::theme_minimal(base_size = 7) +
        ggplot2::guides(colour=FALSE) +
        ggplot2::theme(legend.position="none",
                       strip.text.x = ggplot2::element_text(size = 9, 
                                                            face = "bold")) +
        ggplot2::facet_wrap( ~ Crop, ncol = 2))
dev.off()

# A function to extract niches for each crop
extract_niches <- function(x){
  out_niche <- paste0("./data/derived_data/recons/all_",x$Crop[[1]],".rds") %>%
    readr::read_rds() %$%
    Z %>%
    raster::extract(x %>%
                      as("Spatial")) %>%
    split(row(.))

  x %<>%
    dplyr::mutate(Niche = out_niche)

  return(x)
}

# Extract niches for each crop
niches <- sites %>%
  split(as.factor(sites$Crop)) %>%
  purrr::map(extract_niches) %>%
  do.call(rbind, args = .)

# A function to extract the 95% confidence interval of a distribution
get_CI <- function(dens){
  dens[is.na(dens)] <- 0
  cum_dens <- dens %>%
    cumsum() %>%
    magrittr::divide_by(sum(dens, na.rm = T))
  lower <- which(cum_dens >= 0.025)[1]
  mid <- which(cum_dens >= 0.5)[1]
  upper <- which(cum_dens >= 0.975)[1]
  return(list(Density = dens, 
              Median = mid, 
              CI = c(lower = lower, 
                     upper = upper)))
}

# A function to calculate local niches
local_niche <- function(niche, dens){
  if(is.na(dens$CI[["lower"]]) | is.na(dens$CI[["upper"]])) return(NULL)
  out_niche <- list()
  out_niche$Niche <- rep(NA, length(niche))
  out_niche$Niche[dens$CI[["lower"]]:dens$CI[["upper"]]] <- niche[dens$CI[["lower"]]:dens$CI[["upper"]]]
  out_niche$Niche <- out_niche$Niche / 100 # convert to probability
  out_niche$Median <- quantile(out_niche$Niche, probs = 0.5, na.rm = TRUE)
  out_niche$CI <- c(quantile(out_niche$Niche, probs = 0.025, na.rm = TRUE),
                    quantile(out_niche$Niche, probs = 0.975, na.rm = TRUE))
  names(out_niche$CI) <- c("lower","upper")
  return(out_niche)
}

# Get the "Local" niche densities, by multiplying the site occupation probability densities
# by the crop niche probabilities
# Join the niche and density tables
niche_densities <- niches %>%
  dplyr::left_join(densities, by = c("Site", "Period")) %>%
  dplyr::select(Site:Niche, Density) %>%
  # Get local densities
  dplyr::mutate(Density = purrr::map(Density, get_CI),
                Niche = purrr::map2(Niche, Density, local_niche)
  )

# Create biplots of each crop/site
p <- niche_densities %>%
  dplyr::mutate(`Site, Period` = stringr::str_c(Site,
                                                ifelse(is.na(Period),
                                                       "",
                                                       stringr::str_c(", ",Period))
  )) %>%
  dplyr::filter(!purrr::map_lgl(Niche, is.null)) %>%
  dplyr::mutate(Crop = dplyr::recode_factor(Crop,
                                            foxtail_millet = "Foxtail millet",
                                            broomcorn_millet = "Broomcorn millet",
                                            wheat = "Wheat",
                                            barley = "Barley",
                                            buckwheat = "Buckwheat",
                                            rice = "Rice")) %>%
  dplyr::mutate(Density_Median = marcott2013$YearBP[purrr::map_int(Density, "Median")],
                Density_Lower = marcott2013$YearBP[purrr::map(Density, "CI") %>%
                                                     purrr::map_dbl("lower")],
                Density_Upper = marcott2013$YearBP[purrr::map(Density, "CI") %>%
                                                     purrr::map_dbl("upper")],
                Crop_Median = purrr::map_dbl(Niche, "Median"),
                Crop_Lower = purrr::map(Niche, "CI") %>%
                  purrr::map_dbl("lower"),
                Crop_Upper = purrr::map(Niche, "CI") %>%
                  purrr::map_dbl("upper")) %>%
  ggplot2::ggplot(ggplot2::aes(x = Density_Median,
                               y = Crop_Median,
                               colour = Crop,
                               label = `Site, Period`)) +
  ggplot2::geom_point(na.rm = TRUE) +
  ggplot2::geom_errorbarh(ggplot2::aes(xmin = Density_Lower,
                                       xmax = Density_Upper),
                          na.rm = TRUE) +
  ggplot2::geom_errorbar(ggplot2::aes(ymin = Crop_Lower,
                                      ymax = Crop_Upper),
                         na.rm = TRUE) +
  ggplot2::xlim(6000,0) +
  ggplot2::ylim(0,1) +
  ggplot2::xlab("Years BP") +
  ggplot2::ylab(stringr::str_c("Probability of being in the thermal niche (%)"))

cairo_pdf("./figures/Figure_3_all_crossplot.pdf", width = 7.2, height = 7)
print(p +
        ggplot2::theme_minimal(base_size = 7) +
        ggplot2::theme(legend.position="none",
                       strip.text.x = ggplot2::element_text(size = 9, face = "bold")) +
        ggplot2::facet_wrap( ~ Crop, ncol=2))
dev.off()

pp <- plotly::ggplotly(tooltip = c("label"))
htmlwidgets::saveWidget(widget = plotly::as_widget(pp),
                        file = "Data_file_S2_all_crossplot.html")
file.copy("Data_file_S2_all_crossplot.html",
          to = "./figures/Data_file_S2_all_crossplot.html",
          overwrite = TRUE)
unlink("Data_file_S2_all_crossplot.html")

# Write out a table
niche_densities_extract <- niche_densities %>%
  dplyr::filter(!purrr::map_lgl(Niche, is.null)) %>%
  dplyr::mutate(`Age median (BP)` = marcott2013$YearBP[purrr::map_int(Density, "Median")],
                `Age lower CI (BP)` = marcott2013$YearBP[purrr::map(Density, "CI") %>%
                                                           purrr::map_dbl("lower")],
                `Age upper CI (BP)` = marcott2013$YearBP[purrr::map(Density, "CI") %>%
                                                           purrr::map_dbl("upper")],
                `Niche probability median` = purrr::map_dbl(Niche, "Median"),
                `Niche probability lower CI` = purrr::map(Niche, "CI") %>%
                  purrr::map_dbl("lower"),
                `Niche probability upper CI` = purrr::map(Niche, "CI") %>%
                  purrr::map_dbl("upper")) %>%
  dplyr::select(-Niche:-Density) %>%
  dplyr::distinct() %>%
  dplyr::mutate(Crop = dplyr::recode_factor(Crop,
                                            foxtail_millet = "Foxtail millet",
                                            broomcorn_millet = "Broomcorn millet",
                                            wheat = "Wheat",
                                            barley = "Barley",
                                            buckwheat = "Buckwheat",
                                            rice = "Rice")) %>%
  dplyr::arrange(Site, Period, Crop) 

niche_densities_extract %>%
  sf::st_set_geometry(NULL) %>%
  readr::write_csv("./data/derived_data/Table_S2_age_niche_estimates.csv")

# # Calculate percent of sites whose median niche probabilities are >0.75 for each crop
# niche_densities_extract %>%
#   dplyr::mutate(`Niche probability median` = `Niche probability median` > 0.75) %>%
#   dplyr::summarise(`Percent above 0.75 niche probability` = mean(`Niche probability median`, na.rm = T))
# 
# 
# cairo_pdf("./figures/Figure_7_ancient_crops_density.pdf", width = 4.6, height = 3)
# print(niche_densities_extract %>%
#         dplyr::filter(Crop != "Buckwheat") %>%
#         dplyr::mutate(Crop = as.factor(Crop),
#                       Crop = dplyr::recode_factor(Crop,
#                                                   `Foxtail millet` = "Millet",
#                                                   `Broomcorn millet` = "Millet")) %>%
#         ggplot2::ggplot(ggplot2::aes(`Niche probability median` * 100,
#                                      ..density..,
#                                      color = Crop,
#                                      fill = Crop)) +
#         ggplot2::geom_density(n = 1000,
#                               bw = 2,
#                               alpha = 0.1) +
#         ggplot2::scale_x_continuous(limits = c(0, 100),
#                                     expand = c(0, 0)) +
#         ggplot2::scale_y_continuous(expand = c(0, 0, 0, 0.001)) +
#         ggplot2::theme_minimal(base_size = 7) +
#         ggplot2::xlab("Probability of being in the thermal niche (%)") +
#         ggplot2::ylab("Probability density"))
# dev.off()

# # Plot ECDF for each crop of median local niche density among sites
# cairo_pdf("./figures/niche_ecdf.pdf",
#           width = 3.5,
#           height = 2)
# print({
#   niche_densities_extract %>%
#     ggplot2::ggplot(ggplot2::aes(x = `Niche probability median`,
#                                  color = Crop)) + 
#     ggplot2::stat_ecdf(geom = "step") +
#     ggplot2::ylab("Empirical cumulative probability") +
#     ggplot2::theme_minimal(base_size = 7)
# })
# dev.off()

# # Plot relationship between number of crops and age
# cairo_pdf("./figures/age_counts.pdf",
#           width = 3.5,
#           height = 2)
# print({
#   niche_densities_extract %>%
#     dplyr::select(-Crop) %>%
#     dplyr::group_by(Site, Period) %>%
#     sf::st_set_geometry(NULL) %>%
#     dplyr::summarise_all(.funs = mean) %>%
#     dplyr::left_join(niche_densities_extract %>%
#                        dplyr::group_by(Site, Period) %>%
#                        dplyr::count() %>%
#                        sf::st_set_geometry(NULL),
#                      by = c("Site", "Period")) %>%
#     dplyr::rename(`Number of crops` = n) %>%
#     dplyr::ungroup() %>%
#     ggplot2::ggplot() +
#     ggplot2::geom_point(ggplot2::aes(x = `Number of crops`,
#                                      y = `Niche probability median`),
#                         size = 1) +
#     ggplot2::geom_smooth(ggplot2::aes(x = `Number of crops`,
#                                       y = `Niche probability median`),
#                          method = "lm")+
#     ggplot2::theme_minimal(base_size = 7)
# })
# dev.off()

Generate niche videos {-}

## Plotting crop niche with associated sites
# create the cluster for parallel computation
message("Plotting crop niche reconstructions")
time_check <-  Sys.time()

# Combine the sites with the density data
site_densities <- niche_densities %>%
  dplyr::filter(!purrr::map_lgl(Niche, is.null)) %>%
  dplyr::mutate(Density_Lower = marcott2013$YearBP[purrr::map(Density, "CI") %>%
                                                     purrr::map_dbl("lower")],
                Density_Upper = marcott2013$YearBP[purrr::map(Density, "CI") %>%
                                                     purrr::map_dbl("upper")]) %>%
  dplyr::select(-Density) %>%
  dplyr::filter(!is.na(Density_Lower)) %>%
  dplyr::left_join(sites %>%
                     sf::st_set_geometry(NULL), 
                   by = c("Site", "Period", "Crop")) %>%
  dplyr::mutate(Niche = purrr::map(Niche, function(x){
    x$Niche <-
      tibble::tibble(`Year BP` = marcott2013$YearBP, 
                     Niche = x$Niche)
    return(x)
  }))

crops <- crop_GDD %>%
  dplyr::filter(crop %in% site_densities$Crop) %>%
  dplyr::select(crop_long, crop) %>%
  dplyr::distinct()

message("Generating niche videos.")
dir.create("./figures/crop_niches",
           recursive = TRUE,
           showWarnings = FALSE)

for(n in 1:nrow(crops)){

  crop <- crops[n,]$crop

  title <- stringr::str_c(crops[n,]$crop_long)

  if(file.exists(paste0("./figures/crop_niches/all_",crop,".pdf")) & file.exists(paste0("./figures/crop_niches/all_",crop,".mp4")))
    next

  rasts <- readr::read_rds(paste0("./data/derived_data/recons/all_",crop,".rds")) %>%
    purrr::map(function(x){
      x %>%
        magrittr::extract2(which((x %>%
                                    names() %>%
                                    gsub(pattern = "X",
                                         replacement = "",
                                         x = .) %>%
                                    as.numeric()) > 1000)) %>%
        # raster:::readAll() %>%
        magrittr::extract2(raster::nlayers(.):1)
    })

  years <- rasts[[1]] %>%
    names() %>%
    gsub(pattern = "X",
         replacement = "",
         x = .) %>%
    as.numeric()

  breaks <- seq(0, 100, 10)

  pal <- (RColorBrewer::brewer.pal(10, "Spectral") %>%
            rev() %>%
            colorRampPalette(.))(length(breaks) - 1)

  if(!file.exists(paste0("./figures/crop_niches/all_",crop,".pdf")))
    guedesbocinsky2018:::space_time_plot(the_brick = rasts$Z,
                                         the_brick_lower = rasts$Z_Lower,
                                         the_brick_upper = rasts$Z_Upper,
                                         out_file = paste0("./figures/crop_niches/all_",crop,".pdf"),
                                         title = title,
                                         time = years,
                                         timelim = c(max(years),min(years)),
                                         timeaxis =  seq(from = max(years)-500,
                                                         to = min(years),
                                                         by = -500),
                                         timelab = "Years BP",
                                         zbreaks = breaks,
                                         zlab = "Probability of being in niche",
                                         zaxis = seq(0,100,10),
                                         zcolors = pal
    )


  if(!file.exists(paste0("./figures/crop_niches/all_",crop,".mp4"))){
    # A function to extract site locations for a given year and crop, and plot them
    sites_plot <- function(years = NULL,
                           crops = NULL,
                           scale = scales::area_pal(c(0,1))){
      x_plot <- site_densities
      year_idx <- which(marcott2013$YearBP == years)

      if(!is.null(crops)){
        x_plot %<>%
          dplyr::filter(Crop %in% crops)
      }

      cexs <- purrr::map(x_plot$Niche,c("Niche","Niche")) %>%
        purrr::map(year_idx) %>%
        unlist() %>%
        scale()

      if(cexs %>%
         is.na() %>%
         all()) return() #Don't plot if no sites during this period

      x_plot %<>%
        dplyr::filter(!is.na(cexs))

      cexs <- cexs[!is.na(cexs)]

      x_plot %$%
        plot(geometry,
             cex = cexs,
             pch = 1,
             lwd = 3,
             col = "white",
             add = T)
      x_plot %$%
        plot(geometry,
             cex = cexs,
             pch = 1,
             lwd = 1.5,
             col = "black",
             add = T)
    }

    sites_legend <- function(){
      legend("bottomright",
             title = "Site % probability \nof being in niche",
             legend = seq(0,100,20),
             pch = 1,
             pt.lwd = 1.5,
             col = "black",
             pt.cex = scales::area_pal(c(0.5,3))(seq(0,1,0.2)),
             bty = "n",
             y.intersp = 1.5,
             cex = 0.8,
             text.font = 2
      )
    }

    guedesbocinsky2018:::space_time_video(the_brick = rasts$Z,
                                          the_brick_lower = rasts$Z_Lower,
                                          the_brick_upper = rasts$Z_Upper,
                                          out_file = paste0("./figures/crop_niches/all_",crop,".mp4"),
                                          title = title,
                                          time = years,
                                          timelim = c(max(years),min(years)),
                                          timeaxis =  seq(from = max(years)-500,
                                                          to = min(years),
                                                          by = -500),
                                          timelab = "Years BP",
                                          zbreaks = breaks,
                                          zlab = "Probability of being in niche",
                                          zaxis = seq(0,100,10),
                                          zcolors = pal,
                                          extra_plot_fun = purrr::partial(sites_plot,
                                                                          crops = crop,
                                                                          scale = scales::area_pal(c(0.5,3))),
                                          extra_legend_fun = sites_legend
    )

  }
}

# Create a static plot for paper publication
# A function to extract data from a crop raster brick
tidyCrop <- function(x, years){
  x %>%
    paste0("./data/derived_data/recons/all_",.,".rds") %>%
    readr::read_rds() %$%
    Z %>%
    magrittr::extract2(stringr::str_c("X",years)) %>%
    as("SpatialPixelsDataFrame") %>%
    tibble::as_tibble() %>%
    tidyr::gather(Year, Niche, -x:-y) %>%
    dplyr::rename(Longitude = x,
                  Latitude = y) %>%
    dplyr::mutate(Year = stringr::str_remove(Year, "X") %>%
                    factor(levels = years) %>%
                    forcats::fct_relabel(function(x){stringr::str_c(x," cal. BP")}),
                  Crop = x)
}

breaks <- seq(0, 100, 10)

pal <- (RColorBrewer::brewer.pal(10, "Spectral") %>%
          rev() %>%
          colorRampPalette(.))(length(breaks) - 1)

# A function to create a plot of years and crops
facet_niche <- function(crops, years){
  these_sites <- years %>%
    purrr::map(function(year){
      site_densities %>%
        dplyr::mutate(Year = year) %>%
        dplyr::filter(Density_Lower <= Year & Density_Upper >= Year,
                      Crop %in% crops) %>%
        dplyr::mutate(Longitude = sf::st_coordinates(geometry)[,1],
                      Latitude = sf::st_coordinates(geometry)[,2]) %>%
        dplyr::select(Site, Longitude, Latitude, Crop, Year)
    }) %>%
    do.call(rbind, .) %>%
    dplyr::mutate(Year = factor(Year, levels = years) %>%
                    forcats::fct_relabel(function(x){stringr::str_c(x," cal. BP")}),
                  Crop = dplyr::recode_factor(Crop,
                                              wheat = "Wheat",
                                              barley = "Barley",
                                              buckwheat = "Buckwheat",
                                              foxtail_millet = "Foxtail millet",
                                              broomcorn_millet = "Broomcorn millet",
                                              rice = "Rice"))

  these_rasters <- crops %>%
    purrr::map(tidyCrop, years = years) %>%
    dplyr::bind_rows() %>%
    dplyr::mutate(Crop = dplyr::recode_factor(Crop,
                                              wheat = "Wheat",
                                              barley = "Barley",
                                              buckwheat = "Buckwheat",
                                              foxtail_millet = "Foxtail millet",
                                              broomcorn_millet = "Broomcorn millet",
                                              rice = "Rice"),
                  Niche = Niche  %>%
                    cut(breaks = breaks,
                        include.lowest = TRUE)) 

  p <- these_rasters %>%
    ggplot2::ggplot() +
    ggplot2::geom_raster(mapping = ggplot2::aes(x = Longitude,
                                                y = Latitude,
                                                fill = Niche),
                         show.legend = TRUE) +
    ggplot2::geom_point(data = these_sites,
                        mapping = ggplot2::aes(x = Longitude,
                                               y = Latitude),
                        size = 1) +
    ggplot2::coord_quickmap() +
    ggplot2::facet_grid(Crop ~ Year,
                        switch = "y")

  return(p)
}

p <- facet_niche(crops = c("wheat", "barley", "buckwheat", "foxtail_millet", "broomcorn_millet", "rice"),
                 years = c(4030, 3550, 1690, 1010))

cairo_pdf("./figures/Figure_2_facet_niche.pdf", width = 7.2, height = 10.3)
print({
  p +
    ggplot2::scale_fill_manual(values = pal,
                               labels = c("0–10%",
                                          "10–20%",
                                          "20–30%",
                                          "30–40%",
                                          "40–50%",
                                          "50–60%",
                                          "60–70%",
                                          "70–80%",
                                          "80–90%",
                                          "90–100%"),
                               drop = FALSE,
                               name = "Probability of being in the thermal niche",
                               guide = ggplot2::guide_legend(
                                 direction = "horizontal",
                                 keyheight = ggplot2::unit(0.1, units = "in"),
                                 keywidth = ggplot2::unit(6/10, units = "in"),
                                 title.position = 'top',
                                 title.hjust = 0.5,
                                 label.hjust = 0,
                                 nrow = 1,
                                 byrow = T,
                                 label.position = "bottom"
                               )) +
    ggplot2::scale_y_continuous(position = "right") +
    ggplot2::theme_minimal(base_size = 7) +
    ggplot2::theme(plot.margin = ggplot2::margin(0,0,0,0, "in"),
                   strip.text.x = ggplot2::element_text(size = 8),
                   strip.text.y = ggplot2::element_text(size = 8),
                   legend.position = "bottom",
                   legend.margin = ggplot2::margin(0,0,0.1,0, "in"),
                   legend.title = ggplot2::element_text(face = "bold"))

})
dev.off()

message("Plotting of crop niche reconstructions complete: ", capture.output(Sys.time() - time_check))

Supplementary storage analysis {-}

storage <- readr::read_csv("../inst/storage.csv")

storage %>%
  readr::write_csv("./data/derived_data/Table_S3_storage.csv")

cairo_pdf("./figures/Figure_4_storage.pdf", width = 4.6, height = 3)
print({
  storage %>%
    ggplot2::ggplot(ggplot2::aes(y = YRS_Storage,
                                 x = Country)) +
    ggplot2::geom_boxplot(fill = "forestgreen",
                          varwidth = TRUE,
                          outlier.shape = NA) +
    ggplot2::xlab("Country") +
    ggplot2::ylab("Years of storage") +
    ggplot2::theme_minimal(base_size = 7) +
    ggplot2::scale_y_continuous(limits = c(0,5),
                                expand = c(0, 0, 0, 0))
})
dev.off()

Create submission directory {-}

unlink("./submission",
       recursive = TRUE, 
       force = TRUE)

dir.create("./submission",
           recursive = TRUE,
           showWarnings = FALSE)

# Copy figures
file.copy(from = "./figures/Figure_1_crop_map.pdf",
          to = "./submission/Figure_1_crop_map.pdf",
          overwrite = TRUE)
file.copy(from = "./figures/Figure_2_facet_niche.pdf",
          to = "./submission/Figure_2_facet_niche.pdf",
          overwrite = TRUE)
file.copy(from = "./figures/Figure_3_all_crossplot.pdf",
          to = "./submission/Figure_3_all_crossplot.pdf",
          overwrite = TRUE)
file.copy(from = "./figures/Figure_4_storage.pdf",
          to = "./submission/Figure_4_storage.pdf",
          overwrite = TRUE)
file.copy(from = "./figures/Figure_5_modern_crops.pdf",
          to = "./submission/Figure_5_modern_crops.pdf",
          overwrite = TRUE)
file.copy(from = "./figures/Figure_6_modern_crops_density.pdf",
          to = "./submission/Figure_6_modern_crops_density.pdf",
          overwrite = TRUE)

# Copy Tables
file.copy(from = "./data/derived_data/Table_S1_crops.csv",
          to = "./submission/Table_S1_crops.csv",
          overwrite = TRUE)
file.copy(from = "./data/derived_data/Table_S2_age_niche_estimates.csv",
          to = "./submission/Table_S2_age_niche_estimates.csv",
          overwrite = TRUE)
file.copy(from = "./data/derived_data/Table_S3_storage.csv",
          to = "./submission/Table_S3_storage.csv",
          overwrite = TRUE)

# Copy Data Files
file.copy(from = "./data/derived_data/Data_file_S1_sites_dates.xlsx",
          to = "./submission/Data_file_S1_sites_dates.xlsx",
          overwrite = TRUE)
file.copy(from = "./figures/Data_file_S2_all_crossplot.html",
          to = "./submission/Data_file_S2_all_crossplot.html",
          overwrite = TRUE)

# Copy Movies
file.copy(from = "./figures/crop_niches/all_wheat.mp4",
          to = "./submission/Movie_S1_wheat.mp4",
          overwrite = TRUE)
file.copy(from = "./figures/crop_niches/all_barley.mp4",
          to = "./submission/Movie_S2_barley.mp4",
          overwrite = TRUE)
file.copy(from = "./figures/crop_niches/all_broomcorn_millet.mp4",
          to = "./submission/Movie_S3_broomcorn_millet.mp4",
          overwrite = TRUE)
file.copy(from = "./figures/crop_niches/all_foxtail_millet.mp4",
          to = "./submission/Movie_S4_foxtail_millet.mp4",
          overwrite = TRUE)
file.copy(from = "./figures/crop_niches/all_buckwheat.mp4",
          to = "./submission/Movie_S5_buckwheat.mp4",
          overwrite = TRUE)
file.copy(from = "./figures/crop_niches/all_rice.mp4",
          to = "./submission/Movie_S6_rice.mp4",
          overwrite = TRUE)

# Write the compendium
system(paste0("cp -r ../ ",tempdir(),"/Data_file_S3_research_compendium/"))

system(paste("tar -zcf submission/Data_file_S3_research_compendium.tar.gz",
             "-C ",tempdir(),
             "--exclude='vignettes/data'",
             "--exclude='vignettes/figures'",
             "--exclude='vignettes/submission'",
             "--exclude='vignettes/guedesbocinsky2018_cache'",
             "--exclude='vignettes/zenodo'",
             "--exclude='.git'",
             "Data_file_S3_research_compendium"))

Create Zenodo archive {-}

unlink("./zenodo",
       recursive = TRUE, 
       force = TRUE)

dir.create("./zenodo",
           recursive = TRUE,
           showWarnings = FALSE)

system("tar -zcf ./zenodo/guedesbocinsky2018-1.0.0-output.tar.gz submission figures data")
pagebreak

References {-}

pagebreak

Colophon {-}

This report was generated on r Sys.time() using the following computational environment and dependencies:

# which R packages and versions?
devtools::session_info()

The current Git commit details are:

# what commit is this file at? You may need to change the path value
# if your Rmd is not in analysis/paper/
git2r::repository("..")


bocinsky/guedesbocinsky2018 documentation built on May 3, 2019, 8:59 p.m.