#' yield_to_gcam_basin
#'
#' Function that uses emulated yield data aggregated to GCAM basin level to create
#' GCAM region, basin, and irrigation level data for all GCAM commodities
#'
#' @param write_dir Default = "step3_yield_to_gcam_basin". Output Folder
#' @param emulated_basin_yield_dir Default = NULL
#' @param iso_GCAM_region_mapping Default = NULL
#' @param FAO_ag_mapping Default = NULL
#' @param iso_harvest_area_mapping Default = NULL
#' @param iso_GCAM_basin_mapping Default = NULL
#' @param esm_name Default = 'WRF'
#' @param scn_name Default = 'rcp8p5_hotter'
#' @param max_CCImult Default = 2.5 Upper limit on positive climate impacts (multiplier)
#' @param min_CCImult Default = 0.01 Lower limit on negative climate impacts (multiplier)
#' @param rolling_avg_years Default = 5 Set the number of years to define the range for rolling averages
#' @param maxHistYear Default = 2010 Historical year for which to apply rolling averages
#' @param minFutYear Default = 2015 Min future year for which to apply rolling averages
#' @param maxFutYear Default = 2100 Max future year for which to apply rolling averages
#' @param extrapolate_to Default = NULL. Note that this extrapolates to only one future year (eg, 2099 to 2100)
#' @keywords test
#' @return number
#' @importFrom rlang :=
#' @importFrom magrittr %>%
#' @export
#' @examples
#' \dontrun{
#' library(osiris)
#' osiris::yield_to_gcam_basin()
#' }
yield_to_gcam_basin <- function(write_dir = "step3_yield_to_gcam_basin",
emulated_basin_yield_dir = NULL,
iso_GCAM_region_mapping = NULL,
FAO_ag_mapping = NULL,
iso_harvest_area_mapping = NULL,
iso_GCAM_basin_mapping = NULL,
esm_name = "WRF",
scn_name = "rcp8p5_hotter",
max_CCImult = 2.5,
min_CCImult = 0.01,
rolling_avg_years = 5,
maxHistYear = 2010,
minFutYear = 2015,
maxFutYear = 2100,
extrapolate_to = NULL) {
#.........................
# Initialize
#.........................
rlang::inform("Starting yield_to_gcam_basin...")
# Check write dir
if(!dir.exists(write_dir)){dir.create(write_dir)}
# Initialize values
NULL -> AgMIPAbbrev -> C3avg_incl -> GCAM_commodity -> GCAM_region_ID -> GLU ->
GTAP_crop -> SAGE_crop -> UNIQUE_JOIN_FIELD -> base -> cm_crop -> cm_cropID ->
ctry_iso -> epic_crop -> gepic_crop -> glu_code -> image_crop -> impact ->
iso -> lpjguess_crop -> lpjml_crop -> maximp -> maxyr -> median -> outlier ->
pdssat_crop -> pegasus_crop -> rcp -> weight -> GCAM_basin_ID -> GLU_code ->
GLU_name -> HA -> ID -> ISO -> crop -> cropmodel -> gcm -> id -> irr ->
value -> year -> yield -> .
#.........................
# Custom functions
#.........................
# Backward rolling average
rollAvg <- function(x, n){
y <- stats::filter(x, rep(1/n, n), sides=1)
y
}
repeat_add_columns <- function(x, y) {
x %>%
dplyr::mutate(UNIQUE_JOIN_FIELD = 1) %>%
dplyr::full_join(dplyr::mutate(y, UNIQUE_JOIN_FIELD = 1), by = "UNIQUE_JOIN_FIELD") %>%
dplyr::select(-UNIQUE_JOIN_FIELD)
}
# Identify outliers.
# See: Davies, P.L. and Gather, U. (1993), "The identification of multiple outliers"
# (with discussion), J. Amer. Statist. Assoc., 88, 782-801.
is_outlier <- function(x, devs = 3.2) {
x <- stats::na.omit(x)
lims <- stats::median(x) + c(-1, 1) * devs * stats::mad(x, constant = 1)
# x < lims[1] | x > lims[2]
x > lims[2]
} # is_outlier
# Remove outliers
remove_outlier <- function( DF, col, devs = 3.2 ) {
FINAL_DF <- subset( DF, !is_outlier( DF[[col]]))
return( FINAL_DF )
}
#.........................
# Read in Mapping Data Files
#.........................
iso_GCAM_regID <- tibble::as_tibble(utils::read.csv(file=iso_GCAM_region_mapping,head = TRUE, comment.char = "#", sep = ",") )
FAO_ag_items_PRODSTAT <- tibble::as_tibble(utils::read.csv(file=FAO_ag_mapping,head = TRUE, sep = ",") )
L100.LDS_ag_HA_ha <- tibble::as_tibble(utils::read.csv(file=iso_harvest_area_mapping,head = TRUE, sep = ",", comment.char = "#") )
iso_GCAM_basinID <- tibble::as_tibble(utils::read.csv(file=iso_GCAM_basin_mapping,head = TRUE, sep = ",") )
# correct abbreviations
FAO_ag_items_PRODSTAT %>%
dplyr::mutate_if(is.factor, as.character) %>%
dplyr::select(GTAP_crop, GCAM_commodity, lpjml_crop) ->
crops_gtap_gcam_allCMs
FAO_ag_items_PRODSTAT %>%
dplyr::mutate_if(is.factor, as.character) %>%
dplyr::select(AgMIP_fullNames, C3avg_incl) %>%
dplyr::rename(cropname = AgMIP_fullNames)->
crops_masterlist_c3avgincl
# switch factors to characters for remaining inputs
iso_GCAM_regID %>%
dplyr::mutate_if(is.factor, as.character) ->
iso_GCAM_regID
L100.LDS_ag_HA_ha %>%
dplyr::mutate_if(is.factor, as.character) ->
L100.LDS_ag_HA_ha
iso_GCAM_basinID %>%
dplyr::mutate_if(is.factor, as.character) ->
iso_GCAM_basinID
# reading emulated basin yield files in a loop
emufiles.list <- list.files(path=emulated_basin_yield_dir, pattern=paste0(esm_name, "_", scn_name), full.names=TRUE, recursive=FALSE)
# emufiles.list <- sub( ".csv", "", emufiles.list )
emufiles <- list()
for( i in emufiles.list){ #[c(20, 37, 173, 255, 400, 593,672,731,788)]
index <- which( emufiles.list == i )
emufiles[[ index ]] <- utils::read.csv(file=paste0( i),head = TRUE, sep = ",")
}
#names( emufiles ) <- emufiles.list
emu_data1 <- do.call(rbind, emufiles)
tibble::as_tibble(emu_data1) %>%
dplyr::mutate_if(is.factor, as.character) ->
emu_data1
#.........................
# Extrapolate
#.........................
# Example of extrapolating from 2099 to 2100:
if (!is.null(extrapolate_to)) {
rlang::inform(paste0("Extrapolating basin yield data to ", extrapolate_to))
# Change "maize" to "corn"
emu_data1$crop[emu_data1$crop == 'maize'] <- 'corn'
# Capitalize first letter of crop column (not all capitalized before)
emu_data1$crop <- gsub("(?<!\\w)(.)","\\U\\1", emu_data1$crop, perl = TRUE)
# Store years as numeric
emu_data1$year <- as.numeric(emu_data1$year)
# Run extrapolation function based on Local Polynomial Regression Fitting and add future year
emu_data1 <- emu_data1 %>%
dplyr::group_by(crop, irr, id) %>%
dplyr::do(stats::loess( yield ~ year , control = stats::loess.control(surface = "direct"), data = .) %>%
stats::predict(., tibble::tibble(year = extrapolate_to)) %>%
tibble::tibble(year = extrapolate_to, yield = .)) %>%
dplyr::bind_rows(emu_data1) %>%
tidyr::fill(gcm, cropmodel, HA, rcp, .direction = "downup") %>%
dplyr::select(names(emu_data1)) %>%
dplyr::ungroup() %>%
dplyr::arrange(crop, irr, id, year) %>%
dplyr::ungroup()
# Store years as numeric
emu_data1$yield <- as.numeric(emu_data1$yield)
}
# emu_data1 %>%
# dplyr::filter(cropmodel != "lpj-guess") ->
# emu_data1
# emu_data1 %>%
# dplyr::filter(rcp == "rcp8p5") ->
# emu_data1
#.........................
# Perform computations
#.........................
rlang::inform("Performing computations")
emu_data1 %>%
dplyr::arrange(rcp, gcm, cropmodel, id, crop, irr, year) %>%
# Drop inland water and any rows where the area weight is less than the exogenous floor
dplyr::rename(ID = id,
weight = HA) %>%
# dplyr::filter(weight >= weight_floor_ha) %>%
dplyr::filter( ID != 0) ->
emu_yield_HA_AllYears_glu_irr_isicrop
#Create data frames with the appropriate averages
# For the base years, calculate the average
# For the future years, calculate rolling averages
# get base average for each unique ID from historical years
emu_yield_HA_AllYears_glu_irr_isicrop %>%
dplyr::filter(year <= 2010,
year >= 1980) %>%
dplyr::group_by(ID, weight,rcp, gcm, cropmodel,crop, irr) %>%
dplyr::mutate(base = mean(yield)) %>%
dplyr::ungroup() %>%
dplyr::select(-year, -yield) %>%
dplyr::distinct() ->
emu_yield_HA_base_glu_irr_isicrop
# rolling average of future years; append column of historical years base average for each unique id
# Then calculate impacts
emu_yield_HA_AllYears_glu_irr_isicrop %>%
dplyr::filter(year > maxHistYear) %>%
dplyr::group_by(ID, weight,rcp, gcm, cropmodel,crop, irr) %>%
dplyr::mutate(yield = as.numeric(rollAvg(yield, n = rolling_avg_years ) ) ) %>%
dplyr::ungroup() %>%
# append base values
dplyr::left_join(emu_yield_HA_base_glu_irr_isicrop,
by = c("ID", "weight","rcp", "gcm", "cropmodel","crop", "irr")) %>%
# "dropping irrelevant columns" - mostly done already, just restrict down to every 5 years in future
dplyr::filter(year %in% seq(minFutYear, maxFutYear, 5)) %>%
# calculate the multipliers/impacts by doing Value/base for each year
dplyr::group_by(ID, weight, rcp, gcm, cropmodel,crop, irr) %>%
dplyr::mutate(impact = yield/base,
base = base/base) %>%
dplyr::ungroup() %>%
dplyr::select(rcp, gcm, cropmodel, ID, crop, irr, weight, base, year, impact) ->
emu_impacts_AllYears_base_glu_irr_isicrop
rm(emu_yield_HA_base_glu_irr_isicrop)
rm(emu_yield_HA_AllYears_glu_irr_isicrop)
# #2b. Calculate the C3 average in emulated basin yield and append this to the table with emulated basin yield crops differentiated
# printlog( "Calculating average C3 crop impacts to be applied to C3 crops not included in the emulated basin yield data set")
# printlog( "NOTE: Using area-weighted impacts to avoid bias of computing averages from crops with different base yields" )
# # NOTE: This step works for LPJmL's crop list. not sure how to modify for crop models with different crop lists
### just need a master column for the full range of crops
# #Aggregate crops with weighted average
emu_impacts_AllYears_base_glu_irr_isicrop %>%
dplyr::left_join(crops_masterlist_c3avgincl, by = c("crop" = "cropname")) %>%
dplyr::filter(C3avg_incl == 1) %>%
dplyr::select(-C3avg_incl) %>%
# dplyr::filter(crop %in% FAO_ag_items_PRODSTAT$LPJmL_crop[FAO_ag_items_PRODSTAT$C3avg_include == 1]) %>%
dplyr::group_by(ID, rcp, gcm, cropmodel, irr, year) %>%
dplyr::summarise(impact = stats::weighted.mean(impact, weight),
base = stats::weighted.mean(base, weight)) %>%
dplyr::ungroup() %>%
dplyr::mutate(crop="C3avg") %>%
dplyr::bind_rows(emu_impacts_AllYears_base_glu_irr_isicrop,.) %>%
# add in iso info for each basin so that we can use it to go from basin -> basin + region id
dplyr::left_join(dplyr::select(iso_GCAM_basinID, GCAM_basin_ID, ISO), by = c("ID" = "GCAM_basin_ID")) %>%
dplyr::mutate(ISO = tolower(as.character((ISO)))) %>%
dplyr::rename(iso = ISO) %>%
# add region id
dplyr::left_join(dplyr::select(iso_GCAM_regID, iso, GCAM_region_ID), by ="iso") %>%
# replace Soy with soy
dplyr::mutate(crop = dplyr::case_when(crop == "Soy" ~ "soy", TRUE ~ crop)) ->
emu_impacts_AllYears_base_glu_irr_isicrop_c3avg
# Join crop model and gcam commodity identifying information to the LDS harvested area set (which is HA by glu and gtap crop)
L100.LDS_ag_HA_ha$glu_code <- sprintf("GLU%03d",L100.LDS_ag_HA_ha$glu_code) # modify glu_code format
L100.LDS_ag_HA_ha %>%
dplyr::rename(HA = value,
GTAP_crop = SAGE_crop,
GLU = glu_code,
iso = ctry_iso) %>%
dplyr::left_join(crops_gtap_gcam_allCMs, by = "GTAP_crop") %>%
stats::na.omit() %>%
dplyr::filter(HA != 0) %>%
dplyr::mutate(ID = as.numeric(substr(GLU, 4,6))) %>%
# repeat for all model run info combos
repeat_add_columns(tibble::tibble(cropmodel = unique(emu_impacts_AllYears_base_glu_irr_isicrop_c3avg$cropmodel))) %>%
# dplyr::rename(lpjml_crop = LPJmL_crop,
# gepic_crop = GEPIC_crop) %>%
tidyr::gather(cm_cropID, cm_crop, -iso, -GLU, -GTAP_crop, -HA, -ID, -cropmodel) %>% #, -GCAM_commodity) %>% # may need to kill GCAM commodity here and join separately in next pipeline to avoid errors
#dplyr::filter(paste0(cropmodel, "_crop") == cm_cropID) %>%
dplyr::select(-cm_cropID) ->
LDS_HA_iso_glu_gtap_cm_cmcrop
# join the HA and use to aggregate to GCAM commodity for each region-glu-irr
emu_impacts_AllYears_base_glu_irr_isicrop_c3avg %>%
dplyr::left_join(LDS_HA_iso_glu_gtap_cm_cmcrop, by = c("iso", "ID", "cropmodel", "crop" = "cm_crop")) %>%
dplyr::select(-weight, -ID, -iso) %>% # drop mirca harvested area weights from grid processing and other unneeded info
stats::na.omit() %>%
dplyr::left_join(dplyr::select(FAO_ag_items_PRODSTAT, GTAP_crop, GCAM_commodity), by = "GTAP_crop") %>%
dplyr::group_by(rcp, gcm, cropmodel, irr, GCAM_region_ID, GLU, GCAM_commodity, year) %>%
dplyr::summarise(impact = stats::weighted.mean(impact, HA),
base = stats::weighted.mean(base, HA)) %>%
dplyr::ungroup() %>%
dplyr::mutate(impact = dplyr::if_else(impact < min_CCImult, min_CCImult, impact)) ->
ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears
# Remove FodderGrass for consistency across crop models (only epic and lpjguess cover it)
ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
dplyr::filter(GCAM_commodity != "FodderGrass") ->
ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears
# some labeling for ag
ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
dplyr::left_join(dplyr::select(iso_GCAM_basinID, GLU_code, GLU_name),
by = c("GLU" = "GLU_code")) ->
ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears
# # If not removing outliers
ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears ->
ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears1
# if removing outliers
# identify outliers for ag:
ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
dplyr::group_by(rcp, gcm, cropmodel, GCAM_commodity, irr, GLU) %>%
dplyr::mutate(outlier = is_outlier(impact)) %>%
dplyr::ungroup() ->
ag_tmp
# Get last non-outlier year and impact for each crop-irr-basin to replace outlier years in the crop-irr-basin
ag_tmp %>%
dplyr::filter(outlier == FALSE) %>%
dplyr::group_by(rcp, gcm, cropmodel, GCAM_region_ID, GLU, GLU_name, GCAM_commodity, irr) %>%
dplyr::summarize(maximp = max(impact)) %>%
dplyr::ungroup() ->
ag_tmp1
# Join that to the table of outliers, and replace the outlier years
ag_tmp %>%
dplyr::left_join(ag_tmp1, by = c("rcp", "gcm", "cropmodel", "GCAM_region_ID", "GLU", "GLU_name", "GCAM_commodity", "irr")) %>%
dplyr::mutate(impact = dplyr::if_else(outlier == TRUE, maximp, impact)) %>%
dplyr::mutate(impact = dplyr::if_else(impact > max_CCImult, max_CCImult, impact)) %>%
dplyr::select(-outlier, -maximp) ->
ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears1
rm(ag_tmp)
rm(ag_tmp1)
# #Bioenergy climate change impacts: use the median of the other crops
# again update to keep irrigation and no AEZ info; also add the GCAM_commodity label here, not done in original
ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears1 %>%
dplyr::ungroup() %>%
dplyr::select(-GCAM_commodity) %>%
dplyr::group_by(rcp, gcm, cropmodel, irr, GLU, GLU_name, GCAM_region_ID, year) %>%
dplyr::summarise_if(is.numeric, stats::median) %>%
dplyr::mutate(GCAM_commodity = "biomass") ->
bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears
bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears ->
bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears1
# write
utils::write.csv(ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears1, paste0(write_dir, "/ag_impacts_", esm_name, "_", scn_name, "_rcp_gcm_gcm_R_GLU_C_IRR_allyears_RA",rolling_avg_years, "_gridcull_allyroutlier.csv"), row.names=FALSE)
utils::write.csv(bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears1, paste0(write_dir, "/bio_impacts_", esm_name, "_", scn_name, "_rcp_gcm_gcm_R_GLU_C_IRR_allyears_RA",rolling_avg_years, "_gridcull_allyroutlier.csv"), row.names=FALSE)
rlang::inform("yield_to_gcam_basin complete.")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.