osiris: Process climate impacts on agricultural yields for GCAM

Documented in create_AgProdChange_xml

#' create_AgProdChange_xml
#'
#' Function that takes the defined impacts and reference AgProdChange files then
#' updates and saves the new AgProdChange csv and xml files.
#'
#' @param write_dir Default = "step4_create_AgProdChange_xml". Output Folder
#' @param esm_name Default = NULL
#' @param scn_name Default = NULL
#' @param ssp Default = NULL
#' @param ag_irr_ref Default = NULL
#' @param bio_irr_ref Default = NULL
#' @param ag_impacts Default = NULL
#' @param bio_impacts Default = NULL
#' @param GCAM_region_mapping Default = NULL
#' @param timestep Default = 5
#' @param maxHistYear Default = 2010 Historical year
#' @param minFutYear Default = 2015 Min future year
#' @param appliedto Default = "full". This applies impacts to all regions. Other
#' options are "domestic" and "international".
#' @keywords test
#' @return number
#' @importFrom rlang :=
#' @importFrom magrittr %>%
#' @importFrom foreach %do%
#' @export
#' @examples
#' \dontrun{
#' library(osiris)
#' osiris::create_AgProdChange_xml()
#' }

create_AgProdChange_xml <- function(write_dir = "step4_create_AgProdChange_xml",
                                    esm_name = NULL,
                                    scn_name = NULL,
                                    ssp = NULL,
                                    ag_irr_ref = NULL,
                                    bio_irr_ref = NULL,
                                    ag_impacts = NULL,
                                    bio_impacts = NULL,
                                    GCAM_region_mapping = NULL,
                                    timestep = 5,
                                    maxHistYear = 2010,
                                    minFutYear = 2015,
                                    appliedto = "full") {



  #.........................
  # Initialize
  #.........................

  rlang::inform("Starting create_AgProdChange_xml...")

  # Check write dir
  if(!dir.exists(write_dir)){dir.create(write_dir)}

  # Initialize values
  NULL -> AgProdChange -> AgProductionTechnology -> GCAM_region_ID ->
    GLU -> GLU_name -> base.x -> base.y -> crop -> cropmodel -> f -> impact ->
    irr -> irrig -> previmpact -> prevyear -> region -> scenID -> tech -> tmp ->
    xml_needed_header -> year -> .


  #.........................
  # Custom functions
  #.........................

  # some header info for making csv to xml easier; will be unnecessary when gcamdata fixed for windows
  # https://stackoverflow.com/questions/12381117/add-header-to-file-created-by-write-csv
  write.table_with_header <- function(x, file, header, ...){
    cat(header, sep='\n',  file = file)
    utils::write.table(x, file, append = T, ...)
  }

  #.........................
  # Read in Data Files
  #.........................

  # gcam region names to replace gcam region id
  GCAM_region_names <- utils::read.csv(file = GCAM_region_mapping, head = TRUE, sep = ",", comment.char = "#")

  # read in original AgProdChange files generated by GCAM data system (zchunk_L2052.ag_prodchange_cost_irr_mgmt.R):

  ## reference and SSP2 use the same APC files:
  ## L2052.AgProdChange_ag_irr_ref.csv
  ## L2052.AgProdChange_bio_irr_ref.csv

  ## SSP1 and SSP5 use the same APC files:
  ## L2052.AgProdChange_irr_high.csv
  ## L2052.AgProdChange_bio_irr_ref.csv

  ## SSP3:
  ## L2052.AgProdChange_irr_low.csv
  ## L2052.AgProdChange_bio_irr_ref.csv

  ## SSP4:
  ## L2052.AgProdChange_irr_ssp4.csv
  ## L2052.AgProdChange_bio_irr_ref.csv

  AgProdChange_ag_irr_ref <- utils::read.csv(file= ag_irr_ref,
                                             head = TRUE, sep = ",", comment.char = '#', stringsAsFactors = F)
  AgProdChange_bio_irr_ref <- utils::read.csv(file= bio_irr_ref,
                                              head = TRUE, sep = ",", comment.char = '#', stringsAsFactors = F)

  # read in the impact files generated by the yield_to_gcam_basin function
  ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears  <- tibble::as_tibble(utils::read.csv(file=ag_impacts, head = TRUE, sep = ","))
  bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears <- tibble::as_tibble(utils::read.csv(file=bio_impacts, head = TRUE, sep = ","))

  #.........................
  # Perform computations
  #.........................

  # Because new APC depends on impacts in year t and year t-1, get that set up
  # so for each year, there is the impact and the previous year's impact:

  ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
    dplyr::mutate(joinyear = year + timestep) %>%
    dplyr::rename(prevyear = year,
                  previmpact = impact) ->
    ag_impacts_prev

  ag_impacts_prev %>%
    dplyr::mutate(prevyear = maxHistYear,
                  joinyear = minFutYear,
                  previmpact = 1) %>%
    dplyr::distinct() %>%
    dplyr::bind_rows(ag_impacts_prev) ->
    ag_impacts_prev

  ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
    dplyr::left_join(ag_impacts_prev,
                     by = c("rcp", "gcm", "cropmodel", "GCAM_region_ID", "GLU", "GLU_name",
                            "GCAM_commodity", "irr", "year" = "joinyear")) %>%
    dplyr::select(-base.x, -base.y, -prevyear) ->
    ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears

  rm(ag_impacts_prev)


  bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
    dplyr::mutate(joinyear = year + timestep) %>%
    dplyr::rename(prevyear = year,
                  previmpact = impact) ->
    bio_impacts_prev

  bio_impacts_prev %>%
    dplyr::mutate(prevyear = maxHistYear,
                  joinyear = minFutYear,
                  previmpact = 1) %>%
    dplyr::distinct() %>%
    dplyr::bind_rows(bio_impacts_prev) ->
    bio_impacts_prev

  bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
    dplyr::left_join(bio_impacts_prev,
                     by = c("rcp", "gcm", "cropmodel", "GCAM_region_ID", "GLU", "GLU_name",
                            "GCAM_commodity", "irr", "year" = "joinyear")) %>%
    dplyr::select(-base.x, -base.y, -prevyear) ->
    bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears

  rm(bio_impacts_prev)


  #bring in gcam region names and use to replace gcam region id in above 2
  ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
    tibble::as_tibble() %>%
    dplyr::left_join(GCAM_region_names, by = c("GCAM_region_ID")) %>%
    dplyr::ungroup() %>%
    dplyr::select(-GCAM_region_ID) ->
    ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears

  bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
    tibble::as_tibble() %>%
    dplyr::left_join(GCAM_region_names, by = c("GCAM_region_ID")) %>%
    dplyr::ungroup() %>%
    dplyr::select(-GCAM_region_ID) ->
    bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears


  # pull out basin names from AgSupplySubsector in above two files:
  AgProdChange_ag_irr_ref %>%
    tibble::as_tibble() %>%
    dplyr::mutate_if(is.factor, as.character) %>%
    dplyr::mutate(tmp = AgProductionTechnology) %>%
    tidyr::separate(tmp, c("crop", "GLU_name", "irrig", "tech"), sep = "_") %>%
    dplyr::select(-crop) ->
    AgProdChange_ag_irr_ref

  AgProdChange_bio_irr_ref %>%
    tibble::as_tibble() %>%
    dplyr::mutate_if(is.factor, as.character) %>%
    dplyr::mutate(tmp = AgProductionTechnology) %>%
    tidyr::separate(tmp, c("crop", "GLU_name", "irrig", "tech"), sep = "_") %>%
    dplyr::select(-crop) ->
    AgProdChange_bio_irr_ref


  # Actually incorporate impacts for each BASIN
  # work with irrigated and rainfed separately.

  # irrigated:
  L2052.ag_IRR <- subset(AgProdChange_ag_irr_ref, grepl(".*_IRR*",AgProductionTechnology))

  # pull off irrigated and complete so have full crop-region combo:
  # making sure every commodity-rcp-gcm-cm-irr-year is represented in each region.
  # fill in impact of 1 for anything pasting
  ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
    dplyr::filter(irr == "IRR")  %>%
    dplyr::select(-GLU, -irr, -region) %>%
    dplyr::mutate_if(is.factor, as.character) %>%
    tidyr::complete(GLU_name = unique(L2052.ag_IRR$GLU_name), #L2052.ag_IRR[, c("region", "GLU_name")]),
                    GCAM_commodity = unique(L2052.ag_IRR$AgSupplySector), rcp, gcm, cropmodel,  year,
                    fill = list(impact = 1, previmpact = 1)) ->
    L115.ag_IRR_byBasinOnly


  L2052.ag_IRR %>%
    dplyr::left_join(L115.ag_IRR_byBasinOnly, by = c("GLU_name", "AgSupplySector" = "GCAM_commodity", "year") ) ->
    L2052.ag_IRR_CCI


  # If domestic (ie USA) then remove all other iso data.
  # If international (ie non_USA) then remove USA data.
  if(appliedto == "domestic"){
    L2052.ag_IRR_CCI %>%
      dplyr::mutate(impact = dplyr::if_else(region == 'USA', impact, NA_real_),
                    previmpact = dplyr::if_else(region == 'USA', previmpact, NA_real_)) %>%
      stats::na.omit() ->
      L2052.ag_IRR_CCI
  } else   if(appliedto == "international"){
    L2052.ag_IRR_CCI %>%
      dplyr::mutate(impact = dplyr::if_else(region != 'USA', impact, NA_real_),
                    previmpact = dplyr::if_else(region != 'USA', previmpact, NA_real_)) %>%
      stats::na.omit() ->
      L2052.ag_IRR_CCI
  }

  L2052.ag_IRR_CCI %>%
    dplyr::mutate(AgProdChange = ((impact / previmpact) ^ (1/timestep)) * (1 + AgProdChange) - 1) %>%
    dplyr::select(-GLU_name, -irrig, -tech, -impact, -previmpact) ->
    L2052.ag_IRR_CCI

  rm(L115.ag_IRR_byBasinOnly)


  L2052.bio_IRR <- subset(AgProdChange_bio_irr_ref, grepl(".*_IRR*",AgProductionTechnology))

  #pull off irrigated and complete so have full crop-region combo:
  #making sure every commodity-rcp-gcm-cm-irr-year is represented in each region. fill in impact of 1 for anything pasting
  bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
    dplyr::filter(irr == "IRR")  %>%
    dplyr::select(-GLU, -irr, -region) %>%
    dplyr::mutate_if(is.factor, as.character) %>%
    tidyr::complete(GLU_name = unique(L2052.bio_IRR$GLU_name),
                    GCAM_commodity = unique(L2052.bio_IRR$AgSupplySector), rcp, gcm, cropmodel, year,
                    fill = list(impact = 1, previmpact = 1)) ->
    L115.bio_IRR_byBasinOnly


  L2052.bio_IRR %>%
    dplyr::left_join(L115.bio_IRR_byBasinOnly, by = c("GLU_name", "AgSupplySector" = "GCAM_commodity", "year") ) ->
    L2052.bio_IRR_CCI


  if(appliedto == "domestic"){
    L2052.bio_IRR_CCI %>%
      dplyr::mutate(impact = dplyr::if_else(region == 'USA', impact, NA_real_),
                    previmpact = dplyr::if_else(region == 'USA', previmpact, NA_real_)) %>%
      stats::na.omit() ->
      L2052.bio_IRR_CCI
  } else if(appliedto == "international"){
    L2052.bio_IRR_CCI %>%
      dplyr::mutate(impact = dplyr::if_else(region != 'USA', impact, NA_real_),
                    previmpact = dplyr::if_else(region != 'USA', previmpact, NA_real_)) %>%
      stats::na.omit() ->
      L2052.bio_IRR_CCI
  }

  L2052.bio_IRR_CCI %>%
    dplyr::mutate(AgProdChange = ((impact / previmpact) ^ (1/timestep)) * (1 + AgProdChange) - 1) %>%
    dplyr::select(-GLU_name, -irrig, -tech, -impact, -previmpact) ->
    L2052.bio_IRR_CCI


  rm(L115.bio_IRR_byBasinOnly)


  # rainfed:
  L2052.ag_RFD <- subset(AgProdChange_ag_irr_ref, grepl(".*_RFD*",AgProductionTechnology))

  #pull off irrigated and complete so have full crop-region combo:
  #making sure every commodity-rcp-gcm-cm-irr-year is represented in each region. fill in impact of 1 for anything pasting
  ag_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
    dplyr::filter(irr == "RFD") %>%
    dplyr::select(-GLU, -irr, -region) %>%
    dplyr::mutate_if(is.factor, as.character) %>%
    tidyr::complete(GLU_name = unique(L2052.ag_RFD$GLU_name), #L2052.ag_IRR[, c("region", "GLU_name")]),
                    GCAM_commodity = unique(L2052.ag_RFD$AgSupplySector), rcp, gcm, cropmodel,  year,
                    fill = list(impact = 1, previmpact = 1)) ->
    L115.ag_RFD_byBasinOnly

  L2052.ag_RFD %>%
    dplyr::left_join(L115.ag_RFD_byBasinOnly, by = c("GLU_name", "AgSupplySector" = "GCAM_commodity", "year") ) ->
    L2052.ag_RFD_CCI


  if(appliedto == 'domestic'){
    L2052.ag_RFD_CCI %>%
      dplyr::mutate(impact = dplyr::if_else(region == 'USA', impact, NA_real_),
                    previmpact = dplyr::if_else(region == 'USA', previmpact, NA_real_)) %>%
      stats::na.omit() ->
      L2052.ag_RFD_CCI
  } else if(appliedto == 'international'){
    L2052.ag_RFD_CCI %>%
      dplyr::mutate(impact = dplyr::if_else(region != 'USA', impact, NA_real_),
                    previmpact = dplyr::if_else(region != 'USA', previmpact, NA_real_)) %>%
      stats::na.omit() ->
      L2052.ag_RFD_CCI
  }

  L2052.ag_RFD_CCI %>%
    dplyr::mutate(AgProdChange = ((impact / previmpact) ^ (1/timestep)) * (1 + AgProdChange) - 1) %>%
    dplyr::select(-GLU_name, -irrig, -tech, -impact, -previmpact) ->
    L2052.ag_RFD_CCI

  rm(L115.ag_RFD_byBasinOnly)


  L2052.bio_RFD <- subset(AgProdChange_bio_irr_ref, grepl(".*_RFD*",AgProductionTechnology))

  #pull off irrigated and complete so have full crop-region combo:
  #making sure every commodity-rcp-gcm-cm-irr-year is represented in each region. fill in impact of 1 for anything pasting
  bio_impacts_rcp_gcm_gcm_R_GLU_C_IRR_allyears %>%
    dplyr::filter(irr == "RFD") %>%
    dplyr::select(-GLU, -irr, -region) %>%
    dplyr::mutate_if(is.factor, as.character) %>%
    tidyr::complete(GLU_name = unique(L2052.bio_RFD$GLU_name), #L2052.ag_IRR[, c("region", "GLU_name")]),
                    GCAM_commodity = unique(L2052.bio_RFD$AgSupplySector), rcp, gcm, cropmodel,  year,
                    fill = list(impact = 1, previmpact = 1)) ->
    L115.bio_RFD_byBasinOnly

  L2052.bio_RFD %>%
    dplyr::left_join(L115.bio_RFD_byBasinOnly, by = c("GLU_name", "AgSupplySector" = "GCAM_commodity", "year") ) ->
    L2052.bio_RFD_CCI


  if(appliedto == "domestic"){
    L2052.bio_RFD_CCI %>%
      dplyr::mutate(impact = dplyr::if_else(region == 'USA', impact, NA_real_),
                    previmpact = dplyr::if_else(region == 'USA', previmpact, NA_real_)) %>%
      stats::na.omit() ->
      L2052.bio_RFD_CCI
  } else if(appliedto == "international"){
    L2052.bio_RFD_CCI %>%
      dplyr::mutate(impact = dplyr::if_else(region != 'USA', impact, NA_real_),
                    previmpact = dplyr::if_else(region != 'USA', previmpact, NA_real_)) %>%
      stats::na.omit() ->
      L2052.bio_RFD_CCI
  }

  L2052.bio_RFD_CCI %>%
    dplyr::mutate(AgProdChange = ((impact / previmpact) ^ (1/timestep)) * (1 + AgProdChange) - 1) %>%
    dplyr::select(-GLU_name, -irrig, -tech, -impact, -previmpact) ->
    L2052.bio_RFD_CCI

  rm(L115.bio_RFD_byBasinOnly)


  #bring together
  L2052.CCI <- dplyr::bind_rows(L2052.ag_IRR_CCI, L2052.bio_IRR_CCI, L2052.ag_RFD_CCI, L2052.bio_RFD_CCI)
  rm(L2052.ag_IRR_CCI)
  rm(L2052.bio_IRR_CCI)
  rm(L2052.ag_RFD_CCI)
  rm(L2052.bio_RFD_CCI)


  #pull off by scenario and save:
  L115.all_rcp_gcm_cm <- unique( L2052.CCI[ , c("rcp", "gcm", "cropmodel") ] )
  L115.all_rcp_gcm_cm$scenID <- 1:nrow( L115.all_rcp_gcm_cm )


  #add scenario id scenID to CCI table
  L2052.CCI %>%
    dplyr::left_join(L115.all_rcp_gcm_cm, by=c("rcp", "gcm", "cropmodel") ) %>%
    dplyr::mutate(AgProdChange = round(AgProdChange, 6)) ->
    L2052.CCI


  #loop over IDS and save as own csv table
  options("gcamdata.have_java"=TRUE)

  for(i in unique(L2052.CCI$scenID)){
    L2052.CCI %>% dplyr::filter(scenID==i) -> A
    rcp <- paste(unique(A$rcp))
    gcm <- paste(unique(A$gcm))
    cm <- paste(unique(A$cropmodel))

    A %>%
      dplyr::select(-rcp, -gcm, -cropmodel, -scenID) ->
      A

    write.table_with_header(A, paste0(write_dir, "/ag_prodchange_", scn_name, "_", ssp, "_", esm_name, "_", cm, ".csv"),
                            paste(c("INPUT_TABLE,,,,,",  "Variable ID,,,,,", "AgProdChange,,,,,", ",,,,,")),
                            sep = ",", quote = FALSE, row.names=FALSE)


    # # What SHOULD work to write xml in same loop; doesn't work on windows yet
    # options("gcamdata.use_java"=TRUE)
    # gcamdata::create_xml(paste0(datapath, "/ag_prodchange_", scn_name, "_", ssp, "_", esm_name, "_", cm, "1",xml_end)) %>%
    #   gcamdata::add_xml_data(A, "AgProdChange") %>%
    #   gcamdata::run_xml_conversion()

  }

  # write out xml file
  options("gcamdata.use_java"=TRUE)

  filelist <- list.files(path=write_dir, pattern = paste0(scn_name, "_", ssp, "_", esm_name), full.names=TRUE, recursive=FALSE)
  filelist <- filelist[!(grepl(".xml", filelist, fixed=TRUE))]

  invisible(foreach::foreach(f = filelist) %do% {
    filename1 <- substr(f, (nchar(write_dir)+2), (nchar(f)-4))

    tibble::as_tibble(utils::read.csv(f, skip = 4, stringsAsFactors = F)) ->
      x

    gcamdata::create_xml(paste0(write_dir, "/", filename1, ".xml")) %>%
      gcamdata::add_xml_data(x, "AgProdChange") %>%
      gcamdata::run_xml_conversion()

  })

  rlang::inform("create_AgProdChange_xml complete.")
}