#' grid_to_basin_yield
#'
#' Function that uses emulators to create gridded yield files then aggregates to
#' gcam basin level via MIRCA harvested areas.
#'
#' @param carbon Default = NULL
#' @param weight_floor_ha Default = 100 Floor on area weights, in hectares. Below this climate impacts will be ignored. These are more likely than others to be problematic. 1 hectare = 0.01 km^2 = 1e-5 thou km^2, GCAM land units
#' @param emulator_dir Default = NULL
#' @param input_dir Default = NULL
#' @param area_dir Default = NULL
#' @param basin_grid Default = NULL
#' @param basin_id Default = NULL
#' @param region_id Default = NULL. Filters to specified GCAM region (1-32), otherwise no filter
#' @param gridded_yield_dir Default = NULL.
#' @param write_dir Default = "step2_grid_to_basin_yield". Output Folder
#' @param wheat_area = NULL
#' @param crops Default = c("Corn", "Rice", "Soy", "Spring Wheat", "Winter Wheat")
#' @param esm_name Default = 'WRF'
#' @param cm_name Default = 'LPJmL'
#' @param scn_name Default = 'rcp8p5_hotter'
#' @param N Default = 200. Assuming nothing is nitrogen limited and apply across grids
#' @keywords test
#' @return number
#' @importFrom rlang :=
#' @importFrom magrittr %>%
#' @export
#' @examples
#' \dontrun{
#' library(osiris)
#' osiris::grid_to_basin_yield()
#' }
grid_to_basin_yield <- function(carbon = NULL,
weight_floor_ha = 100,
emulator_dir = NULL,
input_dir = NULL,
area_dir = NULL,
basin_grid = NULL,
basin_id = NULL,
region_id = NULL,
gridded_yield_dir = NULL,
write_dir = "step2_grid_to_basin_yield",
wheat_area = NULL,
crops = c("Corn", "Rice", "Soy", "Spring Wheat", "Winter Wheat"),
esm_name = 'WRF',
cm_name = 'LPJmL',
scn_name = 'rcp8p5_hotter',
N = 200) {
#.........................
# Initialize
#.........................
rlang::inform(paste0("Starting grid_to_basin_yield for ", cm_name, "_",
esm_name, "_", scn_name, ": ", paste(crops, collapse = ", ")))
# Check write dir
if(!dir.exists(write_dir)){dir.create(write_dir)}
# Make a directory for gridded_yield if not there
if(!dir.exists(gridded_yield_dir)){dir.create(gridded_yield_dir)}
# Initialize values
NULL -> year -> value -> vtag -> units -> ctag -> type -> Scenario ->
model -> ID -> long -> lati -> x -> y -> countryID -> protected -> flowD ->
elevation -> elevD -> eleD_min -> eleD_max -> regID -> inGrandELEC ->
basinID -> GCAM_basin_ID -> gcammaptools_name -> GCAM_basin_name ->
Basin_name -> GLU_code -> GLU_name -> ISO -> ISO_NUM -> Country_name ->
lon -> lat -> latgrid -> longrid -> crop -> irr -> deltaT -> deltaP ->
crop_lon -> crop_lat -> C -> yield -> gcm -> cropmodel -> HA -> param_sum ->
orig_lon -> coarse_lon -> coarse_lat -> id -> swh_ir -> wwh_ir -> swh_rf ->
wwh_rf -> .
# Check that the crops in the input argument are valid
crops <- unique(crops)
if(!all(crops %in% c("Corn", "Rice", "Soy", "Spring Wheat", "Winter Wheat"))) {
stop("The crops that were defined are either misspelled or not included in this package.
The crops currently included are: Corn, Rice, Soy, Spring Wheat, Winter Wheat")
}
#.........................
# Custom functions
#.........................
approx_fun <- function(year, value, rule = 1) {
if(rule == 1 | rule == 2 ) {
tryCatch(stats::approx(as.vector(year), value, rule = rule, xout = year)$y,
error=function(e) NA)
} else {
stop("Not implemented yet!")
}
}
# This matches yield data to harvested area data
# ... both of which come in different files.
match_crop <- function(CROP){
croplist <- c("crop01" = "Wheat",
"crop02" = "Corn",
"crop03" = "Rice",
"crop08" = "Soy",
"crop11" = "cas",
"crop25" = "mgr",
"crop06" = "mil",
"crop16" = "nut",
"crop17" = "pea",
# "crop17" = "ben", # EPIC ben = Drybeans is equivalent to LPJmL pea = field peas; both receive HA for MIRCA crop 17
"crop15" = "rap",
"crop13" = "sgb",
"crop12" = "sug",
"crop09" = "sun",
"crop04" = "bar",
"crop07" = "sor",
"crop21" = "cot")
if(CROP %in% names(croplist)) {
croplist[CROP]
}
} # match_crop
# Take ISIMIP irrtype name and match to MIRCA irrtype name
match_irrtype <- function(IRRTYPE){
irrtypelist <- c("rfc" = "rf",
"irc" = "ir")
if(IRRTYPE %in% names(irrtypelist)) {
irrtypelist[IRRTYPE]
} else {
stop("Unknown irrtype: ", IRRTYPE)
}
} # match_irrtype
## TODO when climate data on finer grid than crop emulation (0.5deg):
## probably will want to use class::knn1 to figure out which crop grid every climate grid goes in,
## then just apply that crop grid's coefficients to the climate grid. So basically switch
## the roles of input_grid and grid in the call to the function I'm about to define :
## TODO: move this function outside of the loop, I just put it here so it was clearer what
## arguments to switch.
## TODO: make sure the orientations for lon/lat are same across all data sets, also there's
## probably some weird matches being returned by knn1 around where lon (or lat) = 0 -
## the matched lat (or lon) may be randomly assigned the plus or minus side
## latitude (or lon) matched in.
## NOTE: there's also probably a better way to do this
grid_matching <- function(finer_grid, coarser_grid){
return(do.call(rbind, apply(finer_grid, MARGIN = 1, function(row){
match_id <- as.numeric(class::knn1(coarser_grid, c(row[1], row[2]),1:nrow(coarser_grid) ))
match <- coarser_grid[match_id,]
return(data.frame(lon = as.numeric(row[1]),
lat = as.numeric(row[2]),
coarse_lon = match$lon,
coarse_lat = match$lat))
}))
)
}
# This may take a while since it's evaluating a polynomial at every grid
# point in every year
eval_yield <- function(inputs, params, matched_grids, irrig=F){
# Because my climate data (inputs) Is on a coarser grid than
# my crop data (params), I have to do an apply over the crop
# rows and join in the climate data that corresponds.
## TODO you may need to do the split on climate data instead of crop data and
## reverse their roles in the below code
params %>%
dplyr::mutate(split_id = paste0(lon, '~', lat)) ->
params
params_list <- split(params, f=params$split_id)
# For irrigated crops, yield is calculated by applying the 19 non-NA variables
# to the 19 terms in GMD paper eqn 1 that don't have W (precipitation) terms
# in them.
if(irrig == T){
yields <- do.call(rbind, lapply(params_list, function(param_row){
# param_row <- params_list[[1]]
matched_grids %>%
dplyr::filter(crop_lon == param_row$lon,
crop_lat == param_row$lat) ->
matched_point
inputs %>%
dplyr::filter(lon == matched_point$clim_lon,
lat == matched_point$clim_lat ) %>%
dplyr::mutate(crop_lon = matched_point$crop_lon,
crop_lat = matched_point$crop_lat,
yield = param_row$`1` +
param_row$`2`*C +
param_row$`3`*deltaT +
param_row$`4`*N +
param_row$`5`*C^2 +
param_row$`6`*C*deltaT +
param_row$`7`*C*N +
param_row$`8`*deltaT^2 +
param_row$`9`*deltaT*N +
param_row$`10`*N^2 +
param_row$`11`*C^3 +
param_row$`12`*C^2*deltaT +
param_row$`13`*C^2*N +
param_row$`14`*C*deltaT^2 +
param_row$`15`*C*deltaT*N +
param_row$`16`*C*N^2 +
param_row$`17`*deltaT^3 +
param_row$`18`*deltaT^2*N +
param_row$`19`*deltaT*N^2
) ->
yield
return(yield)
}))
} else if (irrig == F) {
yields <- do.call(rbind, lapply(params_list, function(param_row){
# param_row <- params_list[[1]]
matched_grids %>%
dplyr::filter(crop_lon == param_row$lon,
crop_lat == param_row$lat) ->
matched_point
inputs %>%
dplyr::filter(lon == matched_point$clim_lon,
lat == matched_point$clim_lat ) %>%
dplyr::mutate(crop_lon = matched_point$crop_lon,
crop_lat = matched_point$crop_lat,
yield = param_row$`1` +
param_row$`2`*C +
param_row$`3`*deltaT +
param_row$`4`*deltaP +
param_row$`5`*N +
param_row$`6`*C^2 +
param_row$`7`*C*deltaT +
param_row$`8`*C*deltaP +
param_row$`9`*C*N +
param_row$`10`*deltaT^2 +
param_row$`11`*deltaT*deltaP +
param_row$`12`*deltaT*N +
param_row$`13`*deltaP^2 +
param_row$`14`*deltaP*N +
param_row$`15`*N^2 +
param_row$`16`*C^3 +
param_row$`17`*C^2*deltaT +
param_row$`18`*C^2*deltaP +
param_row$`19`*C^2*N +
param_row$`20`*C*deltaT^2 +
param_row$`21`*C*deltaT*deltaP +
param_row$`22`*C*deltaT*N +
param_row$`23`*C*deltaP^2 +
param_row$`24`*C*deltaP*N +
param_row$`25`*C*N^2 +
param_row$`26`*deltaT^3 +
param_row$`27`*deltaT^2*deltaP +
param_row$`28`*deltaT^2*N +
param_row$`29`*deltaT*deltaP^2 +
param_row$`30`*deltaT*deltaP*N +
param_row$`31`*deltaT*N^2 +
param_row$`32`*deltaP^3 +
param_row$`33`*deltaP^2*N +
param_row$`34`*deltaP*N^2
) ->
yield
return(yield)
}))
}
row.names(yields) <- NULL
return(yields %>%
dplyr::select(crop, irr, crop_lon, crop_lat, year, yield, deltaT, deltaP))
}
## NOTE to aggregate from gridded yields to basin yields, we will have
## to use the MIRCA harvested area data. Which is only on a half
## degree grid.
## For the IM3 experiment where you have finer grid in the US in
## weather data, I think what you'll want to do is aggregate
## the yields in those 12km2 grid cells to half degree grid using
## the land area in each 12km2 grid cell as the weight. That will
## result in you having half degree gridded yields in all grid cells,
## but the finer scale climate data will have an area weighted effect.
## Then you'll do the aggregation with MIRCA harvested area data below:
aggregate_halfdeg_yield2basin <- function(halfdeg_yield){
if(unique(halfdeg_yield$crop) %in% c('Spring Wheat', 'Winter Wheat')){
area_crop_name <- 'Wheat'
}else{
area_crop_name <- unique(halfdeg_yield$crop)
}
if(unique(halfdeg_yield$irr) == 'IRR'){
area_irr_label <- 'ir'
}else{
area_irr_label <- 'rf'
}
# get corresponding area
area.grid <- get(paste0("area.grid.", area_crop_name, ".", area_irr_label))
# Join in the harvested area and year+yield data for each grid cell
# in a basin.
# Then use together to perform correctly weighted yield aggregation,
# keeping weights = HA
basinGrid %>%
dplyr::rename(lon = long, lat=lati) %>%
# area
dplyr::left_join(area.grid, by = c("lon", "lat")) %>%
# yield and year
dplyr::left_join(halfdeg_yield, by = c("lon", "lat")) %>%
stats::na.omit() %>%
# aggregate to basin:
dplyr::group_by(gcm, cropmodel, crop, irr, basinID, year) %>%
dplyr::summarise(yield = stats::weighted.mean(yield, w = HA),
HA = sum(HA)) %>%
dplyr::ungroup() %>%
# calculate basin level yield
dplyr::rename(id = basinID) %>%
dplyr::mutate(yield = dplyr::if_else(HA == 0, 0, yield),
rcp = scn_name) ->
yield_basin
return(yield_basin)
}
#.........................
# Read in Area Files
#.........................
# NOTE: mask applied to growing season info in step0 already included some small area mask
# NOTE: This also means the climate data I have has been masked to land only grid cells -
# so some longitudes just don't show up in the climate data because they're oceans.
# I don't know how much that might cause an issue in the code.
# emulator_dir sourced from https://zenodo.org/record/3592453#.Ygqzhu7MJ7M
# too large for github.
## TODO download for self
emulatorlist <- list.files(path=emulator_dir, full.names=TRUE, recursive=FALSE)
emulatorlist <- emulatorlist[grepl('A0', emulatorlist)] # only keep non-adaptation scenarios
inputlist <- list.files(path=input_dir, pattern = paste0(esm_name, "_", scn_name), full.names=TRUE, recursive=FALSE)
areafilelist <- list.files(path=area_dir, full.names=TRUE, recursive=FALSE)
# there's only 4 crops in the ggcmi phase2 emulators. Subset the area
# files to these to save on time and memory:
areafilelist <- areafilelist[grepl("crop01", areafilelist) |
grepl("crop02", areafilelist) |
grepl("crop03", areafilelist) |
grepl("crop08", areafilelist)]
# CO2
carbon <- utils::read.csv(paste0(carbon), stringsAsFactors = F) %>%
dplyr::mutate(value = dplyr::if_else(value > 810, 810, value))
#.........................
# Maps
#.........................
tibble::as_tibble(utils::read.csv(paste0(basin_grid), stringsAsFactors = F)) %>%
dplyr::select(long, lati, basinID, regID) %>%
dplyr::filter(basinID != 999) ->
basinGrid
# Filter region ID to GCAM region (if applicable)
if (!is.null(region_id)) {
basinGrid <- dplyr::filter(basinGrid, regID == region_id) %>%
dplyr::select(-regID)
} else {
basinGrid <- dplyr::select(basinGrid, -regID)
}
basinIDs <- tibble::as_tibble(utils::read.csv(paste0(basin_id), stringsAsFactors = F))
#.........................
# Process all harvested area file
#.........................
invisible(for(a in areafilelist){
# area .asc
areaFname <- a
# crop:
area.crop <- match_crop(paste0("crop", strsplit((strsplit(areaFname[length(areaFname)], "crop")[[1]][2]), "_")[[1]][1]))
# irrigation:
area.irr <- match_irrtype(strsplit((strsplit(areaFname[length(areaFname)], "_crop")[[1]][1]), "harvested_")[[1]][2])
# file:
try(area <- raster::raster(areaFname), silent=TRUE)
# coordinates:
area.coord <- raster::coordinates(area)
# value:
area.vals <- raster::extract(area, raster::coordinates(area))
# gridded table to join to yield:
assign(paste0("area.grid.", area.crop, ".", area.irr),
area.coord %>%
cbind(area.vals) %>%
tibble::as_tibble() %>%
dplyr::rename(lon = x,
lat = y,
HA = area.vals) %>%
# cull any grid cells with very small HA
dplyr::mutate(HA = dplyr::if_else(HA < weight_floor_ha, 0, HA)))
rm(area)
rm(area.crop)
rm(area.irr)
})
# clean up unneeded file
rm(area.coord)
#.........................
# Yield
#.........................
for(crop in crops){
rlang::inform(paste0("Generating gridded yield for ", crop))
# yield emu netcdf
if(crop == "Corn"){
ncfname <- emulatorlist[grepl("maize", emulatorlist) & grepl(cm_name, emulatorlist)]
}else{
ncfname <- emulatorlist[grepl(sub(" ", "_", tolower(crop)), emulatorlist) & grepl(cm_name, emulatorlist)]
}
# get emulation parameters.
ncin <- ncdf4::nc_open(ncfname)
nc_lon <- ncdf4::ncvar_get(ncin,'lon')
nc_lat <- ncdf4::ncvar_get(ncin, 'lat')
grid <- expand.grid(list(lon=nc_lon,lat=nc_lat))
# pull and reshape rainfed params
rf_params_3d_array <- ncdf4::ncvar_get(ncin, 'K_rf')
indlon <- which(dim(rf_params_3d_array)==length(nc_lon))
indlat <- which(dim(rf_params_3d_array)==length(nc_lat))
indpoly <- which(dim(rf_params_3d_array)==ncin$dim$poly$len)
rf_params <-cbind(grid,
t(rbind(matrix(aperm(rf_params_3d_array, c(indpoly,indlon,indlat)),
ncin$dim$poly$len,length(nc_lat)*length(nc_lon))
))
)
rm(rf_params_3d_array)
# pull and reshape irrigated params
ir_params_3d_array <- ncdf4::ncvar_get(ncin, 'K_ir')
indlon <- which(dim(ir_params_3d_array)==length(nc_lon))
indlat <- which(dim(ir_params_3d_array)==length(nc_lat))
indpoly <- which(dim(ir_params_3d_array)==ncin$dim$poly$len)
ir_params <-cbind(grid,
t(rbind(matrix(aperm(ir_params_3d_array, c(indpoly,indlon,indlat)),
ncin$dim$poly$len,length(nc_lat)*length(nc_lon))
))
)
rm(ir_params_3d_array)
ncdf4::nc_close(ncin)
# The crop responses include every grid cells, even ones with 0 response.
# Drop the cells with 0 response to speed things up.
ir_params[is.na(ir_params)] <- 0
ir_params$param_sum <- rowSums(abs(ir_params[3:36]))
ir_params %>%
dplyr::filter(param_sum != 0 ) %>%
dplyr::select(-param_sum) ->
ir_params
rf_params$param_sum <- rowSums(abs(rf_params[3:36]))
rf_params %>%
dplyr::filter(param_sum != 0 ) %>%
dplyr::select(-param_sum) ->
rf_params
# read in climate data
rf_tp <- utils::read.csv(inputlist[grepl(tolower(crop), inputlist) & grepl('rfd', inputlist)], stringsAsFactors = F) %>%
dplyr::mutate(deltaP = dplyr::if_else(deltaP > 1.3, 1.3, deltaP))
ir_tp <- utils::read.csv(inputlist[grepl(tolower(crop), inputlist) & grepl('irr', inputlist)], stringsAsFactors = F) %>%
dplyr::mutate(deltaP = dplyr::if_else(deltaP > 1.3, 1.3, deltaP))
# Combine C, N, T and P data with ir and rfd params to get ir, rf yields in each
# grid cell in each year
# get the grid of the climate inputs
rf_tp %>%
tidyr::drop_na() %>%
dplyr::select(lon, lat) %>%
dplyr::distinct() %>%
dplyr::mutate(orig_lon = lon,
lon = dplyr::if_else(lon > 179.75, lon -360, lon)) ->
rfinput_grid
ir_tp %>%
tidyr::drop_na() %>%
dplyr::select(lon, lat) %>%
dplyr::distinct() %>%
dplyr::mutate(orig_lon = lon,
lon = dplyr::if_else(lon > 179.75, lon -360, lon)) ->
irinput_grid
# and the grid of the crop responses
ir_params %>%
dplyr::select(lon, lat) %>%
dplyr::distinct() ->
ircrop_grid
rf_params %>%
dplyr::select(lon, lat) %>%
dplyr::distinct() ->
rfcrop_grid
# now call the grid_matching function so that we can have
# the table of whch climate grids pair with which crop grids.
## TODO: this is where to switch which grid is finer and coarser:
ir_matched_grids <- grid_matching(finer_grid = irinput_grid%>%
dplyr::select(-orig_lon),
coarser_grid = ircrop_grid )%>%
dplyr::rename(crop_lon = lon, crop_lat = lat,
clim_lon = coarse_lon, clim_lat = coarse_lat)
rf_matched_grids <- grid_matching(finer_grid = rfinput_grid%>%
dplyr::select(-orig_lon),
coarser_grid = rfcrop_grid )%>%
dplyr::rename(crop_lon = lon, crop_lat = lat,
clim_lon = coarse_lon, clim_lat = coarse_lat)
# Ok now we have all the info we need for different grids.
# Make the main tables of inputs for ir, rf
ir_tp %>%
# TODO: update this with your specific info for nitrogen and CO2 in
# your scenario:
dplyr::mutate(N=N) %>%
dplyr::left_join(carbon %>%
dplyr::select(year, value), by = 'year') %>%
dplyr::rename(C = value) %>%
dplyr::select(-longrid, -latgrid) %>%
dplyr::mutate(orig_lon = lon,
lon = dplyr::if_else(lon > 179.75, lon -360, lon)) ->
ir_inputs
rf_tp %>%
tidyr::drop_na() %>%
# TODO: update this with your specific info for nitrogen and CO2 in
# your scenario:
dplyr::mutate(N=N) %>%
dplyr::left_join(carbon %>%
dplyr::select(year, value), by = 'year') %>%
dplyr::rename(C = value) %>%
dplyr::select(-longrid, -latgrid) %>%
dplyr::mutate(orig_lon = lon,
lon = dplyr::if_else(lon > 179.75, lon -360, lon)) ->
rf_inputs
# Filter the crop parameters to only include grid cells for which we have climate data,
# since we only want to estimate yield in the grid cells where we have the growing season
# info. This will also speed up the code.
ir_params_filtered <- dplyr::left_join(irinput_grid, ir_params, by = c("lat", "lon")) %>%
dplyr::select(-"orig_lon")
rf_params_filtered <- dplyr::left_join(rfinput_grid, rf_params, by = c("lat", "lon")) %>%
dplyr::select(-"orig_lon")
# Test the inputs
# rf_inputs %>%
# dplyr::select(lon, lat, crop, irr) %>%
# dplyr::distinct() %>%
# dplyr::mutate(deltaT=0,
# deltaP=1,
# N=200,
# C=360,
# year = 1985) -> # year shouldn't matter
# test_inputs1
#
# test_inputs <- test_inputs1[3:4,]
#
# test_yields <- eval_yield(inputs=test_inputs, params=rf_params, matched_grids = rf_matched_grids)
ir_yields <- eval_yield(inputs=ir_inputs, params=ir_params_filtered, matched_grids = ir_matched_grids, irrig=T) %>%
dplyr::mutate(gcm = esm_name,
cropmodel = cm_name) %>%
dplyr::rename(lon = crop_lon,
lat = crop_lat)
rf_yields <- eval_yield(inputs=rf_inputs, params=rf_params_filtered, matched_grids = rf_matched_grids, irrig=F) %>%
dplyr::mutate(gcm = esm_name,
cropmodel = cm_name) %>%
dplyr::rename(lon = crop_lon,
lat = crop_lat)
# Replace negative yields with zero
ir_yields$yield[ir_yields$yield < 0] <- 0
rf_yields$yield[rf_yields$yield < 0] <- 0
# Save gridded yield data (as well as deltaT and delatP) by crop and irr
ir_yields %>%
utils::write.csv(., paste0(gridded_yield_dir, '/', cm_name, "_", esm_name, '_', scn_name,'_', tolower(crop), '_irr_gridded_yield_deltaT_deltaP.csv'),
row.names = F)
rf_yields %>%
utils::write.csv(., paste0(gridded_yield_dir, '/', cm_name, "_", esm_name, '_', scn_name,'_', tolower(crop), '_rfd_gridded_yield_deltaT_deltaP.csv'),
row.names = F)
}
# So now we have a data frame of yields for each grid cell in each year, which
# we load from file. This helps to deal with the spring and winter wheat aggregation.
# We deal with wheat separately first. Then the rest of the crops are done in a loop.
if (any(grepl("Wheat", crops))) {
crop <- "Wheat"
rlang::inform(paste0("Generating basin yield for ", crop))
# Read in spring/winter wheat area masks so that we can the area weighted yield for
# winter/spring wheat to get total production by grid cell, then dividing that by
# the MIRCA harvested area to get yield again.
ncin <- ncdf4::nc_open(wheat_area)
nc_lon <- ncdf4::ncvar_get(ncin,'lon')
nc_lat <- ncdf4::ncvar_get(ncin, 'lat')
grid <- expand.grid(list(lon=nc_lon,lat=nc_lat))
units <- unique(c(ncin$var$wwh_rf_area$units,
ncin$var$wwh_ir_area$units,
ncin$var$swh_rf_area$units,
ncin$var$swh_ir_area$units))
if (length(units) >1){
print(paste('something is wrong with', wheat_area, '; too many units'))
}
print(c(ncin$var$wwh_rf_area$longname,
ncin$var$wwh_ir_area$longname,
ncin$var$swh_rf_area$longname,
ncin$var$swh_ir_area$longname))
wheat_areas_holder <- data.frame()
for (varname in c('wwh_rf_area', 'wwh_ir_area', 'swh_rf_area', 'swh_ir_area')){
areadata <- ncdf4::ncvar_get(ncin, varname)
indlon <- which(dim(areadata)==length(nc_lon))
indlat <- which(dim(areadata)==length(nc_lat))
area_long <-matrix(aperm(areadata, c(indlon,indlat)),
length(nc_lat)*length(nc_lon))
area_vals <-cbind(grid, area_long) %>%
rename(HA = area_long) %>%
na.omit %>%
mutate(varname = varname) %>%
tidyr::separate(varname, into = c('crop', 'irr', 'area'), sep = '_') %>%
mutate(crop = dplyr::if_else(crop == 'wwh', 'Winter Wheat', 'Spring Wheat'),
irr = dplyr::if_else(irr == 'ir', 'IRR', 'RFD')) %>%
select(-area)
area_vals %>%
bind_rows(wheat_areas_holder, .) ->
wheat_areas_holder
}
ncdf4::nc_close(ncin)
rm(areadata)
rm(area_long)
rm(area_vals)
rm(grid)
rm(nc_lon)
rm(nc_lat)
rm(indlon)
rm(indlat)
#QC: Compare to MIRCA total
# Not sure need to bring over to OSIRIS
# MIRCA2000 has irr and rfd harvested area values for 'wheat'. If the total
# across winter and spring wheat areas for each of irr, rfd in the agmip
# netcdf agrees with the MIRCA total to at least one decimal place, happy with that.
wheat_areas_holder %>%
dplyr::group_by(lon, lat, irr) %>%
dplyr::summarise(HA = sum(HA, na.rm = T)) %>%
dplyr::ungroup ->
wheat_total_areas_byirr
area.grid.Wheat.ir %>%
dplyr::left_join(wheat_total_areas_byirr %>%
dplyr::filter(irr == 'IRR') %>%
dplyr::rename(newHA = HA), by = c('lon', 'lat')) %>%
dplyr::filter(HA > 0 ) ->
irr_compare
print(paste('winter/spring wheat irr HA processed correctly if following is 0:',
length(which(abs(irr_compare$HA - irr_compare$newHA) > 1e-1)) ))
area.grid.Wheat.rf %>%
dplyr::left_join(wheat_total_areas_byirr %>%
dplyr::filter(irr == 'RFD') %>%
dplyr::rename(newHA = HA), by = c('lon', 'lat')) %>%
dplyr::filter(HA > 0 ) ->
rfd_compare
print(paste('winter/spring wheat rfd HA processed correctly if following is 0:',
length(which(abs(rfd_compare$HA - rfd_compare$newHA) > 1e-1)) ) )
# Using winter/spring Wheat HA's to get to basin level yields from gridded.
griddedlist <- list.files(path=gridded_yield_dir, pattern = paste0(esm_name, "_", scn_name), full.names=TRUE, recursive=FALSE)
# Read in spring and winter wheat gridded yield outputs
ir_yield_swheat <- utils::read.csv(griddedlist[grepl('spring wheat', griddedlist) & grepl('irr', griddedlist)],
stringsAsFactors = F) %>%
dplyr::select(-c(deltaT, deltaP)) %>%
dplyr::rename(swheat_yield = yield) %>%
dplyr::left_join(wheat_areas_holder, by = c('crop', 'irr', 'lon', 'lat')) %>%
tidyr::replace_na(list(HA = 0)) %>%
dplyr::rename(swheat_HA = HA) %>%
dplyr::mutate(swheat_prod = swheat_yield * swheat_HA) %>%
dplyr::select(-swheat_yield)
rf_yield_swheat <- utils::read.csv(griddedlist[grepl('spring wheat', griddedlist) & grepl('rfd', griddedlist)], stringsAsFactors = F)%>%
dplyr::select(-c(deltaT, deltaP)) %>%
dplyr::rename(swheat_yield = yield) %>%
dplyr::left_join(wheat_areas_holder, by = c('crop', 'irr', 'lon', 'lat')) %>%
tidyr::replace_na(list(HA = 0)) %>%
dplyr::rename(swheat_HA = HA) %>%
dplyr::mutate(swheat_prod = swheat_yield * swheat_HA) %>%
dplyr::select(-swheat_yield)
ir_yield_wwheat <- utils::read.csv(griddedlist[grepl('winter wheat', griddedlist) & grepl('irr', griddedlist)], stringsAsFactors = F)%>%
dplyr::select(-c(deltaT, deltaP)) %>%
dplyr::rename(wwheat_yield = yield) %>%
dplyr::left_join(wheat_areas_holder, by = c('crop', 'irr', 'lon', 'lat')) %>%
tidyr::replace_na(list(HA = 0)) %>%
dplyr::rename(wwheat_HA = HA) %>%
dplyr::mutate(wwheat_prod = wwheat_yield * wwheat_HA) %>%
dplyr::select(-wwheat_yield)
rf_yield_wwheat <- utils::read.csv(griddedlist[grepl('winter wheat', griddedlist) & grepl('rfd', griddedlist)], stringsAsFactors = F)%>%
dplyr::select(-c(deltaT, deltaP)) %>%
dplyr::rename(wwheat_yield = yield) %>%
dplyr::left_join(wheat_areas_holder, by = c('crop', 'irr', 'lon', 'lat')) %>%
tidyr::replace_na(list(HA = 0))%>%
dplyr::rename(wwheat_HA = HA) %>%
dplyr::mutate(wwheat_prod = wwheat_yield * wwheat_HA) %>%
dplyr::select(-wwheat_yield)
# Combine spring and winter wheat yields in each grid cell for each year using
# HA weights:
ir_yield_swheat %>%
dplyr::select(-crop) %>%
dplyr::full_join(ir_yield_wwheat %>% dplyr::select(-crop), by = c( "irr", "lon", "lat", "year", "gcm", "cropmodel")) %>%
tidyr::replace_na(list(swheat_HA = 0, swheat_prod=0, wwheat_HA = 0, wwheat_prod = 0)) %>%
dplyr::group_by(gcm, cropmodel, irr, lon, lat, year) %>%
dplyr::summarize(total_prod = swheat_prod + wwheat_prod,
total_HA = swheat_HA + wwheat_HA) %>%
dplyr::ungroup %>%
dplyr::mutate(yield = dplyr::if_else(total_HA == 0, 0, total_prod/total_HA)) %>%
dplyr::mutate(crop = 'Wheat') ->
ir_yield_wheat
rf_yield_swheat %>%
dplyr::select(-crop) %>%
dplyr::full_join(rf_yield_wwheat %>%
dplyr::select(-crop), by = c( "irr", "lon", "lat", "year", "gcm", "cropmodel")) %>%
tidyr::replace_na(list(swheat_HA = 0, swheat_prod=0, wwheat_HA = 0, wwheat_prod = 0)) ->
x
x%>%
dplyr::group_by(gcm, cropmodel, irr, lon, lat, year) %>%
dplyr::summarize(total_prod = swheat_prod + wwheat_prod,
total_HA = swheat_HA + wwheat_HA) %>%
dplyr::ungroup %>%
dplyr::mutate(yield = dplyr::if_else(total_HA == 0, 0, total_prod/total_HA)) %>%
dplyr::mutate(crop = 'Wheat') ->
rf_yield_wheat
# Check that ir and rf summed correctly
# Honestly if they agree to like first decimal place (1e-1), that's probably fine
if(abs(sum(ir_yield_swheat$swheat_HA, ir_yield_wwheat$wwheat_HA, na.rm = TRUE) - sum(ir_yield_wheat$total_HA, na.rm = TRUE)) < 1e-3 &
abs(sum(ir_yield_swheat$swheat_prod, ir_yield_wwheat$wwheat_prod, na.rm = TRUE) - sum(ir_yield_wheat$total_prod, na.rm = TRUE)) < 1e-3) {
rlang::inform("Sum of individual irrigated spring and winter wheat yields correctly sum to total irrigated wheat yield")
} else{rlang::inform("Error: Sum of individual irrigated spring and winter wheat yields do not sum to total irrigated wheat yield")}
if(abs(sum(rf_yield_swheat$swheat_HA, rf_yield_wwheat$wwheat_HA, na.rm = TRUE) - sum(rf_yield_wheat$total_HA, na.rm = TRUE)) < 1e-3 &
abs(sum(rf_yield_swheat$swheat_prod, rf_yield_wwheat$wwheat_prod, na.rm = TRUE) - sum(rf_yield_wheat$total_prod, na.rm = TRUE)) < 1e-3) {
rlang::inform("Sum of individual rainfed spring and winter wheat yields correctly sum to total rainfed wheat yield")
} else{rlang::inform("Error: Sum of individual rainfed spring and winter wheat yields do not sum to total rainfed wheat yield")}
# Generate basin yield
ir_yields_basin <- aggregate_halfdeg_yield2basin(ir_yield_wheat %>%
dplyr::select(-c(total_prod, total_HA)))
rf_yields_basin <- aggregate_halfdeg_yield2basin(rf_yield_wheat %>%
dplyr::select(-c(total_prod, total_HA)))
dplyr::bind_rows(rf_yields_basin,
ir_yields_basin) ->
yield.basin
years <- min(yield.basin$year):max(yield.basin$year)
# note that only 231 basins have data
# we need to create new rows in the data frame for missing basins
# (otherwise gcammapdata::plot_GCAM() will fail)
tibble::tibble(id = which(basinIDs$GCAM_basin_ID %in% yield.basin$id == F)) %>%
dplyr::mutate(year = years[1]) %>%
tidyr::complete(id, year = years) %>%
dplyr::mutate(yield = 0,
HA = 0,
cropmodel = cm_name,
gcm = esm_name,
rcp = scn_name,
crop = crop) ->
tmp
dplyr::bind_rows(tmp %>% dplyr::mutate(irr = 'IRR'),
tmp %>% dplyr::mutate(irr = 'RFD')) ->
mb
rm(tmp)
# bind the missing basins
dplyr::bind_rows(yield.basin, mb) %>%
dplyr::ungroup() ->
yieldByBasin
utils::write.csv(yieldByBasin, paste0(write_dir, "/", cm_name, "_",
esm_name, "_", scn_name, "_", tolower(crop), "_",
min(years), "_", max(years),".csv"),
row.names = F)
}
# Run basin yield aggregation for remaining crops (not including wheat)
for(crop in crops[!grepl("Wheat", crops)]){
rlang::inform(paste0("Generating basin yield for ", crop))
# Read in gridded yield outputs by crop
ir_yields <- utils::read.csv(griddedlist[grepl(tolower(crop), griddedlist) & grepl('irr', griddedlist)], stringsAsFactors = F) %>%
dplyr::select(-c(deltaT, deltaP))
rf_yields <- utils::read.csv(griddedlist[grepl(tolower(crop), griddedlist) & grepl('rfd', griddedlist)], stringsAsFactors = F)%>%
dplyr::select(-c(deltaT, deltaP))
ir_yields_basin <- aggregate_halfdeg_yield2basin(ir_yields)
rf_yields_basin <- aggregate_halfdeg_yield2basin(rf_yields)
dplyr::bind_rows(rf_yields_basin,
ir_yields_basin) ->
yield.basin
years <- min(yield.basin$year):max(yield.basin$year)
# note that only 231 basins have data
# we need to create new rows in the data frame for missing basins
# (otherwise gcammapdata::plot_GCAM() will fail)
tibble::tibble(id = which(basinIDs$GCAM_basin_ID %in% yield.basin$id == F)) %>%
dplyr::mutate(year = years[1]) %>%
tidyr::complete(id, year = years) %>%
dplyr::mutate(yield = 0,
HA = 0,
cropmodel = cm_name,
gcm = esm_name,
rcp = scn_name,
crop = crop) ->
tmp
dplyr::bind_rows(tmp %>% dplyr::mutate(irr = 'IRR'),
tmp %>% dplyr::mutate(irr = 'RFD')) ->
mb
rm(tmp)
# bind the missing basins
dplyr::bind_rows(yield.basin, mb) %>%
dplyr::ungroup() ->
yieldByBasin
utils::write.csv(yieldByBasin, paste0(write_dir, "/", cm_name, "_",
esm_name, "_", scn_name, "_", tolower(crop), "_",
min(years), "_", max(years),".csv"),
row.names = F)
}
rlang::inform("grid_to_basin_yield complete.")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.