R/qchange_calcs_func.R

Defines functions get.quantile.changes

Documented in get.quantile.changes

#' Get changes in estimated quantiles
#'
#' \code{get.quantile.changes} calculates the change in the mean quantile by day
#' of year between a future period (mean across \code{future.years}) and a base
#' period (mean across \code{base.years}) by day of year for quantile estimates
#' generated by \code{\link{estimate.quantiles}} and merged by
#' \code{\link{combine.params}}). In words, for every estimated quantile and every
#' day of year, this code gives the difference between the average day of year
#' (i.e. Jan 1st) of that quantile in the \code{future.years} period vs. the
#' same in the \code{base.years} period.
#'
#' Duplicate locations are removed from the final output.
#'
#'
#' @section NetCDF output structure:
#'  If \code{export.nc=TRUE}, the quantile changes are saved to a NetCDF file.
#'  The main variable, "\code{d[defaults$filevar]}", is a \code{[loc x quantile
#'  x day of year]} array if \code{output.type=="linear"} and a 
#'  \code{[lon x lat x quantile x day of year]} array if \code{output.type=="grid"}, 
#'  with missing pixels filled with NAs. \code{lat}, \code{lon}, and 
#'  \code{q}/quantile are also saved. The day of year is just an index \code{1:365}. 
#'  A few extra global attributes are included, and an additional "comment" attribute c
#'  an be added to the NetCDF file using the \code{comment} parameter.
#'
#'  The NetCDF file is saved in \code{defaults$mod.data.dir} with the name:
#'  d[filevar]_day_[mod.name]_qchange_[base.year[1]-base.year[end]]_[future.year[1]-future.year[end]].nc
#'
#' @param defaults the output of \code{set.defaults}
#' @param export.nc whether to directly save the output in a NetCDF file
#' @param output.type if saving output in a NetCDF file, sets whether the variable
#'   is "linear" ([loc x quantile x day of year]) or "grid" (by default; 
#'   [lon x lat x quantile x day of year]; pixels without data will be NAs)
#' @param lon_vec,lat_vec if saving output in a NetCDF file as a "grid", 
#'   these will manually set the [lon] and [lat] vectors to use as a basis. 
#'   Especially useful if there are lat or lon gaps in the original data. 
#' @param comments if saving output in a NetCDF file, this inputs an extra global
#'   attribute, 'comments_2', with the contents of the variable.
#' @param base.years the base years to average over (i.e. \code{seq(1979,2010)})
#' @param future.years the future years to average over (i.e. \code{seq(2058,2099)})
#' @param load.fn the file containing the quantile fit coefficients. If left empty,
#'   it will look for the output file of \code{\link{combine.params}}, which concantenates the output of with data from
#'   \code{\link{estimate.quantiles}}.
#'
#' @importFrom tictoc tic toc
#' @importFrom ncdf4 ncdim_def ncvar_def nc_create ncvar_put ncatt_put nc_close
#' @importFrom abind abind
#' @importFrom splines ns
#' @importFrom pbs pbs


# (I want to add an 'export as grid' function....)

get.quantile.changes <- function(defaults,
                             export.nc=T,output.type="grid",
                             lon_vec=numeric(),lat_vec=numeric(),
                             comments=character(),
                             base.years,future.years,
                             load.fn=character()) {

  get.quantile.change <- function(process.inputs.tmp, year.i, year.f,
                                  defaults,
                                  bulk.x,tail.x,norm.x=numeric(),
                                  params,params.lats,params.lons) {
    print(paste0("Processing region ",process.inputs.tmp$reg,", ",length(process.inputs.tmp$lon)," pixel(s) with lat=",round(process.inputs.tmp$lat,2)))
    out.list <- tryCatch({
      ## Load normalizing basis function (the only one that's lat-dependent)
      # Get lats from the volcanic data to pick right basis file (if not already inputted)
      if (defaults$get.volc||length(norm.x)==0) {
          norm.x <- get.predictors(n_files=1, df.x=defaults$norm.x.df[1], df.t=defaults$norm.x.df[2], df.xt=defaults$norm.x.df[3],
            year.range=params[[1]]$year.range,get.volc=defaults$get.volc,lat=process.inputs.tmp$lat)
      }

      # Figure out which locations to process (this is to skip over lat-lon duplicates,
      # which might exist. Basically, for every location in the [process.inputs]
      # directory, we find the first [params] list element, by matching lat and lon)
      param.idxs <- unlist(lapply(1:length(process.inputs.tmp$lon),function(x) which(params.lats==process.inputs.tmp$lat&params.lons==process.inputs.tmp$lon[x])[1]));
      # Remove NULLs, which sometimes show up (possibly fixed)
      if (any(unlist(lapply(params[param.idxs],is.null)))) {
        print('NULLs Found')
        param.idxs <- param.idxs[!unlist(lapply(params[param.idxs],is.null))]
      }

      # Get the quantiles
      yqhats_real <- abind(lapply(params[param.idxs],make.quantile.surfaces,bulk.x=bulk.x,tail.x=tail.x,norm.x=norm.x),along=4)

      # Set future and present values to compare (R automatically "squeezes"
      # interior singleton dimensions, so to avoid the dimensions getting
      # messed up, the statements below are separated)
      if (length(year.f)==1) {
        # If just a single year, just get that year
        q.f <- yqhats_real[,year.f,,]
      } else if (dim(yqhats_real)[4]==1) {
        # If just a single pixel, get the mean across years for that pixel
        q.f <- colMeans(aperm(yqhats_real[,year.f,,],c(2,1,3)))
      } else {
        # If multiple pixels, get the mean across years for the multiple pixels
        q.f <- colMeans(aperm(yqhats_real[,year.f,,],c(2,1,3,4)))
      }

      if (length(year.i)==1) {
        # If just a single year, just get that year
        q.i <- yqhats_real[,year.i,,]
      } else if (dim(yqhats_real)[4]==1) {
        # If just a single pixel, get the mean across years for that pixel
        q.i <- colMeans(aperm(yqhats_real[,year.i,,],c(2,1,3)))
      } else {
        # If multiple pixels, get the mean across years for the multiple pixels
        q.i <- colMeans(aperm(yqhats_real[,year.i,,],c(2,1,3,4)))
      }

      # Get quantile change
      qchg <- q.f - q.i
      attr(qchg,"dimnames") <- NULL

      # Return a list of the location and the quantile change for the pixels in
      # this latitude-by-region subset
      return(list(lat=unlist(lapply(params[param.idxs],function(x) x$lat)),
                  lon=unlist(lapply(params[param.idxs],function(x) x$lon)),
                  qchg=qchg))
    }, error=function(e) {
      warning(paste0("issue with ",process.inputs.tmp$reg,", lat=",process.inputs.tmp$lat))
      # Return a list of the location and the quantile change for the pixels in
      # this latitude-by-region subset
      return(list(lat=unlist(lapply(params[param.idxs],function(x) x$lat)),
                  lon=unlist(lapply(params[param.idxs],function(x) x$lon)),
                  qchg=array(NA,c(365,length(params[[param.idxs[1]]]$q_all),length(process.inputs.tmp$lon)))))
    })

    return(out.list)
  }

  # Load fit params and lat-reg-subsets
  if (length(load.fn)==0) {
    load(paste0(defaults$mod.data.dir,"params/",defaults$filevar,"_day_",defaults$mod.name,"_quantfit_params_",paste0(defaults$mod.year.range,collapse="-"),"_alllocs.RData"))
  } else {
    load(load.fn)
  }
  load(paste0(defaults$mod.data.dir,"process_inputs.RData"))

  # Get basis functions for bulk and tail fits (normalization fit is done by latitude band)
  # These are just for one file/run, since we're evaluating, not fitting (so our desired output is just one "file")
  bulk.x <- get.predictors(n_files=1, df.x=defaults$bulk.x.df[1], df.t=defaults$bulk.x.df[2], df.xt=defaults$bulk.x.df[3], year.range=params[[1]]$year.range)
  tail.x <- get.predictors(n_files=1, df.x=defaults$tail.x.df[1], df.t=defaults$tail.x.df[2], df.xt=defaults$tail.x.df[3], year.range=params[[1]]$year.range)
  # If not using the volcanic data, load the normalization parameters, since these are no longer latitude-dependent
  if (!defaults$get.volc) {
    norm.x <- get.predictors(n_files=1, df.x=defaults$norm.x.df[1], df.t=defaults$norm.x.df[2], df.xt=defaults$norm.x.df[3],year.range=params[[1]]$year.range)
  }

  # Get global location indices for each lat-reg subset
  # - for every lat-lon-byreg subset in *process_inputs*, the code finds the corresponding
  # lat-lon pair in the params.lons/params.lats vectors below, representing the lat-lon pairs
  # of the calculated estimated quantiles. If there are duplicates in the *process_inputs*
  # subsets, then the resultant quantile_changes list may include more points than the
  # params list.
  params.lons <- unlist(lapply(params,function(x) if (length(x$lon)>0) {x$lon} else {NaN}))
  params.lats <- unlist(lapply(params,function(x) if (length(x$lon)>0) {x$lat} else {NaN}))

  # Calculate change in quantiles
  tic("Quantile changes calculated!")
  quantile_changes <- lapply(process.inputs,get.quantile.change,
                            year.i=base.years-defaults$base.year.range[1]+1, year.f=future.years-defaults$base.year.range[1]+1,
                            defaults=defaults,
                            bulk.x=bulk.x,tail.x=tail.x,
                            params=params,params.lats=params.lats,params.lons=params.lons)
  toc()

  # Get lat/lon from the qchg processing
  lat <- unlist(lapply(quantile_changes,function(x) x$lat))
  lon <- unlist(lapply(quantile_changes,function(x) x$lon))

  # Change resulting list into a [loc x quantile x year] array
  quantile_changes <- aperm(abind(lapply(quantile_changes,function(x) x$qchg),along=3),c(3,2,1))

  # Remove duplicates?
  dup.idxs <- duplicated(cbind(lat,lon))
  quantile_changes <- quantile_changes[!dup.idxs,,]
  lat <- lat[!dup.idxs]
  lon <- lon[!dup.idxs]

  # Get all quantiles
  q_all = params[[1]]$q_all


  if (export.nc) {
    # Set export filename
    fn <- paste0(c(paste0(defaults$mod.data.dir,"d",defaults$filevar),
               "day",defaults$mod.name,
               "qchange",
               paste0(min(future.years),"-",max(future.years)),
               paste0(min(base.years),"-",max(base.years))),collapse="_")

    if (output.type=="linear") {
      # Dimensions
      latdim <- ncdim_def("lat","deg",lat)
      londim <- ncdim_def("lon","deg",lon)
      locdim <- ncdim_def("loc","idx",1:length(lon),longname="location index")
      qdim <- ncdim_def("quantile","q",q_all)
      timedim <- ncdim_def("time","days",1:365,calendar="noleap")

      # Variables
      qchange <- ncvar_def(paste0("d",defaults$filevar),"K",list(locdim,qdim,timedim),NaN,longname="Change in Near-Surface Air Temperature in doy quantile",prec="double")
      latv <- ncvar_def("lat","deg",locdim,NaN,longname="latitude",prec="double")
      lonv <- ncvar_def("lon","deg",locdim,NaN,longname="longitude",prec="double")
      qs <- ncvar_def("quantile","q",qdim,NaN,longname="quantile",prec="double")

      # Create netcdf file
      ncout <- nc_create(paste0(fn,".nc"),list(latv,lonv,qchange))
      # add global attributes
      ncatt_put(ncout,0,"variable_short","taschg")
      ncatt_put(ncout,0,"variable_long","change in temperature in doy quantile")
      ncatt_put(ncout,0,"frequency","day")
      ncatt_put(ncout,0,"range",paste0(min(future.years),"-",max(future.years))," (mean) - ",paste0(min(base.years),"-",max(base.years))," (mean)")
      ncatt_put(ncout,0,"model_id",defaults$mod.name)
      ncatt_put(ncout,0,"comments",paste0("Difference in quantiles; day-of-year mean across ",paste0(min(future.years),"-",max(future.years)),
        " vs.  day-of-year mean across ",paste0(min(base.years),"-",max(base.years))))
      if (length(comments)>0){
        ncatt_put(ncout,0,"comments_2",comments)
      }
      ncatt_put(ncout,0,"attribution","This file was created using the 'quantile mapping' code package by Kevin Schwarzwald based on methodology and code by Matz Haugen and Haugen et al. (2018).")

      # Add variables to netcdf file
      ncvar_put(ncout,latv,lat)
      ncvar_put(ncout,lonv,lon)
      ncvar_put(ncout,qchange,quantile_changes)

    } else if (output.type=="grid") {
      # Get the grid vectors (ignores gaps - so if there are pixels with lat = 1 2 5 6, 
      # the vector will be c(1 2 5 6))
      if (length(lon_vec)==0) {lon_vec <- sort(unique(lon))}
      if (length(lat_vec)==0) {lat_vec <- sort(unique(lat))}
    

      if (max(diff(lat_vec))/median(unique(diff(lat_vec)))>2 || max(diff(lon_vec))/median(unique(diff(lon_vec)))>2) {
        warning("grid may be discontinuous (not every lat or lon value is sequential); suggest inputting target grid using the lon_vec and lat_vec options")
      }

      # Assign the values from the original [Raw] vector to the grid, by longitude band
      qchange_out <- array(data=NA,c(length(lon_vec),length(lat_vec),dim(quantile_changes)[2:3]))
      for (lon_idx in 1:length(lon_vec)) {
        match.idx <- match(lat_vec,lat[which(lon==lon_vec[lon_idx])])
        # This includes support for the ordering in lat_vec being different than in lat
        qchange_out[lon_idx,which(!is.na(match.idx)),,] <- quantile_changes[which(lon==lon_vec[lon_idx])[match.idx[!is.na(match.idx)]],,]
      }
      quantile_changes <- qchange_out; rm(qchange_out)

      # Dimensions
      latdim <- ncdim_def("lat","deg",lat_vec)
      londim <- ncdim_def("lon","deg",lon_vec)
      qdim <- ncdim_def("quantile","q",q_all)
      timedim <- ncdim_def("time","days",1:365,calendar="noleap")

      # Variables
      qchange <- ncvar_def(paste0("d",defaults$filevar),"K",list(londim,latdim,qdim,timedim),NaN,longname="Change in Near-Surface Air Temperature in doy quantile",prec="double")
      
      # Create netcdf file
      ncout <- nc_create(paste0(fn,".nc"),list(qchange))
      # add global attributes
      ncatt_put(ncout,0,"variable_short","taschg")
      ncatt_put(ncout,0,"variable_long","change in temperature in doy quantile")
      ncatt_put(ncout,0,"frequency","day")
      ncatt_put(ncout,0,"range",paste0(min(future.years),"-",max(future.years)," (mean) - ",paste0(min(base.years),"-",max(base.years)," (mean)")))
      ncatt_put(ncout,0,"model_id",defaults$mod.name)
      ncatt_put(ncout,0,"comments",paste0("Difference in quantiles; day-of-year mean across ",paste0(min(future.years),"-",max(future.years)),
        " vs.  day-of-year mean across ",paste0(min(base.years),"-",max(base.years))))
      if (length(comments)>0){
        ncatt_put(ncout,0,"comments_2",comments)
      }
      ncatt_put(ncout,0,"attribution","This file was created using the 'quantile mapping' code package by Kevin Schwarzwald based on methodology and code by Matz Haugen and Haugen et al. (2018).")

      # Add variables to netcdf file
      ncvar_put(ncout,qchange,quantile_changes)
    }
    # close the file, writing data to disk
    nc_close(ncout)
  }


  return(list(qchange=quantile_changes,qs=q_all,lat=lat,lon=lon))
}
ks905383/quantproj documentation built on Nov. 1, 2020, 9:12 p.m.