R/createResiduals.R

Defines functions createResiduals

Documented in createResiduals

# ####
#' Calculate GAM residuals 
#'
#' Use GAM analysis to compute residuals. Relies on mgcv::gam to perform general
#' additive model.
#' 
#' @param df data frame
#' @param dep variable
#' @param residualModel which gam formula is used to compute. 
#' Default = 'doy_flw_sal'
#' @param analySpec analytical specifications
#' @param gamTable gam table setting (set to FALSE to turn off table output)
#' Default = FALSE
#' @param gamPlot gam plot setting (set to FALSE to turn off plotting)
#' Default = FALSE
#' @param flow.detrended data generated by detrended.flow.  Default = NA
#' @param salinity.detrended data generated by detrended.flow.  Default = NA
#' @param width  width of png figure (inches). Default = 10
#' @param height height of png figure (inches). Default = 3.5
#' @param folder_r folder to store residual plots 
#' @param ProjRoot Root folder for project.
#'
#' @return Returns df with appended column of data 
#' 
#' @export
createResiduals <- function(df
                            , dep
                            , residualModel = 'doy_flw_sal'
                            , analySpec = analySpec
                            , gamTable = FALSE
                            , gamPlot = FALSE
                            , flow.detrended = NA
                            , salinity.detrended = NA
                            , width  = 10
                            , height = 3.5
                            , folder_r = 'pltResiduals'
                            , ProjRoot) {
  
# ----- Change history -------------------------------------------- ####
# 08Aug2018: JBH: updated to accomodate gam formula without cyear and by
#                 default push out plots to png files  
# 03Aug2018: JBH: updated to skip over cases (i.e., specific stations) where
#                 there were insufficient data to run specified gam formula
# 01Aug2018: JBH: migrated model loading to new function, loadModels. Added in
#                 a "fall back" option to use gam2 or gam4 if user had selected
#                 gam3 or gam5 and the model did not run (expected due to 
#                 intervention)
# 30Jul2018: JBH: function renamed to createResiduals; extended to allow for
#                 any gam0-gam5 model
# 12Mar2019: EWL: Add library references to functions.  
#                 Add ProjRoot as input parameter.
# 25Feb2022: EWL: Update for CRAN and trim to 80 char lines
#                 r-devel-linux-x86_64-debian-gcc, r-devel-windows-x86_64
#                   Found if() conditions comparing class() to string:
#  File 'baytrends/R/createResiduals.R': if (class(residualModel) == "list") ...
#  File 'baytrends/R/createResiduals.R': if (class(residualModel) == "list") ...
#  instead of class() use typeof().  is() not quite right.
# 05 May2022: EWL:  More CRAN check failures for if(class()) string comparisons.
#  if (class(residualModel) == "character" && (residualModel 
#           %in% c("gam0", "gam1", "gam2", "gam3", "gam4", "gam5"))) ...
#  if (class(residualModel) == "character" && (residualModel 
#           %in% c("doy", "doy_flw_sal", "doy_flw_sal_int"))) ...
# This is the last of the changes
  
  # create graph folder if not existing
  if (gamPlot) dir.create(file.path(ProjRoot, folder_r), showWarnings = FALSE)
  
  # error trap: all variables present?
  {
    vars <- c(dep, "year", "doy", "dyear", "month", "nummon") 
    if (sum(vars %in% names(df)) <6) {
      warning(paste(" Variables not found: ", paste(vars[!vars %in% names(df)]
                                                    , collapse = ', ')))
      return(NA)
    }
  }

  # error trap: is  variable of qw type
  if (!(typeof(df[, dep]) %in% c('numeric','Surv'))) {
  # if (!class(df[, dep]) == 'qw') {
    warning(paste0("Variable not numeric or Surv class: ", dep))
    return(NA)
  }
  
  # evaluate gam model choice
  if (typeof(residualModel) == "list" ) {
    print(paste0("User specified model for computing residuals"))
    analySpec$gamModels <- residualModel

    # set up computing residuals model for standard gam models ####
  } else if (typeof(residualModel) == "character" &&
             (residualModel %in% c('gam0'
                                   , 'gam1'
                                   , 'gam2'
                                   , 'gam3'
                                   , 'gam4'
                                   , 'gam5'))) {
    analySpec$gamModels   <- loadModels(residualModel)
    
    # set up computing residuals model ####
  } else if (typeof(residualModel) == "character" &&
             (residualModel %in% c('doy', 'doy_flw_sal', 'doy_flw_sal_int'))) {
    analySpec$gamModels   <- loadModelsResid(residualModel)
    
  } else {
    # not a good model specified
    warning(paste0("Valid model for computing residuals is not selected: "
                   , residualModel))
    return(NA)
  }
  
  # relabel if user provided model
  if (typeof(residualModel) == "list" ) {
    residualModel <- 'usr_spec'
  }

  # print selected gam model ####
  if(gamTable | !gamPlot==FALSE) {  
    # only show header if tables or figures are outputted
    .H4(paste("Model: ",analySpec$gamModels[[1]]$model))
  }
  
  print(analySpec$gamModels[[1]]$model)
  
  # down select station list and layer list ####
  analySpec$stationList <- analySpec$stationList[analySpec$stationList$stations 
                                                 %in% unique(df$station),]
  analySpec$layerList <- analySpec$layerList[analySpec$layerList$layers
                                             %in% unique(df$layer),]

  # apply gam and compute residuals ####  
  stations  <- analySpec$stationList$stations
  layers    <- analySpec$layerList$layers
  
  for (layer in layers) { 
    for (stat in stations) { 
      # layer=layers[1]; stat=stations[1]
      # apply specified model
      .H4(paste("Processing: ",stat,"/",layer))
      if (exists("gamResult")) rm(gamResult)
      gamModel.reset <- FALSE
      gamResult <- gamTest(df
                           , dep
                           , stat
                           , layer
                           , analySpec
                           , gamTable = gamTable
                           , gamPlot = gamPlot
                           , gamDiffModel = NA
                           , flow.detrended = flow.detrended
                           , salinity.detrended = salinity.detrended)
      
      # if model didn't run and model was gam3 or gam5 or doy_flw_sal_int, then 
      # drop back to gam 2 or gam4 or doy_flw_sal and retry
      if (!is.na(gamResult[1])) { 
        if (is.null(gamResult$stat.gam.result) & residualModel %in% 
                                        c('gam3', 'gam5', 'doy_flw_sal_int')) {
          .H4(paste("Processing: "
                    ,stat
                    ,"/"
                    ,layer
                    , ' -- dropping intervention term'))
          gamModel.reset     <- TRUE
          gamModel.reset.org <- analySpec$gamModels
          if (residualModel == 'gam3') analySpec$gamModels <- loadModels('gam2')
          if (residualModel == 'gam5') analySpec$gamModels <- loadModels('gam4')
          if (residualModel == 'doy_flw_sal_int') analySpec$gamModels <- 
                                                       loadModels('doy_flw_sal')
          gamResult <- gamTest(df, dep, stat, layer, analySpec
                               , gamTable = gamTable
                               , gamPlot = gamPlot
                               , gamDiffModel = NA
                               , flow.detrended = flow.detrended
                               , salinity.detrended = salinity.detrended) 
        }
      }

      if(gamPlot & !is.na(gamResult[1])) {
        grDevices::dev.copy(device = grDevices::png
                 , filename = file.path(folder_r, paste0(stat
                                                         , "~"
                                                         , dep
                                                         , "~"
                                                         , layer
                                                         , "~"
                                                         , residualModel
                                                         , '.png'))
                 , width = width
                 , height = height
                 , units = 'in'
                 , res = 600) 
        grDevices::dev.off()
      }
      
      # collect computed residuals
      if (!is.na(gamResult[1])) { 
        residuals <- eval(parse(text = paste0("gamResult$gamOutput"
                                              , analySpec$gamModels[[1]]$option
                                              ,"$gamRslt$residuals")))
        dep.res0 <- cbind(gamResult$data[,c("station","date","layer")]
                          ,residuals)
        if (!exists("dep.res1")) {
          dep.res1 <- dep.res0
        } else { 
          dep.res1 <- rbind(dep.res1, dep.res0)
        }
      }   
      
      # reset gamModel if necessary
      if (gamModel.reset) analySpec$gamModels   <- gamModel.reset.org
      
    } # end stat loop
  } # end layer loop

  # rename field residuals
  names(dep.res1) <- c("station"
                       , "date"
                       , "layer"
                       , paste0(dep, "_res.", residualModel))
  
  # merge residuals back to overall data frame ####  
  df <- merge(df, dep.res1, by = c("station", "layer", "date"), all.x = TRUE)
  
  return(df)
}

Try the baytrends package in your browser

Any scripts or data that you put into this service are public.

baytrends documentation built on May 31, 2023, 8:38 p.m.