R/autowin.R

Defines functions autowin

Documented in autowin

#'Test for auto-correlation in climate.
#'
#'Tests the correlation between the climate in a specified climate window and 
#'other fitted climate windows.
#'@param reference Reference climate data to be compared. Generated by functions
#'  \code{\link{singlewin}} or \code{\link{slidingwin}}.
#'@param xvar The climate variable of interest. Please specify the parent 
#'  environment and variable name (e.g. Climate$Temp).
#'@param cdate The climate date variable (dd/mm/yyyy). Please specify the parent
#'  environment and variable name (e.g. Climate$Date).
#'@param bdate The biological date variable (dd/mm/yyyy). Please specify the 
#'  parent environment and variable name (e.g. Biol$Date).
#'@param baseline The baseline model used to fit climate windows. These will be
#'  correlated with the reference climate window.
#'@param range Two values signifying respectively the furthest and closest number 
#'  of time intervals (set by cinterval) back from the cutoff date or biological record to include 
#'  in the climate window search.
#'@param stat The aggregate statistic used to analyse the climate data. Can 
#'  currently use basic R statistics (e.g. mean, min), as well as slope. 
#'  Additional aggregate statistics can be created using the format function(x) 
#'  (...). See parameter FUN in \code{\link{apply}} for more detail.
#'@param func The function used to fit the climate variable. Can be linear 
#'  ("lin"), quadratic ("quad"), cubic ("cub"), inverse ("inv") or log ("log").
#'  Not required when a variable is provided for parameter 'centre'.
#'@param type "absolute" or "relative", whether you wish the climate window to be relative
#'  (e.g. the number of days before each biological record is measured) or absolute
#'  (e.g. number of days before a set point in time).
#'@param refday If type is "absolute", the day and month respectively of the 
#'  year from which the absolute window analysis will start.
#'@param cmissing cmissing Determines what should be done if there are 
#'  missing climate data. Three approaches are possible:
#'   - FALSE; the function will not run if missing climate data is encountered.
#'   An object 'missing' will be returned containing the dates of missing climate.
#'   - "method1"; missing climate data will be replaced with the mean climate
#'   of the preceding and following 2 days.
#'   - "method2"; missing climate data will be replaced with the mean climate
#'   of all records on the same date.
#'@param cinterval The resolution at which climate window analysis will be 
#'  conducted. May be days ("day"), weeks ("week"), or months ("month"). Note the units 
#'  of parameter 'range' will differ with the choice of cinterval.
#'@param upper Cut-off value used to determine growing degree days or positive 
#'  climate thresholds (depending on parameter thresh). Note that when values
#'  of lower and upper are both provided, autowin will instead calculate an 
#'  optimal climate zone.
#'@param lower Cut-off value used to determine chill days or negative 
#'  climate thresholds (determined by parameter thresh). Note that when values
#'  of lower and upper are both provided, autowin will instead calculate an 
#'  optimal climate zone.
#'@param binary TRUE or FALSE. Determines whether to use values of upper and
#'  lower to calculate binary climate data (binary = TRUE), or to use for
#'  growing degree days (binary = FALSE).
#'@param centre A list item containing:
#'  1. The variable used for mean centring (e.g. Year, Site, Individual). 
#'  Please specify the parent environment and variable name (e.g. Biol$Year).
#'  2. Whether the model should include both within-group means and variance ("both"),
#'  only within-group means ("mean"), or only within-group variance ("dev").
#'@param cohort A variable used to group biological records that occur in the same biological
#'  season but cover multiple years (e.g. southern hemisphere breeding season). By default,
#'  autowin will use year (extracted from parameter bdate) as the cohort variable. 
#'  The cohort variable should be in the same dataset as the variable bdate.
#'@param spatial A list item containing:
#'  1. A factor that defines which spatial group (i.e. population) each biological
#'  record is taken from. The length of this factor should correspond to the length 
#'  of the biological dataset.
#'  2. A factor that defines which spatial group (i.e. population) climate data
#'  corresponds to. The length of this factor should correspond to the length of
#'  the climate dataset.
#'@param cutoff.day,cutoff.month Redundant parameters. Now replaced by refday.
#'@param furthest,closest Redundant parameters. Now replaced by range.
#'@param thresh Redundant parameter. Now replaced by binary.
#'@return Will return a data frame showing the correlation between the climate 
#'  in each fitted window and the chosen reference window.
#'@author Liam D. Bailey and Martijn van de Pol
#' @examples
#' 
#'#Simple test example
#'#Create data from a subset of our test dataset
#'#Just use two years
#'biol_data <- Mass[1:2, ]
#'clim_data <- MassClimate[grep(pattern = "1979|1986", x = MassClimate$Date), ]
#'
#'single <- singlewin(xvar = list(Temp = clim_data$Temp),
#'                    cdate = clim_data$Date, 
#'                    bdate = biol_data$Date, 
#'                    baseline = lm(Mass ~ 1, data = biol_data),
#'                    range = c(1, 0), 
#'                    type = "relative", stat = "mean", 
#'                    func = c("lin"), cmissing = FALSE, cinterval = "day")
#'                    
#' auto <- autowin(reference = single,
#'                 xvar  = list(Temp = clim_data$Temp),
#'                 cdate = clim_data$Date, bdate = biol_data$Date,
#'                 baseline = lm(Mass ~ 1, data = biol_data), range = c(1, 0), 
#'                 stat = "mean", func = "lin", 
#'                 type = "relative",
#'                 cmissing = FALSE, cinterval = "day")
#'
#' \dontrun{
#' 
#' # Full example
#' # Test for auto-correlation using 'Mass' and 'MassClimate' data frames
#' 
#' data(Mass)
#' data(MassClimate)
#' 
#' # Fit a single climate window using the datasets Mass and MassClimate.
#' 
#' single <- singlewin(xvar = list(Temp = MassClimate$Temp), 
#'                     cdate = MassClimate$Date, bdate = Mass$Date,
#'                     baseline = lm(Mass ~ 1, data = Mass), 
#'                     range = c(72, 15), 
#'                     stat = "mean", func = "lin", type = "absolute", 
#'                     refday = c(20, 5), 
#'                     cmissing = FALSE, cinterval = "day")            
#' 
#' # Test the autocorrelation between the climate in this single window and other climate windows.
#' 
#' auto <- autowin(reference = single,
#'                 xvar  = list(Temp = MassClimate$Temp), cdate = MassClimate$Date, bdate = Mass$Date,
#'                 baseline = lm(Mass ~ 1, data = Mass), range = c(365, 0), 
#'                 stat = "mean", func = "lin", 
#'                 type = "absolute", refday = c(20, 5),
#'                 cmissing = FALSE, cinterval = "day")
#'                 
#' # View the output
#' head(auto)
#' 
#' # Plot the output
#' plotcor(auto, type = "A")                                   
#'}
#'        
#'@export

autowin <- function(reference, xvar, cdate, bdate, baseline, range, stat, func, type, refday, 
                    cmissing = FALSE, cinterval = "day", upper = NA,
                    lower = NA, binary = FALSE, centre = list(NULL, "both"), 
                    cohort = NULL, spatial = NULL, cutoff.day = NULL, cutoff.month = NULL,
                    furthest = NULL, closest = NULL, thresh = NULL){
  
  #Check date formats
  if(all(is.na(as.Date(cdate, format = "%d/%m/%Y")))){
    
    stop("cdate is not in the correct format. Please provide date data in dd/mm/yyyy.")
    
  }
  
  if(all(is.na(as.Date(bdate, format = "%d/%m/%Y")))){
    
    stop("bdate is not in the correct format. Please provide date data in dd/mm/yyyy.")
    
  }
  
  thresholdQ <- "N"
  
  if((!is.na(upper) || !is.na(lower)) && (cinterval == "week" || cinterval == "month")){
    
    thresholdQ <- readline("You specified a climate threshold using upper and/or lower and are working at a weekly or monthly scale. 
Do you want to apply this threshold before calculating weekly/monthly means (i.e. calculate thresholds for each day)? Y/N")
    
    thresholdQ <- toupper(thresholdQ)
    
    if(thresholdQ != "Y" & thresholdQ != "N"){
      
      thresholdQ <- readline("Please specify yes (Y) or no (N)")
      
    }
    
  }
  
  if(is.null(cohort) == TRUE){
    cohort = lubridate::year(as.Date(bdate, format = "%d/%m/%Y")) 
  }
  
  WindowOpen  <- reference$Dataset$WindowOpen[1]
  WindowClose <- reference$Dataset$WindowClose[1]
  reference   <- reference$BestModelData$climate
  
  if(is.null(thresh) == FALSE){
    stop("Parameter 'thresh' is now redundant. Please use parameter 'binary' instead.")
  }
  
  if(type == "variable" || type == "fixed"){
    stop("Parameter 'type' now uses levels 'relative' and 'absolute' rather than 'variable' and 'fixed'.")
  }
  
  if(is.null(cutoff.day) == FALSE & is.null(cutoff.month) == FALSE){
    stop("cutoff.day and cutoff.month are now redundant. Please use parameter 'refday' instead.")
  }
  
  if(is.null(furthest) == FALSE & is.null(closest) == FALSE){
    stop("furthest and closest are now redundant. Please use parameter 'range' instead.")
  }
  
  xvar = xvar[[1]]

  message("Initialising, please wait...")
  
  if (stat == "slope" & func == "log" || stat == "slope" & func == "inv"){
    stop("stat = slope cannot be used with func = log or inv as negative values may be present.")
  }
  
  if (cinterval == "day"){
    if ((min(as.Date(bdate, format = "%d/%m/%Y")) - range[1]) < min(as.Date(cdate, format = "%d/%m/%Y"))){
      stop("You do not have enough climate data to search that far back. Please adjust the value of range or add additional climate data.")
     }
  }
  
  if (cinterval == "week"){
    if ((min(as.Date(bdate, format = "%d/%m/%Y")) - lubridate::weeks(range[1])) < min(as.Date(cdate, format = "%d/%m/%Y"))){
      stop("You do not have enough climate data to search that far back. Please adjust the value of range or add additional climate data.")
    }
  }
  
  if (cinterval == "month"){
    if ((min(as.Date(bdate, format = "%d/%m/%Y")) - months(range[1])) < min(as.Date(cdate, format = "%d/%m/%Y"))){
      stop("You do not have enough climate data to search that far back. Please adjust the value of range or add additional climate data.")
    }
  }
  
  duration   <- (range[1] - range[2]) + 1
  maxmodno   <- (duration * (duration + 1)) / 2 
  cont       <- convertdate(bdate = bdate, cdate = cdate, xvar = xvar, 
                            cinterval = cinterval, type = type, 
                            refday = refday, cohort = cohort, spatial = spatial,
                            thresholdQ = thresholdQ)
  modno      <- 1
  modlist    <- list()
  cmatrix    <- matrix(ncol = (duration), nrow = length(bdate))
  climate1   <- matrix(ncol = 1, nrow = length(bdate), 1)
  
  if(cinterval == "day" || (!is.na(thresholdQ) && thresholdQ == "N")){ #If dealing with daily data OR user chose to apply threshold later...
    
    if(is.null(spatial) == FALSE){ #...and spatial information is provided...
      
      if (is.na(upper) == FALSE && is.na(lower) == TRUE){ #...and an upper bound is provided...
        if (binary == TRUE){ #...and we want data to be binary (i.e. it's above the value or it's not)
          cont$xvar$Clim <- ifelse (cont$xvar$Clim > upper, 1, 0) #Then turn climate data into binary data.
        } else { #Otherwise, if binary is not true, simply make all data below the upper limit into 0.
          cont$xvar$Clim <- ifelse (cont$xvar$Clim > upper, cont$xvar$Clim, 0)
        }
      }
      
      if (is.na(lower) == FALSE && is.na(upper) == TRUE){ #If a lower limit has been provided, do the same.
        if (binary == TRUE){
          cont$xvar$Clim <- ifelse (cont$xvar$Clim < lower, 1, 0)
        } else {
          cont$xvar$Clim <- ifelse (cont$xvar$Clim < lower, cont$xvar$Clim, 0)
        }
      }
      
      if (is.na(lower) == FALSE && is.na(upper) == FALSE){ #If both an upper and lower limit are provided, do the same.
        if (binary == TRUE){
          cont$xvar$Clim <- ifelse (cont$xvar$Clim > lower && cont$xvar$Clim < upper, 1, 0)
        } else {
          cont$xvar$Clim <- ifelse (cont$xvar$Clim > lower && cont$xvar$Clim < upper, cont$xvar$Clim - lower, 0)
        } 
      }
      
    } else { #Do the same with non-spatial data (syntax is just a bit different, but method is the same.)
      
      if (is.na(upper) == FALSE && is.na(lower) == TRUE){
        if (binary == TRUE){
          cont$xvar <- ifelse (cont$xvar > upper, 1, 0)
        } else {
          cont$xvar <- ifelse (cont$xvar > upper, cont$xvar, 0)
        }
      }
      
      if (is.na(lower) == FALSE && is.na(upper) == TRUE){
        if (binary == TRUE){
          cont$xvar <- ifelse (cont$xvar < lower, 1, 0)
        } else {
          cont$xvar <- ifelse (cont$xvar < lower, cont$xvar, 0)
        }
      }
      
      if (is.na(lower) == FALSE && is.na(upper) == FALSE){
        if (binary == TRUE){
          cont$xvar <- ifelse (cont$xvar > lower & cont$xvar < upper, 1, 0)
        } else {
          cont$xvar <- ifelse (cont$xvar > lower & cont$xvar < upper, cont$xvar - lower, 0)
        } 
      } 
      
    }
    
  }
  
  # Create a matrix with the climate data from closest to furthest days
  # back from each biological record
  if(is.null(spatial) == FALSE){
    for (i in 1:length(bdate)){
      cmatrix[i, ] <- cont$xvar[which(cont$cintno$spatial %in% cont$bintno$spatial[i] & cont$cintno$Date %in% (cont$bintno$Date[i] - c(range[2]:range[1]))), 1]   #Create a matrix which contains the climate data from furthest to furthest from each biological record#    
    }
  } else {
    for (i in 1:length(bdate)){
      cmatrix[i, ] <- cont$xvar[which(cont$cintno %in% (cont$bintno[i] - c(range[2]:range[1])))]   #Create a matrix which contains the climate data from furthest to furthest from each biological record#    
    } 
  }
  cmatrix <- as.matrix(cmatrix[, c(ncol(cmatrix):1)])
  
  if (cmissing == FALSE && length(which(is.na(cmatrix))) > 0){
    if(is.null(spatial) == FALSE){
      if (cinterval == "day"){
        .GlobalEnv$missing <- as.Date(cont$cintno$Date[is.na(cont$xvar$Clim)], origin = min(as.Date(cdate, format = "%d/%m/%Y")) - 1)
      }
      if (cinterval == "month"){
        .GlobalEnv$missing <- c(paste("Month:", month(as.Date(cont$cintno$Date[is.na(cont$xvar$Clim)], origin = min(as.Date(cdate, format = "%d/%m/%Y")) - 1)),
                                      "Year:", year(as.Date(cont$cintno$Date[is.na(cont$xvar$Clim)], origin = min(as.Date(cdate, format = "%d/%m/%Y")) - 1))))
      }
      if (cinterval == "week"){
        .GlobalEnv$missing <- c(paste("Week:", month(as.Date(cont$cintno$Date[is.na(cont$xvar$Clim)], origin = min(as.Date(cdate, format = "%d/%m/%Y")) - 1)),
                                      "Year:", year(as.Date(cont$cintno$Date[is.na(cont$xvar$Clim)], origin = min(as.Date(cdate, format = "%d/%m/%Y")) - 1))))
      }
      stop(c("Climate data should not contain NA values: ", length(.GlobalEnv$missing),
             " NA value(s) found. Please add missing climate data or set cmissing=TRUE.
           See object missing for all missing climate data"))
    } else {
      if (cinterval == "day"){
        .GlobalEnv$missing <- as.Date(cont$cintno[is.na(cont$xvar)], origin = min(as.Date(cdate, format = "%d/%m/%Y")) - 1)
      }
      if (cinterval == "month"){
        .GlobalEnv$missing <- c(paste("Month:", month(as.Date(cont$cintno[is.na(cont$xvar)], origin = min(as.Date(cdate, format = "%d/%m/%Y")) - 1)),
                                      "Year:", year(as.Date(cont$cintno[is.na(cont$xvar)], origin = min(as.Date(cdate, format = "%d/%m/%Y")) - 1))))
      }
      if (cinterval == "week"){
        .GlobalEnv$missing <- c(paste("Week:", month(as.Date(cont$cintno[is.na(cont$xvar)], origin = min(as.Date(cdate, format = "%d/%m/%Y")) - 1)),
                                      "Year:", year(as.Date(cont$cintno[is.na(cont$xvar)], origin = min(as.Date(cdate, format = "%d/%m/%Y")) - 1))))
      }
      stop(c("Climate data should not contain NA values: ", length(.GlobalEnv$missing),
             " NA value(s) found. Please add missing climate data or set cmissing=TRUE.
           See object missing for all missing climate data"))
    }
  }
  
  #If we expect NAs and choose a method to deal with them...
  if (cmissing != FALSE && any(is.na(cmatrix))){
    
    message("Missing climate data detected. Please wait while NAs are replaced.")
    
    for(i in which(is.na(cmatrix))){
      
      #Determine the column and row location...
      if(i %% nrow(cmatrix) == 0){
        
        col <- i/nrow(cmatrix)
        row <- nrow(cmatrix)
        
      } else {
        
        col <- i%/%nrow(cmatrix) + 1
        row <- i %% nrow(cmatrix)
        
      }
      
      
      #If we are using method1
      if(cmissing == "method1"){
        
        #If we are using a daily interval
        if(cinterval == "day"){
          
          #For the original cdate data extract date information.
          cdate_new <- data.frame(Date = as.Date(cdate, format = "%d/%m/%Y"))
          
          #Extract the original biological date
          bioldate <- as.Date(bdate[row], format = "%d/%m/%Y")
          
          #Determine from this on which date data is missing
          missingdate <- bioldate - (col + range[2] - 1)
          
          #If we have spatial replication
          if(is.null(spatial) == FALSE){
            
            cdate_new$spatial <- spatial[[2]]
            
            siteID <- spatial[[1]][row]
            
            cmatrix[row, col] <- mean(xvar[which(cdate_new$Date %in% c(missingdate - (1:2), missingdate + (1:2)) & cdate_new$spatial %in% siteID)], na.rm = T)
            
          } else {
            
            cmatrix[row, col] <- mean(xvar[which(cdate_new$Date %in% c(missingdate - (1:2), missingdate + (1:2)))], na.rm = T)
            
          }
          
        } else if(cinterval == "week" || cinterval == "month"){
          
          if(is.null(spatial) == FALSE){
            
            #Extract the climate week numbers
            cdate_new <- data.frame(Date = cont$cintno$Date,
                                    spatial = cont$cintno$spatial)
            
            #Extract the biological week number that is missing
            bioldate <- cont$bintno$Date[row]
            
            #Determine from this on which week data is missing
            missingdate <- bioldate - (col + range[2] - 1)
            
            siteID <- spatial[[1]][row]
            
            cmatrix[row, col] <- mean(cont$xvar$Clim[which(cdate_new$Date %in% c(missingdate - (1:2), missingdate + (1:2)) & cdate_new$spatial %in% siteID)], na.rm = T)
            
          } else {
            
            #Extract the climate week numbers
            cdate_new <- data.frame(Date = cont$cintno)
            
            #Extract the biological week number that is missing
            bioldate <- cont$bintno[row]
            
            #Determine from this on which week data is missing
            missingdate <- bioldate - (col + range[2] - 1)
            
            cmatrix[row, col] <- mean(cont$xvar[which(cdate_new$Date %in% c(missingdate - (1:2), missingdate + (1:2)))], na.rm = T)
            
          }
          
        }
        
        #If the record is still an NA, there must be too many NAs. Give an error.
        if(is.na(cmatrix[row, col])){
          
          stop("Too many consecutive NAs present in the data. Consider using method2 or manually replacing NAs.")
          
        }
        
      } else if(cmissing == "method2"){
        
        if(cinterval == "day"){
          
          #For the original cdate data, determine the year, month, week and day.
          cdate_new <- data.frame(Date = as.Date(cdate, format = "%d/%m/%Y"),
                                  Month = lubridate::month(as.Date(cdate, format = "%d/%m/%Y")),
                                  Day   = lubridate::day(as.Date(cdate, format = "%d/%m/%Y")))
          
          #Extract the original biological date
          bioldate <- as.Date(bdate[row], format = "%d/%m/%Y")
          
          #Determine from this on which date data is missing
          missingdate <- bioldate - (col + range[2] - 1)
          
          missingdate <- data.frame(Date  = missingdate,
                                    Month = lubridate::month(missingdate),
                                    Day   = lubridate::day(missingdate))
          
          if(is.null(spatial) == FALSE){
            
            cdate_new$spatial <- spatial[[2]]
            
            siteID <- spatial[[1]][row]
            
            cmatrix[row, col] <- mean(xvar[which(cdate_new$Month %in% missingdate$Month & cdate_new$Day %in% missingdate$Day & cdate_new$spatial %in% siteID)], na.rm = T)
            
          } else {
            
            cmatrix[row, col] <- mean(xvar[which(cdate_new$Month %in% missingdate$Month & cdate_new$Day %in% missingdate$Day)], na.rm = T)
            
          }
          
        } else if(cinterval == "week" || cinterval == "month"){
          
          if(is.null(spatial) == FALSE){
            
            #Extract the climate week numbers
            cdate_new <- data.frame(Date = cont$cintno$Date,
                                    spatial = cont$cintno$spatial)
            
            #Extract the biological week number that is missing
            bioldate <- cont$bintno$Date[row]
            
            #Determine from this on which week data is missing
            missingdate <- bioldate - (col + range[2] - 1)
            
            #Convert all dates back into year specific values
            if(cinterval == "week"){
              
              cdate_new$Date <- cdate_new$Date - (floor(cdate_new$Date/52) * 52)
              cdate_new$Date <- ifelse(cdate_new$Date == 0, 52, cdate_new$Date)
              
              missingdate <- missingdate - (floor(missingdate/52) * 52)
              missingdate <- ifelse(missingdate == 0, 52, missingdate)
              
            } else {
              
              cdate_new$Date <- cdate_new$Date - (floor(cdate_new$Date/12) * 12)
              cdate_new$Date <- ifelse(cdate_new$Date == 0, 12, cdate_new$Date)
              
              missingdate <- missingdate - (floor(missingdate/12) * 12)
              missingdate <- ifelse(missingdate == 0, 12, missingdate)
              
            }
            
            siteID <- spatial[[1]][row]
            
            cmatrix[row, col] <- mean(cont$xvar$Clim[which(cdate_new$Date %in% missingdate & cdate_new$spatial %in% siteID)], na.rm = T)
            
          } else {
            
            #Extract the climate week numbers
            cdate_new <- data.frame(Date = cont$cintno)
            
            #Extract the biological week number that is missing
            bioldate <- cont$bintno[row]
            
            #Determine from this on which week data is missing
            missingdate <- bioldate - (col + range[2] - 1)
            
            #Convert all dates back into year specific values
            if(cinterval == "week"){
              
              cdate_new$Date <- cdate_new$Date - (floor(cdate_new$Date/52) * 52)
              cdate_new$Date <- ifelse(cdate_new$Date == 0, 52, cdate_new$Date)
              
              missingdate <- missingdate - (floor(missingdate/52) * 52)
              missingdate <- ifelse(missingdate == 0, 52, missingdate)
              
            } else {
              
              cdate_new$Date <- cdate_new$Date - (floor(cdate_new$Date/12) * 12)
              cdate_new$Date <- ifelse(cdate_new$Date == 0, 12, cdate_new$Date)
              
              missingdate <- missingdate - (floor(missingdate/12) * 12)
              missingdate <- ifelse(missingdate == 0, 12, missingdate)
              
            }
            
            cmatrix[row, col] <- mean(cont$xvar[which(cdate_new$Date %in% missingdate)], na.rm = T)
            
          }
        }
        
        if(is.na(cmatrix[row, col])){
          
          stop("There is not enough data to replace missing values using method2. Consider dealing with NA values manually")
          
        }
        
      } else {
        
        stop("cmissing should be method1, method2 or FALSE")
        
      }
    }
  } 

  modeldat           <- model.frame(baseline)
  modeldat$yvar      <- modeldat[, 1]
  modeldat$climate   <- seq(1, nrow(modeldat), 1)
  
  if (is.null(weights(baseline)) == FALSE){
    if (class(baseline)[1] == "glm" & sum(weights(baseline)) == nrow(model.frame(baseline)) || attr(class(baseline), "package") == "lme4" & sum(weights(baseline)) == nrow(model.frame(baseline))){
    } else {
      modeldat$modweights <- weights(baseline)
      baseline <- update(baseline, .~., weights = modeldat$modweights, data = modeldat)
    }
  }
  
  #If using a mixed model, ensure that maximum likelihood is specified (because we are comparing models with different fixed effects)
  if(!is.null(attr(class(baseline), "package")) && attr(class(baseline), "package") == "lme4" && class(baseline)[1] == "lmerMod" && baseline@resp$REML == 1){
    
    message("Linear mixed effects models are run in climwin using maximum likelihood. Baseline model has been changed to use maximum likelihood.")
    
    baseline <- update(baseline, yvar ~., data = modeldat, REML = F)
    
  }
  
  if(attr(baseline, "class")[1] == "lme" && baseline$method == "REML"){
    
    message("Linear mixed effects models are run in climwin using maximum likelihood. Baseline model has been changed to use maximum likelihood.")
    
    baseline <- update(baseline, yvar ~., data = modeldat, method = "ML")
    
  }
  
  if (func == "lin"){
    modeloutput <- update(baseline, .~. + climate, data = modeldat)
  } else if (func == "quad") {
    modeloutput <- update(baseline, .~. + climate + I(climate ^ 2), data = modeldat)
  } else if (func == "cub") {
    modeloutput <- update(baseline, .~. + climate + I(climate ^ 2) + I(climate ^ 3), data = modeldat)
  } else if (func == "log") {
    modeloutput <- update(baseline, .~. + log(climate), data = modeldat)
  } else if (func == "inv") {
    modeloutput <- update (baseline, .~. + I(climate ^ -1), data = modeldat)
  } else {
    stop("Define func")
  }
  
  pb <- txtProgressBar(min = 0, max = maxmodno, style = 3, char = "|")
  
  for (m in range[2]:range[1]){
    for (n in 1:duration){
      if ( (m - n) >= (range[2] - 1)){  # do not use windows that overshoot the closest possible day in window   
        if (stat != "slope" || n > 1){
          windowopen  <- m - range[2] + 1
          windowclose <- windowopen-n + 1
          if (stat == "slope"){ 
            time       <- seq(1, n, 1)
            climate1 <- apply(cmatrix[, windowclose:windowopen], 1, FUN = function(x) coef(lm(x ~ time))[2])
          } else { 
            if (n == 1){
              climate1 <- cmatrix[, windowclose:windowopen]
            } else {
              climate1 <- apply(cmatrix[, windowclose:windowopen], 1, FUN = stat)
            }
          }
          
          modeloutput <- cor(climate1, reference)
          
          modlist$cor[modno]         <- modeloutput
          modlist$WindowOpen[modno]  <- m
          modlist$WindowClose[modno] <- m - n + 1
          modno                        <- modno + 1
        }
      }
    }  
    #Fill progress bar
    setTxtProgressBar(pb, modno - 1)
  }
  
  modlist$Furthest        <- range[1]
  modlist$Closest         <- range[2]
  modlist$Statistics      <- stat
  modlist$Functions       <- type
  modlist$BestWindowOpen  <- WindowOpen
  modlist$BestWindowClose <- WindowClose
  
  if (type == TRUE){
    modlist$Reference.day   <- refday[1]
    modlist$Reference.month <- refday[2] 
  }
  
  local <- as.data.frame(modlist)
  return(local)
}
LiamDBailey/climwin documentation built on July 8, 2022, 8:26 p.m.