R/Crossvalidate.R

#' @title Process module: Crossvalidate
#'
#' @description Run k fold crossvalidation. If presence absence, split presences and absences separately so folds have equally balanced data. Otherwise just sample.
#'
#' @param .data \strong{Internal parameter, do not use in the workflow function}. \code{.data} is a list of a data frame and a raster object returned from occurrence modules and covariate modules respectively. \code{.data} is passed automatically in workflow from the occurrence and covariate modules to the process module(s) and should not be passed by the user.
#'
#' @param k Positive integer number of folds to split the data into. Default is 5.
#'  
#' @param seed Numeric used with \code{\link[base]{set.seed}}
#' 
#' @author ZOON Developers, \email{zoonproject@@gmail.com}
#' @section Version: 1.0
#' @section Date submitted: 2015-11-13
#' @section Data type: presence-only, presence/absence, abundance, proportion
#'
#' @name Crossvalidate
#' @family process
Crossvalidate <-
function (.data, k = 5, seed = NULL) {
  
  occurrence <- .data$df
  ras <- .data$ras

  # Keep attributes
  Atts <- attributes(occurrence)[!names(attributes(occurrence)) %in% c('names', 'class', 'row.names')]
  
  if (all(occurrence$value == 1)) {
    message('You currently only have presence points. Unless you are using presence only modelling, create some pseudoabsence points before this module.')
  }
  
  # set seed if specified
  if(!is.null(seed)){
    if(inherits(x = seed, what = c('numeric', 'integer'))){
      set.seed(seed)
    } else {
      stop("'seed' must be numeric or NULL")
    }
  }
  
  # if presence absence, create folds separately to give well balanced groups
  # if presence only or abundance etc., just split randomly into folds.
  if (all(c(0,1) %in% occurrence$value) & all(occurrence$value %in% c(0,1) )){
    fold <- rep(NA, NROW(occurrence))
    fold[occurrence$value == 1] <- sample(1:k, sum(occurrence$value == 1), replace=TRUE)
    fold[occurrence$value == 0] <- sample(1:k, sum(occurrence$value == 0), replace=TRUE)
  } else {
    fold <- sample(1:k, NROW(occurrence), replace=TRUE)
  }

  # extract covariates
  coords <- cbind(occurrence$lon,
                  occurrence$lat)
  covs <- as.matrix(extract(ras, coords))


  # combine with the occurrence data
  df <- data.frame(value = occurrence$value,
                   type = occurrence$type,
                   fold = fold,
                   longitude = occurrence$lon,
                   latitude = occurrence$lat,
                   covs)
  
  names(df)[6:ncol(df)] <- names(ras)
  
  attributes(df) <- c(attributes(df), Atts)
  attr(df, 'covCols') <- names(ras)
  
  df <- na.omit(df)

  return(list(df=df, ras=ras))
  
}
zoonproject/modules documentation built on May 4, 2019, 11:25 p.m.