R/create_env_set.R

Defines functions create_env_set

Documented in create_env_set

################################################################################
### create_env_set
### 01/2022

#' Generate a set of environmental variable(s) that are similar to each other
#'  by a random amount
#'
#' @description Generates a set of environmental variables (can be just 1
#'  variable) that are related but have some random variation between them. The
#'  first variable is generated using unconditional Gaussian simulation using a
#'  variogram with the specified model and psill parameter, using
#'  \code{\link[gstat]{vgm}} from the \code{gstat} package, plus a random range
#'  value extracted using the specified distribution. New conditional
#'  Gaussian simulations are then made for subsequent variables using a random
#'  subsample of cells from the first variable plus a new random range value.
#'
#' @details The model and psill are kept constant across all variables, but a
#'  new range value is drawn from the specified distribution each time. The
#'  distribution is specified using a user-generated function. The user can
#'  either input the name of the function or define the function as the
#'  argument.
#'
#'  Subsequent variables are generated based on a random sample of the 1st
#'  variable. The number of cells selected is defined by \code{propsamp}; the
#'  larger the value the more similar the new variable will be to the original.
#'  The psill and model type of the variogram is kept constant across all new
#'  variables, although the psill can be altered using the parameter
#'  \code{dep1}, which is multiplied by the psill of the first variable (must
#'  be >0 and <2; default = 1).
#'
#' @author Charlie Marsh (charlie.marsh@@mailbox.org) & Yoni Gavish, based on
#'  the original code from
#'  http://santiago.begueria.es/2010/10/generating-spatially-correlated-random-fields-with-r/
#'
#' @param cellDims vector containing variable dimensions (n cell width, n cell
#'  height)
#' @param nPerSet the total number of environmental variables to generate
#' @param model model for variogram; default = "Sph" (see
#'  \code{\link[gstat]{vgm}} for options)
#' @param psill (partial) sill of the variogram model; default = 1.5 (see
#'  \code{\link[gstat]{vgm}} for options)
#' @param rangeFun name of function, or specified function, to generate random
#'  number for the range parameter of the variogram model. E.g. function()
#'  exp(runif(1, 1, 6))
#' @param propSamp proportion of cells to sample in each submodel (default =
#'  0.5)
#' @param dep1 multiplied by the psill in submodels (default = 1)
#'
#' @return A raster stack of environmental variables. For each variable all
#'  values are standardised between 0 and 1.
#'
#' @references Variations on this method have been used to generate virtual
#'  species in:
#'
#' Gavish, Y., Marsh, C.J., Kuemmerlen, M., Stoll, S., Haase, P., Kunin, W.E.,
#'  2017. Accounting for biotic interactions through alpha-diversity
#'  constraints in stacked species distribution models. Methods in Ecology and
#'  Evolution 8, 1092–1102. https://doi.org/10.1111/2041-210X.12731
#'
#' Marsh, C.J., Gavish, Y., Kunin, W.E., Brummitt, N.A., 2019. Mind the gap:
#'  Can downscaling Area of Occupancy overcome sampling gaps when assessing
#'  IUCN Red List status? Diversity and Distributions 025, 1832–1845.
#'  https://doi.org/10.1111/ddi.12983
#'
#' @examples
#'
#' ### generate a set of 5 related variables
#'
#' # the sampling distribution for range can either be a saved function
#' vrangeFun <- function() exp(runif(1, 1, 6))
#'
#' envSet <- create_env_set(cellDims = c(100, 100),
#'                          nPerSet  = 5,
#'                          model    = "Sph",
#'                          psill    = 1.5,
#'                          dep1     = 1,
#'                          rangeFun = vrangeFun,
#'                          propSamp = 0.25)
#'
#' # or we can define the function within the argument itself
#' envSet <- create_env_set(cellDims = c(100, 100),
#'                          nPerSet  = 5,
#'                          model    = "Sph",
#'                          psill    = 1.5,
#'                          dep1     = 1,
#'                          rangeFun = function() 100 + runif(1, -100, 100),
#'                          propSamp = 0.05)
#'
#' # the output is a raster stack which can be plotted
#' plot(envSet)
#'
################################################################################

#' @export
#' @importFrom raster raster stack
#' @importFrom sp "gridded<-"
#' @importFrom stats predict
create_env_set <- function(cellDims = c(100, 100),
                           nPerSet  = 5,
                           model    = "Sph",
                           psill    = 1.5,
                           rangeFun = "vrangeFun",
                           propSamp = 0.25,
                           dep1     = 1) {
  ### generate base variable
  envBase <- create_env(cellDims = cellDims,
                        model    = model,
                        psill    = psill,
                        rangeFun = rangeFun)

  ### generate variables based on subsamples of base variable and save as list
  if(nPerSet > 1) {
    envSet <- list(envBase)
    for(i in 2:nPerSet) {
      envSet[[i]] <- create_env_samp(envBase,
                                     model    = model,
                                     psill    = psill,
                                     dep1     = dep1,
                                     rangeFun = rangeFun,
                                     propSamp = propSamp)
    }

    ### standardise and convert to raster
    for(i in 1:nPerSet) {
      ### standardise variables between 0 and 1
      envSet[[i]]$sim1 <- (envSet[[i]]$sim1 - min(envSet[[i]]$sim1)) /
        (max(envSet[[i]]$sim1) - min(envSet[[i]]$sim1))

      ### grid
      gridded(envSet[[i]]) = ~x + y

      ### convert to raster
      envSet[[i]] <- raster(envSet[[i]])
    }
  }

  ### if only one variable then return the base variable
  if(nPerSet == 1) {
    envSet <- envBase

    ### standardise variables between 0 and 1
    envSet$sim1 <- (envSet$sim1 - min(envSet$sim1)) /
      (max(envSet$sim1) - min(envSet$sim1))

    ### grid
    gridded(envSet) = ~x + y

    ### convert to raster
    envSet <- raster(envSet)
  }

  ### stack and rename
  envSet <- stack(envSet)
  names(envSet) <- paste0("var_", 1:nPerSet)
  return(envSet)
}
charliem2003/sdmProfiling documentation built on June 13, 2022, 4:43 a.m.