R/todo/makeVirtualSpecies.R

Defines functions makeVirtualSpecies

#' Generates virtual species
#'
#' @description Generates virtual species from virtual ecological niches. The niche definition can be either provided by the user, or generated randomly.
#'
#' @usage makeVirtualSpecies(
#' variables,
#' niche.parameters = NULL,
#' seed = NULL,
#' species.type = "multiplicative",
#' max.n = NULL
#' )
#'
#' @param variables A raster brick or stack with three or more environmental variables.
#' @param niche.parameters A named list containing the parameters of the gaussian functions representing the niche of the species. Each slot name must be a layer name of the brick/stack \code{variables}. The slot must contain a vector with two numbers. The first represents the mean of a normal distributiĆ³n, and the second represents the standard deviation of the same normal distribution. These numbers must be in the same units of the \code{variables} layer with the same name of the slot. If this argument is set to \code{NULL}, then a random set of niche parameters is generated by the function based on an uncorrelated set of predictive variables.
#' @param seed Integer determining a random seed. Only relevant when \code{niche.parameters = NULL}. Added to allow reproducibility in the generation of random virtual species.
#' @param species.type String, one of "additive" or "multiplicative". This is an argument of the function \code{\link[virtualspecies]{generateSpFromFun}}, which is used internally. It defines how the overall environmental suitability is computed from the response functions of each predictor. These are summed when \code{species.type = "additive"}, and multiplied when \code{species.type = "multiplicative"}, which is the default behavior.
#' @param max.n Numeric integer, maximum number of presence records to extract if possible, otherwise the function returns as many presence records as possible.
#'
#' @return A named list with 5 slots:
#' \itemize{
#'  \item \emph{niche.dimensions}: Names of the variables used to define the niche of the virtual species.
#'  \item \emph{niche.parameters}: Either the same \code{niche.parameters} used as input, or the niche definition generated randomly by the function if \code{niche.parameters = NULL}.
#'  \item \emph{niche.plot}: A ggplot with the density plots of the presence records (represented in blue) and the background (represented in yellow), and a habitat suitability map with presence records.
#'  \item \emph{suitability.raster}: A habitat suitability raster generated from the niche parameters by the function \code{\link[virtualspecies]{generateSpFromFun}}.
#'  \item \emph{observed.presence}: A dataframe with the coordinates x and y (in the same coordinate reference system as \code{variables}) of the selected presences.
#' }
#'
#' @details This function relies on the functions \code{\link[virtualspecies]{generateSpFromFun}}, \code{\link[virtualspecies]{convertToPA}} ,\code{\link[virtualspecies]{sampleOccurrences}} of the \emph{virtualspecies} package, so please, cite Leroy et al. (2016) if you find this function useful.
#'
#' Note that for simplicity, the settings of the functions mentioned above have been simplified. Particularly, the function \code{\link[virtualspecies]{convertToPA}} is configured with the parameters \code{PA.method = "probability"}, \code{prob.method = "linear"}, \code{a = 1}, and \code{b = 0}. This setting is the safest one when \code{niche.parameters} is not provided, and therefore randomized virtual taxa are being generated.
#'
#' The function \code{\link[virtualspecies]{sampleOccurrences}} is configured as follows: \code{type = "presence only"}; \code{correct.by.suitability = TRUE}.
#'
#' Please, check the help files of these three functions to better understand how they work!
#'
#' @examples
#'\dontrun{
#' data("europe2000")
#'
#'#niche parameters list
#'niche.parameters <- list(
#'  bio12 = c(500, 250),
#'  bio5 = c(240, 50),
#'  bio6 = c(10, 30),
#'  human_footprint = c(0, 30),
#'  topo_slope = c(0, 2),
#'  landcover_veg_herb = c(100, 35)
#')
#'
#'#making the virtual species
#'vs <- makeVirtualSpecies(
#'  variables = europe2000,
#'  niche.parameters = NULL,
#'  max.n = 200,
#'  species.type = "multiplicative",
#'  seed = NULL
#')
#'}
#'
#' @author Blas Benito <blasbenito@gmail.com>. The functions \code{\link[virtualspecies]{generateSpFromFun}}, \code{\link[virtualspecies]{convertToPA}}, and \code{\link[virtualspecies]{sampleOccurrences}} are authored by Boris Leroy <leroy.boris@gmail.com>, with help from C. N. Meynard, C. Bellard & F. Courchamp
#' @references Leroy, B., Meynard, C.N., Bellard, C. and Courchamp, F. (2016), virtualspecies, an R package to generate virtual species distributions. Ecography, 39: 599-607. doi:10.1111/ecog.01388
#' @export
makeVirtualSpecies <- function(variables, niche.parameters = NULL, seed = NULL, species.type = "multiplicative", max.n = NULL){

  #setting random seed
  if(is.null(seed) == FALSE){
    set.seed(seed)
  }

  #variables brick to dataframe
  variables.df <-
    variables %>%
    raster::as.data.frame() %>%
    na.omit()

  #if niche.parameters is null, creates a random definition of niche parameters
  if(is.null(niche.parameters) == TRUE){

    #gets random niche dimensions
    variable.names <- variables@data@names
    if(length(variable.names) > 2){
      niche.dimensions <- sample(
        x = variable.names,
        sample(x = 2:floor(length(variable.names)/2))
        )
    } else {
      niche.dimensions <- variable.names
    }

    #applies automatic vif to niche dimensions to keep uncorrelated ones
    niche.dimensions <- s_vif_auto(training.df = variables.df[, niche.dimensions])

    #list to store niche parameters
    niche.parameters <- list()

    #iterates through niche dimensions
    for(i in niche.dimensions){

      #computes random mean and sd for the niche function
      #mean is selected between quantiles 0.1 and 0.9 of the range of each niche dimension
      #sd takes a value between the 0.5 and 0.05 of the range of the variable.
      niche.parameters[[i]] <-
        c(
          sample(x = seq(quantile(variables.df[,i], 0.1), quantile(variables.df[,i], 0.9), length.out = 1000), size = 1),
          (max(variables.df[,i]) - min(variables.df[,i])) / sample(x = seq(2, 20, by = 0.1), size = 1)
        )
    }

  }#end of random species

  #getting niche dimensions
  niche.dimensions <- names(niche.parameters)

  #keeping niche dimensions only
  variables.df <- variables.df[, niche.dimensions]

  #applies niche parameters to the values of the niche dimensions
  #to generate response curves
  variables.df.nrow <- nrow(variables.df)
  response.curves <- list()
  for(i in niche.dimensions){
    response.curves[[i]] <- rnorm(
      variables.df.nrow,
      mean = niche.parameters[[i]][1],
      sd = niche.parameters[[i]][2]
    )
  }

  #from list to data frame
  response.curves <-
    data.frame(do.call("cbind", response.curves)) %>%
    tidyr::pivot_longer(
      cols = niche.dimensions,
      names_to = "variable",
      values_to = "value"
    ) %>%
    data.frame()

  #variables.df to long
  variables.df.long <-
    variables.df %>%
    tidyr::pivot_longer(
      cols = niche.dimensions,
      names_to = "variable",
      values_to = "value"
    ) %>%
    data.frame()

  #plots density of the variables
  plot.use.availability <- ggplot(
    data = variables.df.long,
    aes(
      x = value,
      group = variable
    )
  ) +
    geom_density(
      fill = viridis::viridis(1, begin = 0.99),
      alpha = 1,
      size = 0
    ) +
    facet_wrap("variable", scales = "free") +
    geom_density(
      data = response.curves,
      aes(
        x = value,
        group = variable
      ),
      fill = viridis::viridis(1, begin = 0.3),
      alpha = 0.5,
      size = 0
    )

  #generates the functions definitions for virtualspecies::generateSpFromFun
  args <- list()
  for(i in 1:length(niche.parameters)){
    args[[i]] <- c(
      fun = "dnorm",
      mean = niche.parameters[[i]][1],
      sd = niche.parameters[[i]][2]
      )
  }
  names(args) <- names(niche.parameters)

  #generating the details file for virtualspecies::generateSpFromFun
  details <- list()
  for (i in names(args)){
    details[[i]]$fun <- args[[i]]["fun"]
    args[[i]] <- args[[i]][-(names(args[[i]]) %in% "fun")]
    details[[i]]$args <- as.list(args[[i]])
    details[[i]]$args <- sapply(details[[i]]$args, as.numeric)
  }

  #generating niche map from details
  virtual.species.temp <- virtualspecies::generateSpFromFun(
    raster.stack = variables[[niche.dimensions]],
    parameters = details,
    rescale = TRUE,
    species.type = species.type
  )

  #raster to dataframe for ggplotting
  niche.map.df <-
    raster::as.data.frame(virtual.species.temp$suitab.raster, xy = TRUE) %>%
    na.omit()

  #converts distribution to presence-absence
  virtual.species.temp <- virtualspecies::convertToPA(
    x = virtual.species.temp,
    PA.method = "probability",
    prob.method = "linear",
    a = 1,
    b = 0,
    plot = FALSE
  )

  #checking that there are enough presences
  presence.cells <-
    raster::as.data.frame(virtual.species.temp$pa.raster, xy = TRUE) %>%
    na.omit() %>%
    dplyr::filter(layer == TRUE) %>%
    nrow()

  #adjusting n
  if(is.null(max.n) == FALSE){
    if(max.n > presence.cells){max.n <- presence.cells}
  } else {max.n <- presence.cells}

  #sampling occurrences
  xy <- virtualspecies::sampleOccurrences(
    virtual.species.temp,
    n = max.n,
    type = "presence only",
    correct.by.suitability = TRUE,
    plot = FALSE
  )$sample.points[c("x", "y")]

  #plot niche map
  plot.niche.map <- ggplot() +
    geom_tile(
      data = niche.map.df,
      aes(
        x = x,
        y = y,
        fill = layer
        )
      ) +
    viridis::scale_fill_viridis(direction = -1) +
    labs(fill = "Suitability") +
    xlab("Longitude") +
    ylab("Latitude") +
    geom_point(
      data = xy,
      aes(
        x = x,
        y = y
        ),
      shape = 1
      )

  #multipanel plot
  plot.multipanel <- cowplot::plot_grid(
    plot.use.availability,
    plot.niche.map,
    ncol = 1,
    axis = "l",
    align = "hv")
  print(plot.multipanel)

  #output object
  virtual.species <- list()
  virtual.species$niche.dimensions <- names(niche.parameters)
  virtual.species$niche.parameters <- niche.parameters
  virtual.species$niche.plot <- plot.multipanel
  virtual.species$suitability.raster <- virtual.species.temp$suitab.raster
  virtual.species$observed.presence <- xy

  return(virtual.species)

}
BlasBenito/sdmflow documentation built on April 10, 2020, 2:31 a.m.