R/plotting.R
In ENMeval: Automated Tuning and Evaluations of Ecological Niche Models

Documented in evalplot.envSim.hist evalplot.envSim.map evalplot.grps evalplot.nulls evalplot.stats plot.sim.dataPrep

#' @title Partition group plots
#' @description Plot occurrence partition groups over an environmental predictor raster.
#' @param e ENMevaluation object
#' @param envs SpatRaster: environmental predictor variable used to build the models in "e"
#' @param pts matrix / data frame: coordinates for occurrence or background data
#' @param pts.grp numeric vector: partition groups corresponding to data in "pts"
#' @param ref.data character: plot occurrences ("occs") or background ("bg"), with default "occs"
#' @param pts.size numeric: custom point size for ggplot
#' @param return.tbl boolean: if TRUE, return the data frames used to make the ggplot instead of the plot itself
#' @details This function serves as a quick way to visualize occurrence or background partitions over the extent of an environmental predictor raster.
#' It can be run with an existing ENMevaluation object, or alternatively with occurrence or background coordinates and the corresponding partitions.
#' 
#' @examples
#' \dontrun{
#' library(terra)
#' library(ENMeval)
#' occs <- read.csv(file.path(system.file(package="predicts"), 
#'                            "/ex/bradypus.csv"))[,2:3]
#' envs <- rast(list.files(path=paste(system.file(package="predicts"), 
#'                                    "/ex", sep=""), pattern="tif$", full.names=TRUE))
#' bg <- as.data.frame(predicts::backgroundSample(envs, n = 10000))
#' names(bg) <- names(occs)
#' 
#' parts <- get.block(occs, bg, orientation = "lat_lon")
#' 
#' # now, plot the partition groups for occurrence and background points
#' evalplot.grps(envs = envs, pts = occs, pts.grp = parts$occs.grp)
#' evalplot.grps(envs = envs, pts = bg, pts.grp = parts$bg.grp)
#' 
#' # you can also plot with an ENMevaluation object
#' ps <- list(orientation = "lat_lon")
#' e <- ENMevaluate(occs, envs, bg, 
#'                  tune.args = list(fc = c("L","LQ"), rm = 1:3), 
#'                  partitions = "block", partition.settings = ps, 
#'                  algorithm = "maxnet", categoricals = "biome", 
#'                  parallel = TRUE)
#' 
#' evalplot.grps(e = e, envs = envs, ref.data = "occs")
#' evalplot.grps(e = e, envs = envs, ref.data = "bg")
#' }
#' 
#' @export

evalplot.grps <- function(e = NULL, envs, pts = NULL, pts.grp = NULL, ref.data = "occs", pts.size = 1.5, return.tbl = FALSE) {
  if(!is.null(e)) {
    pts.plot <- switch(ref.data, occs = cbind(e@occs, partition = e@occs.grp),
                       bg = cbind(e@bg, partition = e@bg.grp))  
    if(e@partition.method == "testing") {
      pts.plot <- pts.plot |> dplyr::mutate(partition = as.numeric(as.character(partition))) |> 
        dplyr::bind_rows(e@occs.testing |> dplyr::mutate(partition = 1)) |>
        dplyr::mutate(partition = factor(partition))
    } 
    names(pts.plot)[1:2] <- c("longitude", "latitude")
  }else{
    if(!is.null(pts) & !is.null(pts.grp)) {
      # make sure pts is a data frame with the right column names
      pts <- as.data.frame(pts)
      names(pts) <- c("longitude", "latitude")
      pts.plot <- cbind(pts, partition = factor(pts.grp))
    }else{
      stop("If inputting point data instead of an ENMevaluation object, make sure to also input the partition groups (pts.grp).")
    }
  }
  
  grp.n <- length(unique(pts.plot$partition))
  if(grp.n > 9) {
    theme.custom <- ggplot2::guides(color = "none")
    pt.cols <- rainbow(grp.n)
  }else{
    theme.custom <- NULL
    pt.cols <- RColorBrewer::brewer.pal(9, "Set1")
  }
  
  if(terra::nlyr(envs) > 1) {
    message("Plotting first raster in stack...")
    envs <- envs[[1]]
  }
  envs.df <- terra::as.data.frame(envs, xy = TRUE, na.rm = FALSE)
  names(envs.df)[3] <- "value"
  g <- ggplot2::ggplot() + ggplot2::geom_raster(data = envs.df, ggplot2::aes(x = x, y = y, fill = value)) +
    ggplot2::geom_point(data = pts.plot, ggplot2::aes(x = longitude, y = latitude, color = partition), size = pts.size) +
    ggplot2::scale_color_manual(values = pt.cols) +
    ggplot2::scale_fill_distiller(palette = "Greys", na.value = "white") + ggplot2::theme_classic() + 
    ggplot2::coord_equal() + theme.custom
  
  if(return.tbl == TRUE) {
    return(list(envs.df = tibble::as_tibble(envs.df), pts.plot = tibble::as_tibble(pts.plot)))
  }else{
    return(g)  
  }
}


#' Internal plotting function
#'
#' This function preps data for plotting.
#'
#' @examples \dontrun{
#' plot.sim.dataPrep()
#' }
#' @keywords internal
plot.sim.dataPrep <- function(e, envs, occs.z, bg.z, occs.grp, bg.grp, ref.data, occs.testing.z, quiet) {
  
  if(!is.null(e) & any(!is.null(occs.z), !is.null(bg.z), !is.null(occs.grp), !is.null(bg.grp))) {
    stop("* If inputting an ENMevaluation object, leave occs.z, bg.z, occs.grp, and bg.grp NULL. These are read from the object.")
  }
  
  if(is.null(envs)) {
    if(is.null(e) & any(is.null(occs.z), is.null(bg.z), is.null(occs.grp), is.null(bg.grp))) {
      stop("* If inputting occurrence and background data instead of an ENMevaluation object, please input occs.z, bg.z, occs.grp, and bg.grp.")
      if(!quiet) message("* Similarity values calculated by contrasting occurrences with background.")
    }
  }else{
    if(is.null(e)) {
      if(ref.data == "occs") {
        if(any(is.null(occs.z), is.null(occs.grp))) {stop("* If inputting occurrence data instead of an ENMevaluation object, please input occs.z and occs.grp.")}
      }else if (ref.data == "bg") {
        if(any(is.null(bg.z), is.null(bg.grp))) {stop("* If inputting background data instead of an ENMevaluation object, please input bg.z and bg.grp.")}
      }
    }
    if(!quiet) message("* Similarity values calculated by contrasting occurrences with all cell values in raster extent.")
  }
  
  # assign variables from ENMevaluation object
  if(!is.null(e)) {
    occs.z <- e@occs
    bg.z <- e@bg
    occs.grp <- as.numeric(as.character(e@occs.grp))
    bg.grp <- as.numeric(as.character(e@bg.grp))
  }else{
    occs.grp <- as.numeric(as.character(occs.grp))
    bg.grp <- as.numeric(as.character(bg.grp))
  }
  
  if(ref.data == "bg" & length(unique(bg.grp)) == 1) stop('If background is not partitioned (non-spatial), do not assign ref.data to "bg".')
  
  if(any(is.null(occs.z), is.null(occs.grp))) {
    pts.plot <- bg.z |> dplyr::mutate(type = rep(0, nrow(bg.z)), partition = factor(bg.grp))  
  }else if(any(is.null(bg.z), is.null(bg.grp))) {
    pts.plot <- occs.z |> dplyr::mutate(type = rep(1, nrow(occs.z)), partition = factor(occs.grp))  
  }else{
    pts.plot <- rbind(occs.z, bg.z) |> as.data.frame() |>
      dplyr::mutate(type = c(rep(1, nrow(occs.z)), rep(0, nrow(bg.z))), partition = factor(c(occs.grp, bg.grp)))
  }
  names(pts.plot)[1:2] <- c("longitude","latitude")
  
  # find factor rasters or columns and identify them as categoricals
  if(!is.null(envs)) {
    categoricals <- unique(names(envs)[which(terra::is.factor(envs))])
    if(length(categoricals) == 0) categoricals <- NULL
  }else{
    categoricals <- unique(names(occs.z)[which(sapply(occs.z, is.factor))])
    if(length(categoricals) == 0) categoricals <- NULL
  }
  
  # remove categorical variables for plotting
  if(!is.null(categoricals)) {
    for(i in 1:length(categoricals)) {
      if(!quiet) message(paste0("* Ignoring categorical variable ", categoricals[i], "..."))
      pts.plot[, categoricals[i]] <- NULL
    }
  }
  
  if(all(unique(pts.plot$partition) == 0)) {
    if(ref.data == "bg") stop('If using fully withheld testing data, input ref.data as "occs".')
    if(is.null(e) & is.null(occs.testing.z)) stop("If using fully withheld testing data, input either an ENMevaluation object or occs.testing.z.")
    if(!is.null(e)) occs.testing.z <- e@occs.testing
    names(occs.testing.z)[1:2] <- c("longitude","latitude")
    if(!is.null(categoricals)) occs.testing.z[[categoricals]] <- NULL
    occs.testing.z <- occs.testing.z |> dplyr::mutate(type = 1, partition = 2)
    pts.plot$partition <- as.numeric(as.character(pts.plot$partition))
    pts.plot[pts.plot$type == 1, "partition"] <- 1
    pts.plot <- dplyr::bind_rows(pts.plot, occs.testing.z) |> dplyr::mutate(partition = factor(partition))
  }
  
  return(pts.plot)
}

#' @title Similarity histogram plots for partition groups
#' @description Plots environmental similarity of reference partitions (occurrences or 
#' background) to remaining data (occurrence and background for all other partitions). This 
#' function does not use raster data, and thus only calculates similarity values for data used 
#' in model training. Further, this function does not calculate similarity for categorical 
#' variables.
#' @details When fully withheld testing groups are used, make sure to input either an 
#' ENMevaluation object or the argument occs.testing.z. In the resulting plot, partition 1 
#' refers to the training data, while partition 2 refers to the fully withheld testing group.
#' @param e ENMevaluation object
#' @param occs.z data frame: longitude, latitude, and environmental predictor variable values for occurrence records, in that order (optional);
#' the first two columns must be named "longitude" and "latitude"
#' @param occs.grp numeric vector: partition groups for occurrence records (optional)
#' @param bg.z data frame: longitude, latitude, and environmental predictor variable values for background records, in that order (optional);
#' the first two columns must be named "longitude" and "latitude"
#' @param bg.grp numeric vector: partition groups for background records (optional)
#' @param ref.data character: the reference to calculate MESS based on occurrences ("occs") or background ("bg"), with default "occs"
#' these must be specified as this function was intended for use with continuous data only; these must be specified when inputting tabular data instead of an ENMevaluation object 
#' @param envs.vars character vector: names of a predictor variable to plot similarities for; if left NULL, calculations are done
#' with respect to all variables (optional) 
#' @param occs.testing.z data frame: longitude, latitude, and environmental predictor variable values for fully withheld testing records, 
#' in that order; this is for use only with the "testing" partition option when an ENMevaluation object is not input (optional)
#' @param hist.bins numeric: number of histogram bins for histogram plots; default is 30
#' @param return.tbl boolean: if TRUE, return the data frames of similarity values used to make the ggplot instead of the plot itself
#' @param quiet boolean: if TRUE, silence all function messages (but not errors)
#' @details Histograms are plotted showing the environmental similarity estimates for each 
#' partition group in relation to the others. The similarity between environmental values associated with the 
#' validation occurrence or background records per partition group and those associated with 
#' the remaining data (training occurrences and background) are calculated with the MESS algorithm, and the minimum similarity 
#' per grid cell is returned. Higher negative values indicate a greater environmental difference between the validation occurrences and the study extent, and higher 
#' positive values indicate greater similarity. This function uses the `mess()` function 
#' from the package `predicts`. Please see the below reference for details on MESS.
#' @return A ggplot of environmental similarities between the occurrence or background data 
#' for each partition and the rest of the data (all other occurrences and background data).
#' @references 
#' Baumgartner J, Wilson P (2021). _rmaxent: Tools for working with Maxent in R_. R package version 0.8.5.9000, <URL: https://github.com/johnbaums/rmaxent>.
#' Elith, J., Kearney, M., and Phillips, S. (2010) The art of modelling range-shifting species. \emph{Methods in Ecology and Evolution}, \bold{1}: 330-342. \doi{doi:10.1111/j.2041-210X.2010.00036.x}
#' 
#' @examples
#' \dontrun{
#' # first, let's tune some models
#' occs <- read.csv(file.path(system.file(package="predicts"), 
#' "/ex/bradypus.csv"))[,2:3]
#' envs <- rast(list.files(path=paste(system.file(package="predicts"), 
#' "/ex", sep=""), pattern="tif$", full.names=TRUE))
#' bg <- as.data.frame(predicts::backgroundSample(envs, n = 10000))
#' names(bg) <- names(occs)
#'  
#' ps <- list(orientation = "lat_lat")
#' 
#' e <- ENMevaluate(occs, envs, bg, 
#'                tune.args = list(fc = c("L","LQ","LQH"), rm = 1:5), 
#'                partitions = "block", partition.settings = ps, 
#'                algorithm = "maxnet", categoricals = "biome", 
#'                parallel = TRUE)
#' 
#' # now, plot the environmental similarity of each partition to the others               
#' evalplot.envSim.hist(e)
#' }
#' 
#' @export

evalplot.envSim.hist <- function(e = NULL, occs.z = NULL, bg.z = NULL, occs.grp = NULL, 
                                 bg.grp = NULL, ref.data = "occs", 
                                 envs.vars = NULL, occs.testing.z = NULL,
                                 hist.bins = 30, return.tbl = FALSE, quiet = FALSE) {
  
  pts.plot <- plot.sim.dataPrep(e, envs = NULL, occs.z, bg.z, occs.grp, bg.grp, ref.data, occs.testing.z, quiet)
  
  envs.names <- pts.plot |> dplyr::select(-longitude, -latitude, -partition, -type) |> names()
  
  if(!is.null(envs.vars)) {
    if(!quiet) message(paste0("* Similarity values calculated based only on ", paste(envs.vars, collapse = ", "), "."))
    envs.rem <- envs.names[-which(envs.names %in% envs.vars)]
    pts.plot <- pts.plot |> dplyr::select(-dplyr::all_of(envs.rem))
  }
  
  test.sim <- list()
  nk <- length(unique(pts.plot$partition[pts.plot$partition != 0]))
  if(nk == sum(pts.plot$type)) {
    stop("This plotting function is not available for jackknife (leave-one-out) partitions.")
  }
  
  for(k in 1:nk) {
    test.z <- pts.plot |> dplyr::filter(partition == k) |> dplyr::select(-longitude, -latitude, -partition)
    if(ref.data == "occs") {
      test.z <- test.z |> dplyr::filter(type == 1) |> dplyr::select(-type)
    }else if (ref.data == "bg") {
      test.z <- test.z |> dplyr::filter(type == 0) |> dplyr::select(-type)
    }
    train.z <- pts.plot |> dplyr::filter(partition != k) |> dplyr::select(-longitude, -latitude, -partition, -type)
    
    sim <- tryCatch({
      predicts::mess(train.z, test.z)
    }, error = function(cond) {
      message('Error: there may be at least one categorical variable in the predictor data that is not attributed as a factor. Please convert these variable(s) to factor.')
      # Choose a return value in case of error
      return(NULL)
    })
    test.sim[[k]] <- data.frame(partition = k, sim)
    names(test.sim[[k]])[2] <- "mess"  
  }
  
  if(nk > 9) {
    theme.custom <- ggplot2::guides(color = "none")
    pt.cols <- rainbow(nk)
  }else{
    theme.custom <- NULL
    pt.cols <- RColorBrewer::brewer.pal(nk, "Set1")
  }
  
  plot.df <- dplyr::bind_rows(test.sim)
  plot.df$partition <- factor(plot.df$partition)
  
  plot.text <- paste("\n(Values represent environmental similarity between", 
                     switch(ref.data, occs = "occurrence", bg = "background"),
                     "partitions and all other background partitions.)")
  
  g <- ggplot2::ggplot(plot.df, ggplot2::aes(x = mess, fill = partition)) + 
    ggplot2::geom_histogram(bins = hist.bins) +
    ggplot2::facet_grid(ggplot2::vars(partition)) + 
    ggplot2::scale_fill_manual(values = pt.cols) +
    ggplot2::theme_classic() +
    ggplot2::geom_vline(xintercept = 0) +
    ggplot2::ggtitle(paste("Multivariate environmental similarity", plot.text, collapse = "\n")) +
    ggplot2::theme(strip.background = ggplot2::element_blank(), strip.text.y = ggplot2::element_blank())  
  
  if(return.tbl == TRUE) {
    return(tibble::as_tibble(plot.df))
  }else{
    return(g)  
  }
}

#' @title Similarity maps for partition groups
#' @description Maps environmental similarity of reference partitions (occurrences or 
#' background) to all cells with values in the predictor variable rasters. This function uses 
#' raster data, and thus cannot map similarity values using only tables of environmental values f
#' or occurrences or background. Further, this function does not calculate similarity for 
#' categorical variables.
#' @details When fully withheld testing groups are used, make sure to input either an ENMevaluation 
#' object or the argument occs.testing.z. In the resulting plot, partition 1 refers to the training data,
#' while partition 2 refers to the fully withheld testing group.
#' @param e ENMevaluation object (optional) 
#' @param envs SpatRaster: environmental predictor variables used to build the models in "e"; categorical variables will be 
#' removed before internally as they cannot be used to calculate MESS
#' @param occs.z data frame: longitude, latitude, and environmental predictor variable values for occurrence records, in that order (optional);
#' the first two columns must be named "longitude" and "latitude"
#' @param occs.grp numeric vector: partition groups for occurrence records (optional)
#' @param bg.z data frame: longitude, latitude, and environmental predictor variable values for background records, in that order (optional);
#' the first two columns must be named "longitude" and "latitude"
#' @param bg.grp numeric vector: partition groups for background records (optional)
#' @param ref.data character: the reference to calculate MESS based on occurrences ("occs") or background ("bg"), with default "occs"
#' @param envs.vars character vector: names of a predictor variable to plot similarities for; if left NULL, calculations are done
#' with respect to all variables (optional) 
#' @param bb.buf numeric: distance used to buffer (extend) the mapping extent in map units; for latitude/longitude, this is in degrees (optional)
#' @param occs.testing.z data frame: longitude, latitude, and environmental predictor variable values for fully withheld testing records, 
#' in that order; this is for use only with the "testing" partition option when an ENMevaluation object is not input (optional)
#' @param plot.bg.pts boolean: if TRUE, plot background points when using ref.data = "bg"
#' @param sim.palette character: RColorBrewer palette name to use for plotting discrete variables; if NULL, default is "Set1"
#' @param pts.size numeric: custom point size for ggplot
#' @param gradient.colors character vector: colors used for ggplot2::scale_fill_gradient2
#' @param na.color character: color used for NA values
#' @param return.tbl boolean: if TRUE, return the data frames of similarity values used to make the ggplot instead of the plot itself
#' @param return.ras boolean: if TRUE, return the SpatRaster of similarity values used to make the ggplot instead of the plot itself
#' @param quiet boolean: if TRUE, silence all function messages (but not errors)
#' @details Rasters are plotted showing the environmental similarity estimates for each 
#' partition group in relation to the others. The similarity between environmental values associated with the 
#' validation occurrence or background records per partition group and those associated with 
#' the entire study extent (specified by the extent of the input SpatRaster "envs") are 
#' calculated with the MESS algorithm, and the minimum similarity per grid cell is returned. Higher 
#' negative values indicate greater environmental difference between the validation occurrences 
#' and the study extent, and higher positive values indicate greater similarity. This function 
#' uses the `mess()` function from the package `predicts` to calculate the similarities. Please see the below 
#' reference for details on MESS. 
#' @return A ggplot of environmental similarities between the occurrence or background data 
#' for each partition and all predictor variable values in the extent.
#' @references 
#' Baumgartner J, Wilson P (2021). _rmaxent: Tools for working with Maxent in R_. R package version 0.8.5.9000, <URL: https://github.com/johnbaums/rmaxent>.
#' Elith, J., Kearney, M., and Phillips, S. (2010) The art of modelling range-shifting species. \emph{Methods in Ecology and Evolution}, \bold{1}: 330-342. \doi{doi:10.1111/j.2041-210X.2010.00036.x}
#' 
#' @examples
#' \dontrun{
#' library(terra)
#' library(ENMeval)
#' 
#' # first, let's tune some models
#' occs <- read.csv(file.path(system.file(package="predicts"), 
#' "/ex/bradypus.csv"))[,2:3]
#' envs <- rast(list.files(path=paste(system.file(package="predicts"), 
#' "/ex", sep=""), pattern="tif$", full.names=TRUE))
#' bg <- as.data.frame(predicts::backgroundSample(envs, n = 10000))
#' names(bg) <- names(occs)
#'  
#' ps <- list(orientation = "lat_lat")
#' 
#' e <- ENMevaluate(occs, envs, bg, 
#'                tune.args = list(fc = c("L","LQ","LQH"), rm = 1:5), 
#'                partitions = "block", partition.settings = ps, 
#'                algorithm = "maxnet", categoricals = "biome", 
#'                parallel = TRUE)
#' 
#' # now, plot the environmental similarity of each partition to the others               
#' evalplot.envSim.map(e, envs)
#' }
#' 
#' @export

evalplot.envSim.map <- function(e = NULL, envs, occs.z = NULL, bg.z = NULL, occs.grp = NULL, 
                                bg.grp = NULL, ref.data = "occs", 
                                envs.vars = NULL, bb.buf = 0, occs.testing.z = NULL,
                                plot.bg.pts = FALSE, sim.palette = NULL, 
                                pts.size = 1.5, gradient.colors = c("red","white","blue"), na.color = "gray",
                                return.tbl = FALSE, return.ras = FALSE, quiet = FALSE) {
  
  if(return.tbl == TRUE & return.ras == TRUE) {
    stop("*Error: please select only one of return.tbl or return.ras.")
  }
  
  if(is.null(e) & (ref.data == "occs" & any(is.null(occs.z), is.null(occs.grp)))) {
    stop("* Error: If using occurrences as reference group, ensure you input occs.z and occs.grp") 
  }
  if(is.null(e) & (ref.data == "bg" & any(is.null(bg.z), is.null(bg.grp)))) {
    stop("* Error: If using background as reference group, ensure you input bg.z and bg.grp") 
  }
  
  if(!is.numeric(bb.buf)) stop("Please ensure bb.buf is a number.")
  
  pts.plot <- plot.sim.dataPrep(e, envs, occs.z, bg.z, occs.grp, bg.grp, ref.data, occs.testing.z, quiet)
  
  categoricals <- unique(names(envs)[which(terra::is.factor(envs))])
  if(length(categoricals) != 0 & !is.null(envs)) {
    envs <- terra::subset(envs, -which(names(envs) %in% categoricals))
  }
  
  if(!is.null(envs.vars)) {
    if(!quiet) message(paste0("* Similarity values calculated based only on ", 
                              paste(envs.vars, collapse = ", "), "."))
    envs.names <- names(envs)
    envs.rem <- envs.names[-which(envs.names %in% envs.vars)]
    pts.plot <- pts.plot |> dplyr::select(-dplyr::all_of(envs.rem))
    envs <- envs[[envs.vars]]
  }
  
  if(ref.data == "occs") {
    pts.plot <- pts.plot |> dplyr::filter(type == 1) |> dplyr::select(-type)
  }else if (ref.data == "bg") {
    pts.plot <- pts.plot |> dplyr::filter(type == 0) |> dplyr::select(-type)
  }
  
  ras.sim <- list()
  nk <- length(unique(pts.plot$partition))
  if(nk == sum(pts.plot$type)) {
    stop("This plotting function is not available for jackknife (leave-one-out) partitions.")
  }
  
  for(k in 1:nk) {
    message("Calculating MESS for partition ", k, "...")
    test.z <- pts.plot |> dplyr::filter(partition == k) |> 
      dplyr::select(-longitude, -latitude, -partition)
    
    sim <- tryCatch({
      predicts::mess(envs, test.z)
    }, error = function(cond) {
      message('Error: there may be at least one categorical variable in the predictor data that is not attributed as a factor. Please convert these variable(s) to factor.')
      # Choose a return value in case of error
      return(NULL)
    })
    names(sim) <- paste0("partition", k)
    
    ras.sim[[k]] <- sim
  }
  
  rs.sim <- terra::rast(ras.sim)
  plot.df <- terra::as.data.frame(rs.sim, xy = TRUE, na.rm = FALSE) |>
    tidyr::pivot_longer(cols = 3:dplyr::last_col(), names_to = "ras", values_to = "mess")
  # add buffer
  plot.df <- plot.df |> dplyr::filter(x > min(pts.plot$longitude) - bb.buf, 
                                      x < max(pts.plot$longitude) + bb.buf,
                                      y > min(pts.plot$latitude) - bb.buf, 
                                      y < max(pts.plot$latitude) + bb.buf)
  
  title <- "Multivariate environmental similarity" 
  
  plot.text <- paste("\n(Values represent environmental similarity between", 
                     switch(ref.data, occs = "occurrence", bg = "background"),
                     "partitions and all raster cells with values.)")
  
  g <- ggplot2::ggplot() +
    ggplot2::geom_raster(data = plot.df, ggplot2::aes_string(x = "x", y = "y", fill = "mess"))
  if(ref.data == "bg" & plot.bg.pts == FALSE) {
    g <- g + ggplot2::facet_wrap(ggplot2::vars(ras), ncol = 2) +
      ggplot2::ggtitle(paste(title, plot.text, collapse = "\n")) +
      ggplot2::theme_classic()
  }else{
    g <- g + ggplot2::geom_point(data = pts.plot, ggplot2::aes(x = longitude, y = latitude, shape = partition), color = "black", size = pts.size) +
      ggplot2::facet_wrap(ggplot2::vars(ras), ncol = 2) +
      ggplot2::ggtitle(paste(title, plot.text, collapse = "\n")) +
      ggplot2::theme_classic()
  }
  g <- g + ggplot2::scale_fill_gradient2(low = gradient.colors[1], mid = gradient.colors[2], high = gradient.colors[3], na.value = na.color)
  
  if(return.tbl == TRUE) {
    return(tibble::as_tibble(plot.df))
  }else if(return.ras == TRUE) {
    return(rs.sim)
  }else{
    return(g)  
  }
}


#' @title ENMevaluation statistics plot
#' @description Plot evaluation statistics over tuning parameter ranges to visualize differences in performance
#' @param e ENMevaluation object
#' @param stats character vector: names of statistics from results table to be plotted; if more than
#' one statistic is specified, the plot will be faceted
#' @param x.var character: variable to be plotted on x-axis
#' @param color.var character: variable used to assign symbology colors 
#' @param dodge numeric: dodge width for points and lines; this improves visibility when there is high overlap (optional)
#' @param error.bars boolean: if TRUE, plot error bars
#' @param facet.labels character vector: custom names for the metric facets
#' @param metric.levels character vector: custom factor levels for metrics; this controls the order that metric statistics are plotted
#' @param return.tbl boolean: if TRUE, return the data frames of results used to make the ggplot instead of the plot itself
#' @details In this plot, the x-axis represents a tuning parameter range, while the y-axis represents the average of a statistic over all partitions.
#' Different colors represent the categories or values of another tuning parameter. 
#' Error bars represent the standard deviation of a statistic around the mean. 
#' Currently, this function can only plot two tuning parameters at a time.
#' @return A ggplot of evaluation statistics. 
#' 
#' @examples
#' \dontrun{
#' # first, let's tune some models
#' occs <- read.csv(file.path(system.file(package="predicts"), 
#' "/ex/bradypus.csv"))[,2:3]
#' envs <- rast(list.files(path=paste(system.file(package="predicts"), 
#' "/ex", sep=""), pattern="tif$", full.names=TRUE))
#' bg <- as.data.frame(predicts::backgroundSample(envs, n = 10000))
#' names(bg) <- names(occs)
#'  
#' ps <- list(orientation = "lat_lat")

#' e <- ENMevaluate(occs, envs, bg, 
#'                tune.args = list(fc = c("L","LQ","LQH"), rm = 1:5), 
#'                partitions = "block", partition.settings = ps, 
#'                algorithm = "maxnet", categoricals = "biome", 
#'                parallel = TRUE)
#'                
#' evalplot.stats(e, c("cbi.val", "or.mtp"), x.var = "rm", color.var = "fc", 
#'              error.bars = FALSE)
#' }
#' @export

evalplot.stats <- function(e, stats, x.var, color.var, dodge = NULL, error.bars = TRUE, facet.labels = NULL, metric.levels = NULL, return.tbl = FALSE) {
  exp <- paste(paste0("*", stats), collapse = "|")
  res <- e@results |> 
    tidyr::pivot_longer(cols = auc.train:ncoef, names_to = "metric", values_to = "value") |>
    dplyr::filter(grepl(exp, metric))
  avgs <- res |> 
    dplyr::filter(grepl("avg", metric)) |>
    dplyr::rename(avg = value) |>
    dplyr::mutate(metric = gsub(".avg", "", metric))
  sds <- res |> 
    dplyr::filter(grepl("sd", metric)) |>
    dplyr::rename(sd = value) |>
    dplyr::mutate(metric = gsub(".sd", "", metric))
  join.names <- names(avgs)
  join.names <- join.names[join.names != "avg"]
  res.avgs <- dplyr::left_join(avgs, sds, by = join.names) |>
    dplyr::mutate(lower = avg - sd, upper = avg + sd,
                  metric = factor(metric, levels = stats))
  res.avgs[[x.var]] <- factor(res.avgs[[x.var]])
  res.avgs[[color.var]] <- factor(res.avgs[[color.var]])
  if(!is.null(facet.labels)) labeller <- ggplot2::as_labeller(facet.labels) else labeller <- NULL
  if(!is.null(metric.levels)) res$metric <- factor(res$metric, levels = metric.levels)
  if(!is.null(metric.levels)) res.avgs$metric <- factor(res.avgs$metric, levels = metric.levels)
  
  if(nrow(res.avgs) > 0) {
    if(is.null(dodge)) dodge <- 0.1
    g <- ggplot2::ggplot(res.avgs, ggplot2::aes_string(x = x.var, y = "avg", color = color.var, group = color.var)) + 
      ggplot2::geom_point(position=ggplot2::position_dodge(width=dodge)) + 
      ggplot2::geom_line(position=ggplot2::position_dodge(width=dodge)) +
      ggplot2::theme_bw()
    if(length(stats) > 1) {
      if(!is.null(labeller)) {
        g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_y", nrow = length(stats), labeller = labeller)
      }else{
        g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_y", nrow = length(stats))  
      }
    }
    if(error.bars == TRUE) {
      g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin = lower, ymax = upper), width = 0.5, 
                                      position = ggplot2::position_dodge(width=dodge))
    }
    g
    if(return.tbl == TRUE) {
      return(tibble::as_tibble(res.avgs))
    }else{
      return(g)  
    }
  }else{
    if(is.null(dodge)) dodge <- 0
    g <- ggplot2::ggplot(res, ggplot2::aes_string(x = x.var, y = "value", color = color.var, group = color.var)) + 
      ggplot2::geom_point(position=ggplot2::position_dodge(width=dodge)) + 
      ggplot2::geom_line(position=ggplot2::position_dodge(width=dodge)) + 
      ggplot2::theme_bw()
    if(!is.null(labeller)) {
      g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_y", nrow = length(stats), labeller = labeller)
    }else{
      g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_y", nrow = length(stats))
    }
    g
    if(return.tbl == TRUE) {
      return(tibble::as_tibble(res))
    }else{
      return(g)  
    }
  }
}

#' @title ENMnulls statistics plot
#' @description Plot histogram of evaluation statistics for null ENM simulations
#' @param e.null ENMnull object
#' @param stats character vector: metrics from results table to be plotted; examples are
#' "auc.val" or "or.10p"; if more than one statistic is specified, the histogram plot will be faceted
#' @param plot.type character: either "violin" or "histogram"
#' @param facet.labels named list: custom names for the metric facets, in the form list(old_name = "new_name", ...)
#' @param metric.levels character vector: custom factor levels for metrics; this controls the order that metric statistics are plotted
#' @param return.tbl boolean: if TRUE, return the data frames of null results used to make the ggplot instead of the plot itself
#' @details There are two variations for this plot, but both show null quantiles (0.01, 0.05, 0.5, 0.95, 0.99). 
#' For violin plots, the null distribution is displayed as a vertical shape (i.e., the violin) with horizontal lines showing 
#' the quantiles and the empirical value is plotted as a red point along the vertical axis. 
#' For histogram plots, the null distribution is displayed as a histogram with vertical lines showing the quantiles 
#' and the empirical value as a vertical red line on the distribution.
#' 
#' @examples
#' \dontrun{
#' # first, let's tune some models
#' occs <- read.csv(file.path(system.file(package="predicts"), 
#'                            "/ex/bradypus.csv"))[,2:3]
#' envs <- rast(list.files(path=paste(system.file(package="predicts"), 
#'                                    "/ex", sep=""), pattern="tif$", full.names=TRUE))
#' bg <- as.data.frame(predicts::backgroundSample(envs, n = 10000))
#' names(bg) <- names(occs)
#' 
#' ps <- list(orientation = "lat_lat")
#' 
#' e <- ENMevaluate(occs, envs, bg, 
#'                  tune.args = list(fc = c("L","LQ","LQH"), rm = 2:4), partitions = "block", 
#'                  partition.settings = ps, algorithm = "maxnet", categoricals = "biome",
#'                  parallel = TRUE)
#' 
#' d <- eval.results(e)
#' 
#' # here, we will choose an optimal model based on validation CBI, but you can
#' # choose yourself what evaluation statistics to use
#' opt <- d |> filter(cbi.val.avg == max(cbi.val.avg))
#' 
#' # now we can run our null models 
#' # NOTE: you should use at least 100 iterations in practice -- this is just an
#' # example
#' nulls <- ENMnulls(e, mod.settings = list(fc = opt$fc, rm = opt$rm), no.iter = 10)
#' 
#' # let's plot the null model results
#' evalplot.nulls(nulls, stats = c("cbi.val", "auc.val"), plot.type = "violin")
#' }
#' 
#' @return A ggplot of null model statistics. 
#' @export

evalplot.nulls <- function(e.null, stats, plot.type, facet.labels = NULL, metric.levels = NULL, return.tbl = FALSE) {
  exp <- paste(paste0("*", stats), collapse = "|")
  null.res <- e.null@null.results |> 
    tidyr::pivot_longer(cols = auc.train:ncoef, names_to = "metric", values_to = "value") |>
    dplyr::filter(grepl(exp, metric)) |>
    dplyr::select(metric, value)
  null.avgs <- null.res |> 
    dplyr::filter(grepl("avg", metric) | metric %in% stats) |>
    dplyr::rename(avg = value) |>
    dplyr::mutate(metric = gsub(".avg", "", metric)) |>
    dplyr::mutate(metric = factor(metric, levels = stats))
  if(!is.null(metric.levels)) null.avgs$metric <- factor(null.avgs$metric, levels = metric.levels)
  # null.sds <- null.res |> 
  #   dplyr::filter(grepl("sd", metric)) |>
  #   dplyr::rename(sd = value) |>
  #   dplyr::mutate(metric = gsub(".sd", "", metric))
  # null.res.avgs <- dplyr::bind_cols(null.avgs, null.sds |> dplyr::select(sd))
  
  emp.res <- e.null@null.emp.results |> 
    dplyr::slice(1) |>
    tidyr::pivot_longer(cols = stats, names_to = "metric", values_to = "value") |>
    dplyr::select(statistic, metric, value) |>
    tidyr::pivot_wider(names_from = statistic, values_from = value) |>
    dplyr::rename(avg = emp.mean) |>
    dplyr::mutate(metric = factor(metric, levels = stats))
  
  if(!is.null(facet.labels)) labeller <- ggplot2::as_labeller(facet.labels) else labeller <- NULL
  
  if(plot.type == "violin") {
    g <- ggplot2::ggplot(null.avgs, ggplot2::aes(x = metric, y = avg)) + 
      ggplot2::geom_violin(draw_quantiles = c(0.01, 0.05, 0.5, 0.95, 0.99)) +
      ggplot2::geom_point(data = emp.res, ggplot2::aes(y = avg), color = "red") +
      ggplot2::theme_bw()  
  }else if(plot.type == "histogram") {
    stats.all <- rbind(null.avgs, emp.res)
    vlines <- null.avgs |> dplyr::group_by(metric) |> 
      dplyr::summarize(`0.01 quantile` = quantile(avg, 0.01),
                       `0.05 quantile` = quantile(avg, 0.05),
                       `0.50 quantile` = quantile(avg, 0.5),
                       `0.95 quantile` = quantile(avg, 0.95),
                       `0.99 quantile` = quantile(avg, 0.99)) |>
      tidyr::pivot_longer(cols = `0.01 quantile`:`0.99 quantile`, names_to = "quantile", values_to = "value")
    vlines <- rbind(vlines, emp.res |> dplyr::mutate(quantile = "empirical value") |> dplyr::rename(value = avg))
    g <- ggplot2::ggplot(mapping = ggplot2::aes(x = avg)) + 
      ggplot2::geom_histogram(data = null.avgs, fill = "gray80") +
      ggplot2::geom_vline(data = vlines, ggplot2::aes(xintercept = value, color = quantile, linetype = quantile, size = quantile)) +
      ggplot2::scale_color_manual(values = c(`0.01 quantile` = "purple", 
                                             `0.05 quantile` = "blue",
                                             `0.50 quantile` = "blue",
                                             `0.95 quantile` = "blue",
                                             `0.99 quantile` = "purple",
                                             `empirical value` = "red")) +
      ggplot2::scale_linetype_manual(values = c(`0.01 quantile` = "dotted", 
                                                `0.05 quantile` = "dashed",
                                                `0.50 quantile` = "solid",
                                                `0.95 quantile` = "dashed", 
                                                `0.99 quantile` = "dotted",
                                                `empirical value` = "solid")) +
      ggplot2::scale_size_manual(values = c(`0.01 quantile` = 1, 
                                            `0.05 quantile` = 1,
                                            `0.50 quantile` = 1,
                                            `0.95 quantile` = 1, 
                                            `0.99 quantile` = 1,
                                            `empirical value` = 0.5)) +
      ggplot2::theme_bw() +
      ggplot2::theme(legend.title=ggplot2::element_blank(), 
                     legend.position="bottom")
    if(!is.null(labeller)) {
      g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_x", ncol = 1, labeller = labeller)
    }else{
      g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_x", ncol = 1)
    }
  }
  if(return.tbl == TRUE) {
    return(list(null.avgs = tibble::as_tibble(null.avgs), empirical.results = tibble::as_tibble(emp.res)))
  }else{
    return(g)  
  }
}
Any scripts or data that you put into this service are public.
ENMeval documentation built on June 8, 2025, 10:22 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ENMeval
Automated Tuning and Evaluations of Ecological Niche Models

R/plotting.R
In ENMeval: Automated Tuning and Evaluations of Ecological Niche Models

Defines functions evalplot.nulls evalplot.stats evalplot.envSim.map evalplot.envSim.hist plot.sim.dataPrep evalplot.grps

Documented in evalplot.envSim.hist evalplot.envSim.map evalplot.grps evalplot.nulls evalplot.stats plot.sim.dataPrep

Try the ENMeval package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ENMeval Automated Tuning and Evaluations of Ecological Niche Models

R/plotting.R In ENMeval: Automated Tuning and Evaluations of Ecological Niche Models

Defines functions evalplot.nulls evalplot.stats evalplot.envSim.map evalplot.envSim.hist plot.sim.dataPrep evalplot.grps

Documented in evalplot.envSim.hist evalplot.envSim.map evalplot.grps evalplot.nulls evalplot.stats plot.sim.dataPrep

Try the ENMeval package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ENMeval
Automated Tuning and Evaluations of Ecological Niche Models

R/plotting.R
In ENMeval: Automated Tuning and Evaluations of Ecological Niche Models