R/plotting.R

Defines functions evalplot.nulls evalplot.stats evalplot.envSim.map evalplot.envSim.hist plot.sim.dataPrep evalplot.grps

Documented in evalplot.envSim.hist evalplot.envSim.map evalplot.grps evalplot.nulls evalplot.stats plot.sim.dataPrep

#' @title Partition group plots
#' @description Plot occurrence partition groups over an environmental predictor raster.
#' @param e ENMevaluation object
#' @param envs SpatRaster: environmental predictor variable used to build the models in "e"
#' @param pts matrix / data frame: coordinates for occurrence or background data
#' @param pts.grp numeric vector: partition groups corresponding to data in "pts"
#' @param ref.data character: plot occurrences ("occs") or background ("bg"), with default "occs"
#' @param pts.size numeric: custom point size for ggplot
#' @param return.tbl boolean: if TRUE, return the data frames used to make the ggplot instead of the plot itself
#' @details This function serves as a quick way to visualize occurrence or background partitions over the extent of an environmental predictor raster.
#' It can be run with an existing ENMevaluation object, or alternatively with occurrence or background coordinates and the corresponding partitions.
#' 
#' @examples
#' \dontrun{
#' library(terra)
#' library(ENMeval)
#' occs <- read.csv(file.path(system.file(package="predicts"), 
#'                            "/ex/bradypus.csv"))[,2:3]
#' envs <- rast(list.files(path=paste(system.file(package="predicts"), 
#'                                    "/ex", sep=""), pattern="tif$", full.names=TRUE))
#' bg <- as.data.frame(predicts::backgroundSample(envs, n = 10000))
#' names(bg) <- names(occs)
#' 
#' parts <- get.block(occs, bg, orientation = "lat_lon")
#' 
#' # now, plot the partition groups for occurrence and background points
#' evalplot.grps(envs = envs, pts = occs, pts.grp = parts$occs.grp)
#' evalplot.grps(envs = envs, pts = bg, pts.grp = parts$bg.grp)
#' 
#' # you can also plot with an ENMevaluation object
#' ps <- list(orientation = "lat_lon")
#' e <- ENMevaluate(occs, envs, bg, 
#'                  tune.args = list(fc = c("L","LQ"), rm = 1:3), 
#'                  partitions = "block", partition.settings = ps, 
#'                  algorithm = "maxnet", categoricals = "biome", 
#'                  parallel = TRUE)
#' 
#' evalplot.grps(e = e, envs = envs, ref.data = "occs")
#' evalplot.grps(e = e, envs = envs, ref.data = "bg")
#' }
#' 
#' @export

evalplot.grps <- function(e = NULL, envs, pts = NULL, pts.grp = NULL, ref.data = "occs", pts.size = 1.5, return.tbl = FALSE) {
  if(!is.null(e)) {
    pts.plot <- switch(ref.data, occs = cbind(e@occs, partition = e@occs.grp),
                       bg = cbind(e@bg, partition = e@bg.grp))  
    if(e@partition.method == "testing") {
      pts.plot <- pts.plot |> dplyr::mutate(partition = as.numeric(as.character(partition))) |> 
        dplyr::bind_rows(e@occs.testing |> dplyr::mutate(partition = 1)) |>
        dplyr::mutate(partition = factor(partition))
    } 
    names(pts.plot)[1:2] <- c("longitude", "latitude")
  }else{
    if(!is.null(pts) & !is.null(pts.grp)) {
      # make sure pts is a data frame with the right column names
      pts <- as.data.frame(pts)
      names(pts) <- c("longitude", "latitude")
      pts.plot <- cbind(pts, partition = factor(pts.grp))
    }else{
      stop("If inputting point data instead of an ENMevaluation object, make sure to also input the partition groups (pts.grp).")
    }
  }
  
  grp.n <- length(unique(pts.plot$partition))
  if(grp.n > 9) {
    theme.custom <- ggplot2::guides(color = "none")
    pt.cols <- rainbow(grp.n)
  }else{
    theme.custom <- NULL
    pt.cols <- RColorBrewer::brewer.pal(9, "Set1")
  }
  
  if(terra::nlyr(envs) > 1) {
    message("Plotting first raster in stack...")
    envs <- envs[[1]]
  }
  envs.df <- terra::as.data.frame(envs, xy = TRUE, na.rm = FALSE)
  names(envs.df)[3] <- "value"
  g <- ggplot2::ggplot() + ggplot2::geom_raster(data = envs.df, ggplot2::aes(x = x, y = y, fill = value)) +
    ggplot2::geom_point(data = pts.plot, ggplot2::aes(x = longitude, y = latitude, color = partition), size = pts.size) +
    ggplot2::scale_color_manual(values = pt.cols) +
    ggplot2::scale_fill_distiller(palette = "Greys", na.value = "white") + ggplot2::theme_classic() + 
    ggplot2::coord_equal() + theme.custom
  
  if(return.tbl == TRUE) {
    return(list(envs.df = tibble::as_tibble(envs.df), pts.plot = tibble::as_tibble(pts.plot)))
  }else{
    return(g)  
  }
}


#' Internal plotting function
#'
#' This function preps data for plotting.
#'
#' @examples \dontrun{
#' plot.sim.dataPrep()
#' }
#' @keywords internal
plot.sim.dataPrep <- function(e, envs, occs.z, bg.z, occs.grp, bg.grp, ref.data, occs.testing.z, quiet) {
  
  if(!is.null(e) & any(!is.null(occs.z), !is.null(bg.z), !is.null(occs.grp), !is.null(bg.grp))) {
    stop("* If inputting an ENMevaluation object, leave occs.z, bg.z, occs.grp, and bg.grp NULL. These are read from the object.")
  }
  
  if(is.null(envs)) {
    if(is.null(e) & any(is.null(occs.z), is.null(bg.z), is.null(occs.grp), is.null(bg.grp))) {
      stop("* If inputting occurrence and background data instead of an ENMevaluation object, please input occs.z, bg.z, occs.grp, and bg.grp.")
      if(!quiet) message("* Similarity values calculated by contrasting occurrences with background.")
    }
  }else{
    if(is.null(e)) {
      if(ref.data == "occs") {
        if(any(is.null(occs.z), is.null(occs.grp))) {stop("* If inputting occurrence data instead of an ENMevaluation object, please input occs.z and occs.grp.")}
      }else if (ref.data == "bg") {
        if(any(is.null(bg.z), is.null(bg.grp))) {stop("* If inputting background data instead of an ENMevaluation object, please input bg.z and bg.grp.")}
      }
    }
    if(!quiet) message("* Similarity values calculated by contrasting occurrences with all cell values in raster extent.")
  }
  
  # assign variables from ENMevaluation object
  if(!is.null(e)) {
    occs.z <- e@occs
    bg.z <- e@bg
    occs.grp <- as.numeric(as.character(e@occs.grp))
    bg.grp <- as.numeric(as.character(e@bg.grp))
  }else{
    occs.grp <- as.numeric(as.character(occs.grp))
    bg.grp <- as.numeric(as.character(bg.grp))
  }
  
  if(ref.data == "bg" & length(unique(bg.grp)) == 1) stop('If background is not partitioned (non-spatial), do not assign ref.data to "bg".')
  
  if(any(is.null(occs.z), is.null(occs.grp))) {
    pts.plot <- bg.z |> dplyr::mutate(type = rep(0, nrow(bg.z)), partition = factor(bg.grp))  
  }else if(any(is.null(bg.z), is.null(bg.grp))) {
    pts.plot <- occs.z |> dplyr::mutate(type = rep(1, nrow(occs.z)), partition = factor(occs.grp))  
  }else{
    pts.plot <- rbind(occs.z, bg.z) |> as.data.frame() |>
      dplyr::mutate(type = c(rep(1, nrow(occs.z)), rep(0, nrow(bg.z))), partition = factor(c(occs.grp, bg.grp)))
  }
  names(pts.plot)[1:2] <- c("longitude","latitude")
  
  # find factor rasters or columns and identify them as categoricals
  if(!is.null(envs)) {
    categoricals <- unique(names(envs)[which(terra::is.factor(envs))])
    if(length(categoricals) == 0) categoricals <- NULL
  }else{
    categoricals <- unique(names(occs.z)[which(sapply(occs.z, is.factor))])
    if(length(categoricals) == 0) categoricals <- NULL
  }
  
  # remove categorical variables for plotting
  if(!is.null(categoricals)) {
    for(i in 1:length(categoricals)) {
      if(!quiet) message(paste0("* Ignoring categorical variable ", categoricals[i], "..."))
      pts.plot[, categoricals[i]] <- NULL
    }
  }
  
  if(all(unique(pts.plot$partition) == 0)) {
    if(ref.data == "bg") stop('If using fully withheld testing data, input ref.data as "occs".')
    if(is.null(e) & is.null(occs.testing.z)) stop("If using fully withheld testing data, input either an ENMevaluation object or occs.testing.z.")
    if(!is.null(e)) occs.testing.z <- e@occs.testing
    names(occs.testing.z)[1:2] <- c("longitude","latitude")
    if(!is.null(categoricals)) occs.testing.z[[categoricals]] <- NULL
    occs.testing.z <- occs.testing.z |> dplyr::mutate(type = 1, partition = 2)
    pts.plot$partition <- as.numeric(as.character(pts.plot$partition))
    pts.plot[pts.plot$type == 1, "partition"] <- 1
    pts.plot <- dplyr::bind_rows(pts.plot, occs.testing.z) |> dplyr::mutate(partition = factor(partition))
  }
  
  return(pts.plot)
}

#' @title Similarity histogram plots for partition groups
#' @description Plots environmental similarity of reference partitions (occurrences or 
#' background) to remaining data (occurrence and background for all other partitions). This 
#' function does not use raster data, and thus only calculates similarity values for data used 
#' in model training. Further, this function does not calculate similarity for categorical 
#' variables.
#' @details When fully withheld testing groups are used, make sure to input either an 
#' ENMevaluation object or the argument occs.testing.z. In the resulting plot, partition 1 
#' refers to the training data, while partition 2 refers to the fully withheld testing group.
#' @param e ENMevaluation object
#' @param occs.z data frame: longitude, latitude, and environmental predictor variable values for occurrence records, in that order (optional);
#' the first two columns must be named "longitude" and "latitude"
#' @param occs.grp numeric vector: partition groups for occurrence records (optional)
#' @param bg.z data frame: longitude, latitude, and environmental predictor variable values for background records, in that order (optional);
#' the first two columns must be named "longitude" and "latitude"
#' @param bg.grp numeric vector: partition groups for background records (optional)
#' @param ref.data character: the reference to calculate MESS based on occurrences ("occs") or background ("bg"), with default "occs"
#' these must be specified as this function was intended for use with continuous data only; these must be specified when inputting tabular data instead of an ENMevaluation object 
#' @param envs.vars character vector: names of a predictor variable to plot similarities for; if left NULL, calculations are done
#' with respect to all variables (optional) 
#' @param occs.testing.z data frame: longitude, latitude, and environmental predictor variable values for fully withheld testing records, 
#' in that order; this is for use only with the "testing" partition option when an ENMevaluation object is not input (optional)
#' @param hist.bins numeric: number of histogram bins for histogram plots; default is 30
#' @param return.tbl boolean: if TRUE, return the data frames of similarity values used to make the ggplot instead of the plot itself
#' @param quiet boolean: if TRUE, silence all function messages (but not errors)
#' @details Histograms are plotted showing the environmental similarity estimates for each 
#' partition group in relation to the others. The similarity between environmental values associated with the 
#' validation occurrence or background records per partition group and those associated with 
#' the remaining data (training occurrences and background) are calculated with the MESS algorithm, and the minimum similarity 
#' per grid cell is returned. Higher negative values indicate a greater environmental difference between the validation occurrences and the study extent, and higher 
#' positive values indicate greater similarity. This function uses the `mess()` function 
#' from the package `predicts`. Please see the below reference for details on MESS.
#' @return A ggplot of environmental similarities between the occurrence or background data 
#' for each partition and the rest of the data (all other occurrences and background data).
#' @references 
#' Baumgartner J, Wilson P (2021). _rmaxent: Tools for working with Maxent in R_. R package version 0.8.5.9000, <URL: https://github.com/johnbaums/rmaxent>.
#' Elith, J., Kearney, M., and Phillips, S. (2010) The art of modelling range-shifting species. \emph{Methods in Ecology and Evolution}, \bold{1}: 330-342. \doi{doi:10.1111/j.2041-210X.2010.00036.x}
#' 
#' @examples
#' \dontrun{
#' # first, let's tune some models
#' occs <- read.csv(file.path(system.file(package="predicts"), 
#' "/ex/bradypus.csv"))[,2:3]
#' envs <- rast(list.files(path=paste(system.file(package="predicts"), 
#' "/ex", sep=""), pattern="tif$", full.names=TRUE))
#' bg <- as.data.frame(predicts::backgroundSample(envs, n = 10000))
#' names(bg) <- names(occs)
#'  
#' ps <- list(orientation = "lat_lat")
#' 
#' e <- ENMevaluate(occs, envs, bg, 
#'                tune.args = list(fc = c("L","LQ","LQH"), rm = 1:5), 
#'                partitions = "block", partition.settings = ps, 
#'                algorithm = "maxnet", categoricals = "biome", 
#'                parallel = TRUE)
#' 
#' # now, plot the environmental similarity of each partition to the others               
#' evalplot.envSim.hist(e)
#' }
#' 
#' @export

evalplot.envSim.hist <- function(e = NULL, occs.z = NULL, bg.z = NULL, occs.grp = NULL, 
                                 bg.grp = NULL, ref.data = "occs", 
                                 envs.vars = NULL, occs.testing.z = NULL,
                                 hist.bins = 30, return.tbl = FALSE, quiet = FALSE) {
  
  pts.plot <- plot.sim.dataPrep(e, envs = NULL, occs.z, bg.z, occs.grp, bg.grp, ref.data, occs.testing.z, quiet)
  
  envs.names <- pts.plot |> dplyr::select(-longitude, -latitude, -partition, -type) |> names()
  
  if(!is.null(envs.vars)) {
    if(!quiet) message(paste0("* Similarity values calculated based only on ", paste(envs.vars, collapse = ", "), "."))
    envs.rem <- envs.names[-which(envs.names %in% envs.vars)]
    pts.plot <- pts.plot |> dplyr::select(-dplyr::all_of(envs.rem))
  }
  
  test.sim <- list()
  nk <- length(unique(pts.plot$partition[pts.plot$partition != 0]))
  if(nk == sum(pts.plot$type)) {
    stop("This plotting function is not available for jackknife (leave-one-out) partitions.")
  }
  
  for(k in 1:nk) {
    test.z <- pts.plot |> dplyr::filter(partition == k) |> dplyr::select(-longitude, -latitude, -partition)
    if(ref.data == "occs") {
      test.z <- test.z |> dplyr::filter(type == 1) |> dplyr::select(-type)
    }else if (ref.data == "bg") {
      test.z <- test.z |> dplyr::filter(type == 0) |> dplyr::select(-type)
    }
    train.z <- pts.plot |> dplyr::filter(partition != k) |> dplyr::select(-longitude, -latitude, -partition, -type)
    
    sim <- tryCatch({
      predicts::mess(train.z, test.z)
    }, error = function(cond) {
      message('Error: there may be at least one categorical variable in the predictor data that is not attributed as a factor. Please convert these variable(s) to factor.')
      # Choose a return value in case of error
      return(NULL)
    })
    test.sim[[k]] <- data.frame(partition = k, sim)
    names(test.sim[[k]])[2] <- "mess"  
  }
  
  if(nk > 9) {
    theme.custom <- ggplot2::guides(color = "none")
    pt.cols <- rainbow(nk)
  }else{
    theme.custom <- NULL
    pt.cols <- RColorBrewer::brewer.pal(nk, "Set1")
  }
  
  plot.df <- dplyr::bind_rows(test.sim)
  plot.df$partition <- factor(plot.df$partition)
  
  plot.text <- paste("\n(Values represent environmental similarity between", 
                     switch(ref.data, occs = "occurrence", bg = "background"),
                     "partitions and all other background partitions.)")
  
  g <- ggplot2::ggplot(plot.df, ggplot2::aes(x = mess, fill = partition)) + 
    ggplot2::geom_histogram(bins = hist.bins) +
    ggplot2::facet_grid(ggplot2::vars(partition)) + 
    ggplot2::scale_fill_manual(values = pt.cols) +
    ggplot2::theme_classic() +
    ggplot2::geom_vline(xintercept = 0) +
    ggplot2::ggtitle(paste("Multivariate environmental similarity", plot.text, collapse = "\n")) +
    ggplot2::theme(strip.background = ggplot2::element_blank(), strip.text.y = ggplot2::element_blank())  
  
  if(return.tbl == TRUE) {
    return(tibble::as_tibble(plot.df))
  }else{
    return(g)  
  }
}

#' @title Similarity maps for partition groups
#' @description Maps environmental similarity of reference partitions (occurrences or 
#' background) to all cells with values in the predictor variable rasters. This function uses 
#' raster data, and thus cannot map similarity values using only tables of environmental values f
#' or occurrences or background. Further, this function does not calculate similarity for 
#' categorical variables.
#' @details When fully withheld testing groups are used, make sure to input either an ENMevaluation 
#' object or the argument occs.testing.z. In the resulting plot, partition 1 refers to the training data,
#' while partition 2 refers to the fully withheld testing group.
#' @param e ENMevaluation object (optional) 
#' @param envs SpatRaster: environmental predictor variables used to build the models in "e"; categorical variables will be 
#' removed before internally as they cannot be used to calculate MESS
#' @param occs.z data frame: longitude, latitude, and environmental predictor variable values for occurrence records, in that order (optional);
#' the first two columns must be named "longitude" and "latitude"
#' @param occs.grp numeric vector: partition groups for occurrence records (optional)
#' @param bg.z data frame: longitude, latitude, and environmental predictor variable values for background records, in that order (optional);
#' the first two columns must be named "longitude" and "latitude"
#' @param bg.grp numeric vector: partition groups for background records (optional)
#' @param ref.data character: the reference to calculate MESS based on occurrences ("occs") or background ("bg"), with default "occs"
#' @param envs.vars character vector: names of a predictor variable to plot similarities for; if left NULL, calculations are done
#' with respect to all variables (optional) 
#' @param bb.buf numeric: distance used to buffer (extend) the mapping extent in map units; for latitude/longitude, this is in degrees (optional)
#' @param occs.testing.z data frame: longitude, latitude, and environmental predictor variable values for fully withheld testing records, 
#' in that order; this is for use only with the "testing" partition option when an ENMevaluation object is not input (optional)
#' @param plot.bg.pts boolean: if TRUE, plot background points when using ref.data = "bg"
#' @param sim.palette character: RColorBrewer palette name to use for plotting discrete variables; if NULL, default is "Set1"
#' @param pts.size numeric: custom point size for ggplot
#' @param gradient.colors character vector: colors used for ggplot2::scale_fill_gradient2
#' @param na.color character: color used for NA values
#' @param return.tbl boolean: if TRUE, return the data frames of similarity values used to make the ggplot instead of the plot itself
#' @param return.ras boolean: if TRUE, return the SpatRaster of similarity values used to make the ggplot instead of the plot itself
#' @param quiet boolean: if TRUE, silence all function messages (but not errors)
#' @details Rasters are plotted showing the environmental similarity estimates for each 
#' partition group in relation to the others. The similarity between environmental values associated with the 
#' validation occurrence or background records per partition group and those associated with 
#' the entire study extent (specified by the extent of the input SpatRaster "envs") are 
#' calculated with the MESS algorithm, and the minimum similarity per grid cell is returned. Higher 
#' negative values indicate greater environmental difference between the validation occurrences 
#' and the study extent, and higher positive values indicate greater similarity. This function 
#' uses the `mess()` function from the package `predicts` to calculate the similarities. Please see the below 
#' reference for details on MESS. 
#' @return A ggplot of environmental similarities between the occurrence or background data 
#' for each partition and all predictor variable values in the extent.
#' @references 
#' Baumgartner J, Wilson P (2021). _rmaxent: Tools for working with Maxent in R_. R package version 0.8.5.9000, <URL: https://github.com/johnbaums/rmaxent>.
#' Elith, J., Kearney, M., and Phillips, S. (2010) The art of modelling range-shifting species. \emph{Methods in Ecology and Evolution}, \bold{1}: 330-342. \doi{doi:10.1111/j.2041-210X.2010.00036.x}
#' 
#' @examples
#' \dontrun{
#' library(terra)
#' library(ENMeval)
#' 
#' # first, let's tune some models
#' occs <- read.csv(file.path(system.file(package="predicts"), 
#' "/ex/bradypus.csv"))[,2:3]
#' envs <- rast(list.files(path=paste(system.file(package="predicts"), 
#' "/ex", sep=""), pattern="tif$", full.names=TRUE))
#' bg <- as.data.frame(predicts::backgroundSample(envs, n = 10000))
#' names(bg) <- names(occs)
#'  
#' ps <- list(orientation = "lat_lat")
#' 
#' e <- ENMevaluate(occs, envs, bg, 
#'                tune.args = list(fc = c("L","LQ","LQH"), rm = 1:5), 
#'                partitions = "block", partition.settings = ps, 
#'                algorithm = "maxnet", categoricals = "biome", 
#'                parallel = TRUE)
#' 
#' # now, plot the environmental similarity of each partition to the others               
#' evalplot.envSim.map(e, envs)
#' }
#' 
#' @export

evalplot.envSim.map <- function(e = NULL, envs, occs.z = NULL, bg.z = NULL, occs.grp = NULL, 
                                bg.grp = NULL, ref.data = "occs", 
                                envs.vars = NULL, bb.buf = 0, occs.testing.z = NULL,
                                plot.bg.pts = FALSE, sim.palette = NULL, 
                                pts.size = 1.5, gradient.colors = c("red","white","blue"), na.color = "gray",
                                return.tbl = FALSE, return.ras = FALSE, quiet = FALSE) {
  
  if(return.tbl == TRUE & return.ras == TRUE) {
    stop("*Error: please select only one of return.tbl or return.ras.")
  }
  
  if(is.null(e) & (ref.data == "occs" & any(is.null(occs.z), is.null(occs.grp)))) {
    stop("* Error: If using occurrences as reference group, ensure you input occs.z and occs.grp") 
  }
  if(is.null(e) & (ref.data == "bg" & any(is.null(bg.z), is.null(bg.grp)))) {
    stop("* Error: If using background as reference group, ensure you input bg.z and bg.grp") 
  }
  
  if(!is.numeric(bb.buf)) stop("Please ensure bb.buf is a number.")
  
  pts.plot <- plot.sim.dataPrep(e, envs, occs.z, bg.z, occs.grp, bg.grp, ref.data, occs.testing.z, quiet)
  
  categoricals <- unique(names(envs)[which(terra::is.factor(envs))])
  if(length(categoricals) != 0 & !is.null(envs)) {
    envs <- terra::subset(envs, -which(names(envs) %in% categoricals))
  }
  
  if(!is.null(envs.vars)) {
    if(!quiet) message(paste0("* Similarity values calculated based only on ", 
                              paste(envs.vars, collapse = ", "), "."))
    envs.names <- names(envs)
    envs.rem <- envs.names[-which(envs.names %in% envs.vars)]
    pts.plot <- pts.plot |> dplyr::select(-dplyr::all_of(envs.rem))
    envs <- envs[[envs.vars]]
  }
  
  if(ref.data == "occs") {
    pts.plot <- pts.plot |> dplyr::filter(type == 1) |> dplyr::select(-type)
  }else if (ref.data == "bg") {
    pts.plot <- pts.plot |> dplyr::filter(type == 0) |> dplyr::select(-type)
  }
  
  ras.sim <- list()
  nk <- length(unique(pts.plot$partition))
  if(nk == sum(pts.plot$type)) {
    stop("This plotting function is not available for jackknife (leave-one-out) partitions.")
  }
  
  for(k in 1:nk) {
    message("Calculating MESS for partition ", k, "...")
    test.z <- pts.plot |> dplyr::filter(partition == k) |> 
      dplyr::select(-longitude, -latitude, -partition)
    
    sim <- tryCatch({
      predicts::mess(envs, test.z)
    }, error = function(cond) {
      message('Error: there may be at least one categorical variable in the predictor data that is not attributed as a factor. Please convert these variable(s) to factor.')
      # Choose a return value in case of error
      return(NULL)
    })
    names(sim) <- paste0("partition", k)
    
    ras.sim[[k]] <- sim
  }
  
  rs.sim <- terra::rast(ras.sim)
  plot.df <- terra::as.data.frame(rs.sim, xy = TRUE, na.rm = FALSE) |>
    tidyr::pivot_longer(cols = 3:dplyr::last_col(), names_to = "ras", values_to = "mess")
  # add buffer
  plot.df <- plot.df |> dplyr::filter(x > min(pts.plot$longitude) - bb.buf, 
                                      x < max(pts.plot$longitude) + bb.buf,
                                      y > min(pts.plot$latitude) - bb.buf, 
                                      y < max(pts.plot$latitude) + bb.buf)
  
  title <- "Multivariate environmental similarity" 
  
  plot.text <- paste("\n(Values represent environmental similarity between", 
                     switch(ref.data, occs = "occurrence", bg = "background"),
                     "partitions and all raster cells with values.)")
  
  g <- ggplot2::ggplot() +
    ggplot2::geom_raster(data = plot.df, ggplot2::aes_string(x = "x", y = "y", fill = "mess"))
  if(ref.data == "bg" & plot.bg.pts == FALSE) {
    g <- g + ggplot2::facet_wrap(ggplot2::vars(ras), ncol = 2) +
      ggplot2::ggtitle(paste(title, plot.text, collapse = "\n")) +
      ggplot2::theme_classic()
  }else{
    g <- g + ggplot2::geom_point(data = pts.plot, ggplot2::aes(x = longitude, y = latitude, shape = partition), color = "black", size = pts.size) +
      ggplot2::facet_wrap(ggplot2::vars(ras), ncol = 2) +
      ggplot2::ggtitle(paste(title, plot.text, collapse = "\n")) +
      ggplot2::theme_classic()
  }
  g <- g + ggplot2::scale_fill_gradient2(low = gradient.colors[1], mid = gradient.colors[2], high = gradient.colors[3], na.value = na.color)
  
  if(return.tbl == TRUE) {
    return(tibble::as_tibble(plot.df))
  }else if(return.ras == TRUE) {
    return(rs.sim)
  }else{
    return(g)  
  }
}


#' @title ENMevaluation statistics plot
#' @description Plot evaluation statistics over tuning parameter ranges to visualize differences in performance
#' @param e ENMevaluation object
#' @param stats character vector: names of statistics from results table to be plotted; if more than
#' one statistic is specified, the plot will be faceted
#' @param x.var character: variable to be plotted on x-axis
#' @param color.var character: variable used to assign symbology colors 
#' @param dodge numeric: dodge width for points and lines; this improves visibility when there is high overlap (optional)
#' @param error.bars boolean: if TRUE, plot error bars
#' @param facet.labels character vector: custom names for the metric facets
#' @param metric.levels character vector: custom factor levels for metrics; this controls the order that metric statistics are plotted
#' @param return.tbl boolean: if TRUE, return the data frames of results used to make the ggplot instead of the plot itself
#' @details In this plot, the x-axis represents a tuning parameter range, while the y-axis represents the average of a statistic over all partitions.
#' Different colors represent the categories or values of another tuning parameter. 
#' Error bars represent the standard deviation of a statistic around the mean. 
#' Currently, this function can only plot two tuning parameters at a time.
#' @return A ggplot of evaluation statistics. 
#' 
#' @examples
#' \dontrun{
#' # first, let's tune some models
#' occs <- read.csv(file.path(system.file(package="predicts"), 
#' "/ex/bradypus.csv"))[,2:3]
#' envs <- rast(list.files(path=paste(system.file(package="predicts"), 
#' "/ex", sep=""), pattern="tif$", full.names=TRUE))
#' bg <- as.data.frame(predicts::backgroundSample(envs, n = 10000))
#' names(bg) <- names(occs)
#'  
#' ps <- list(orientation = "lat_lat")

#' e <- ENMevaluate(occs, envs, bg, 
#'                tune.args = list(fc = c("L","LQ","LQH"), rm = 1:5), 
#'                partitions = "block", partition.settings = ps, 
#'                algorithm = "maxnet", categoricals = "biome", 
#'                parallel = TRUE)
#'                
#' evalplot.stats(e, c("cbi.val", "or.mtp"), x.var = "rm", color.var = "fc", 
#'              error.bars = FALSE)
#' }
#' @export

evalplot.stats <- function(e, stats, x.var, color.var, dodge = NULL, error.bars = TRUE, facet.labels = NULL, metric.levels = NULL, return.tbl = FALSE) {
  exp <- paste(paste0("*", stats), collapse = "|")
  res <- e@results |> 
    tidyr::pivot_longer(cols = auc.train:ncoef, names_to = "metric", values_to = "value") |>
    dplyr::filter(grepl(exp, metric))
  avgs <- res |> 
    dplyr::filter(grepl("avg", metric)) |>
    dplyr::rename(avg = value) |>
    dplyr::mutate(metric = gsub(".avg", "", metric))
  sds <- res |> 
    dplyr::filter(grepl("sd", metric)) |>
    dplyr::rename(sd = value) |>
    dplyr::mutate(metric = gsub(".sd", "", metric))
  join.names <- names(avgs)
  join.names <- join.names[join.names != "avg"]
  res.avgs <- dplyr::left_join(avgs, sds, by = join.names) |>
    dplyr::mutate(lower = avg - sd, upper = avg + sd,
                  metric = factor(metric, levels = stats))
  res.avgs[[x.var]] <- factor(res.avgs[[x.var]])
  res.avgs[[color.var]] <- factor(res.avgs[[color.var]])
  if(!is.null(facet.labels)) labeller <- ggplot2::as_labeller(facet.labels) else labeller <- NULL
  if(!is.null(metric.levels)) res$metric <- factor(res$metric, levels = metric.levels)
  if(!is.null(metric.levels)) res.avgs$metric <- factor(res.avgs$metric, levels = metric.levels)
  
  if(nrow(res.avgs) > 0) {
    if(is.null(dodge)) dodge <- 0.1
    g <- ggplot2::ggplot(res.avgs, ggplot2::aes_string(x = x.var, y = "avg", color = color.var, group = color.var)) + 
      ggplot2::geom_point(position=ggplot2::position_dodge(width=dodge)) + 
      ggplot2::geom_line(position=ggplot2::position_dodge(width=dodge)) +
      ggplot2::theme_bw()
    if(length(stats) > 1) {
      if(!is.null(labeller)) {
        g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_y", nrow = length(stats), labeller = labeller)
      }else{
        g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_y", nrow = length(stats))  
      }
    }
    if(error.bars == TRUE) {
      g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin = lower, ymax = upper), width = 0.5, 
                                      position = ggplot2::position_dodge(width=dodge))
    }
    g
    if(return.tbl == TRUE) {
      return(tibble::as_tibble(res.avgs))
    }else{
      return(g)  
    }
  }else{
    if(is.null(dodge)) dodge <- 0
    g <- ggplot2::ggplot(res, ggplot2::aes_string(x = x.var, y = "value", color = color.var, group = color.var)) + 
      ggplot2::geom_point(position=ggplot2::position_dodge(width=dodge)) + 
      ggplot2::geom_line(position=ggplot2::position_dodge(width=dodge)) + 
      ggplot2::theme_bw()
    if(!is.null(labeller)) {
      g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_y", nrow = length(stats), labeller = labeller)
    }else{
      g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_y", nrow = length(stats))
    }
    g
    if(return.tbl == TRUE) {
      return(tibble::as_tibble(res))
    }else{
      return(g)  
    }
  }
}

#' @title ENMnulls statistics plot
#' @description Plot histogram of evaluation statistics for null ENM simulations
#' @param e.null ENMnull object
#' @param stats character vector: metrics from results table to be plotted; examples are
#' "auc.val" or "or.10p"; if more than one statistic is specified, the histogram plot will be faceted
#' @param plot.type character: either "violin" or "histogram"
#' @param facet.labels named list: custom names for the metric facets, in the form list(old_name = "new_name", ...)
#' @param metric.levels character vector: custom factor levels for metrics; this controls the order that metric statistics are plotted
#' @param return.tbl boolean: if TRUE, return the data frames of null results used to make the ggplot instead of the plot itself
#' @details There are two variations for this plot, but both show null quantiles (0.01, 0.05, 0.5, 0.95, 0.99). 
#' For violin plots, the null distribution is displayed as a vertical shape (i.e., the violin) with horizontal lines showing 
#' the quantiles and the empirical value is plotted as a red point along the vertical axis. 
#' For histogram plots, the null distribution is displayed as a histogram with vertical lines showing the quantiles 
#' and the empirical value as a vertical red line on the distribution.
#' 
#' @examples
#' \dontrun{
#' # first, let's tune some models
#' occs <- read.csv(file.path(system.file(package="predicts"), 
#'                            "/ex/bradypus.csv"))[,2:3]
#' envs <- rast(list.files(path=paste(system.file(package="predicts"), 
#'                                    "/ex", sep=""), pattern="tif$", full.names=TRUE))
#' bg <- as.data.frame(predicts::backgroundSample(envs, n = 10000))
#' names(bg) <- names(occs)
#' 
#' ps <- list(orientation = "lat_lat")
#' 
#' e <- ENMevaluate(occs, envs, bg, 
#'                  tune.args = list(fc = c("L","LQ","LQH"), rm = 2:4), partitions = "block", 
#'                  partition.settings = ps, algorithm = "maxnet", categoricals = "biome",
#'                  parallel = TRUE)
#' 
#' d <- eval.results(e)
#' 
#' # here, we will choose an optimal model based on validation CBI, but you can
#' # choose yourself what evaluation statistics to use
#' opt <- d |> filter(cbi.val.avg == max(cbi.val.avg))
#' 
#' # now we can run our null models 
#' # NOTE: you should use at least 100 iterations in practice -- this is just an
#' # example
#' nulls <- ENMnulls(e, mod.settings = list(fc = opt$fc, rm = opt$rm), no.iter = 10)
#' 
#' # let's plot the null model results
#' evalplot.nulls(nulls, stats = c("cbi.val", "auc.val"), plot.type = "violin")
#' }
#' 
#' @return A ggplot of null model statistics. 
#' @export

evalplot.nulls <- function(e.null, stats, plot.type, facet.labels = NULL, metric.levels = NULL, return.tbl = FALSE) {
  exp <- paste(paste0("*", stats), collapse = "|")
  null.res <- e.null@null.results |> 
    tidyr::pivot_longer(cols = auc.train:ncoef, names_to = "metric", values_to = "value") |>
    dplyr::filter(grepl(exp, metric)) |>
    dplyr::select(metric, value)
  null.avgs <- null.res |> 
    dplyr::filter(grepl("avg", metric) | metric %in% stats) |>
    dplyr::rename(avg = value) |>
    dplyr::mutate(metric = gsub(".avg", "", metric)) |>
    dplyr::mutate(metric = factor(metric, levels = stats))
  if(!is.null(metric.levels)) null.avgs$metric <- factor(null.avgs$metric, levels = metric.levels)
  # null.sds <- null.res |> 
  #   dplyr::filter(grepl("sd", metric)) |>
  #   dplyr::rename(sd = value) |>
  #   dplyr::mutate(metric = gsub(".sd", "", metric))
  # null.res.avgs <- dplyr::bind_cols(null.avgs, null.sds |> dplyr::select(sd))
  
  emp.res <- e.null@null.emp.results |> 
    dplyr::slice(1) |>
    tidyr::pivot_longer(cols = stats, names_to = "metric", values_to = "value") |>
    dplyr::select(statistic, metric, value) |>
    tidyr::pivot_wider(names_from = statistic, values_from = value) |>
    dplyr::rename(avg = emp.mean) |>
    dplyr::mutate(metric = factor(metric, levels = stats))
  
  if(!is.null(facet.labels)) labeller <- ggplot2::as_labeller(facet.labels) else labeller <- NULL
  
  if(plot.type == "violin") {
    g <- ggplot2::ggplot(null.avgs, ggplot2::aes(x = metric, y = avg)) + 
      ggplot2::geom_violin(draw_quantiles = c(0.01, 0.05, 0.5, 0.95, 0.99)) +
      ggplot2::geom_point(data = emp.res, ggplot2::aes(y = avg), color = "red") +
      ggplot2::theme_bw()  
  }else if(plot.type == "histogram") {
    stats.all <- rbind(null.avgs, emp.res)
    vlines <- null.avgs |> dplyr::group_by(metric) |> 
      dplyr::summarize(`0.01 quantile` = quantile(avg, 0.01),
                       `0.05 quantile` = quantile(avg, 0.05),
                       `0.50 quantile` = quantile(avg, 0.5),
                       `0.95 quantile` = quantile(avg, 0.95),
                       `0.99 quantile` = quantile(avg, 0.99)) |>
      tidyr::pivot_longer(cols = `0.01 quantile`:`0.99 quantile`, names_to = "quantile", values_to = "value")
    vlines <- rbind(vlines, emp.res |> dplyr::mutate(quantile = "empirical value") |> dplyr::rename(value = avg))
    g <- ggplot2::ggplot(mapping = ggplot2::aes(x = avg)) + 
      ggplot2::geom_histogram(data = null.avgs, fill = "gray80") +
      ggplot2::geom_vline(data = vlines, ggplot2::aes(xintercept = value, color = quantile, linetype = quantile, size = quantile)) +
      ggplot2::scale_color_manual(values = c(`0.01 quantile` = "purple", 
                                             `0.05 quantile` = "blue",
                                             `0.50 quantile` = "blue",
                                             `0.95 quantile` = "blue",
                                             `0.99 quantile` = "purple",
                                             `empirical value` = "red")) +
      ggplot2::scale_linetype_manual(values = c(`0.01 quantile` = "dotted", 
                                                `0.05 quantile` = "dashed",
                                                `0.50 quantile` = "solid",
                                                `0.95 quantile` = "dashed", 
                                                `0.99 quantile` = "dotted",
                                                `empirical value` = "solid")) +
      ggplot2::scale_size_manual(values = c(`0.01 quantile` = 1, 
                                            `0.05 quantile` = 1,
                                            `0.50 quantile` = 1,
                                            `0.95 quantile` = 1, 
                                            `0.99 quantile` = 1,
                                            `empirical value` = 0.5)) +
      ggplot2::theme_bw() +
      ggplot2::theme(legend.title=ggplot2::element_blank(), 
                     legend.position="bottom")
    if(!is.null(labeller)) {
      g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_x", ncol = 1, labeller = labeller)
    }else{
      g <- g + ggplot2::facet_wrap(ggplot2::vars(metric), scales = "free_x", ncol = 1)
    }
  }
  if(return.tbl == TRUE) {
    return(list(null.avgs = tibble::as_tibble(null.avgs), empirical.results = tibble::as_tibble(emp.res)))
  }else{
    return(g)  
  }
}
jamiemkass/ENMeval documentation built on Jan. 19, 2025, 4:33 a.m.