ellipsenm: Ecological Niche's Characterizations Using Ellipsoids

Documented in partial_roc

#' Partial ROC calculation for ecological niche models
#'
#' @description partial_roc applies partial ROC tests to model predictions.
#'
#' @param prediction RasterLayer or numeric vector of ecological niche model
#' predictions to be evaluated. If RasterLayer, layer of predicted suitability.
#' If numeric vector, predicted suitability values.
#' @param test_data matrix, data.frame, or numeric vector containing coordinates
#' of occurrences to test model predictions to be evaluated. If matrix or
#' data.frame, columns must include longitude and latitude to extract values of
#' raster prediction. If numeric, values of suitability in such occurrences. If
#' a matrix or a data.frame is provided, \code{prediction} must be a RasterLayer.
#' @param longitude (character) name of the column with longitude data.
#' @param latitude (character) name of the column with latitude data.
#' @param error (numeric) value from 0 to 100 to represent the percentage of
#' potential error (E) that the data could have due to any source of uncertainty.
#' Default = 5.
#' @param iterations (numeric) number of bootstrap iterations to be performed;
#' default = 500.
#' @param percentage (numeric) percentage of testing data to be used in each
#' bootstrapped process for calculating the partial ROC. Default = 50.
#'
#' @return A list with the summary of the results and a data.frame containing
#' the AUC values and AUC ratios calculated for all iterations.
#'
#' @usage
#' partial_roc(prediction, test_data, longitude, latitude, error = 5,
#'             iterations = 500, percentage = 50)
#'
#' @details Partial ROC is calculated following Peterson et al. (2008;
#' \url{http://dx.doi.org/10.1016/j.ecolmodel.2007.11.008}).
#'
#' @importFrom purrr map_df
#' @useDynLib ellipsenm
#' @export
#'
#' @examples
#' occurrences <- read.csv(system.file("extdata", "occurrences.csv",
#'                         package = "ellipsenm"))
#' prediction <- raster::raster(system.file("extdata", "prediction.tif",
#'                                       package = "ellipsenm"))
#'
#' p_roc <- partial_roc(prediction = prediction, test_data = occurrences,
#'                      longitude = "Longitude", latitude = "Latitude")

partial_roc <- function(prediction, test_data, longitude, latitude, error = 5,
                        iterations = 500, percentage = 50) {

  # -----------
  # detecting potential errors, other potential problems tested in code
  if (missing(prediction)) {
    stop("Argument 'prediction' is necessary to perform the analysis.")
  }
  if (missing(test_data)) {
    stop("Argument 'test_data' is necessary to perform the analysis.")
  }
  c_pred <- class(prediction)[1]
  if (!c_pred %in% c("RasterLayer", "numeric")) {
    stop("'prediction' must be of class RasterLayer or numeric.")
  }
  c_tdat <- class(test_data)[1]
  if (!c_tdat %in% c("matrix", "data.frame", "numeric")) {
    stop("'test_data' must be of class matrix, data.frame, or numeric.")
  }
  if (c_pred == "numeric" & c_tdat != "numeric") {
    stop("'test_data' must be of class numeric if prediction is a numeric vector.")
  }
  if (c_tdat != "numeric") {
    if (missing(longitude)) {
      stop("Argument 'longitude' is not defined.")
    }
    if (missing(latitude)) {
      stop("Argument 'latitude' is not defined.")
    }
  }

  # -----------
  # package and function needed
  suppressPackageStartupMessages(library(dplyr))

  calc_aucDF <- function(big_classpixels, fractional_area, test_data, n_data,
                         n_samp, error_sens) {
    rowsID <- sample(x = n_data, size = n_samp, replace = TRUE)
    test_data1 <- test_data[rowsID]
    omssion_matrix <- big_classpixels > test_data1
    sensibility <- 1 - colSums(omssion_matrix) / n_samp
    xyTable <- data.frame(fractional_area, sensibility)
    less_ID <- which(xyTable$sensibility <= error_sens)
    xyTable <- xyTable[-less_ID, ]
    xyTable <- xyTable[order(xyTable$fractional_area, decreasing = F), ]
    auc_pmodel <- ellipsenm:::trap_roc(xyTable$fractional_area, xyTable$sensibility)
    auc_prand <- ellipsenm:::trap_roc(xyTable$fractional_area, xyTable$fractional_area)
    auc_ratio <- auc_pmodel / auc_prand
    auc_table <- data.frame(auc_pmodel, auc_prand, auc_ratio = auc_ratio )
    return(auc_table)
  }

  # -----------
  # preparing data
  if (c_pred == "RasterLayer") {prediction <- raster::setMinMax(prediction)}
  min_pred <- ifelse(c_pred == "numeric", min(prediction, na.rm = TRUE),
                     prediction@data@min)
  max_pred <- ifelse(c_pred == "numeric", max(prediction, na.rm = TRUE),
                     prediction@data@max)

  prediction <- round((prediction / max_pred) * 1000)
  if (c_pred == "RasterLayer") {
    if (c_tdat != "numeric") {
      test_data <- na.omit(raster::extract(prediction,
                                           test_data[, c(longitude, latitude)]))
    } else {
      test_data <- round((test_data / max_pred) * 1000)
    }
    classpixels <- data.frame(raster::freq(prediction, useNA = "no"))
  } else {
    test_data <- round((test_data / max_pred) * 1000)
    vals <- na.omit(unique(prediction))
    classpixels <- data.frame(value = vals, count = c(table(prediction)),
                              row.names = 1:length(vals))
  }

  # -----------
  # analysis
  if(min_pred == max_pred){
    warning("\nprediction has no variability, pROC will return NA.\n")

    p_roc <- rep(NA, 3)
    names(p_roc) <- c(paste0("Mean_AUC_ratio_at_", error, "%"), "pval_pROC",
                      "Valid_iterations")

    auc_ratios <- rep(NA, 3)
    names(auc_ratios) <- c("Prediction_partial_AUC", "Random_curve_partial_AUC",
                           "AUC_ratio")

    p_roc_res <- list(pROC_summary = p_roc, pROC_results = auc_ratios)

    return(p_roc_res)
  }else {
    classpixels <- classpixels %>%
      dplyr::mutate_(value = ~rev(value),
                     count = ~rev(count),
                     totpixperclass = ~cumsum(count),
                     percentpixels = ~totpixperclass/sum(count)) %>%
      dplyr::arrange(value)


    error_sens <- 1 - (error / 100)
    prediction_errors <- classpixels[, "value"]
    fractional_area <- classpixels[, "percentpixels"]
    n_data <- length(test_data)
    n_samp <- ceiling((percentage / 100) * n_data)

    big_classpixels <- matrix(rep(prediction_errors, each = n_samp),
                              ncol = length(prediction_errors))

    partial_AUC <- 1:iterations %>%
      purrr::map_df(~calc_aucDF(big_classpixels, fractional_area, test_data,
                                n_data, n_samp, error_sens))

    naID <- !is.na(partial_AUC$auc_ratio)
    nona_valproc <- partial_AUC$auc_ratio[naID]
    mauc <- mean(nona_valproc)
    proc <- sum(nona_valproc <= 1) / length(nona_valproc)
    valid_iter <- length(nona_valproc)

    p_roc <- c(mauc, proc, valid_iter)
    names(p_roc) <- c(paste0("Mean_AUC_ratio_at_", error, "%"), "pval_pROC",
                      "Valid_iterations")

    auc_ratios <- partial_AUC
    names(auc_ratios) <- c("Prediction_partial_AUC", "Random_curve_partial_AUC",
                           "AUC_ratio")

    p_roc_res <- list(pROC_summary = p_roc, pROC_results = auc_ratios)

    return(p_roc_res)
  }
}