R/PartialROC.R
In tenm: Temporal Ecological Niche Models

Documented in pROC

#' Partial ROC calculation for Niche Models
#'
#' @description Apply partial ROC tests to continuous niche models.
#' @param continuous_mod A SpatRaster or numeric vector of the ecological
#' niche model to be evaluated. If a numeric vector is provided, it should
#' contain the values of the predicted suitability.
#' @param test_data A numerical matrix, data.frame, or numeric vector:
#'   - If data.frame or matrix, it should contain coordinates of the
#'     occurrences used to test the ecological niche model.
#'     Columns must be: longitude and latitude.
#'   - If numeric vector, it should contain the values of the predicted
#'     suitability.
#' @param E_percent Numeric value from 0 to 100 used as the threshold (E)
#' for partial ROC calculations. Default is 5.
#' @param boost_percent Numeric value from 0 to 100 representing the
#' percentage of testing data to use for bootstrap iterations in partial ROC.
#'  Default is 50.
#' @param n_iter Number of bootstrap iterations to perform for partial ROC
#' calculations. Default is 1000.
#' @param rseed Logical. Whether or not to set a random seed for
#' reproducibility. Default is \code{FALSE}.
#' @param sub_sample Logical. Indicates whether to use a subsample of
#'  the test data. Recommended for large datasets.
#' @param sub_sample_size Size of the subsample to use for computing pROC
#' values when sub_sample is \code{TRUE}.
#' @return A list of two elements:
#' - "pROC_summary": a data.frame containing the mean
#'   AUC value, AUC ratio calculated for each iteration and the p-value of the
#'   test.
#' - "pROC_results": a data.frame with four columns containing the AUC
#'   (auc_model), partial AUC (auc_pmodel), partial AUC of the random model
#'   (auc_prand) and the AUC ratio (auc_ratio) for each iteration.

#' @details Partial ROC is calculated following Peterson et al.
#' (2008; \doi{10.1016/j.ecolmodel.2007.11.008}).
#' This function is a modification of the PartialROC function, available
#' at \url{https://github.com/narayanibarve/ENMGadgets}.
#' @references Peterson, A.T. et al. (2008) Rethinking receiver operating
#' characteristic analysis applications in ecological niche modeling.
#' Ecol. Modell., 213, 63–72. \doi{10.1016/j.ecolmodel.2007.11.008}
#' @importFrom purrr map_df
#' @import future
#' @examples
#' data(abronia)
#' suit_1970_2000 <- terra::rast(system.file("extdata/suit_1970_2000.tif",
#'                                           package = "tenm"))
#' print(suit_1970_2000)
#' proc_test <- tenm::pROC(continuous_mod = suit_1970_2000,
#'                         test_data = abronia[,c("decimalLongitude",
#'                                                "decimalLatitude")],
#'                         n_iter = 500, E_percent=5,
#'                         boost_percent=50)
#' print(proc_test$pROC_summary)
#'
#' @export

pROC <- function(continuous_mod,test_data,
                 n_iter=1000,E_percent=5,
                 boost_percent=50,
                 rseed=FALSE,
                 sub_sample=TRUE,sub_sample_size=1000){
  if (methods::is(continuous_mod,"SpatRaster")) {
    ck1 <- paste0("continuous_mod@",names(attributes(continuous_mod))[1])
    ck2 <- paste0(ck1,"@.xData$")
    ck_min <- eval(parse(text=paste0(ck2,"range_min")))
    ck_max <- eval(parse(text=paste0(ck2,"range_max")))
    if (ck_min == ck_max) {
      stop("\nModel with no variability.\n")
    }
    if (is.data.frame(test_data) || is.matrix(test_data)) {
      test_data <- stats::na.omit(terra::extract(continuous_mod,
                                                  test_data))[,-1]

    }
    vals <- continuous_mod[!is.na(continuous_mod[])]
  }
  if(is.numeric(continuous_mod)){
    vals <- continuous_mod
    if (!is.numeric(test_data))
      stop("If continuous_mod is of class numeric,
           test_data must be numeric...")
  }
  if(sub_sample){
    nvals <- length(vals)
    if(sub_sample_size> nvals) sub_sample_size <- nvals
    vals <- base::sample(vals,size = sub_sample_size)
  }
  ndigits <- proc_precision(mod_vals = vals,
                            test_data = test_data)

  tomult <- as.numeric(paste0("1e+",ndigits))
  test_value <- test_data*tomult
  test_value <- round(as.vector(test_value))

  vals2 <- round(vals*tomult)
  classpixels <- as.data.frame(base::table(vals2),
                               stringsAsFactors = F)
  names(classpixels) <- c("value",
                          "count")
  classpixels$value <- as.numeric(classpixels$value)
  classpixels <- data.frame(stats::na.omit(classpixels))
  value <- count <- totpixperclass <- NULL
  classpixels <- classpixels |>
    dplyr::mutate(value  = rev(value),
                  count = rev(count),
                  totpixperclass = cumsum(count),
                  percentpixels = totpixperclass/sum(count)) |>
    dplyr::arrange(value)

  #if(nrow(classpixels)>1500){
  #  classpixels <- classpixels |>
  #    dplyr::sample_n(1500) |> dplyr::arrange(value)
  #}

  error_sens <- 1 - (E_percent/100)
  models_thresholds <- classpixels[, "value"]
  fractional_area <- classpixels[, "percentpixels"]
  n_data <- length(test_value)
  n_samp <- ceiling((boost_percent/100) * (n_data))

  big_classpixels <- matrix(rep(models_thresholds,
                                each = n_samp),
                            ncol = length(models_thresholds))


  calc_aucDF <- function(big_classpixels,
                         fractional_area,
                         test_value,
                         n_data, n_samp,
                         error_sens,rseed=NULL) {
    if(is.numeric(rseed)) set.seed(rseed)
    trapz_roc <- function(x,y){
      size_x <- length(x)
      xp <- c(x, x[size_x:1])
      yp <- c(numeric(size_x), y[size_x:1])
      nda <- 2 * size_x
      p1 <- sum(xp[1:(nda - 1)] * yp[2:nda]) + xp[nda] * yp[1]
      p2 <- sum(xp[2:nda] * yp[1:(nda - 1)]) + xp[1] * yp[nda]
      return(0.5 * (p1 - p2))
    }

    rowsID <- sample(x = n_data,
                     size = n_samp,
                     replace = TRUE)

    test_value1 <- test_value[rowsID]
    omssion_matrix <- big_classpixels > test_value1
    sensibility <- 1 - colSums(omssion_matrix)/n_samp
    xyTable <- data.frame(fractional_area, sensibility)
    xyTable <- rbind(xyTable,c(0,0))
    xyTable <- xyTable[order(xyTable$fractional_area,
                             decreasing = F),]
    auc_model <- try(trapz_roc(xyTable$fractional_area,
                               xyTable$sensibility),silent = TRUE)
    if(methods::is(auc_model,"try-error")) auc_model <- NA


    if(error_sens>0){
      less_ID <- which(xyTable$sensibility <= error_sens)
      xyTable <- xyTable[-less_ID, ]
      auc_pmodel <- try(trapz_roc(xyTable$fractional_area,
                                  xyTable$sensibility),silent = TRUE)

      if(methods::is(auc_pmodel,"try-error")) auc_pmodel <- NA

      auc_prand <- try(trapz_roc(xyTable$fractional_area,
                                 xyTable$fractional_area),silent = TRUE)
      if(methods::is(auc_prand,"try-error")) auc_prand <- NA

    }
    else{
      auc_pmodel <- auc_model
      auc_prand <- 0.5

    }



    auc_ratio <- auc_pmodel/auc_prand
    auc_table <- data.frame(auc_model,
                            auc_pmodel,
                            auc_prand,
                            auc_ratio)
    return(auc_table)
  }
  partial_AUC <- seq_len(n_iter) |>
    purrr::map_df(function(i){
      proc <- calc_aucDF(big_classpixels,
                         fractional_area,
                         test_value,
                         n_data,
                         n_samp,
                         error_sens,rseed = i)
    })

  mauc <-  mean(partial_AUC$auc_model, na.rm = TRUE)
  maucp <- mean(partial_AUC$auc_ratio, na.rm = TRUE)
  proc <- sum(partial_AUC$auc_ratio <= 1, na.rm = TRUE)/
    length(partial_AUC$auc_ratio[!is.na(partial_AUC$auc_ratio)])

  p_roc <- c(mauc,maucp, proc)
  names(p_roc) <- c("Mean_AUC",
                    paste("Mean_pAUC_ratio_at_",
                          E_percent,
                          "%", sep = ""),
                    "P_value")
  p_roc_res <- list(pROC_summary = p_roc,
                    pROC_results = partial_AUC)
  return(p_roc_res)
}


proc_precision <- function(mod_vals,test_data){

  min_vals <- min(mod_vals,na.rm = TRUE)

  percentil_test <- unique(sort(stats::na.omit(test_data)))[2]


  #percentil_test <- stats::quantile(test_data,
  #                                  probs=0.1)
  partition_flag <- mean(c(min_vals,
                           percentil_test))
  fflag <- stringr::str_detect(partition_flag, "e")
  if (length(fflag)>0L && fflag) {
    ndigits <- stringr::str_split(partition_flag, "e-")[[1]]
    ndigits <- as.numeric(ndigits)[2] #- 1
  }
  else {
    med <- stringr::str_extract_all(partition_flag, pattern = "[0-9]|[.]")
    med <- unlist(med)
    med <- med[-(1:which(med == "."))]
    med1 <- which(med != 0)
    ndigits <- ifelse(med1[1] <= 2, 3, 4)
  }
  return(ndigits)
}