R/summary_ssp.R
In SSP: Simulated Sampling Procedure for Community Ecology

Documented in summary_ssp

#' Summary of MultSE for Each Sampling Effort in Simulated Data Sets
#'
#' Computes the average MultSE (pseudo-multivariate standard error) for each sampling effort across simulated datasets,
#' and estimates associated variation and rate of change.
#'
#' @param results A matrix generated by \code{\link{sampsd}} containing MultSE values for each simulation and sampling configuration.
#' @param multi.site Logical. Indicates whether multiple sites were simulated.
#'
#' @details
#' For each sampling effort in each simulated data set, the average MultSE is computed (Anderson & Santana-Garcon, 2015).
#' The function then calculates the overall mean and associated lower and upper quantiles of these averages.
#' To evaluate how precision improves with effort, the average MultSE values are relativized to the maximum (typically at the lowest effort),
#' and a numerical forward finite difference derivative is calculated to approximate the rate of change.
#'
#' This output is used to support the identification of optimal and redundant sampling efforts based on precision gain.
#'
#' @return A data frame summarizing MultSE for each sampling effort, including the mean, quantiles, relativized values, and estimated derivative.
#'
#' @note This data frame can be used to plot MultSE versus sampling effort and to apply cutoff rules using \code{\link{ioptimum}}.
#'
#' @references
#' Anderson, M. J., & Santana-Garcon, J. (2015). Measures of precision for dissimilarity-based multivariate analysis of ecological communities. Ecology Letters, 18(1), 66–73.
#'
#' Guerra-Castro, E.J., Cajas, J.C., Simões, N., Cruz-Motta, J.J., & Mascaró, M. (2021). SSP: an R package to estimate sampling effort in studies of ecological communities. Ecography 44(4), 561-573. doi: \doi{10.1111/ecog.05284}
#'
#' @seealso \code{\link{sampsd}}, \code{\link{ioptimum}}
#'
#' @examples
#' ## Single site example
#' data(micromollusk)
#' par.mic <- assempar(data = micromollusk, type = "P/A", Sest.method = "average")
#' sim.mic <- simdata(par.mic, cases = 3, N = 10, sites = 1)
#' sam.mic <- sampsd(dat.sim = sim.mic, Par = par.mic, transformation = "P/A",
#'                   method = "jaccard", n = 10, m = 1, k = 3)
#' summ.mic <- summary_ssp(results = sam.mic, multi.site = FALSE)
#'
#' ## Multiple site example
#' data(sponges)
#' par.spo <- assempar(data = sponges, type = "counts", Sest.method = "average")
#' sim.spo <- simdata(par.spo, cases = 3, N = 20, sites = 3)
#' sam.spo <- sampsd(dat.sim = sim.spo, Par = par.spo, transformation = "square root",
#'                   method = "bray", n = 10, m = 3, k = 3)
#' summ.spo <- summary_ssp(results = sam.spo, multi.site = TRUE)
#'
#' @importFrom stats aggregate quantile
#'
#' @export
summary_ssp<- function(results, multi.site) {
  lower <- function(x) {
    quantile(x, 0.025)
  }
  upper <- function(x) {
    quantile(x, 0.975)
  }
  if (multi.site == TRUE) {
    # General average and 95% quartiles, of the multSE on the scales of sites
    sites.mse <- aggregate(MSE.sites ~ dat.sim * m, data = results, mean)
    sites.mean <- aggregate(MSE.sites ~ m, data = sites.mse, mean)
    colnames(sites.mean) <- c("m", "mean")
    sites.lower <- aggregate(MSE.sites ~ m, data = sites.mse, lower)
    colnames(sites.lower) <- c("m", "lower")
    sites.upper <- aggregate(MSE.sites ~ m, data = sites.mse, upper)
    colnames(sites.upper) <- c("m", "upper")
    sites.results <- cbind(sites.mean, sites.upper[, 2], sites.lower[, 2])
    colnames(sites.results) <- c("samples", "mean", "upper", "lower")
    sites.results$sv <- c(rep("sites", nrow(sites.results)))

    # General average and 95% quartiles, of the multSE on the scales of samples
    n.mse <- aggregate(MSE.n ~ dat.sim * n, data = results, mean)
    n.mean <- aggregate(MSE.n ~ n, data = n.mse, mean)
    colnames(n.mean) <- c("n", "mean")
    n.lower <- aggregate(MSE.n ~ n, data = n.mse, lower)
    colnames(n.lower) <- c("n", "lower")
    n.upper <- aggregate(MSE.n ~ n, data = n.mse, upper)
    colnames(n.upper) <- c("n", "upper")
    n.results <- cbind(n.mean, n.upper[, 2], n.lower[, 2])
    colnames(n.results) <- c("samples", "mean", "upper", "lower")
    n.results$sv <- c(rep("samples", nrow(n.results)))
    xx <- rbind(sites.results, n.results)

    #Relativization of the MultSE to the maximum for the minimum sampling effort

    max.mse.samples<- max(xx[xx$sv=="samples", 2])
    max.mse.sites<- max(xx[xx$sv=="sites", 2])
    xx$rel<-c((xx[xx$sv=="sites", 2]/max.mse.sites)*100, (xx[xx$sv=="samples", 2]/max.mse.samples)*100)
    xx$der<-c(rep(NA, nrow(xx)))
    mse.sites<-xx[xx$sv=="sites",c(1,6,7)]
    mse.residual<-xx[xx$sv=="samples",c(1,6,7)]

    for (i in 1:(nrow(mse.sites)-1)){
      mse.sites$der[i+1]<-(mse.sites$rel[i]-mse.sites$rel[i+1])/(mse.sites$samples[i+1]-mse.sites$samples[i])
    }
    mse.sites$cum<-c(NA,cumsum(mse.sites$der[2:nrow(mse.sites)]))

    for (i in 1:(nrow(mse.residual)-1)){
      mse.residual$der[i+1]<-(mse.residual$rel[i]-mse.residual$rel[i+1])/(mse.residual$samples[i+1]-mse.residual$samples[i])
    }
    mse.residual$cum<-c(NA,cumsum(mse.residual$der[2:nrow(mse.residual)]))

    xx$der<-abs(c(mse.sites$der, mse.residual$der))
    xx$der<-round(xx$der, 3)

    xx$cum<-c(mse.sites$cum, mse.residual$cum)
    xx$cum<-round(xx$cum, 0)
    return(xx)
  }
  if (multi.site == FALSE) {
    # General average and 95% quartiles of the multSE
    n.mse <- aggregate(mSE ~ dat.sim * n, data = results, mean)
    n.mean <- aggregate(mSE ~ n, data = n.mse, mean)
    colnames(n.mean) <- c("n", "mean")
    n.lower <- aggregate(mSE ~ n, data = n.mse, lower)
    colnames(n.lower) <- c("n", "lower")
    n.upper <- aggregate(mSE ~ n, data = n.mse, upper)
    colnames(n.upper) <- c("n", "upper")
    xx <- cbind(n.mean, n.upper[, 2], n.lower[, 2])
    colnames(xx) <- c("samples", "mean", "upper", "lower")

    #Relativization of the MultSE to the maximum for the minimum sampling effort
    xx$rel<-(xx$mean/xx$mean[1])*100
    xx$der<-c(rep(NA, nrow(xx)))

    for (i in 1:(nrow(xx)-1)){
     xx$der[i+1]<-(xx$rel[i]-xx$rel[i+1])/(xx$samples[i+1]-xx$samples[i])
    }
    xx$der<-abs(round(xx$der, 3))
    cum<-c(NA,cumsum(xx$der[2:nrow(xx)]))
    xx$cum<-round(cum, 0)
    return(xx)
  }
}


####