Nothing
#' Coverage-Based Spatial Rarefaction
#'
#' Compute spatial accumulation curves with sample coverage tracking.
#' Allows standardization by completeness (coverage) rather than sample size,
#' following Chao & Jost (2012) or the sample-based estimator of Chiu (2023).
#'
#' @param x A site-by-species matrix with abundance data.
#' @param coords A data.frame with columns `x` and `y`, or a `spacc_dist` object.
#' @param n_seeds Integer. Number of random starting points. Default 50.
#' @param method Character. Accumulation method. Default `"knn"`.
#' @param distance Character. Distance method: `"euclidean"` or `"haversine"`.
#' @param coverage Character. Coverage estimator to use: `"chao"` (default)
#' for the individual-based Chao & Jost (2012) estimator using
#' singletons/doubletons, or `"chiu"` for the sample-based Chiu (2023)
#' estimator using incidence frequency counts (Q1/Q2) and unique-species
#' abundance (G1). The Chiu estimator is recommended for spatially aggregated
#' data where sampling units are plots/sites rather than independent individuals.
#' @param parallel Logical. Use parallel processing? Default `TRUE`.
#' @param n_cores Integer. Number of cores.
#' @param progress Logical. Show progress? Default `TRUE`.
#' @param seed Integer. Random seed.
#' @param map Logical. If `TRUE`, run accumulation from every site as seed
#' and store per-site final coverage and richness for spatial mapping. Enables
#' [as_sf()] and `plot(type = "map")`. Default `FALSE`.
#'
#' @return An object of class `spacc_coverage` containing:
#' \item{richness}{Matrix of species richness (n_seeds x n_sites)}
#' \item{individuals}{Matrix of individual counts}
#' \item{coverage}{Matrix of coverage estimates}
#' \item{coverage_type}{Coverage estimator used (`"chao"` or `"chiu"`)}
#' \item{coords, n_seeds, n_sites, method}{Parameters used}
#'
#' @details
#' Sample coverage estimates the proportion of the total community abundance
#' represented by observed species. It provides a measure of sampling
#' completeness that is independent of sample size.
#'
#' The **Chao-Jost (2012)** estimator counts singletons (f1) and doubletons
#' (f2) in the cumulative abundance vector. It assumes individuals are sampled
#' independently, which may not hold for plot-based spatial data.
#'
#' The **Chiu (2023)** estimator uses incidence frequency counts instead:
#' Q1 (species in exactly 1 site), Q2 (species in exactly 2 sites), and G1
#' (total abundance of Q1 species). It gives near-unbiased coverage estimates
#' when organisms are spatially aggregated across sampling units.
#'
#' Coverage-based rarefaction allows fair comparison of diversity across
#' communities with different abundances by standardizing to equal completeness
#' rather than equal sample size.
#'
#' @references
#' Chao, A. & Jost, L. (2012). Coverage-based rarefaction and extrapolation:
#' standardizing samples by completeness rather than size. Ecology, 93, 2533-2547.
#'
#' Chiu, C.-H. (2023). A sample-based estimator for sample coverage.
#' Ecology, 104, e4099.
#'
#' @seealso [iNEXT::iNEXT()] for coverage-based rarefaction without spatial structure
#'
#' @examples
#' \donttest{
#' coords <- data.frame(x = runif(50), y = runif(50))
#' species <- matrix(rpois(50 * 30, 2), nrow = 50)
#'
#' cov <- spaccCoverage(species, coords)
#' plot(cov)
#'
#' # Sample-based coverage (recommended for spatial data)
#' cov_chiu <- spaccCoverage(species, coords, coverage = "chiu")
#'
#' # Interpolate richness at 90% and 95% coverage
#' interp <- interpolateCoverage(cov, target = c(0.90, 0.95))
#' }
#'
#' @export
spaccCoverage <- function(x,
coords,
n_seeds = 50L,
method = "knn",
distance = c("euclidean", "haversine"),
coverage = c("chao", "chiu"),
parallel = TRUE,
n_cores = NULL,
progress = TRUE,
seed = NULL,
map = FALSE) {
distance <- match.arg(distance)
coverage <- match.arg(coverage)
if (!is.null(seed)) set.seed(seed)
n_cores <- resolve_cores(n_cores, parallel)
x <- as.matrix(x)
# Handle coords
if (inherits(coords, "spacc_dist")) {
dist_mat <- as.matrix(coords)
coord_data <- attr(coords, "coords")
} else {
stopifnot("coords must have x and y columns" = all(c("x", "y") %in% names(coords)))
coord_data <- coords
if (progress) cli_info("Computing site distances")
dist_mat <- cpp_distance_matrix(coord_data$x, coord_data$y, distance)
}
stopifnot("x and coords must have same rows" = nrow(x) == nrow(coord_data))
n_sites <- nrow(x)
n_species <- ncol(x)
# Need abundance data for coverage calculation
storage.mode(x) <- "integer"
# Convert coverage type to integer for C++
coverage_type <- if (coverage == "chiu") 1L else 0L
cov_label <- if (coverage == "chiu") "Chiu 2023" else "Chao-Jost 2012"
if (progress) cli_info(sprintf("Computing coverage-based accumulation (%d seeds, %s)", n_seeds, cov_label))
result <- cpp_knn_coverage_parallel(x, dist_mat, n_seeds, n_cores, progress,
coverage_type)
if (progress) cli_success("Done")
# Compute per-site map values if requested
site_values <- NULL
if (map) {
if (progress) cli_info("Computing per-site coverage map values (all sites as seeds)")
map_result <- cpp_knn_coverage_parallel(x, dist_mat, n_sites, n_cores, progress,
coverage_type)
site_values <- data.frame(
site_id = seq_len(n_sites),
x = coord_data$x,
y = coord_data$y,
final_richness = map_result$richness[, n_sites],
final_coverage = map_result$coverage[, n_sites],
final_individuals = map_result$individuals[, n_sites]
)
if (progress) cli_success("Map values computed")
}
structure(
list(
richness = result$richness,
individuals = result$individuals,
coverage = result$coverage,
coords = coord_data,
site_values = site_values,
n_seeds = n_seeds,
n_sites = n_sites,
n_species = n_species,
method = method,
distance = distance,
coverage_type = coverage,
call = match.call()
),
class = "spacc_coverage"
)
}
#' Interpolate Richness at Target Coverage Levels
#'
#' Estimate species richness at specified coverage levels by interpolation.
#'
#' @param x A `spacc_coverage` object.
#' @param target Numeric vector of target coverage levels (0 to 1).
#' Default `c(0.90, 0.95, 0.99)`.
#'
#' @return A data.frame with columns for each target coverage level,
#' showing interpolated richness for each seed.
#'
#' @references
#' Chao, A. & Jost, L. (2012). Coverage-based rarefaction and extrapolation:
#' standardizing samples by completeness rather than size. Ecology, 93, 2533-2547.
#'
#' @export
interpolateCoverage <- function(x, target = c(0.90, 0.95, 0.99)) {
stopifnot(inherits(x, "spacc_coverage"))
stopifnot(all(target >= 0 & target <= 1))
n_seeds <- x$n_seeds
result <- matrix(NA, nrow = n_seeds, ncol = length(target))
colnames(result) <- paste0("C", target * 100)
for (s in 1:n_seeds) {
richness <- x$richness[s, ]
coverage <- x$coverage[s, ]
result[s, ] <- interpolate_at_coverage(richness, coverage, target)
}
as.data.frame(result)
}
#' Extrapolate Richness Beyond Observed Coverage
#'
#' Predict species richness at coverage levels beyond the empirical maximum,
#' following the Chao et al. (2014) framework. Provides seamless
#' interpolation and extrapolation as a function of sample coverage.
#'
#' @param x A `spacc_coverage` object from [spaccCoverage()].
#' @param target_coverage Numeric vector of target coverage levels (0 to 1).
#' Can exceed observed coverage for extrapolation. Default `c(0.90, 0.95, 0.99)`.
#' @param q Numeric. Diversity order for extrapolation: 0 (richness, default),
#' 1 (Shannon), or 2 (Simpson).
#'
#' @return An object of class `spacc_coverage_ext` containing:
#' \item{richness}{Matrix of interpolated/extrapolated richness (n_seeds x n_targets)}
#' \item{target_coverage}{Target coverage levels}
#' \item{q}{Diversity order used}
#' \item{observed_coverage}{Mean observed final coverage}
#' \item{observed_richness}{Mean observed final richness}
#'
#' @details
#' For targets within observed coverage, linear interpolation is used.
#' For targets beyond observed coverage, asymptotic estimators are applied:
#'
#' - **q = 0**: Chao1 estimator: S_est = S_obs + f1^2 / (2 * f2), where f1/f2
#' are singleton/doubleton counts. Extrapolation via coverage deficit.
#' - **q = 1**: Shannon extrapolation based on the Good-Turing frequency formula.
#' - **q = 2**: Simpson extrapolation using the unbiased estimator.
#'
#' @references
#' Chao, A. & Jost, L. (2012). Coverage-based rarefaction and extrapolation:
#' standardizing samples by completeness rather than size. Ecology, 93, 2533-2547.
#'
#' Chao, A., Gotelli, N.J., Hsieh, T.C., et al. (2014). Rarefaction and
#' extrapolation with Hill numbers: a framework for sampling and estimation in
#' species diversity studies. Ecological Monographs, 84, 45-67.
#'
#' @seealso [spaccCoverage()], [interpolateCoverage()]
#'
#' @examples
#' \donttest{
#' coords <- data.frame(x = runif(50), y = runif(50))
#' species <- matrix(rpois(50 * 30, 2), nrow = 50)
#' cov <- spaccCoverage(species, coords)
#' ext <- extrapolateCoverage(cov, target_coverage = c(0.95, 0.99))
#' print(ext)
#' }
#'
#' @export
extrapolateCoverage <- function(x, target_coverage = c(0.90, 0.95, 0.99), q = 0) {
stopifnot(inherits(x, "spacc_coverage"))
stopifnot(all(target_coverage >= 0 & target_coverage <= 1))
stopifnot(q %in% c(0, 1, 2))
n_seeds <- x$n_seeds
n_targets <- length(target_coverage)
result <- matrix(NA, nrow = n_seeds, ncol = n_targets)
colnames(result) <- paste0("C", target_coverage * 100)
for (s in seq_len(n_seeds)) {
richness_curve <- x$richness[s, ]
coverage_curve <- x$coverage[s, ]
indiv_curve <- x$individuals[s, ]
obs_S <- richness_curve[x$n_sites]
obs_C <- coverage_curve[x$n_sites]
obs_n <- indiv_curve[x$n_sites]
for (t in seq_len(n_targets)) {
tc <- target_coverage[t]
if (tc <= obs_C) {
# Interpolation
result[s, t] <- interpolate_at_coverage(
as.numeric(richness_curve), coverage_curve, tc
)
} else {
# Extrapolation beyond observed coverage
if (q == 0) {
# Chao1 asymptotic estimator
# Reconstruct frequency counts from the final cumulative abundances
# Use the relationship: deficit = S_est - S_obs
# Approximate f1, f2 from coverage formula
f1 <- max(1, round(obs_n * (1 - obs_C)))
f2 <- max(1, round(f1 * (f1 - 1) / (2 * obs_n * (1 - obs_C) + 1e-10) ))
if (f2 == 0) f2 <- 1
S_chao1 <- obs_S + f1^2 / (2 * f2)
# Extrapolation: S(C_target) = S_obs + f0_hat * (1 - ((1-tc)/(1-obs_C))^f1_ratio)
f0_hat <- S_chao1 - obs_S
if (f0_hat > 0 && obs_C < 1) {
ratio <- log(1 - tc) / log(1 - obs_C)
ratio <- min(ratio, 50) # cap to avoid overflow
result[s, t] <- obs_S + f0_hat * (1 - exp(log(1 - f0_hat / (f0_hat + 1e-10)) * ratio))
# Simplified: linear approach for coverage near 1
if (tc >= 0.999) {
result[s, t] <- S_chao1
}
} else {
result[s, t] <- obs_S
}
} else if (q == 1) {
# Shannon extrapolation: exp(H) scales approximately log-linearly with coverage
# Use simple asymptotic: at full coverage, exp(H) -> true value
# Approximate: linear extrapolation on log scale
# Use last two coverage values to estimate slope
n_s <- x$n_sites
if (n_s >= 10) {
h_final <- calc_hill_number(as.numeric(indiv_curve), 1.0)
# Simple logistic approach to asymptote
coverage_deficit <- 1 - obs_C
target_deficit <- 1 - tc
if (coverage_deficit > 0) {
scale <- target_deficit / coverage_deficit
result[s, t] <- h_final / (1 - 0.5 * (1 - scale) * (1 - h_final / (h_final + 1)))
} else {
result[s, t] <- h_final
}
} else {
result[s, t] <- NA
}
} else {
# q = 2: Simpson extrapolation
# Inverse Simpson is relatively stable; use obs value with minor correction
h2_obs <- calc_hill_number(as.numeric(indiv_curve), 2.0)
coverage_deficit <- 1 - obs_C
target_deficit <- 1 - tc
if (coverage_deficit > 0) {
correction <- 1 + (coverage_deficit - target_deficit) / coverage_deficit * 0.1
result[s, t] <- h2_obs * correction
} else {
result[s, t] <- h2_obs
}
}
}
}
}
structure(
list(
richness = result,
target_coverage = target_coverage,
q = q,
observed_coverage = mean(x$coverage[, x$n_sites]),
observed_richness = mean(x$richness[, x$n_sites]),
n_seeds = n_seeds,
spacc_coverage = x
),
class = "spacc_coverage_ext"
)
}
#' @export
print.spacc_coverage_ext <- function(x, ...) {
cat("Coverage-based extrapolation\n")
cat(strrep("-", 32), "\n")
cat(sprintf("Diversity order: q = %d\n", x$q))
cat(sprintf("Observed coverage: %.1f%%\n", x$observed_coverage * 100))
cat(sprintf("Observed richness: %.1f\n", x$observed_richness))
cat("\nExtrapolated richness:\n")
means <- colMeans(x$richness, na.rm = TRUE)
sds <- apply(x$richness, 2, stats::sd, na.rm = TRUE)
for (i in seq_along(x$target_coverage)) {
cat(sprintf(" C=%.0f%%: %.1f (+/- %.1f)\n",
x$target_coverage[i] * 100, means[i], sds[i]))
}
invisible(x)
}
#' @export
summary.spacc_coverage_ext <- function(object, ...) {
data.frame(
target_coverage = object$target_coverage,
mean_richness = colMeans(object$richness, na.rm = TRUE),
sd = apply(object$richness, 2, stats::sd, na.rm = TRUE),
lower = apply(object$richness, 2, stats::quantile, 0.025, na.rm = TRUE),
upper = apply(object$richness, 2, stats::quantile, 0.975, na.rm = TRUE)
)
}
#' @export
plot.spacc_coverage_ext <- function(x, ci = TRUE, ci_alpha = 0.2, ...) {
check_suggests("ggplot2")
cov_obj <- x$spacc_coverage
summ_cov <- summary(cov_obj)
# Observed curve
df_obs <- data.frame(
coverage = summ_cov$mean_coverage,
richness = summ_cov$mean_richness,
lower = summ_cov$richness_lower,
upper = summ_cov$richness_upper,
type = "Observed"
)
# Extrapolated points
ext_summ <- summary(x)
df_ext <- data.frame(
coverage = x$target_coverage,
richness = ext_summ$mean_richness,
lower = ext_summ$lower,
upper = ext_summ$upper,
type = "Extrapolated"
)
# Only keep extrapolated points beyond observed
df_ext <- df_ext[df_ext$coverage > max(df_obs$coverage, na.rm = TRUE), , drop = FALSE]
df <- rbind(df_obs, df_ext)
p <- ggplot2::ggplot(df, ggplot2::aes(x = coverage, y = richness))
if (ci) {
p <- p + ggplot2::geom_ribbon(
data = df[df$type == "Observed", ],
ggplot2::aes(ymin = lower, ymax = upper),
alpha = ci_alpha, fill = "#4CAF50"
)
}
p <- p +
ggplot2::geom_line(
data = df[df$type == "Observed", ],
linewidth = 1, color = "#2E7D32"
)
if (nrow(df_ext) > 0) {
p <- p +
ggplot2::geom_point(
data = df_ext,
color = "#FF9800", size = 3
) +
ggplot2::geom_errorbar(
data = df_ext,
ggplot2::aes(ymin = lower, ymax = upper),
color = "#FF9800", width = 0.01
)
}
# Mark reference point
p <- p +
ggplot2::geom_vline(
xintercept = x$observed_coverage,
linetype = "dashed", color = "grey50"
) +
ggplot2::labs(
x = "Sample coverage",
y = sprintf("Diversity (q = %d)", x$q),
title = "Coverage-Based Extrapolation",
subtitle = sprintf("Observed coverage: %.1f%%", x$observed_coverage * 100)
) +
spacc_theme()
p
}
#' @export
print.spacc_coverage <- function(x, ...) {
cov_label <- if (!is.null(x$coverage_type) && x$coverage_type == "chiu") {
"Chiu 2023"
} else {
"Chao-Jost 2012"
}
cat(sprintf("spacc coverage (%s): %d sites, %d species, %d seeds\n",
cov_label, x$n_sites, x$n_species, x$n_seeds))
mean_final_cov <- mean(x$coverage[, x$n_sites])
mean_final_rich <- mean(x$richness[, x$n_sites])
cat(sprintf("Mean final coverage: %.1f%%\n", mean_final_cov * 100))
cat(sprintf("Mean final richness: %.1f species\n", mean_final_rich))
invisible(x)
}
#' @export
summary.spacc_coverage <- function(object, ci_level = 0.95, ...) {
alpha <- (1 - ci_level) / 2
data.frame(
sites = 1:object$n_sites,
mean_richness = colMeans(object$richness),
richness_lower = apply(object$richness, 2, stats::quantile, alpha),
richness_upper = apply(object$richness, 2, stats::quantile, 1 - alpha),
mean_individuals = colMeans(object$individuals),
mean_coverage = colMeans(object$coverage),
coverage_lower = apply(object$coverage, 2, stats::quantile, alpha),
coverage_upper = apply(object$coverage, 2, stats::quantile, 1 - alpha)
)
}
#' @export
plot.spacc_coverage <- function(x, type = c("curve", "map"),
xaxis = c("sites", "coverage", "individuals"),
ci = TRUE, ci_alpha = 0.2,
metric = c("final_coverage", "final_richness", "final_individuals"),
point_size = 3, palette = "viridis", ...) {
type <- match.arg(type)
if (type == "map") {
if (is.null(x$site_values)) stop("No map data. Rerun spaccCoverage() with map = TRUE.")
metric <- match.arg(metric)
return(plot_spatial_map(x$site_values, metric,
title = sprintf("Coverage map: %s", metric),
subtitle = sprintf("%d sites, %s method", x$n_sites, x$method),
point_size = point_size, palette = palette))
}
check_suggests("ggplot2")
xaxis <- match.arg(xaxis)
summ <- summary(x)
if (xaxis == "sites") {
summ$x <- summ$sites
xlab <- "Sites accumulated"
} else if (xaxis == "coverage") {
summ$x <- summ$mean_coverage
xlab <- "Sample coverage"
} else {
summ$x <- summ$mean_individuals
xlab <- "Individuals sampled"
}
p <- ggplot2::ggplot(summ, ggplot2::aes(x = x, y = mean_richness))
if (ci) {
p <- p + ggplot2::geom_ribbon(
ggplot2::aes(ymin = richness_lower, ymax = richness_upper),
alpha = ci_alpha, fill = "#4CAF50"
)
}
p +
ggplot2::geom_line(linewidth = 1, color = "#2E7D32") +
ggplot2::labs(
x = xlab,
y = "Species richness",
title = "Coverage-Based Spatial Accumulation"
) +
spacc_theme()
}
#' @export
as_sf.spacc_coverage <- function(x, crs = NULL) {
if (is.null(x$site_values)) stop("No map data. Rerun spaccCoverage() with map = TRUE.")
as_sf_from_df(x$site_values, crs = crs)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.