Nothing
#' Standardized Effect Size (SES) via Null Models
#'
#' Compute standardized effect sizes by comparing observed diversity metrics
#' against a null distribution from randomized communities. Supports multiple
#' null model algorithms and works with most spacc output classes.
#'
#' @param x A spacc output object. Supported classes: `spacc`, `spacc_hill`,
#' `spacc_phylo`, `spacc_func`, `spacc_beta`, `spacc_metrics`,
#' `spacc_alpha`, `spacc_partition`.
#' @param species A site-by-species matrix (required). The species matrix used
#' to produce `x`. Needed because spacc objects do not store the raw species
#' matrix.
#' @param coords Optional data.frame with columns `x` and `y`. Required if the
#' original analysis used coordinates. If `x` contains stored coordinates,
#' they will be used automatically.
#' @param metric Character or `NULL`. For multi-metric objects (e.g.,
#' `spacc_hill` with multiple q), specify which metric to extract. If `NULL`,
#' uses the first/default metric.
#' @param null_model Character. Null model algorithm:
#' - `"frequency"`: Shuffle species columns independently (maintains column
#' totals = species frequency)
#' - `"richness"`: Shuffle species rows independently (maintains row totals
#' = site richness)
#' - `"both"`: Independent swap algorithm maintaining both row and column
#' totals (Gotelli 2000)
#' - `"curveball"`: Curveball algorithm for efficient swap (Strona et al. 2014)
#' - `"torus"`: Toroidal shift preserving spatial autocorrelation. Shifts all
#' coordinates by a random offset and reassigns species to shifted sites.
#' Requires `coords`.
#' - `"spatial_swap"`: Independent swap restricted to spatially proximate site
#' pairs. Preserves both marginals while respecting spatial structure.
#' Requires `coords`.
#' @param n_perm Integer. Number of permutations. Default 999.
#' @param parallel Logical. Use parallel processing for the underlying analysis?
#' Default `TRUE`.
#' @param n_cores Integer. Number of cores. Default `NULL` (auto-detect).
#' @param progress Logical. Show progress? Default `TRUE`.
#' @param seed Integer. Random seed for reproducibility.
#'
#' @return An object of class `spacc_ses` containing:
#' \item{observed}{Numeric vector of observed metric values}
#' \item{null_mean}{Mean of null distribution}
#' \item{null_sd}{Standard deviation of null distribution}
#' \item{ses}{Standardized effect size: (observed - null_mean) / null_sd}
#' \item{p_value}{Two-tailed p-value}
#' \item{n_perm}{Number of permutations}
#' \item{null_model}{Null model algorithm used}
#' \item{metric}{Metric name}
#' \item{input_class}{Class of input object}
#'
#' @details
#' SES is computed as:
#' \deqn{SES = \frac{observed - \bar{null}}{sd_{null}}}
#'
#' A two-tailed p-value is calculated as the proportion of null values at
#' least as extreme as the observed value:
#' \deqn{p = \frac{2 \cdot \min(r, n_{perm} + 1 - r)}{n_{perm} + 1}}
#' where \eqn{r} is the rank of the observed value among null values.
#'
#' **Null model algorithms:**
#' - `"frequency"`: Tests whether species composition matters given observed
#' species frequencies
#' - `"richness"`: Tests whether species identity matters given observed site
#' richness
#' - `"both"`: Maintains both marginal totals; tests non-random species
#' co-occurrence patterns
#' - `"curveball"`: Efficient alternative to `"both"` with proven uniform
#' sampling properties
#'
#' @references
#' Gotelli, N.J. (2000). Null model analysis of species co-occurrence patterns.
#' Ecology, 81, 2606-2621.
#'
#' Strona, G., Nappo, D., Boccacci, F., Fattorini, S. & San-Miguel-Ayanz, J.
#' (2014). A fast and unbiased procedure to randomize ecological binary
#' matrices with fixed row and column totals. Nature Communications, 5, 4114.
#'
#' @seealso [spaccHill()], [spaccBeta()], [spaccMetrics()]
#'
#' @examples
#' \donttest{
#' coords <- data.frame(x = runif(20), y = runif(20))
#' species <- matrix(rbinom(20 * 15, 1, 0.3), nrow = 20)
#'
#' sac <- spacc(species, coords, n_seeds = 10)
#' result <- ses(sac, species, n_perm = 19)
#' print(result)
#' }
#'
#' @export
ses <- function(x, species,
coords = NULL,
metric = NULL,
null_model = c("frequency", "richness", "both", "curveball",
"torus", "spatial_swap"),
n_perm = 999L,
parallel = TRUE,
n_cores = NULL,
progress = TRUE,
seed = NULL) {
null_model <- match.arg(null_model)
if (!is.null(seed)) set.seed(seed)
species <- as.matrix(species)
input_class <- class(x)[1]
# Spatial null models require coords
spatial_nulls <- c("torus", "spatial_swap")
if (null_model %in% spatial_nulls && is.null(coords) && is.null(x$coords)) {
stop(sprintf("Null model '%s' requires coords.", null_model))
}
# Supported classes
supported <- c("spacc", "spacc_hill", "spacc_phylo", "spacc_func",
"spacc_beta", "spacc_metrics", "spacc_alpha", "spacc_partition")
if (!input_class %in% supported) {
stop(sprintf("ses() does not support class '%s'. Supported: %s",
input_class, paste(supported, collapse = ", ")))
}
# Get coords from object if not provided
if (is.null(coords) && !is.null(x$coords)) {
coords <- x$coords
}
# Extract observed values
observed <- extract_ses_metric(x, metric)
metric_name <- attr(observed, "metric_name")
if (progress) cli_info(sprintf("Running %d null model permutations (%s)", n_perm, null_model))
# Run permutations
null_values <- matrix(NA, nrow = n_perm, ncol = length(observed))
for (p in seq_len(n_perm)) {
# Randomize species matrix
null_species <- randomize_matrix(species, null_model, coords = coords)
# Re-run analysis with null species
null_result <- tryCatch(
rerun_analysis(x, null_species, coords, parallel, n_cores),
error = function(e) NULL
)
if (!is.null(null_result)) {
null_values[p, ] <- extract_ses_metric(null_result, metric)
}
}
# Remove failed permutations
valid <- !is.na(null_values[, 1])
null_values <- null_values[valid, , drop = FALSE]
n_valid <- nrow(null_values)
if (n_valid < 5) {
stop("Too few valid permutations. Check that the analysis can run on randomized data.")
}
# Compute SES
null_mean <- colMeans(null_values)
null_sd <- apply(null_values, 2, stats::sd)
ses_vals <- ifelse(null_sd > 0, (observed - null_mean) / null_sd, 0)
# Two-tailed p-value
p_value <- vapply(seq_along(observed), function(i) {
rank_obs <- sum(null_values[, i] <= observed[i]) + 1
2 * min(rank_obs, n_valid + 2 - rank_obs) / (n_valid + 1)
}, numeric(1))
if (progress) cli_success(sprintf("Done (%d valid permutations)", n_valid))
structure(
list(
observed = observed,
null_mean = null_mean,
null_sd = null_sd,
ses = ses_vals,
p_value = p_value,
n_perm = n_perm,
n_valid = n_valid,
null_model = null_model,
metric = metric_name,
input_class = input_class,
coords = coords,
call = match.call()
),
class = "spacc_ses"
)
}
# Extract the relevant metric from a spacc result object
extract_ses_metric <- function(x, metric = NULL) {
cls <- class(x)[1]
result <- switch(cls,
"spacc" = {
# Mean curve across seeds
vals <- colMeans(x$curves)
attr(vals, "metric_name") <- "richness"
vals
},
"spacc_hill" = {
# Pick first q or specified metric
q_idx <- if (!is.null(metric)) {
match(metric, paste0("q", x$q))
} else 1
if (is.na(q_idx)) q_idx <- 1
vals <- colMeans(x$curves[[q_idx]])
attr(vals, "metric_name") <- paste0("hill_q", x$q[q_idx])
vals
},
"spacc_phylo" = {
m_idx <- if (!is.null(metric)) match(metric, x$metric) else 1
if (is.na(m_idx)) m_idx <- 1
vals <- colMeans(x$curves[[m_idx]])
attr(vals, "metric_name") <- x$metric[m_idx]
vals
},
"spacc_func" = {
m_idx <- if (!is.null(metric)) match(metric, x$metric) else 1
if (is.na(m_idx)) m_idx <- 1
vals <- colMeans(x$curves[[m_idx]])
attr(vals, "metric_name") <- x$metric[m_idx]
vals
},
"spacc_beta" = {
vals <- colMeans(x$beta_total)
attr(vals, "metric_name") <- "beta_total"
vals
},
"spacc_metrics" = {
m_col <- if (!is.null(metric)) metric else x$metric_names[1]
vals <- x$metrics[[m_col]]
attr(vals, "metric_name") <- m_col
vals
},
"spacc_alpha" = {
q_col <- if (!is.null(metric)) metric else paste0("q", x$q[1])
vals <- x$values[[q_col]]
attr(vals, "metric_name") <- q_col
vals
},
"spacc_partition" = {
vals <- x$gamma
attr(vals, "metric_name") <- "gamma"
vals
},
stop(sprintf("Unsupported class: %s", cls))
)
result
}
# Re-run the same analysis with a new species matrix
rerun_analysis <- function(original, null_species, coords, parallel, n_cores) {
cls <- class(original)[1]
n_cores_val <- resolve_cores(n_cores, parallel)
switch(cls,
"spacc" = {
spacc(null_species, coords,
n_seeds = original$n_seeds,
method = original$method,
distance = original$distance,
parallel = parallel, n_cores = n_cores,
progress = FALSE)
},
"spacc_hill" = {
spaccHill(null_species, coords,
q = original$q,
n_seeds = original$n_seeds,
method = original$method,
distance = original$distance,
parallel = parallel, n_cores = n_cores,
progress = FALSE)
},
"spacc_beta" = {
spaccBeta(null_species, coords,
n_seeds = original$n_seeds,
method = original$method,
index = original$index,
parallel = parallel, n_cores = n_cores,
progress = FALSE)
},
"spacc_alpha" = {
alphaDiversity(null_species, q = original$q, coords = coords)
},
"spacc_partition" = {
diversityPartition(null_species, q = original$q, coords = coords)
},
"spacc_metrics" = {
spaccMetrics(null_species, coords,
metrics = original$metric_names,
method = original$method,
parallel = parallel, n_cores = n_cores,
progress = FALSE)
},
# For phylo/func we can't easily re-run without tree/traits
# Return NULL to skip
NULL
)
}
# Null model algorithms --------------------------------------------------
randomize_matrix <- function(m, method, coords = NULL) {
pa <- (m > 0) * 1L
switch(method,
"frequency" = {
# Shuffle each column independently
for (j in seq_len(ncol(pa))) {
pa[, j] <- sample(pa[, j])
}
pa
},
"richness" = {
# Shuffle each row independently
for (i in seq_len(nrow(pa))) {
pa[i, ] <- sample(pa[i, ])
}
pa
},
"both" = {
# Independent swap: maintain both marginals
independent_swap(pa, n_swaps = 5L * sum(pa))
},
"curveball" = {
curveball(pa)
},
"torus" = {
torus_shift(pa, coords)
},
"spatial_swap" = {
spatial_swap(pa, coords)
}
)
}
# Independent swap algorithm (Gotelli 2000)
independent_swap <- function(m, n_swaps) {
nr <- nrow(m)
nc <- ncol(m)
for (s in seq_len(n_swaps)) {
# Pick 2 random rows and 2 random columns
rows <- sample(nr, 2)
cols <- sample(nc, 2)
submat <- m[rows, cols]
# Check for 2x2 checkerboard pattern to swap
if ((submat[1, 1] == 1 && submat[2, 2] == 1 &&
submat[1, 2] == 0 && submat[2, 1] == 0) ||
(submat[1, 1] == 0 && submat[2, 2] == 0 &&
submat[1, 2] == 1 && submat[2, 1] == 1)) {
# Swap
m[rows[1], cols[1]] <- 1L - m[rows[1], cols[1]]
m[rows[2], cols[2]] <- 1L - m[rows[2], cols[2]]
m[rows[1], cols[2]] <- 1L - m[rows[1], cols[2]]
m[rows[2], cols[1]] <- 1L - m[rows[2], cols[1]]
}
}
m
}
# Curveball algorithm (Strona et al. 2014)
curveball <- function(m) {
nr <- nrow(m)
# Get column indices of 1s for each row
row_cols <- lapply(seq_len(nr), function(i) which(m[i, ] == 1L))
n_trades <- 5L * nr
for (t in seq_len(n_trades)) {
# Pick two random rows
ab <- sample(nr, 2)
a <- ab[1]
b <- ab[2]
# Find exclusive columns
only_a <- setdiff(row_cols[[a]], row_cols[[b]])
only_b <- setdiff(row_cols[[b]], row_cols[[a]])
n_a <- length(only_a)
n_b <- length(only_b)
n_trade <- min(n_a, n_b)
if (n_trade == 0) next
# Randomly select number and which columns to trade
# safe_sample avoids sample(x, n) interpreting scalar x as 1:x
n_swap <- sample.int(n_trade, 1)
swap_a <- if (n_a == 1) only_a else only_a[sample.int(n_a, n_swap)]
swap_b <- if (n_b == 1) only_b else only_b[sample.int(n_b, n_swap)]
# Trade: row a gives swap_a, receives swap_b; row b gives swap_b, receives swap_a
row_cols[[a]] <- sort(c(setdiff(row_cols[[a]], swap_a), swap_b))
row_cols[[b]] <- sort(c(setdiff(row_cols[[b]], swap_b), swap_a))
}
# Reconstruct matrix
result <- matrix(0L, nrow = nr, ncol = ncol(m))
for (i in seq_len(nr)) {
result[i, row_cols[[i]]] <- 1L
}
result
}
# Toroidal shift null model
# Shifts all coordinates by random offset, then reassigns species
# to shifted sites via nearest-neighbor mapping
torus_shift <- function(pa, coords) {
n <- nrow(pa)
x_range <- range(coords$x)
y_range <- range(coords$y)
x_width <- diff(x_range)
y_width <- diff(y_range)
# Random shift
dx <- stats::runif(1, 0, x_width)
dy <- stats::runif(1, 0, y_width)
# Shift coordinates with wrapping
new_x <- ((coords$x - x_range[1] + dx) %% x_width) + x_range[1]
new_y <- ((coords$y - y_range[1] + dy) %% y_width) + y_range[1]
# For each shifted site, find nearest original site
# Simple O(n^2) matching - fine for typical use
mapping <- integer(n)
for (i in seq_len(n)) {
dists <- (new_x[i] - coords$x)^2 + (new_y[i] - coords$y)^2
mapping[i] <- which.min(dists)
}
# Reassign rows
pa[mapping, , drop = FALSE]
}
# Spatially-constrained independent swap
# Like independent_swap but only swaps between spatially proximate sites
spatial_swap <- function(pa, coords, n_swaps = NULL) {
nr <- nrow(pa)
nc <- ncol(pa)
# Compute pairwise distances
dist_vec <- as.matrix(stats::dist(cbind(coords$x, coords$y)))
# Distance threshold: median pairwise distance
threshold <- stats::median(dist_vec[upper.tri(dist_vec)])
# Find valid pairs within threshold
valid_pairs <- which(dist_vec <= threshold & upper.tri(dist_vec), arr.ind = TRUE)
n_pairs <- nrow(valid_pairs)
if (n_pairs == 0) {
# Fallback to standard independent swap
return(independent_swap(pa, n_swaps = 5L * sum(pa)))
}
if (is.null(n_swaps)) n_swaps <- 5L * sum(pa)
for (s in seq_len(n_swaps)) {
# Pick a random proximate pair
pair_idx <- sample.int(n_pairs, 1)
rows <- valid_pairs[pair_idx, ]
cols <- sample(nc, 2)
submat <- pa[rows, cols]
# Check for checkerboard pattern to swap
if ((submat[1, 1] == 1 && submat[2, 2] == 1 &&
submat[1, 2] == 0 && submat[2, 1] == 0) ||
(submat[1, 1] == 0 && submat[2, 2] == 0 &&
submat[1, 2] == 1 && submat[2, 1] == 1)) {
pa[rows[1], cols[1]] <- 1L - pa[rows[1], cols[1]]
pa[rows[2], cols[2]] <- 1L - pa[rows[2], cols[2]]
pa[rows[1], cols[2]] <- 1L - pa[rows[1], cols[2]]
pa[rows[2], cols[1]] <- 1L - pa[rows[2], cols[1]]
}
}
pa
}
# S3 methods for spacc_ses -----------------------------------------------
#' @export
print.spacc_ses <- function(x, ...) {
cat(sprintf("Standardized Effect Size (SES)\n"))
cat(strrep("-", 36), "\n")
cat(sprintf("Input class: %s\n", x$input_class))
cat(sprintf("Metric: %s\n", x$metric))
cat(sprintf("Null model: %s (%d permutations)\n", x$null_model, x$n_valid))
cat(sprintf("SES range: [%.2f, %.2f]\n", min(x$ses), max(x$ses)))
cat(sprintf("Mean SES: %.2f\n", mean(x$ses)))
n_sig <- sum(x$p_value < 0.05)
cat(sprintf("Significant (p < 0.05): %d / %d values (%.0f%%)\n",
n_sig, length(x$p_value), 100 * n_sig / length(x$p_value)))
invisible(x)
}
#' @export
summary.spacc_ses <- function(object, ...) {
data.frame(
position = seq_along(object$observed),
observed = object$observed,
null_mean = object$null_mean,
null_sd = object$null_sd,
ses = object$ses,
p_value = object$p_value
)
}
#' @export
as.data.frame.spacc_ses <- function(x, row.names = NULL, optional = FALSE, ...) {
summary(x)
}
#' @export
as_sf.spacc_ses <- function(x, crs = NULL) {
if (is.null(x$coords)) stop("No coordinates available. ses() input had no coords.")
# Only meaningful for per-site input classes
per_site_classes <- c("spacc_metrics", "spacc_alpha", "spacc_partition")
if (!x$input_class %in% per_site_classes) {
stop(sprintf("as_sf() only meaningful for per-site SES (input class: %s). Supported: %s",
x$input_class, paste(per_site_classes, collapse = ", ")))
}
df <- data.frame(
x = x$coords$x,
y = x$coords$y,
ses = x$ses,
p_value = x$p_value,
observed = x$observed,
null_mean = x$null_mean
)
as_sf_from_df(df, crs = crs)
}
#' @export
plot.spacc_ses <- function(x, type = c("curve", "histogram", "map"),
significance = 1.96,
metric_map = c("ses", "p_value"),
point_size = 3, palette = "viridis", ...) {
type <- match.arg(type)
if (type == "map") {
if (is.null(x$coords)) stop("No coordinates available. ses() input had no coords.")
metric_map <- match.arg(metric_map)
vals <- if (metric_map == "ses") x$ses else x$p_value
df <- data.frame(x = x$coords$x, y = x$coords$y)
df[[metric_map]] <- vals
return(plot_spatial_map(df, metric_map,
title = sprintf("SES %s: %s", metric_map, x$metric),
subtitle = sprintf("%s null model, %d permutations",
x$null_model, x$n_valid),
point_size = point_size, palette = palette))
}
check_suggests("ggplot2")
if (type == "curve") {
df <- data.frame(
position = seq_along(x$ses),
ses = x$ses,
p_value = x$p_value
)
p <- ggplot2::ggplot(df, ggplot2::aes(x = .data[["position"]], y = .data[["ses"]])) +
ggplot2::geom_hline(yintercept = c(-significance, 0, significance),
linetype = c("dashed", "solid", "dashed"),
color = c("#C62828", "grey50", "#C62828")) +
ggplot2::geom_line(linewidth = 0.8, color = "#2E7D32") +
ggplot2::geom_point(
data = df[df$p_value < 0.05, ],
ggplot2::aes(x = .data[["position"]], y = .data[["ses"]]),
color = "#C62828", size = 1.5
) +
ggplot2::labs(
x = "Position",
y = "Standardized Effect Size",
title = sprintf("SES: %s (%s null model)", x$metric, x$null_model)
) +
spacc_theme()
return(p)
} else {
# Histogram of SES values
df <- data.frame(ses = x$ses)
ggplot2::ggplot(df, ggplot2::aes(x = .data[["ses"]])) +
ggplot2::geom_histogram(bins = 30, fill = "#4CAF50", color = "white", alpha = 0.8) +
ggplot2::geom_vline(xintercept = c(-significance, significance),
linetype = "dashed", color = "#C62828") +
ggplot2::geom_vline(xintercept = 0, color = "grey50") +
ggplot2::labs(
x = "Standardized Effect Size",
y = "Count",
title = sprintf("SES Distribution: %s", x$metric)
) +
spacc_theme()
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.