R/spacc.R

Defines functions resolve_cores .build_orders plot.spacc_decay print.spacc_decay distanceDecay plot.spacc_wavefront summary.spacc_wavefront print.spacc_wavefront wavefront spacc

Documented in distanceDecay spacc wavefront

#' Spatial Species Accumulation Curves
#'
#' Compute species accumulation curves using various spatial sampling methods
#' with C++ backend for performance.
#'
#' @param x A site-by-species matrix (rows = sites, cols = species) with
#'   presence/absence (0/1) or abundance data. Can also be a data.frame.
#' @param coords Site coordinates. Can be:
#'   - A data.frame with columns `x` and `y`
#'   - An `sf` object with point geometries (CRS is preserved)
#'   - A `spacc_dist` object from [distances()]
#' @param n_seeds Integer. Number of random starting points for uncertainty
#'   quantification. Default 50.
#' @param method Character. Accumulation method:
#'   - `"knn"`: k-Nearest Neighbor (always visit closest unvisited)
#'   - `"kncn"`: k-Nearest Centroid Neighbor (visit closest to centroid)
#'   - `"random"`: Random order (null model)
#'   - `"radius"`: Expand by distance from seed
#'   - `"gaussian"`: Probabilistic selection weighted by distance
#'   - `"cone"`: Directional expansion within angular constraint
#'   - `"collector"`: Sites in data order (no randomization, single curve)
#' @param order Optional user-defined accumulation order(s). When supplied,
#'   `method` is ignored and sites are accumulated in the given sequence(s)
#'   (no distance computation or seed sampling). Can be:
#'   - A numeric vector of length `nrow(x)`: a single ordering (one curve).
#'   - A list of such vectors, or a matrix with one ordering per row: each
#'     produces a curve, treated like seeds for uncertainty quantification.
#'   Each ordering must be a permutation of `seq_len(nrow(x))`. Cannot be
#'   combined with `support` or `time`. Default `NULL`.
#' @param distance Character. Distance method: `"euclidean"` or `"haversine"`.
#' @param support Optional. Spatial support for core/halo classification via
#'   [areaOfEffect::aoe()]. Can be:
#'   - `"auto"`: Auto-detect countries and run per-country accumulation,
#'     returning a grouped `spacc` object with one curve per country
#'   - Country name or ISO code: `"France"`, `"FR"`, `"FRA"`
#'   - Vector of countries: `c("France", "Germany")`
#'   - An `sf` polygon object
#'   - An `aoe_result` object (pre-computed)
#'   When provided, seeds are sampled only from "core" sites (inside support),
#'   while accumulation can expand into "halo" sites (buffer zone).
#' @param include_halo Logical. When `support` is provided, should halo sites
#'   be included in accumulation? Default `TRUE` (ecological boundary).
#'   Set to `FALSE` for political/hard boundary.
#' @param backend Character. Nearest-neighbor backend for `knn` and `kncn`:
#'   - `"auto"` (default): Uses exact (brute-force) for <=500 sites,
#'     spatial tree for >500 sites.
#'   - `"exact"`: Always use brute-force with precomputed distance matrix.
#'   - `"kdtree"`: Always use spatial tree. Uses k-d tree (nanoflann) for
#'     Euclidean distances and ball tree for haversine distances. Faster for
#'     large datasets, no distance matrix needed.
#' @param sigma Numeric. Bandwidth for Gaussian method. Default auto-calculated.
#' @param cone_width Numeric. Half-width in radians for cone method. Default pi/4.
#' @param parallel Logical. Use parallel processing? Default `TRUE`.
#' @param n_cores Integer. Number of cores. Default `NULL` uses `detectCores() - 1`.
#' @param progress Logical. Show progress bar? Default `TRUE`.
#' @param groups Optional. A factor, character, or integer vector of length
#'   `ncol(x)` assigning each species (column) to a group. When provided,
#'   separate accumulation curves are computed for each group using the
#'   **same spatial site ordering**, and a grouped `spacc` object is returned.
#'   Useful for comparing native vs alien species, families, or any
#'   categorical split. Default `NULL` (no grouping).
#' @param time Optional. Numeric vector of length `nrow(x)` giving a temporal
#'   coordinate for each site. When provided, a combined spatiotemporal distance
#'   matrix is computed as `w_space * d_spatial + w_time * d_temporal` and used
#'   for accumulation. Forces exact (brute-force) backend since spatial trees
#'   cannot handle composite distances. Only supported for methods that use a
#'   distance matrix: `"knn"`, `"radius"`, `"gaussian"`.
#' @param w_space Numeric. Weight for spatial distance when `time` is provided.
#'   Default 1.
#' @param w_time Numeric. Weight for temporal distance when `time` is provided.
#'   Default 1.
#' @param seed Integer. Random seed for reproducibility. Default `NULL`.
#'
#' @return When `groups = NULL`, an object of class `spacc` containing:
#'   \item{curves}{Matrix of cumulative species counts (n_seeds x n_sites)}
#'   \item{coords}{Original coordinates}
#'   \item{n_seeds}{Number of seeds used}
#'   \item{method}{Method used}
#'   \item{n_species}{Total species in dataset}
#'
#' @examples
#' \donttest{
#' coords <- data.frame(x = runif(50), y = runif(50))
#' species <- matrix(rbinom(50 * 30, 1, 0.3), nrow = 50)
#'
#' # Basic usage
#' sac <- spacc(species, coords)
#' plot(sac)
#'
#' # Different methods
#' sac_knn <- spacc(species, coords, method = "knn")
#' sac_rand <- spacc(species, coords, method = "random")
#' comp <- compare(sac_knn, sac_rand)
#' }
#'
#' @references
#' Arrhenius, O. (1921). Species and area. Journal of Ecology, 9, 95-99.
#'
#' Scheiner, S.M. (2003). Six types of species-area curves. Global Ecology
#' and Biogeography, 12, 441-447.
#'
#' Chiarucci, A., Bacaro, G., Scheiner, S.M. (2011). Old and new challenges
#' in using species diversity for assessing biodiversity. Philosophical
#' Transactions of the Royal Society B, 366, 2426-2437.
#'
#' @export
spacc <- function(x,
                  coords,
                  n_seeds = 50L,
                  method = c("knn", "kncn", "random", "radius", "gaussian", "cone", "collector"),
                  distance = c("euclidean", "haversine"),
                  backend = c("auto", "exact", "kdtree"),
                  support = NULL,
                  include_halo = TRUE,
                  sigma = NULL,
                  cone_width = pi / 4,
                  parallel = TRUE,
                  n_cores = NULL,
                  progress = TRUE,
                  groups = NULL,
                  time = NULL,
                  w_space = 1,
                  w_time = 1,
                  seed = NULL,
                  order = NULL) {

  method <- match.arg(method)
  distance <- match.arg(distance)
  backend <- match.arg(backend)

  # Set RNG seed if provided
  if (!is.null(seed)) set.seed(seed)

  # Resolve cores
  n_cores <- resolve_cores(n_cores, parallel)

  # Input validation
  x <- as.matrix(x)
  if (!is.numeric(x)) {
    stop("Species matrix must be numeric (presence/absence or abundance data)")
  }

  # Handle coords: data.frame, sf, or spacc_dist
  coords_crs <- NULL
  if (inherits(coords, "spacc_dist")) {
    dist_mat <- as.matrix(coords)
    coord_data <- attr(coords, "coords")
  } else if (inherits(coords, "sf")) {
    check_suggests("sf")
    coords_crs <- sf::st_crs(coords)
    coord_mat <- sf::st_coordinates(coords)
    coord_data <- data.frame(x = coord_mat[, 1], y = coord_mat[, 2])
    dist_mat <- NULL
  } else {
    stopifnot(
      "coords must have x and y columns" = all(c("x", "y") %in% names(coords))
    )
    coord_data <- coords
    dist_mat <- NULL
  }

  stopifnot(
    "x and coords must have same number of rows" = nrow(x) == nrow(coord_data),
    "n_seeds must be positive integer" = n_seeds > 0
  )

  if (!is.null(order)) {
    if (!is.null(support)) {
      stop("`order` cannot be combined with `support`; user-defined orderings index the full dataset.", call. = FALSE)
    }
    if (!is.null(time)) {
      stop("`order` cannot be combined with `time`; user-defined orderings already fix the accumulation sequence.", call. = FALSE)
    }
  }

  # Handle groups: split species by group and recurse
  if (!is.null(groups)) {
    groups <- as.character(groups)
    stopifnot(
      "groups must have length equal to ncol(x)" = length(groups) == ncol(x)
    )
    group_levels <- unique(groups)
    if (progress) cli_info(sprintf("Running grouped accumulation (%d groups: %s)",
                                    length(group_levels), paste(group_levels, collapse = ", ")))
    objects <- lapply(group_levels, function(g) {
      cols <- which(groups == g)
      spacc(x[, cols, drop = FALSE], coords,
            n_seeds = n_seeds, method = method, distance = distance,
            backend = backend, support = support, include_halo = include_halo,
            sigma = sigma, cone_width = cone_width,
            parallel = parallel, n_cores = n_cores,
            progress = FALSE, groups = NULL,
            time = time, w_space = w_space, w_time = w_time,
            seed = seed, order = order)
    })
    names(objects) <- group_levels

    # Check if recursive calls returned grouped objects (e.g., from support = "auto")
    # If so, flatten to compound names: "country.group"
    base <- objects[[1]]
    if (is_grouped(base)) {
      # Flatten nested grouped objects into compound names
      all_curves <- list()
      all_n_species <- list()
      for (gname in group_levels) {
        obj <- objects[[gname]]
        for (subname in obj$group_names) {
          compound <- paste(subname, gname, sep = ".")
          all_curves[[compound]] <- obj$curves[[subname]]
          all_n_species[[compound]] <- obj$n_species[[subname]]
        }
      }
      return(structure(
        list(
          curves = all_curves,
          group_names = names(all_curves),
          coords = base$coords,
          n_seeds = base$n_seeds,
          n_sites = base$n_sites,
          n_species = all_n_species,
          method = base$method,
          distance = base$distance,
          backend = base$backend,
          sigma = base$sigma,
          cone_width = base$cone_width,
          time = base$time,
          w_space = base$w_space,
          w_time = base$w_time,
          support = base$support,
          call = match.call()
        ),
        class = "spacc"
      ))
    }

    # Build grouped spacc directly (simple case: no nested grouping)
    return(structure(
      list(
        curves = stats::setNames(lapply(objects, `[[`, "curves"), group_levels),
        group_names = group_levels,
        coords = base$coords,
        n_seeds = base$n_seeds,
        n_sites = base$n_sites,
        n_species = stats::setNames(lapply(objects, `[[`, "n_species"), group_levels),
        method = base$method,
        distance = base$distance,
        backend = base$backend,
        sigma = base$sigma,
        cone_width = base$cone_width,
        time = base$time,
        w_space = base$w_space,
        w_time = base$w_time,
        support = base$support,
        call = match.call()
      ),
      class = "spacc"
    ))
  }

  # Handle support = "auto": split by country
  if (is.character(support) && length(support) == 1 && support == "auto") {
    check_suggests("sf")
    check_suggests("areaOfEffect")

    if (progress) cli_info("Auto-detecting countries from coordinates")

    coords_sf <- sf::st_as_sf(coord_data, coords = c("x", "y"), crs = 4326)
    aoe_result <- areaOfEffect::aoe(coords_sf)

    # Map support_id back to country names
    aoe_countries <- areaOfEffect::countries
    sid_to_name <- stats::setNames(aoe_countries$name, rownames(aoe_countries))
    unique_sids <- unique(aoe_result$support_id)
    country_names <- sid_to_name[unique_sids]

    if (progress) cli_info(sprintf("Found %d countries: %s",
      length(country_names), paste(country_names, collapse = ", ")))

    # Run spacc per country
    objects <- lapply(seq_along(unique_sids), function(i) {
      spacc(x, coords,
            n_seeds = n_seeds, method = method, distance = distance,
            backend = backend, support = country_names[i],
            include_halo = include_halo,
            sigma = sigma, cone_width = cone_width,
            parallel = parallel, n_cores = n_cores,
            progress = FALSE, groups = groups,
            time = time, w_space = w_space, w_time = w_time,
            seed = seed)
    })
    names(objects) <- country_names

    # Handle compound grouping (countries x species groups)
    has_groups <- !is.null(groups)
    if (has_groups) {
      # Flatten nested grouped objects into compound names
      all_curves <- list()
      all_n_species <- list()
      for (cname in country_names) {
        obj <- objects[[cname]]
        for (gname in obj$group_names) {
          compound <- paste(cname, gname, sep = ".")
          all_curves[[compound]] <- obj$curves[[gname]]
          all_n_species[[compound]] <- obj$n_species[[gname]]
        }
      }
      group_labels <- names(all_curves)
    } else {
      all_curves <- stats::setNames(lapply(objects, `[[`, "curves"), country_names)
      all_n_species <- stats::setNames(lapply(objects, `[[`, "n_species"), country_names)
      group_labels <- country_names
    }

    base <- objects[[1]]
    return(structure(
      list(
        curves = all_curves,
        group_names = group_labels,
        coords = coord_data,
        n_seeds = n_seeds,
        n_sites = nrow(x),
        n_species = all_n_species,
        method = base$method,
        distance = base$distance,
        backend = base$backend,
        sigma = base$sigma,
        cone_width = base$cone_width,
        time = time,
        w_space = if (!is.null(time)) w_space else NULL,
        w_time = if (!is.null(time)) w_time else NULL,
        support = list(auto = TRUE, countries = country_names, aoe_result = aoe_result),
        call = match.call()
      ),
      class = "spacc"
    ))
  }

  # Handle areaOfEffect support
  aoe_result <- NULL
  core_indices <- NULL
  original_indices <- seq_len(nrow(x))

  if (!is.null(support)) {
    check_suggests("sf")
    check_suggests("areaOfEffect")

    if (progress) cli_info("Processing spatial support via areaOfEffect")

    # Convert coords to sf, preserving CRS if provided
    # When using haversine distance, assume WGS84 (CRS 4326) if no CRS is set
    if (!is.null(coords_crs) && !is.na(coords_crs)) {
      coords_sf <- sf::st_as_sf(coord_data, coords = c("x", "y"), crs = coords_crs)
    } else if (distance == "haversine") {
      coords_sf <- sf::st_as_sf(coord_data, coords = c("x", "y"), crs = 4326)
    } else {
      coords_sf <- sf::st_as_sf(coord_data, coords = c("x", "y"))
    }

    # Get aoe classification
    if (inherits(support, "aoe_result")) {
      aoe_result <- support
    } else {
      aoe_result <- areaOfEffect::aoe(coords_sf, support = support)
    }

    # Get indices of core and halo sites
    core_mask <- aoe_result$aoe_class == "core"
    halo_mask <- aoe_result$aoe_class == "halo"

    # Map back to original indices (aoe may have pruned some points)
    point_ids <- as.integer(aoe_result$point_id)
    core_indices <- point_ids[core_mask]
    halo_indices <- point_ids[halo_mask]

    if (include_halo) {
      # Keep core + halo sites
      keep_indices <- c(core_indices, halo_indices)
    } else {
      # Keep core sites only (hard boundary)
      keep_indices <- core_indices
    }

    # Filter data to kept sites
    x <- x[keep_indices, , drop = FALSE]
    coord_data <- coord_data[keep_indices, , drop = FALSE]
    original_indices <- keep_indices

    # Update core_indices to be relative to filtered data
    core_indices <- which(keep_indices %in% core_indices)

    if (progress) {
      cli_info(sprintf("Support: %d core, %d halo sites (using %d total)",
                        sum(core_mask), sum(halo_mask), length(keep_indices)))
    }

    # Warn if core/halo are unbalanced
    n_core <- sum(core_mask)
    n_halo <- sum(halo_mask)
    if (n_core > 0 && n_halo > 0) {
      ratio <- max(n_core, n_halo) / min(n_core, n_halo)
      if (ratio > 3) {
        warning(sprintf(
          "Unbalanced core/halo: %d core vs %d halo (ratio %.1f:1). Consider adjusting support scale.",
          n_core, n_halo, ratio
        ), call. = FALSE)
      }
    } else if (n_core == 0) {
      stop("No core sites found. Check that your points intersect the support polygon.", call. = FALSE)
    } else if (n_halo == 0 && include_halo) {
      if (progress) cli_info("No halo sites found. All points are inside the support.")
    }
  }

  n_sites <- nrow(x)
  n_species_total <- ncol(x)

  # Handle spatiotemporal distance
  if (!is.null(time)) {
    stopifnot(
      "time must have length equal to nrow(x)" = length(time) == n_sites,
      "time must be numeric" = is.numeric(time),
      "w_space must be a positive number" = is.numeric(w_space) && length(w_space) == 1 && w_space > 0,
      "w_time must be a positive number" = is.numeric(w_time) && length(w_time) == 1 && w_time > 0
    )
    if (!method %in% c("knn", "radius", "gaussian")) {
      stop(sprintf("Spatiotemporal accumulation (time argument) is only supported for methods 'knn', 'radius', and 'gaussian', not '%s'.", method),
           call. = FALSE)
    }
    if (progress) cli_info("Computing spatiotemporal distance matrix")
    d_space <- cpp_distance_matrix(coord_data$x, coord_data$y, distance)
    d_time <- abs(outer(time, time, `-`))
    dist_mat <- w_space * d_space + w_time * d_time
  }

  # Convert to presence/absence if abundance
  species_pa <- (x > 0) * 1L
  storage.mode(species_pa) <- "integer"

  # Resolve backend for knn/kncn
  use_kdtree <- FALSE
  if (method %in% c("knn", "kncn")) {
    if (!is.null(time)) {
      # Spatiotemporal: force exact backend (kdtree can't handle composite distances)
      use_kdtree <- FALSE
    } else if (backend == "auto") {
      use_kdtree <- n_sites > 500L
    } else {
      use_kdtree <- backend == "kdtree"
    }
  }

  # Collector method: no simulation needed
  backend_label <- NA_character_
  if (!is.null(order)) {
    # User-defined accumulation order(s): no distances or seeds needed
    orders_mat <- .build_orders(order, n_sites)
    if (progress) cli_info(sprintf("Running user-defined accumulation (%d ordering%s)",
                                    nrow(orders_mat), if (nrow(orders_mat) == 1L) "" else "s"))
    curves <- cpp_order_parallel(species_pa, orders_mat, n_cores, progress)
    n_seeds <- nrow(orders_mat)
    method <- "user"
  } else if (method == "collector") {
    curve <- cpp_collector_single(species_pa)
    curves <- matrix(curve, nrow = 1)
    n_seeds <- 1L
  } else {
    # Compute distance matrix if needed (exact knn, radius, gaussian)
    needs_dist <- (!use_kdtree && method == "knn") || method %in% c("radius", "gaussian")
    if (is.null(dist_mat) && needs_dist) {
      if (progress) cli_info(sprintf("Computing distances (%d x %d)", n_sites, n_sites))
      dist_mat <- cpp_distance_matrix(coord_data$x, coord_data$y, distance)
    }

    # Auto-calculate sigma for Gaussian method
    if (method == "gaussian" && is.null(sigma)) {
      if (is.null(dist_mat)) {
        dist_mat <- cpp_distance_matrix(coord_data$x, coord_data$y, distance)
      }
      sigma <- stats::median(dist_mat[dist_mat > 0])
    }

    # Run accumulation curves
    backend_label <- if (method %in% c("knn", "kncn")) {
      if (use_kdtree) "kdtree" else "exact"
    } else {
      NA_character_
    }
    if (method %in% c("knn", "kncn")) {
      if (progress) cli_info(sprintf("Running %s accumulation (%d seeds, %d cores, %s backend)",
                                      method, n_seeds, n_cores, backend_label))
    } else {
      if (progress) cli_info(sprintf("Running %s accumulation (%d seeds, %d cores)", method, n_seeds, n_cores))
    }

    # Sample seeds (from core sites only if support provided)
    if (!is.null(core_indices) && length(core_indices) > 0) {
      seed_pool <- core_indices - 1L
      explicit_seeds <- sample(seed_pool, n_seeds, replace = TRUE)
    } else {
      explicit_seeds <- NULL
    }

    curves <- switch(method,
      knn = if (use_kdtree) {
        if (is.null(explicit_seeds)) {
          cpp_knn_kdtree_parallel(species_pa, coord_data$x, coord_data$y, n_seeds, n_cores, progress, distance)
        } else {
          cpp_knn_kdtree_parallel_seeds(species_pa, coord_data$x, coord_data$y, explicit_seeds, n_cores, progress, distance)
        }
      } else {
        if (is.null(explicit_seeds)) {
          cpp_knn_parallel(species_pa, dist_mat, n_seeds, n_cores, progress)
        } else {
          cpp_knn_parallel_seeds(species_pa, dist_mat, explicit_seeds, n_cores, progress)
        }
      },
      kncn = if (use_kdtree) {
        cpp_kncn_kdtree_parallel(species_pa, coord_data$x, coord_data$y, n_seeds, n_cores, progress, distance)
      } else {
        cpp_kncn_parallel(species_pa, coord_data$x, coord_data$y, n_seeds, n_cores, progress)
      },
      random = cpp_random_parallel(species_pa, n_seeds, n_cores, progress),
      radius = cpp_radius_parallel(species_pa, dist_mat, n_seeds, n_cores, progress),
      gaussian = cpp_gaussian_parallel(species_pa, dist_mat, n_seeds, sigma, n_cores, progress),
      cone = cpp_cone_parallel(species_pa, coord_data$x, coord_data$y, n_seeds, cone_width, n_cores, progress)
    )
  }

  if (progress) cli_success("Done")

  structure(
    list(
      curves = curves,
      coords = coord_data,
      n_seeds = n_seeds,
      n_sites = n_sites,
      n_species = n_species_total,
      method = method,
      distance = distance,
      backend = backend_label,
      sigma = sigma,
      cone_width = if (method == "cone") cone_width else NULL,
      time = time,
      w_space = if (!is.null(time)) w_space else NULL,
      w_time = if (!is.null(time)) w_time else NULL,
      support = if (!is.null(support)) list(
        aoe_result = aoe_result,
        include_halo = include_halo,
        n_core = length(core_indices),
        n_halo = n_sites - length(core_indices),
        original_indices = original_indices
      ) else NULL,
      call = match.call()
    ),
    class = "spacc"
  )
}


#' Wavefront Expansion Accumulation
#'
#' Accumulate species within an expanding radius from seed points.
#' Models invasion spread from introduction points.
#'
#' @param x A site-by-species matrix.
#' @param coords A data.frame with x and y columns, or a `spacc_dist` object.
#' @param n_seeds Integer. Number of random starting points.
#' @param r0 Numeric. Initial radius. Default 0.
#' @param dr Numeric. Radius increment per step. Default auto-calculated.
#' @param n_steps Integer. Number of expansion steps. Default 50.
#' @param distance Character. Distance method.
#' @param progress Logical. Show progress?
#' @param seed Integer. Random seed.
#'
#' @return An object of class `spacc_wavefront` containing:
#'   \item{curves}{Matrix of species counts (n_seeds x n_steps)}
#'   \item{radius}{Vector of radius values}
#'   \item{sites_included}{Matrix of sites included at each step}
#'
#' @references
#' Shigesada, N. & Kawasaki, K. (1997). Biological Invasions: Theory and
#' Practice. Oxford University Press.
#'
#' @examples
#' \donttest{
#' coords <- data.frame(x = runif(50), y = runif(50))
#' species <- matrix(rbinom(50 * 30, 1, 0.3), nrow = 50)
#' wf <- wavefront(species, coords, n_seeds = 20, n_steps = 50)
#' plot(wf)
#' }
#'
#' @export
wavefront <- function(x,
                      coords,
                      n_seeds = 50L,
                      r0 = 0,
                      dr = NULL,
                      n_steps = 50L,
                      distance = c("euclidean", "haversine"),
                      progress = TRUE,
                      seed = NULL) {

  distance <- match.arg(distance)

  if (!is.null(seed)) set.seed(seed)

  x <- as.matrix(x)

  # Handle coords
  if (inherits(coords, "spacc_dist")) {
    dist_mat <- as.matrix(coords)
    coord_data <- attr(coords, "coords")
  } else {
    stopifnot("coords must have x and y columns" = all(c("x", "y") %in% names(coords)))
    coord_data <- coords
    dist_mat <- cpp_distance_matrix(coord_data$x, coord_data$y, distance)
  }

  n_sites <- nrow(x)
  n_species_total <- ncol(x)

  # Auto-calculate dr if not provided
  if (is.null(dr)) {
    max_dist <- max(dist_mat)
    dr <- max_dist / n_steps
  }

  # Convert to presence/absence
  species_pa <- (x > 0) * 1L
  storage.mode(species_pa) <- "integer"

  if (progress) cli_info(sprintf("Running wavefront expansion (%d seeds, %d steps)", n_seeds, n_steps))

  result <- cpp_wavefront_parallel(species_pa, dist_mat, n_seeds, r0, dr, n_steps, 1L, progress)

  if (progress) cli_success("Done")

  structure(
    list(
      curves = result$curves,
      radius = result$radius,
      sites_included = result$sites_included,
      coords = coord_data,
      n_seeds = n_seeds,
      n_sites = n_sites,
      n_species = n_species_total,
      r0 = r0,
      dr = dr,
      n_steps = n_steps,
      distance = distance,
      call = match.call()
    ),
    class = "spacc_wavefront"
  )
}


#' @export
print.spacc_wavefront <- function(x, ...) {
  cat(sprintf("spacc wavefront: %d sites, %d species, %d seeds\n",
              x$n_sites, x$n_species, x$n_seeds))
  cat(sprintf("Radius: %.2f to %.2f (%d steps)\n",
              x$r0, max(x$radius), x$n_steps))
  invisible(x)
}


#' @export
summary.spacc_wavefront <- function(object, ci_level = 0.95, ...) {
  alpha <- (1 - ci_level) / 2

  mean_curve <- colMeans(object$curves)
  mean_sites <- colMeans(object$sites_included)
  lower <- apply(object$curves, 2, stats::quantile, probs = alpha)
  upper <- apply(object$curves, 2, stats::quantile, probs = 1 - alpha)

  data.frame(
    radius = object$radius,
    mean_species = mean_curve,
    lower = lower,
    upper = upper,
    mean_sites = mean_sites
  )
}


#' @export
plot.spacc_wavefront <- function(x, ci = TRUE, ci_alpha = 0.3,
                                  xaxis = c("radius", "sites"), ...) {
  check_suggests("ggplot2")

  xaxis <- match.arg(xaxis)
  summ <- summary(x)

  if (xaxis == "radius") {
    df <- data.frame(
      x = summ$radius,
      mean = summ$mean_species,
      lower = summ$lower,
      upper = summ$upper
    )
    xlab <- "Radius"
  } else {
    df <- data.frame(
      x = summ$mean_sites,
      mean = summ$mean_species,
      lower = summ$lower,
      upper = summ$upper
    )
    xlab <- "Sites included"
  }

  p <- ggplot2::ggplot(df, ggplot2::aes(x = x, y = mean))

  if (ci) {
    p <- p + ggplot2::geom_ribbon(
      ggplot2::aes(ymin = lower, ymax = upper),
      alpha = ci_alpha, fill = "#4CAF50"
    )
  }

  p +
    ggplot2::geom_line(linewidth = 1, color = "#2E7D32") +
    ggplot2::labs(
      x = xlab,
      y = "Cumulative species",
      title = "Wavefront Species Accumulation",
      subtitle = sprintf("%d seeds, r0=%.1f, dr=%.2f", x$n_seeds, x$r0, x$dr)
    ) +
    spacc_theme()
}


#' Distance-Decay Analysis
#'
#' Analyze how species richness changes with distance from focal points.
#'
#' @param x A site-by-species matrix.
#' @param coords A data.frame with x and y columns.
#' @param n_seeds Integer. Number of focal points.
#' @param breaks Numeric vector. Distance thresholds. Default auto-calculated.
#' @param distance Character. Distance method.
#' @param progress Logical. Show progress?
#' @param seed Integer. Random seed.
#'
#' @return An object of class `spacc_decay` with distance-species relationships.
#'
#' @references
#' Nekola, J.C. & White, P.S. (1999). The distance decay of similarity in
#' biogeography and ecology. Journal of Biogeography, 26, 867-878.
#'
#' Soininen, J., McDonald, R. & Hillebrand, H. (2007). The distance decay of
#' similarity in ecological communities. Ecography, 30, 3-12.
#'
#' @export
distanceDecay <- function(x,
                           coords,
                           n_seeds = 50L,
                           breaks = NULL,
                           distance = c("euclidean", "haversine"),
                           progress = TRUE,
                           seed = NULL) {

  distance <- match.arg(distance)

  if (!is.null(seed)) set.seed(seed)

  x <- as.matrix(x)
  stopifnot("coords must have x and y columns" = all(c("x", "y") %in% names(coords)))

  n_sites <- nrow(x)

  # Compute distance matrix
  dist_mat <- cpp_distance_matrix(coords$x, coords$y, distance)

  # Auto-calculate breaks if not provided
  if (is.null(breaks)) {
    max_dist <- max(dist_mat)
    breaks <- seq(0, max_dist, length.out = 51)[-1]  # exclude 0
  }

  species_pa <- (x > 0) * 1L
  storage.mode(species_pa) <- "integer"

  if (progress) cli_info(sprintf("Computing distance-decay (%d seeds, %d distances)", n_seeds, length(breaks)))

  curves <- cpp_distance_decay_parallel(species_pa, dist_mat, n_seeds, breaks, 1L, progress)

  if (progress) cli_success("Done")

  structure(
    list(
      curves = curves,
      breaks = breaks,
      n_seeds = n_seeds,
      n_sites = n_sites,
      n_species = ncol(x),
      distance = distance,
      call = match.call()
    ),
    class = "spacc_decay"
  )
}


#' @export
print.spacc_decay <- function(x, ...) {
  cat(sprintf("spacc distance-decay: %d seeds, %d distance breaks\n",
              x$n_seeds, length(x$breaks)))
  cat(sprintf("Distance range: %.2f to %.2f\n", min(x$breaks), max(x$breaks)))
  invisible(x)
}


#' @export
plot.spacc_decay <- function(x, ci = TRUE, ci_alpha = 0.3, ...) {
  check_suggests("ggplot2")

  mean_curve <- colMeans(x$curves)
  lower <- apply(x$curves, 2, stats::quantile, 0.025)
  upper <- apply(x$curves, 2, stats::quantile, 0.975)

  df <- data.frame(
    distance = x$breaks,
    mean = mean_curve,
    lower = lower,
    upper = upper
  )

  p <- ggplot2::ggplot(df, ggplot2::aes(x = distance, y = mean))

  if (ci) {
    p <- p + ggplot2::geom_ribbon(
      ggplot2::aes(ymin = lower, ymax = upper),
      alpha = ci_alpha, fill = "#4CAF50"
    )
  }

  p +
    ggplot2::geom_line(linewidth = 1, color = "#2E7D32") +
    ggplot2::labs(
      x = "Distance from focal point",
      y = "Cumulative species",
      title = "Distance-Decay Relationship",
      subtitle = sprintf("%d focal points", x$n_seeds)
    ) +
    spacc_theme()
}


#' Validate and normalise user-supplied accumulation orders
#'
#' Accepts a single ordering vector, a list of vectors, or a matrix (one
#' ordering per row) and returns a 0-based integer matrix suitable for
#' `cpp_order_parallel()`. Each ordering must be a permutation of
#' `seq_len(n_sites)`.
#' @noRd
.build_orders <- function(order, n_sites) {
  if (is.matrix(order)) {
    orders_mat <- order
  } else if (is.list(order)) {
    lens <- vapply(order, length, integer(1))
    if (any(lens != n_sites)) {
      stop(sprintf("Each ordering in `order` must have length %d (number of sites)", n_sites),
           call. = FALSE)
    }
    orders_mat <- do.call(rbind, order)
  } else {
    orders_mat <- matrix(order, nrow = 1)
  }

  if (ncol(orders_mat) != n_sites) {
    stop(sprintf("`order` must contain %d site indices per ordering, got %d",
                 n_sites, ncol(orders_mat)), call. = FALSE)
  }

  expected <- seq_len(n_sites)
  for (r in seq_len(nrow(orders_mat))) {
    o <- orders_mat[r, ]
    if (anyNA(o) || !setequal(o, expected)) {
      stop(sprintf("Ordering %d in `order` must be a permutation of 1:%d (each site exactly once)",
                   r, n_sites), call. = FALSE)
    }
  }

  orders_mat <- orders_mat - 1L  # convert to 0-based for C++
  storage.mode(orders_mat) <- "integer"
  orders_mat
}


#' Resolve number of cores
#' @noRd
resolve_cores <- function(n_cores, parallel) {
  if (!parallel) return(1L)
  if (is.null(n_cores)) {
    n_cores <- max(1L, parallel::detectCores() - 1L)
  } else {
    n_cores <- as.integer(n_cores)
    if (n_cores < 1L) {
      stop("n_cores must be a positive integer")
    }
  }
  # R CMD check sets _R_CHECK_LIMIT_CORES_ and permits at most two cores.
  # Cap the count and pin the RcppParallel thread pool so the C++ workers,
  # which draw from the global pool, honour the limit.
  if (nzchar(Sys.getenv("_R_CHECK_LIMIT_CORES_"))) {
    n_cores <- min(n_cores, 2L)
    RcppParallel::setThreadOptions(numThreads = n_cores)
  }
  n_cores
}

Try the spacc package in your browser

Any scripts or data that you put into this service are public.

spacc documentation built on June 20, 2026, 5:07 p.m.