R/region_skater.R
In spopt: Spatial Optimization for Regionalization, Facility Location, and Market Analysis

Documented in skater

#' SKATER Spatial Clustering
#'
#' Performs spatial clustering using the SKATER algorithm (Spatial 'K'luster
#' Analysis by Tree Edge Removal). The algorithm builds a minimum spanning
#' tree from the spatial contiguity graph, then iteratively removes edges
#' to create spatially contiguous clusters.
#'
#' @param data An sf object with polygon or point geometries.
#' @param attrs Character vector of column names to use for clustering
#'   (e.g., `c("var1", "var2")`). If NULL, uses all numeric columns.
#' @param n_regions Integer. Number of regions (clusters) to create.
#' @param weights Spatial weights specification. Can be:
#'   \itemize{
#'     \item `"queen"` (default): Polygons sharing any boundary point are neighbors
#'     \item `"rook"`: Polygons sharing an edge are neighbors
#'     \item An `nb` object from spdep or created with [sp_weights()]
#'     \item A list for other weight types: `list(type = "knn", k = 6)` for
#'       k-nearest neighbors, or `list(type = "distance", d = 5000)` for
#'       distance-based weights
#'   }
#' @param bridge_islands Logical. If TRUE, automatically connect disconnected
#'   components (e.g., islands) using nearest-neighbor edges. If FALSE (default),
#'   the function will error when the spatial weights graph is disconnected.
#' @param floor Optional. Column name specifying a floor constraint variable.
#' @param floor_value Numeric. Minimum sum of floor variable required per region.
#'   Only used if `floor` is specified.
#' @param scale Logical. If TRUE (default), standardize attributes before clustering.
#' @param seed Optional integer for reproducibility.
#' @param verbose Logical. Print progress messages.
#'
#' @return An sf object with a `.region` column containing cluster assignments.
#'   Metadata is stored in the "spopt" attribute.
#'
#' @examples
#' \donttest{
#' library(sf)
#' nc <- st_read(system.file("shape/nc.shp", package = "sf"))
#'
#' # Cluster into 5 regions based on SIDS rates
#' result <- skater(nc, attrs = c("SID74", "SID79"), n_regions = 5)
#'
#' # With floor constraint: each region must have at least 100,000 births
#' result <- skater(nc, attrs = c("SID74", "SID79"), n_regions = 5,
#'                  floor = "BIR74", floor_value = 100000)
#'
#' # View results
#' plot(result[".region"])
#' }
#'
#' @references
#' Assuncao, R. M., Neves, M. C., Camara, G., & Freitas, C. da C. (2006).
#' Efficient regionalization techniques for socio-economic geographical units
#' using minimum spanning trees. International Journal of Geographical
#' Information Science, 20(7), 797-811.
#'
#' @export
skater <- function(data,
                   attrs = NULL,
                   n_regions,
                   weights = "queen",
                   bridge_islands = FALSE,
                   floor = NULL,
                   floor_value = 0,
                   scale = TRUE,
                   seed = NULL,
                   verbose = FALSE) {
  # Input validation
  if (!inherits(data, "sf")) {
    stop("`data` must be an sf object", call. = FALSE)
  }

  if (!is.numeric(n_regions) || n_regions < 2) {
    stop("`n_regions` must be an integer >= 2", call. = FALSE)
  }

  # Determine which columns to check for NAs
  check_cols <- c(
    if (!is.null(attrs)) attrs else character(0),
    if (!is.null(floor)) floor else character(0)
  )

  # Validate data: remove empty geometries, check for NAs
  validated <- validate_regionalization_data(data, check_cols, call_name = "skater")
  data <- validated$data

  n <- nrow(data)
  if (n_regions >= n) {
    stop("`n_regions` must be less than number of observations", call. = FALSE)
  }

  # Extract attributes
  attr_matrix <- extract_attrs(data, attrs)

  if (scale) {
    attr_matrix <- scale(attr_matrix)
  }

  # Prepare spatial weights
  nb <- prepare_weights(data, weights, bridge_islands = bridge_islands, call_name = "skater")

  # Convert nb to sparse matrix indices
  adj <- nb_to_adj_indices(nb)

  # Handle floor constraint
  floor_var <- NULL
  if (!is.null(floor)) {
    if (!floor %in% names(data)) {
      stop(paste0("Floor variable '", floor, "' not found in data"), call. = FALSE)
    }
    floor_var <- as.numeric(data[[floor]])
  }

  # Call Rust implementation
  start_time <- Sys.time()

  labels <- rust_skater(
    attrs = attr_matrix,
    adj_i = adj$i,
    adj_j = adj$j,
    n_regions = as.integer(n_regions),
    floor_var = floor_var,
    floor_value = as.numeric(floor_value),
    seed = if (!is.null(seed)) as.integer(seed) else NULL
  )

end_time <- Sys.time()

  # Attach results to sf object
  result <- data
  result$.region <- as.character(labels)

  # Compute objective (total within-cluster SSD)
  objective <- compute_ssd(attr_matrix, labels)

  # Check if we achieved the requested number of regions
  actual_n_regions <- length(unique(labels))
  if (actual_n_regions < n_regions) {
    warning(
      "Could only create ", actual_n_regions, " regions (requested ", n_regions, "). ",
      if (!is.null(floor)) "Floor constraint may be too restrictive." else "Graph may be disconnected.",
      call. = FALSE
    )
  }

  # Attach metadata
  metadata <- list(
    algorithm = "skater",
    n_regions = actual_n_regions,
    objective = objective,
    floor = floor,
    floor_value = floor_value,
    solve_time = as.numeric(difftime(end_time, start_time, units = "secs")),
    scaled = scale
  )

  attach_spopt_metadata(result, metadata)
}

# Convert nb object to adjacency indices (internal)
nb_to_adj_indices <- function(nb) {
  n <- length(nb)
  i <- integer(0)
  j <- integer(0)

  for (idx in seq_len(n)) {
    neighbors <- nb[[idx]]
    if (length(neighbors) > 0 && neighbors[1] != 0) {
      i <- c(i, rep(idx - 1L, length(neighbors)))  # 0-based for Rust
      j <- c(j, neighbors - 1L)
    }
  }

  list(i = i, j = j)
}

# Compute total sum of squared deviations (internal)
compute_ssd <- function(attrs, labels) {
  unique_labels <- unique(labels)
  total_ssd <- 0

  for (lab in unique_labels) {
    cluster_idx <- which(labels == lab)
    if (length(cluster_idx) > 1) {
      cluster_attrs <- attrs[cluster_idx, , drop = FALSE]
      centroid <- colMeans(cluster_attrs)
      ssd <- sum(sweep(cluster_attrs, 2, centroid)^2)
      total_ssd <- total_ssd + ssd
    }
  }

  total_ssd
}

Any scripts or data that you put into this service are public.

spopt documentation built on April 22, 2026, 9:07 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

spopt
Spatial Optimization for Regionalization, Facility Location, and Market Analysis

R/region_skater.R
In spopt: Spatial Optimization for Regionalization, Facility Location, and Market Analysis

Defines functions compute_ssd nb_to_adj_indices skater

Documented in skater

Try the spopt package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

spopt Spatial Optimization for Regionalization, Facility Location, and Market Analysis

R/region_skater.R In spopt: Spatial Optimization for Regionalization, Facility Location, and Market Analysis

Defines functions compute_ssd nb_to_adj_indices skater

Documented in skater

Try the spopt package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

spopt
Spatial Optimization for Regionalization, Facility Location, and Market Analysis

R/region_skater.R
In spopt: Spatial Optimization for Regionalization, Facility Location, and Market Analysis