R/locate_mclp.R

Defines functions mclp

Documented in mclp

#' Maximum Coverage Location Problem (MCLP)
#'
#' Solves the Maximum Coverage Location Problem: maximize total weighted
#' demand covered by locating exactly p facilities.
#'
#' @param demand An sf object representing demand points.
#' @param facilities An sf object representing candidate facility locations.
#' @param service_radius Numeric. Maximum distance for coverage.
#' @param n_facilities Integer. Number of facilities to locate (p).
#' @param weight_col Character. Column name in `demand` containing demand weights.
#' @param cost_matrix Optional. Pre-computed distance matrix.
#' @param distance_metric Distance metric: "euclidean" (default) or "manhattan".
#' @param fixed_col Optional column name in `facilities` indicating which
#'   facilities are pre-selected. The column should be logical (`TRUE` for
#'   fixed) or character (`"required"`/`"candidate"`). Fixed facilities are
#'   always selected; the solver optimizes the remaining slots. Useful for
#'   expansion planning where some facilities already exist.
#' @param verbose Logical. Print solver progress.
#'
#' @return A list with two sf objects:
#'   \itemize{
#'     \item `$demand`: Original demand sf with `.covered` and `.facility` columns
#'     \item `$facilities`: Original facilities sf with `.selected` column
#'   }
#'   Metadata is stored in the "spopt" attribute.
#'
#' @details
#' The MCLP maximizes the total weighted demand covered by locating exactly p
#' facilities. Unlike [lscp()], which requires full coverage, MCLP accepts
#' partial coverage and optimizes for the best possible outcome given a fixed
#' budget (number of facilities).
#'
#' The integer programming formulation is:
#' \deqn{\max \sum_i w_i z_i}
#' Subject to:
#' \deqn{\sum_j y_j = p}
#' \deqn{z_i \leq \sum_j a_{ij} y_j \quad \forall i}
#' \deqn{y_j, z_i \in \{0,1\}}
#'
#' Where \eqn{w_i} is the weight (demand) at location i, \eqn{y_j = 1} if
#' facility j is selected, \eqn{z_i = 1} if demand i is covered, and
#' \eqn{a_{ij} = 1} if facility j can cover demand i.
#'
#' @section Use Cases:
#' MCLP is appropriate when you have a fixed budget or capacity constraint:
#' \itemize{
#'   \item **Healthcare access**: Locating p clinics to maximize the population
#'     within a 30-minute drive, given limited funding
#'   \item **Retail site selection**: Choosing p store locations to maximize
#'     the number of potential customers within a trade area
#'   \item **Emergency services**: Placing p ambulance stations to maximize
#'     the population reachable within an 8-minute response time
#'   \item **Conservation**: Selecting p reserve sites to maximize the number
#'     of species or habitat area protected
#'   \item **Telecommunications**: Locating p cell towers to maximize population
#'     coverage when full coverage is not economically feasible
#' }
#'
#' For situations where complete coverage is required, use [lscp()] to find
#' the minimum number of facilities needed.
#'
#' @examples
#' \donttest{
#' library(sf)
#'
#' # Create demand with weights
#' demand <- st_as_sf(data.frame(
#'   x = runif(50), y = runif(50), population = rpois(50, 100)
#' ), coords = c("x", "y"))
#' facilities <- st_as_sf(data.frame(x = runif(10), y = runif(10)), coords = c("x", "y"))
#'
#' # Maximize population coverage with 3 facilities
#' result <- mclp(demand, facilities, service_radius = 0.3,
#'                n_facilities = 3, weight_col = "population")
#'
#' attr(result, "spopt")$coverage_pct
#' }
#'
#' @references
#' Church, R., & ReVelle, C. (1974). The Maximal Covering Location Problem.
#' Papers in Regional Science, 32(1), 101-118. \doi{10.1007/BF01942293}
#'
#' @seealso [lscp()] for finding the minimum facilities needed for complete coverage
#'
#' @export
mclp <- function(demand,
                 facilities,
                 service_radius,
                 n_facilities,
                 weight_col,
                 cost_matrix = NULL,
                 distance_metric = "euclidean",
                 fixed_col = NULL,
                 verbose = FALSE) {
  # Input validation
  if (!inherits(demand, "sf")) {
    stop("`demand` must be an sf object", call. = FALSE)
  }
  if (!inherits(facilities, "sf")) {
    stop("`facilities` must be an sf object", call. = FALSE)
  }
  if (!weight_col %in% names(demand)) {
    stop(paste0("Weight column '", weight_col, "' not found in demand"), call. = FALSE)
  }

  weights <- as.numeric(demand[[weight_col]])
  if (any(is.na(weights))) {
    stop("Weight column contains NA values", call. = FALSE)
  }

  # Compute distance matrix if not provided
  if (is.null(cost_matrix)) {
    cost_matrix <- distance_matrix(demand, facilities, type = distance_metric)
  }

  # Validate cost matrix for NA/Inf values
  if (any(is.na(cost_matrix))) {
    n_na <- sum(is.na(cost_matrix))
    warning(sprintf(
      "cost_matrix contains %d NA values (unreachable points). Replacing with large value.",
      n_na
    ))
    max_cost <- max(cost_matrix, na.rm = TRUE)
    cost_matrix[is.na(cost_matrix)] <- max_cost * 100
  }
  if (any(is.infinite(cost_matrix))) {
    n_inf <- sum(is.infinite(cost_matrix))
    warning(sprintf(
      "cost_matrix contains %d Inf values. Replacing with large value.",
      n_inf
    ))
    finite_max <- max(cost_matrix[is.finite(cost_matrix)])
    cost_matrix[is.infinite(cost_matrix)] <- finite_max * 100
  }

  n_demand <- nrow(demand)
  n_fac <- nrow(facilities)

  if (n_facilities > n_fac) {
    stop("`n_facilities` cannot exceed number of candidate facilities", call. = FALSE)
  }

  fixed_facilities <- resolve_fixed_facilities(fixed_col, facilities, n_facilities)

  start_time <- Sys.time()

  result <- .solve_mclp(cost_matrix, weights, service_radius, n_facilities,
                        fixed_facilities)

  end_time <- Sys.time()

  # Build result sf objects
  demand_result <- demand
  facilities_result <- facilities

  selected_indices <- result$selected

  # Determine coverage
  covered <- rep(FALSE, n_demand)
  for (i in seq_len(n_demand)) {
    for (j in selected_indices) {
      if (cost_matrix[i, j] <= service_radius) {
        covered[i] <- TRUE
        break
      }
    }
  }

  demand_result$.covered <- covered
  demand_result$.facility <- NA_integer_

  for (i in seq_len(n_demand)) {
    if (covered[i]) {
      dists <- cost_matrix[i, selected_indices]
      demand_result$.facility[i] <- selected_indices[which.min(dists)]
    }
  }

  facilities_result$.selected <- seq_len(n_fac) %in% selected_indices
  facilities_result$.fixed <- if (!is.null(fixed_facilities)) seq_len(n_fac) %in% fixed_facilities else rep(FALSE, n_fac)
  facilities_result$.n_assigned <- 0L

  for (j in selected_indices) {
    facilities_result$.n_assigned[j] <- sum(demand_result$.facility == j, na.rm = TRUE)
  }

  output <- list(
    demand = demand_result,
    facilities = facilities_result
  )

  metadata <- list(
    algorithm = "mclp",
    n_selected = result$n_selected,
    objective = result$objective,
    service_radius = service_radius,
    n_facilities = n_facilities,
    covered_weight = result$covered_weight,
    total_weight = result$total_weight,
    coverage_pct = result$coverage_pct,
    n_fixed = length(fixed_facilities),
    fixed_col = fixed_col,
    fixed_facilities = fixed_facilities,
    solve_time = as.numeric(difftime(end_time, start_time, units = "secs"))
  )

  attr(output, "spopt") <- metadata
  class(output) <- c("spopt_mclp", "spopt_locate", "list")

  output
}

Try the spopt package in your browser

Any scripts or data that you put into this service are public.

spopt documentation built on April 22, 2026, 9:07 a.m.