Nothing
#' @title Build Spatial Weights Matrix
#'
#' @description This function constructs spatial weights matrices (\eqn{W}) for spatial modeling. It supports various methods including Contiguity, Distance-based, and Kernel-based weights, and provides a robust fallback mechanism to automatically connect isolated areas (islands).
#'
#' @details
#' The function supports the following spatial weight construction methods:
#' \itemize{
#' \item \strong{Contiguity:} Queen, Rook, and Bishop.
#' \item \strong{Distance-based:} K-Nearest Neighbors (KNN), Inverse Distance, and Exponential.
#' \item \strong{Kernel-based:} Uniform, Gaussian, Triangular, Epanechnikov, and Quartic.
#' }
#' For distance and kernel methods, if \code{lonlat = TRUE}, spherical (great-circle) distances are calculated. For the kernel method specifically, distances are internally converted to kilometers.
#'
#' @param data An \code{sf} object (polygons/points) or a standard data frame. If a standard data frame is provided, \code{coords} must be specified.
#' @param coords An \eqn{N \times 2} matrix of coordinates. Required if \code{data} is not an \code{sf} object.
#' @param method A string indicating the spatial weight construction method. Options are \code{"contiguity"}, \code{"distance"}, or \code{"kernel"}.
#' @param contiguity A string indicating the contiguity type. Options are \code{"queen"}, \code{"rook"}, or \code{"bishop"}.
#' @param distance A string indicating the distance-based type. Options are \code{"knn"}, \code{"inverse_distance"}, or \code{"exponential"}.
#' @param k An integer specifying the number of nearest neighbors for KNN methods. Default is \code{2}.
#' @param dmax A numeric specifying the maximum distance threshold for distance-based neighbors. The unit depends on \code{lonlat} (kilometers if \code{TRUE}, native coordinate units/meters if \code{FALSE}).
#' @param power A numeric specifying the decay power for inverse distance weights. Default is \code{1}.
#' @param alpha A numeric specifying the decay parameter for exponential distance weights. Default is \code{1}.
#' @param epsilon A small numeric value to prevent division by zero in inverse distance calculation. Default is \code{1e-12}.
#' @param kernel A string indicating the type of spatial kernel. Options are \code{"uniform"}, \code{"gaussian"}, \code{"triangular"}, \code{"epanechnikov"}, or \code{"quartic"}.
#' @param bandwidth A numeric specifying the bandwidth (\eqn{h}) for kernel weights. Required if \code{method = "kernel"}. The unit depends on \code{lonlat} (kilometers if \code{TRUE}, native coordinate units/meters if \code{FALSE}).
#' @param lonlat Logical; if \code{TRUE}, coordinates are treated as Longitude/Latitude, spherical distances are calculated, and limits are in \strong{kilometers (km)}. If \code{FALSE}, coordinates are assumed to be planar (e.g., UTM), Euclidean distances are calculated, and limits are in the native unit of the coordinates (usually \strong{meters}). Default is \code{TRUE}.
#' @param style A character string specifying the spatial weights coding scheme (\code{"W"} for row-standardized or \code{"B"} for binary). Default is \code{"W"}.
#' @param zero.policy Logical; if \code{TRUE}, areas with no neighbors are allowed to have zero-weight rows. Default is \code{TRUE}.
#' @param fallback A string indicating the fallback method for isolated areas (without neighbors) when using contiguity. Options are \code{"knn"}, \code{"distance"}, or \code{"none"}. Default is \code{"knn"}.
#' @param fallback_k An integer specifying the number of neighbors for the fallback method. Default is \code{2}.
#' @param fallback_dmax A numeric specifying the maximum distance for the fallback method.
#' @param output A string specifying the format of the output. Options are \code{"all"} (returns a comprehensive list), \code{"matrix"}, \code{"listw"}, or \code{"nb"}. Default is \code{"all"}.
#'
#' @return Depending on the \code{output} argument, this function returns:
#' \itemize{
#' \item \code{"matrix"}: An \eqn{N \times N} spatial weights matrix.
#' \item \code{"listw"}: A \code{listw} object compatible with \code{spdep} functions.
#' \item \code{"nb"}: An \code{nb} (neighborhood) object.
#' \item \code{"all"}: A list containing \code{W} (matrix), \code{listw}, \code{nb}, \code{info} (method details), and \code{diag} (diagnostic metrics for isolates and fallback).
#' }
#'
#' @examples
#' # Generate random Longitude and Latitude coordinates for 10 areas
#' set.seed(123)
#' lon <- runif(10, min = 100, max = 140)
#' lat <- runif(10, min = -10, max = 10)
#' coords <- cbind(lon, lat)
#'
#' # 1. Build KNN distance-based weights (k = 2) using spherical distance
#' W_knn <- build_w(
#' data = NULL,
#' coords = coords,
#' method = "distance",
#' distance = "knn",
#' k = 2,
#' lonlat = TRUE,
#' output = "matrix"
#' )
#'
#' # View the first few rows of the matrix
#' head(W_knn)
#'
#' # 2. Build Gaussian Kernel weights using 500 km bandwidth
#' W_kernel <- build_w(
#' data = NULL,
#' coords = coords,
#' method = "kernel",
#' kernel = "gaussian",
#' bandwidth = 500,
#' lonlat = TRUE,
#' output = "matrix"
#' )
#'
#' # View the first few rows of the matrix
#' head(W_kernel)
#'
#' @import sf
#' @import spdep
#' @importFrom stats na.omit
#'
#' @export build_w
build_w <- function(
data,
coords = NULL,
method = c("contiguity", "distance", "kernel"),
contiguity = c("queen", "rook", "bishop"),
distance = c("knn", "inverse_distance", "exponential"),
k = 2,
dmax = NULL,
power = 1,
alpha = 1,
epsilon = 1e-12,
kernel = c("uniform", "gaussian", "triangular", "epanechnikov", "quartic"),
bandwidth = NULL,
lonlat = TRUE,
style = "W",
zero.policy = TRUE,
fallback = c("knn", "distance", "none"),
fallback_k = 2,
fallback_dmax = NULL,
output = c("all", "matrix", "listw", "nb")
) {
method <- match.arg(method)
contiguity <- match.arg(contiguity)
distance <- match.arg(distance)
kernel <- match.arg(kernel)
fallback <- match.arg(fallback)
output <- match.arg(output)
get_coords <- function(data, coords) {
if (!is.null(coords)) {
cm <- as.matrix(coords)
if (!is.numeric(cm) || ncol(cm) != 2) stop("coords must be numeric with 2 columns.")
return(cm)
}
if (inherits(data, "sf")) {
suppressWarnings(cc <- sf::st_coordinates(sf::st_centroid(sf::st_geometry(data))))
if (ncol(cc) < 2) stop("Failed to extract centroid coordinates from sf.")
return(as.matrix(cc[, 1:2, drop = FALSE]))
}
stop("Provide `coords` when `data` is not an sf object.")
}
is_sf <- inherits(data, "sf")
if (method == "contiguity" && !is_sf) {
stop("method='contiguity' requires `data` as sf polygons.")
}
coords_mat <- get_coords(data, coords)
if (any(is.na(coords_mat))) {
stop("Some areas have empty geometries or NA coordinates (Ghost Regions). Please clean your spatial data first.")
}
n <- nrow(coords_mat)
if (n < 2) stop("Need at least 2 areas to build W.")
k <- min(k, n - 1)
fallback_k <- min(fallback_k, n - 1)
is_looks_like_degree <- all(coords_mat[,1] >= -180 & coords_mat[,1] <= 180) &&
all(coords_mat[,2] >= -90 & coords_mat[,2] <= 90)
if (is_looks_like_degree && !lonlat) {
warning("Coordinate data looks like Latitude/Longitude (degrees), but `lonlat = FALSE` is set. Calculating Euclidean distance on degree data can produce flawed spatial weights. Consider setting `lonlat = TRUE`.")
}
if (!is_looks_like_degree && lonlat) {
warning("Coordinate data does NOT look like Latitude/Longitude (values exceed -180/180 or -90/90), but `lonlat = TRUE` is set. Calculating spherical distance on planar/projected data can produce flawed spatial weights. Consider setting `lonlat = FALSE`.")
}
nb <- NULL
lw <- NULL
W <- NULL
diag <- list()
if (method == "contiguity") {
if (contiguity == "queen") {
nb_main <- spdep::poly2nb(data, queen = TRUE)
} else if (contiguity == "rook") {
nb_main <- spdep::poly2nb(data, queen = FALSE)
} else if (contiguity == "bishop") {
nb_q <- spdep::poly2nb(data, queen = TRUE)
nb_r <- spdep::poly2nb(data, queen = FALSE)
nb_main <- spdep::diffnb(nb_q, nb_r)
}
isolates <- which(spdep::card(nb_main) == 0)
diag$isolates_before <- isolates
diag$n_isolates_before <- length(isolates)
if (length(isolates) > 0 && fallback != "none") {
if (fallback == "knn") {
knn <- spdep::knearneigh(coords_mat, k = fallback_k, longlat = lonlat)
nb_fb <- spdep::knn2nb(knn)
} else {
if (is.null(fallback_dmax)) stop("fallback_dmax must be provided when fallback='distance'.")
nb_fb <- spdep::dnearneigh(coords_mat, 0, fallback_dmax, longlat = lonlat)
}
for (i in isolates) {
nb_main[[i]] <- nb_fb[[i]]
}
}
nb <- nb_main
diag$isolates_after <- which(spdep::card(nb) == 0)
diag$n_isolates_after <- length(diag$isolates_after)
lw <- spdep::nb2listw(nb, style = style, zero.policy = zero.policy)
W <- spdep::listw2mat(lw)
}
if (method == "distance") {
if (distance == "knn") {
if (!is.numeric(k) || k < 1) stop("k must be >= 1 for knn.")
knn <- spdep::knearneigh(coords_mat, k = k, longlat = lonlat)
nb <- spdep::knn2nb(knn)
diag$isolates_after <- which(spdep::card(nb) == 0)
diag$n_isolates_after <- length(diag$isolates_after)
lw <- spdep::nb2listw(nb, style = style, zero.policy = zero.policy)
W <- spdep::listw2mat(lw)
}
if (distance %in% c("inverse_distance", "exponential")) {
if (distance == "inverse_distance" && (!is.numeric(power) || power <= 0)) stop("power must be > 0.")
if (distance == "exponential" && (!is.numeric(alpha) || alpha <= 0)) stop("alpha must be > 0.")
if (!is.null(dmax)) {
nb <- spdep::dnearneigh(coords_mat, 0, dmax, longlat = lonlat)
if (any(spdep::card(nb) == 0) && !zero.policy) {
stop("Some areas have no neighbors with current dmax. Increase dmax or use k-based distance.")
}
} else {
if (!is.numeric(k) || k < 1) stop("k must be >= 1 when dmax is NULL.")
knn <- spdep::knearneigh(coords_mat, k = k, longlat = lonlat)
nb <- spdep::knn2nb(knn)
}
dist_list <- spdep::nbdists(nb, coords_mat, longlat = lonlat)
if (distance == "inverse_distance") {
glist <- lapply(dist_list, function(d) 1 / (pmax(d, epsilon)^power))
diag$invdist_power <- power
} else if (distance == "exponential") {
glist <- lapply(dist_list, function(d) exp(-alpha * d))
diag$exp_alpha <- alpha
}
lw <- spdep::nb2listw(nb, glist = glist, style = style, zero.policy = zero.policy)
W <- spdep::listw2mat(lw)
diag$used_dmax <- !is.null(dmax)
diag$dmax <- dmax
diag$k <- k
}
}
if (method == "kernel") {
if (is.null(bandwidth) || !is.numeric(bandwidth) || bandwidth <= 0) {
stop("bandwidth must be provided and > 0 for kernel weights.")
}
pts <- sf::st_as_sf(as.data.frame(coords_mat), coords = 1:2,
crs = ifelse(lonlat, 4326, NA))
dist_mat <- as.numeric(sf::st_distance(pts))
dist_mat <- matrix(dist_mat, n, n)
if (lonlat == TRUE) {
dist_mat <- dist_mat / 1000
}
Z <- dist_mat / bandwidth
W <- matrix(0, n, n)
if (kernel == "uniform") {
W[dist_mat > 0 & dist_mat < bandwidth] <- 0.5
}
else if (kernel == "triangular") {
idx <- which(dist_mat > 0 & dist_mat < bandwidth)
W[idx] <- 1 - abs(Z[idx])
}
else if (kernel == "epanechnikov") {
idx <- which(dist_mat > 0 & dist_mat < bandwidth)
W[idx] <- 0.75 * (1 - Z[idx]^2)
}
else if (kernel == "quartic") {
idx <- which(dist_mat > 0 & dist_mat < bandwidth)
W[idx] <- (15/16) * (1 - Z[idx]^2)^2
}
else if (kernel == "gaussian") {
W <- (1 / sqrt(2 * pi)) * exp(-(Z^2) / 2)
diag(W) <- 0 # No self-neighbors allowed
}
if (any(rowSums(W) == 0) && !zero.policy) {
stop("Some areas have no neighbors with current bandwidth. Increase bandwidth (h) or set zero.policy = TRUE.")
}
lw <- spdep::mat2listw(W, style = style, zero.policy = zero.policy)
W <- spdep::listw2mat(lw)
nb <- lw$neighbours
diag$kernel_bandwidth <- bandwidth
}
# Output
info <- list(
method = method,
contiguity = if (method == "contiguity") contiguity else NULL,
distance = if (method == "distance") distance else NULL,
kernel = if (method == "kernel") kernel else NULL,
style = style,
lonlat = lonlat,
fallback = if (method == "contiguity") fallback else NULL,
zero.policy= zero.policy
)
if (output == "matrix") return(W)
if (output == "listw") return(lw)
if (output == "nb") return(nb)
list(W = W, listw = lw, nb = nb, info = info, diag = diag)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.