R/neighborhood.R

Defines functions neighborhood_HT

Documented in neighborhood_HT

#' Homogeneous neighborhood selection
#'
#' Identifies homogeneous neighbors around a given grid point using a combination
#' of the Hosking-Wallis (1993) and Anderson-Darling (1987) tests for marginal homogeneity.
#'
#' @inheritParams fdata
#' @param s0 Numeric vector of length 2: the longitude and latitude of the target grid point.
#' @param miles Logical; whether to compute distance in miles (default: FALSE).
#' @param min.neigh Minimum number of neighbors to accept (default: 5).
#' @param max.neigh Maximum number of neighbors to test (default: 20).
#' @param pr Probability threshold for quantile filtering (e.g. 0.9).
#' @param alpha Significance level for homogeneity tests.
#' @param dmax Maximum distance (in km) to consider.
#' @param which.test Integer vector specifying which test(s) to run:
#'   \itemize{
#'     \item 1 = HW test (Hosking–Wallis)
#'     \item 2 = AD test (Anderson–Darling)
#'     \item c(1, 2) = both tests
#'   }
#'
#' @return A vector of station indices considered homogeneous with the grid point.
#'
#' @references
#' Castro-Camilo, D. and Huser, R. (2020). *JASA* 115, 1037–1054.
#' Hosking, J. and Wallis, J. (1993). *Water Resour. Res.* 29, 271–281.
#' Scholz, F.W. and Stephens, M.A. (1987). *JASA* 82, 918–924.
#'
#' @examples
#' \donttest{
#' neighborhood_HT(counterfactual, coord = LonLat, s0 = c(30, 39), which.test = c(1, 2))
#' }
#'
#' @seealso [fdata()]
#' @export
#' @importFrom fields rdist.earth
neighborhood_HT <- function(data, coord, s0, miles = FALSE, min.neigh = 5, max.neigh = 20, pr = 0.9, alpha = 0.05, dmax = 150, which.test = c(1, 2)) {

  # Input check
  assert_tabular(data, "data")
  assert_tabular(coord, "coord")
  assert_no_missing(coord, "coord")

  if (!is.numeric(s0) || length(s0) != 2 || any(!is.finite(s0))) {
    stop("`s0` must be a numeric vector of length 2 (lon, lat).", call. = FALSE)
  }
  if (!all(which.test %in% c(1, 2))) {
    stop("`which.test` must be 1, 2, or c(1, 2).", call. = FALSE)
  }
  if (dmax <= 0) stop("`dmax` must be a positive number.", call. = FALSE)

  # distance matrix
  dist.mat <- rdist.earth(coord, matrix(s0, ncol = 2), miles = miles)
  distances <- cbind(1:nrow(coord), dist.mat)
  distances <- distances[order(distances[, 2]), , drop = FALSE]
  distances <- distances[distances[, 2] <= dmax, , drop = FALSE]

  i <- min(nrow(distances), max.neigh)
  if (i == 1) {
    return(1)
  }
  if (nrow(distances) == 0) {
    return(0)
  }

  # convergence test
  repeat {
    cols <- distances[1:i, 1]
    dists <- distances[1:i, 2]
    eval.asses <- assess.hom(data, cols, pr = pr, alpha = alpha,
                             rm.zeros = T, which.test = which.test)
    if (eval.asses$code == 1) {
      n.neigh <- length(cols)
      break
    }
    else {
      i <- i - 1
    }
    if (i < 2)
      break
    if (exists("n.neigh")) {
      if (n.neigh < min.neigh) {
        cols <- 0
      }
      else {
        cols <- sort(distances[1:n.neigh, 1])
      }
    }
    else {
      cols <- 0
    }
  }
  return(cols)
}

Try the eFCM package in your browser

Any scripts or data that you put into this service are public.

eFCM documentation built on Sept. 9, 2025, 5:52 p.m.