Nothing
#' Apply an inclusion radius and exclusion buffer to indices
#'
#' @param data An object of class `sf` or `sfc`.
#' @param indices List of indices in each fold generated by `split_unnamed()`.
#' @param radius Numeric: points within this distance of the initially-selected
#' test points will be assigned to the assessment set. If `NULL`, no radius is
#' applied.
#' @param buffer Numeric: points within this distance of any point in the
#' test set (after `radius` is applied) will be assigned to neither the analysis
#' or assessment set. If `NULL`, no buffer is applied.
#'
#' @keywords internal
buffer_indices <- function(data, indices, radius, buffer, call = rlang::caller_env()) {
standard_checks(data, "Buffering", call)
n <- nrow(data)
# calling st_distance is a _huge_ performance hit, especially for big data,
# so we make a point of only doing it once.
#
# This winds up requiring all sorts of weird handler code,
# namely `row_ids_within_dist` and `which_within_dist`, in order to
# only calculate this matrix
#
# Using st_is_within_dist is not faster. Using st_intersects is not faster.
# I keep trying both of these, and have left this comment in vain hope it
# convinces me to stop.
distmat <- sf::st_distance(data)
# only run radius checks if radius is not NULL (to prevent NAs from >)
run_radius <- !is.null(radius)
if (run_radius && units::set_units(radius, NULL) > 0) {
# In case `radius` has no units, assume it's in the same units as `data`
if (!identical(sf::st_crs(data), sf::NA_crs_)) units(radius) <- units(distmat)
indices <- row_ids_within_dist(distmat, indices, radius)
}
# `buffer_indices` are _always_ needed
# so re-code a NULL buffer as a 0, which will buffer nothing
if (is.null(buffer)) buffer <- 0L
# In case `buffer` has no units, assume it's in the same units as `data`
if (!identical(sf::st_crs(data), sf::NA_crs_)) units(buffer) <- units(distmat)
buffer_indices <- row_ids_within_dist(distmat, indices, buffer)
purrr::map2(indices, buffer_indices, buffered_complement, n = n)
}
buffered_complement <- function(ind, buff_ind, n) {
list(
analysis = setdiff(1:n, c(ind, buff_ind)),
assessment = unique(ind)
)
}
row_ids_within_dist <- function(distmat, indices, dist) {
if (units::set_units(dist, NULL) > 0) {
# c++ won't implicitly cast, so do it in R
mode(distmat) <- "numeric"
dist <- as.numeric(dist)
purrr::map(
# indices is the output of split_unnamed
indices,
# which_within_dist returns a vector of row IDs in sequential order
#
# In order to visualize (eventually) which observations were originally
# chosen for the test set and which were inside `radius`,
# we want the new indices to be appended to the end of the original indices,
# not sorted in
#
# So here we append the new indices to the old and de-duplicate them
~ unique(c(.x, which_within_dist(distmat, as.numeric(.x), dist)))
)
} else {
# initialize to integer(0) in case buffer is <= 0:
lapply(seq_along(indices), function(x) integer(0))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.