#' Spatially merge clusters
#'
#' `merge_clusters()` aggregates spatiotemporal clusters within a specified distance using the
#' Density-based Spatial Clustering of Applications with Noise (\code{\link[dbscan]{DBSCAN}}) algorithm.
#' After merging, the remaining clusters are not temporally unique.
#'
#' `merge_clusters()` spatially combines clusters based on the Euclidean distance between points. Because the Earth is sphere, the calculated
#' distances are not exact. See [here](http://wiki.gis.com/wiki/index.php/Decimal_degrees).
#'
#' @param df a data frame created by `circleclust()` with a `sp_temporal_cluster` column and datetime column.
#' @param dt_field POSIXct; name of datetime field.
#' @param radius numeric; distance threshold (meters) used to aggregate clusters.
#' @param minPts numeric; minimum number of points points required in each cluster.
#' @param borderPoints logical; should border points be assigned to clusters. Default = TRUE.
#' If FALSE, border points are considered noise.
#' @param keep_noise logical; should noise points be retained? Default = FALSE.
#' @param noise_threshold numeric; threshold value (%) to determine if noise points should be retained.
#' If the percentage of noise points is above this value, noise points are retained and column `noise` is
#' appended to the output data frame. Noise points are deleted otherwise. This argument is ignored if
#' 'keep_noise' is set to FALSE.
#' @return a data frame. The original spatiotemporal cluster values are retained
#' in a column called `sp_temporal_cluster`. New spatially merged cluster values are
#' listed under `spatial_cluster`.
#'
#' @export
#'
#' @examples
#' \dontrun{
#'
#' merge_clusters(
#' df, dt_field = NULL, radius = 100, minPts = 5, borderPoints = TRUE,
#' keep_noise = FALSE, noise_threshold = 1)
#' }
merge_clusters <- function(df, dt_field = NULL, radius = 100, minPts = 5,
borderPoints = TRUE, keep_noise = FALSE, noise_threshold = 1) {
if (!'sp_temporal_cluster' %in% names(df)) {
stop('Column `sp_temporal_cluster` is not in the input data frame. Did you use `circleclust()` to identify periods of stationary activity?',
call. = FALSE)
}
if (sum(is.na(df$sp_temporal_cluster)) == nrow(df)) {
stop('The input data frame does not contain periods of stationary activity/clustered coordinates.',
call. = FALSE)
}
if (is.null(dt_field)) {
stop("`dt_field` has not been assigned a value.", call. = FALSE)
} else if (!lubridate::is.POSIXct(df[[dt_field]])) {
c_dt_field <- class(df[[dt_field]])
stop(paste0("`dt_field` must be a datetime. `", {{ dt_field }}, "` is of class ", c_dt_field, "."),
call. = FALSE
)
}
d_xy <- df %>%
dplyr::filter(!is.na(lat) | !is.na(lon)) %>%
dplyr::filter(!is.na(sp_temporal_cluster))
d_xy_lonlat <- d_xy %>%
dplyr::select(lon, lat)
mdb <- dbscan::dbscan(d_xy_lonlat, eps = ((0.001/111)*radius), minPts = minPts, borderPoints = borderPoints)
d_xy$db_cluster <- mdb$cluster
d_mc <- suppressMessages(dplyr::left_join(df, d_xy))
d_mc <- d_mc %>%
dplyr::rename(spatial_cluster = db_cluster) %>%
dplyr::arrange(.[[dt_field]])
n_spatio <- length(table(d_mc$sp_temporal_cluster))
db_table <- table(d_xy$db_cluster)
noise_lgl <- '0' %in% names(db_table)
noise_rm <- names(db_table) != '0'
n_db <- length(db_table[noise_rm])
if (keep_noise == FALSE) {
d_mc <- d_mc %>%
dplyr::filter(!spatial_cluster == 0 | is.na(spatial_cluster))
}
if (n_spatio > n_db) {
message(cli::col_green(paste0('The number of spatiotemporal clusters was reduced from ', n_spatio, ' to ', n_db, ' spatial clusters.')))
if (noise_lgl) {
n_noise <- db_table[['0']]
message(cli::col_cyan(paste(n_noise,
'coordinates were classified as "noise."')))
} else {
message(cli::col_cyan('Noise points were not detected.'))
}
if (keep_noise == TRUE & noise_lgl) {
pct_noise <- round(n_noise/nrow(df)*100, digits = 3)
if (pct_noise < noise_threshold) {
message(cli::col_red(
paste0('The percentage of noise coordinates (', pct_noise, '%) was below the threshold value (',
noise_threshold, '%), and these observations were deleted.')))
d_mc <- d_mc %>%
dplyr::filter(!spatial_cluster == 0 | is.na(spatial_cluster))
} else {
mc_noise <- dplyr::mutate(d_mc,
noise = ifelse(spatial_cluster == 0, 1, 0),
noise_break = dplyr::if_else((dplyr::lag(noise) == 0 | is.na(dplyr::lag(noise))) & noise == 1, 1, 0, missing = 0)) %>%
dplyr::filter(noise == 1) %>%
dplyr::mutate(noise_grp = cumsum(noise_break),
nc = ifelse(noise == 1, noise_grp + n_db, spatial_cluster)) %>%
dplyr::select(-c(noise_break, noise_grp))
mc_noise <- mc_noise %>%
dplyr::mutate(spatial_cluster = dplyr::if_else(is.na(nc), as.numeric(spatial_cluster), nc)) %>%
dplyr::select(-nc)
d_mc <- dplyr::bind_rows(d_mc, mc_noise)
d_mc <- d_mc %>%
dplyr::filter(!spatial_cluster == 0 | is.na(spatial_cluster))
message(cli::col_cyan(paste0('Noise coordinates were retained and assigned to spatial cluster(s): ',
paste(unique(mc_noise$spatial_cluster), collapse = ' '), '.')))
}
}
} else if (max(df$sp_temporal_cluster, na.rm = TRUE) == 1) {
stop_quietly <- function() {
opt <- options(show.error.messages = FALSE)
on.exit(options(opt))
return(df)
stop()
}
warning('The input data frame only has 1 identified cluster. Execution halted--returning the input data frame.',
call. = FALSE)
stop_quietly()
} else {
message(cli::col_cyan('Clusters were not merged. Multiple clusters do not exist within the specified radius.'))
d_mc <- d_mc %>%
dplyr::select(-spatial_cluster)
}
d_mc %>%
dplyr::arrange(.[[dt_field]])
d_mc
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.