#' Fusion centroid
#'
#' \code{centroid_fusion} calculates the centroid (mean location) of each
#' timestep in fusion events. The function accepts an edge list of fusion events
#' identified by \code{fusion_id} from edge lists generated with
#' \code{edge_dist} and a \code{data.table} with relocation data appended with a
#' \code{timegroup} column from \code{group_times}. It is recommended to use the
#' argument \code{fillNA = FALSE} for \code{edge_dist} when using
#' \code{centroid_fusion} to avoid unnecessarily merging additional rows.
#' Relocation data should be in two columns representing the X and Y
#' coordinates.
#'
#' The \code{edges} and \code{DT} must be \code{data.table}. If your data is a
#' \code{data.frame}, you can convert it by reference using
#' \code{\link[data.table:setDT]{data.table::setDT}} or by reassigning using
#' \code{\link[data.table:data.table]{data.table::data.table}}.
#'
#' The \code{edges} and \code{DT} are internally merged in this function using
#' the columns \code{timegroup} (from \code{group_times}) and \code{ID1} and
#' \code{ID2} (in \code{edges}, from \code{dyad_id}) and \code{id} (in
#' \code{DT}). This function expects a \code{fusionID} present, generated with
#' the \code{fusion_id} function. The \code{timegroup} argument expects the
#' names of a column in \code{edges} which correspond to the timegroup column.
#' The \code{id}, \code{coords} and \code{timegroup} arguments expect the names
#' of a column in \code{DT} which correspond to the id, X and Y coordinates and
#' timegroup columns. The \code{na.rm} argument is passed to the \code{rowMeans}
#' function to control if NA values are removed before calculation.
#'
#' @param edges edge list generated generated by \code{edge_dist} or
#' \code{edge_nn}, with fusionID column generated by \code{fusion_id}
#' @param DT input data.table with timegroup column generated with
#' \code{group_times} matching the input data.table used to generate the edge
#' list with \code{edge_nn} or \code{edge_dist}
#' @inheritParams group_pts
#' @param na.rm if NAs should be removed in calculating mean location, see
#' \code{rowMeans}
#'
#' @return \code{centroid_fusion} returns the input \code{edges} appended with
#' centroid columns for the X and Y coordinate columns.
#'
#' These columns represents the centroid coordinate columns for each timestep
#' in a fusion event. The naming of these columns will correspond to the
#' provided coordinate column names prefixed with "centroid_".
#'
#' Note: due to the merge required within this function, the output needs to
#' be reassigned unlike some other \code{spatsoc} functions like
#' \code{fusion_id} and \code{group_pts}.
#'
#' A message is returned when centroid columns are already exists in the input
#' \code{edges}, because they will be overwritten.
#'
#' @export
#' @family Centroid functions
#' @seealso [fusion_id] [edge_dist] [group_pts]
#' @examples
#' # Load data.table
#' library(data.table)
#' \dontshow{data.table::setDTthreads(1)}
#'
#' # Read example data
#' DT <- fread(system.file("extdata", "DT.csv", package = "spatsoc"))
#'
#' # Cast the character column to POSIXct
#' DT[, datetime := as.POSIXct(datetime, tz = 'UTC')]
#'
#' # Temporal grouping
#' group_times(DT, datetime = 'datetime', threshold = '20 minutes')
#'
#' # Edge list generation
#' edges <- edge_dist(
#' DT,
#' threshold = 100,
#' id = 'ID',
#' coords = c('X', 'Y'),
#' timegroup = 'timegroup',
#' returnDist = TRUE,
#' fillNA = FALSE
#' )
#'
#' # Generate dyad id
#' dyad_id(edges, id1 = 'ID1', id2 = 'ID2')
#'
#' # Generate fusion id
#' fusion_id(edges, threshold = 100)
#'
#' # Calculate fusion centroid
#' centroids <- centroid_fusion(
#' edges,
#' DT,
#' id = 'ID',
#' coords = c('X', 'Y'),
#' timegroup = 'timegroup', na.rm = TRUE
#' )
#'
#' print(centroids)
centroid_fusion <- function(
edges = NULL,
DT = NULL,
id = NULL,
coords = NULL,
timegroup = 'timegroup',
na.rm = FALSE) {
if (is.null(DT)) {
stop('input DT required')
}
if (is.null(edges)) {
stop('input edges required')
}
if (is.null(id)) {
stop('id column name required')
}
if (length(coords) != 2) {
stop('coords requires a vector of column names for coordinates X and Y')
}
if (is.null(timegroup)) {
stop('timegroup column name required')
}
check_cols_edges <- c('fusionID', 'ID1', 'ID2', timegroup)
if (any(!(check_cols_edges %in% colnames(edges)))) {
stop(paste0(
as.character(paste(setdiff(
check_cols_edges,
colnames(edges)
), collapse = ', ')),
' field(s) provided are not present in input DT'
))
}
if (any(!(
c(id, coords, timegroup) %in% colnames(DT)
))) {
stop(paste0(
as.character(paste(setdiff(
c(id, coords, timegroup),
colnames(DT)
), collapse = ', ')),
' field(s) provided are not present in input DT'
))
}
if (any(!(DT[, vapply(.SD, is.numeric, TRUE), .SDcols = coords]))) {
stop('coords must be numeric')
}
if (is.null(na.rm)) {
stop('na.rm is required')
}
if (!is.logical(na.rm)) {
stop('na.rm should be a boolean (TRUE/FALSE), see ?mean')
}
xcol <- data.table::first(coords)
ycol <- data.table::last(coords)
out_xcol <- paste0('centroid_', gsub(' ', '', xcol))
out_ycol <- paste0('centroid_', gsub(' ', '', ycol))
id1_coords <- paste0('id1_', coords)
id2_coords <- paste0('id2_', coords)
m <- merge(edges,
DT[, .SD, .SDcols = c(coords, id, 'timegroup')],
by.x = c('ID1', timegroup),
by.y = c(id, timegroup),
all.x = TRUE,
sort = FALSE)
data.table::setnames(m, coords, id1_coords)
m <- merge(m,
DT[, .SD, .SDcols = c(coords, id, 'timegroup')],
by.x = c('ID2', timegroup),
by.y = c(id, timegroup),
all.x = TRUE,
sort = FALSE)
data.table::setnames(m, coords, id2_coords)
if (out_xcol %in% colnames(m)) {
message(paste(out_xcol, 'column will be overwritten by this function'))
data.table::set(m, j = out_xcol, value = NULL)
}
if (out_ycol %in% colnames(m)) {
message(paste(out_ycol, 'column will be overwritten by this function'))
data.table::set(m, j = out_ycol, value = NULL)
}
m[, c(out_xcol) := rowMeans(.SD, na.rm = na.rm),
.SDcols = c(data.table::first(id1_coords), data.table::first(id2_coords))]
m[, c(out_ycol) := rowMeans(.SD, na.rm = na.rm),
.SDcols = c(data.table::last(id1_coords), data.table::last(id2_coords))]
data.table::set(m, j = c(id1_coords, id2_coords), value = NULL)
data.table::setcolorder(m, colnames(edges))
return(m[])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.