R/centroid_fusion.R

Defines functions centroid_fusion

Documented in centroid_fusion

#' Fusion centroid
#'
#' \code{centroid_fusion} calculates the centroid (mean location) of each
#' timestep in fusion events. The function accepts an edge list of fusion events
#' identified by \code{fusion_id} from edge lists generated with
#' \code{edge_dist} and a \code{data.table} with relocation data appended with a
#' \code{timegroup} column from \code{group_times}. It is recommended to use the
#' argument \code{fillNA = FALSE} for \code{edge_dist} when using
#' \code{centroid_fusion} to avoid unnecessarily merging additional rows.
#' Relocation data should be in two columns representing the X and Y
#' coordinates.
#'
#' The \code{edges} and \code{DT} must be \code{data.table}. If your data is a
#' \code{data.frame}, you can convert it by reference using
#' \code{\link[data.table:setDT]{data.table::setDT}} or by reassigning using
#' \code{\link[data.table:data.table]{data.table::data.table}}.
#'
#' The \code{edges} and \code{DT} are internally merged in this function using
#' the columns \code{timegroup} (from \code{group_times}) and \code{ID1} and
#' \code{ID2} (in \code{edges}, from \code{dyad_id}) and \code{id} (in
#' \code{DT}). This function expects a \code{fusionID} present, generated with
#' the \code{fusion_id} function. The \code{timegroup} argument expects the
#' names of a column in \code{edges} which correspond to the timegroup column.
#' The \code{id}, \code{coords} and \code{timegroup} arguments expect the names
#' of a column in \code{DT} which correspond to the id, X and Y coordinates and
#' timegroup columns. The \code{na.rm} argument is passed to the \code{rowMeans}
#' function to control if NA values are removed before calculation.
#'
#' @param edges edge list generated generated by \code{edge_dist} or
#'  \code{edge_nn}, with fusionID column generated by \code{fusion_id}
#' @param DT input data.table with timegroup column generated with
#'   \code{group_times} matching the input data.table used to generate the edge
#'   list with \code{edge_nn} or \code{edge_dist}
#' @inheritParams group_pts
#' @param na.rm if NAs should be removed in calculating mean location, see
#'   \code{rowMeans}
#'
#' @return \code{centroid_fusion} returns the input \code{edges} appended with
#'   centroid columns for the X and Y coordinate columns.
#'
#'   These columns represents the centroid coordinate columns for each timestep
#'   in a fusion event. The naming of these columns will correspond to the
#'   provided coordinate column names prefixed with "centroid_".
#'
#'   Note: due to the merge required within this function, the output needs to
#'   be reassigned unlike some other \code{spatsoc} functions like
#'   \code{fusion_id} and \code{group_pts}.
#'
#'   A message is returned when centroid columns are already exists in the input
#'   \code{edges}, because they will be overwritten.
#'
#' @export
#' @family Centroid functions
#' @seealso [fusion_id] [edge_dist] [group_pts]
#' @examples
#' # Load data.table
#' library(data.table)
#' \dontshow{data.table::setDTthreads(1)}
#'
#' # Read example data
#' DT <- fread(system.file("extdata", "DT.csv", package = "spatsoc"))
#'
#' # Cast the character column to POSIXct
#' DT[, datetime := as.POSIXct(datetime, tz = 'UTC')]
#'
#' # Temporal grouping
#' group_times(DT, datetime = 'datetime', threshold = '20 minutes')
#'
#' # Edge list generation
#' edges <- edge_dist(
#'     DT,
#'     threshold = 100,
#'     id = 'ID',
#'     coords = c('X', 'Y'),
#'     timegroup = 'timegroup',
#'     returnDist = TRUE,
#'     fillNA = FALSE
#'   )
#'
#' # Generate dyad id
#' dyad_id(edges, id1 = 'ID1', id2 = 'ID2')
#'
#' # Generate fusion id
#' fusion_id(edges, threshold = 100)
#'
#' # Calculate fusion centroid
#' centroids <- centroid_fusion(
#'   edges,
#'   DT,
#'   id = 'ID',
#'   coords = c('X', 'Y'),
#'   timegroup = 'timegroup', na.rm = TRUE
#' )
#'
#' print(centroids)
centroid_fusion <- function(
    edges = NULL,
    DT = NULL,
    id = NULL,
    coords = NULL,
    timegroup = 'timegroup',
    na.rm = FALSE) {

  if (is.null(DT)) {
    stop('input DT required')
  }

  if (is.null(edges)) {
    stop('input edges required')
  }

  if (is.null(id)) {
    stop('id column name required')
  }

  if (length(coords) != 2) {
    stop('coords requires a vector of column names for coordinates X and Y')
  }

  if (is.null(timegroup)) {
    stop('timegroup column name required')
  }

  check_cols_edges <- c('fusionID', 'ID1', 'ID2', timegroup)
  if (any(!(check_cols_edges %in% colnames(edges)))) {
    stop(paste0(
      as.character(paste(setdiff(
        check_cols_edges,
        colnames(edges)
      ), collapse = ', ')),
      ' field(s) provided are not present in input DT'
    ))
  }

  if (any(!(
    c(id, coords, timegroup) %in% colnames(DT)
  ))) {
    stop(paste0(
      as.character(paste(setdiff(
        c(id, coords, timegroup),
        colnames(DT)
      ), collapse = ', ')),
      ' field(s) provided are not present in input DT'
    ))
  }

  if (any(!(DT[, vapply(.SD, is.numeric, TRUE), .SDcols = coords]))) {
    stop('coords must be numeric')
  }

  if (is.null(na.rm)) {
    stop('na.rm is required')
  }

  if (!is.logical(na.rm)) {
    stop('na.rm should be a boolean (TRUE/FALSE), see ?mean')
  }

  xcol <- data.table::first(coords)
  ycol <- data.table::last(coords)

  out_xcol <- paste0('centroid_', gsub(' ', '', xcol))
  out_ycol <- paste0('centroid_', gsub(' ', '', ycol))

  id1_coords <- paste0('id1_', coords)
  id2_coords <- paste0('id2_', coords)

  m <- merge(edges,
             DT[, .SD, .SDcols = c(coords, id, 'timegroup')],
             by.x = c('ID1', timegroup),
             by.y = c(id, timegroup),
             all.x = TRUE,
             sort = FALSE)
  data.table::setnames(m, coords, id1_coords)
  m <- merge(m,
             DT[, .SD, .SDcols = c(coords, id, 'timegroup')],
             by.x = c('ID2', timegroup),
             by.y = c(id, timegroup),
             all.x = TRUE,
             sort = FALSE)
  data.table::setnames(m, coords, id2_coords)

  if (out_xcol %in% colnames(m)) {
    message(paste(out_xcol, 'column will be overwritten by this function'))
    data.table::set(m, j = out_xcol, value = NULL)
  }

  if (out_ycol %in% colnames(m)) {
    message(paste(out_ycol, 'column will be overwritten by this function'))
    data.table::set(m, j = out_ycol, value = NULL)
  }

  m[, c(out_xcol) := rowMeans(.SD, na.rm = na.rm),
    .SDcols = c(data.table::first(id1_coords), data.table::first(id2_coords))]
  m[, c(out_ycol) := rowMeans(.SD, na.rm = na.rm),
    .SDcols = c(data.table::last(id1_coords), data.table::last(id2_coords))]

  data.table::set(m, j = c(id1_coords, id2_coords), value = NULL)
  data.table::setcolorder(m, colnames(edges))

  return(m[])
}
ropensci/spatsoc documentation built on Feb. 12, 2025, 7:16 a.m.