R/centroid_dyad.R

Defines functions centroid_dyad

Documented in centroid_dyad

#' Dyad centroid
#'
#' \code{centroid_dyad} calculates the centroid (mean location) of a dyad in
#' each observation identified by \code{edge_nn} or \code{edge_dist}. The
#' function accepts an edge list generated by \code{edge_nn} or \code{edge_dist}
#' and a \code{data.table} with relocation data appended with a \code{timegroup}
#' column from \code{group_times}. It is recommended to use the argument
#' \code{fillNA = FALSE} for \code{edge_dist} when using \code{centroid_dyad} to
#' avoid unnecessarily merging additional rows. Relocation data should be in two
#' columns representing the X and Y coordinates.
#'
#' The \code{edges} and \code{DT} must be \code{data.table}. If your data is a
#' \code{data.frame}, you can convert it by reference using
#' \code{\link[data.table:setDT]{data.table::setDT}} or by reassigning using
#' \code{\link[data.table:data.table]{data.table::data.table}}.
#'
#' The \code{edges} and \code{DT} are internally merged in this function using
#' the columns \code{id}, \code{dyadID} and \code{timegroup}. This function
#' expects a \code{dyadID} present, generated with the \code{dyad_id} function.
#' The \code{dyadID} and \code{timegroup} arguments expect the names of a column
#' in \code{edges} which correspond to the dyadID and timegroup columns. The
#' \code{id} and \code{timegroup} arguments expect the names of a column in
#' \code{DT} which correspond to the X and Y coordinates and group columns. The
#' \code{na.rm} argument is passed to the \code{rowMeans} function to control if
#' NA values are removed before calculation.
#'
#' @param edges edge list generated generated by \code{edge_dist} or
#'  \code{edge_nn}, with dyad ID column generated by \code{dyad_id}
#' @param DT input data.table with timegroup column generated with
#'   \code{group_times} matching the input data.table used to generate the edge
#'   list with \code{edge_nn} or \code{edge_dist}
#' @inheritParams group_pts
#' @param na.rm if NAs should be removed in calculating mean location, see
#'   \code{rowMeans}
#'
#' @return \code{centroid_dyad} returns the input \code{edges} appended with
#'  centroid columns for the X and Y coordinate columns.
#'
#'   These columns represents the centroid coordinate columns for the dyad.
#'   The naming of these columns will correspond to the provided coordinate
#'   column names prefixed with "centroid_".
#'
#'  Note: due to the merge required within this function, the output needs to be
#'  reassigned unlike some other \code{spatsoc} functions like \code{dyad_id}
#'  and \code{group_pts}.
#'
#'   A message is returned when centroid columns are already exists in
#'   the input \code{edges}, because they will be overwritten.
#'
#' @export
#' @family Centroid functions
#' @seealso [dyad_id] [edge_dist] [edge_nn] [group_pts]
#' @examples
#' # Load data.table
#' library(data.table)
#' \dontshow{data.table::setDTthreads(1)}
#'
#' # Read example data
#' DT <- fread(system.file("extdata", "DT.csv", package = "spatsoc"))
#'
#' # Cast the character column to POSIXct
#' DT[, datetime := as.POSIXct(datetime, tz = 'UTC')]
#'
#' # Temporal grouping
#' group_times(DT, datetime = 'datetime', threshold = '20 minutes')
#'
#' # Edge list generation
#' edges <- edge_dist(
#'     DT,
#'     threshold = 100,
#'     id = 'ID',
#'     coords = c('X', 'Y'),
#'     timegroup = 'timegroup',
#'     returnDist = TRUE,
#'     fillNA = FALSE
#'   )
#'
#' # Generate dyad id
#' dyad_id(edges, id1 = 'ID1', id2 = 'ID2')
#'
#' # Calculate dyad centroid
#' centroids <- centroid_dyad(
#'   edges,
#'   DT,
#'   id = 'ID',
#'   coords = c('X', 'Y'),
#'   timegroup = 'timegroup', na.rm = TRUE
#' )
#'
#' print(centroids)
centroid_dyad <- function(
    edges = NULL,
    DT = NULL,
    id = NULL,
    coords = NULL,
    timegroup = 'timegroup',
    na.rm = FALSE) {

  if (is.null(DT)) {
    stop('input DT required')
  }

  if (is.null(edges)) {
    stop('input edges required')
  }

  if (is.null(id)) {
    stop('id column name required')
  }

  if (length(coords) != 2) {
    stop('coords requires a vector of column names for coordinates X and Y')
  }

  if (is.null(timegroup)) {
    stop('timegroup column name required')
  }

  if (any(!(
    c('dyadID', timegroup) %in% colnames(edges)
  ))) {
    stop(paste0(
      as.character(paste(setdiff(
        c('dyadID', timegroup),
        colnames(edges)
      ), collapse = ', ')),
      ' field(s) provided are not present in input DT'
    ))
  }

  if (any(!(
    c(id, coords, timegroup) %in% colnames(DT)
  ))) {
    stop(paste0(
      as.character(paste(setdiff(
        c(id, coords, timegroup),
        colnames(DT)
      ), collapse = ', ')),
      ' field(s) provided are not present in input DT'
    ))
  }

  if (any(!(DT[, vapply(.SD, is.numeric, TRUE), .SDcols = coords]))) {
    stop('coords must be numeric')
  }

  if (is.null(na.rm)) {
    stop('na.rm is required')
  }

  if (!is.logical(na.rm)) {
    stop('na.rm should be a boolean (TRUE/FALSE), see ?mean')
  }

  xcol <- data.table::first(coords)
  ycol <- data.table::last(coords)

  out_xcol <- paste0('centroid_', gsub(' ', '', xcol))
  out_ycol <- paste0('centroid_', gsub(' ', '', ycol))

  id1_coords <- paste0('id1_', coords)
  id2_coords <- paste0('id2_', coords)

  m <- merge(edges,
        DT[, .SD, .SDcols = c(coords, id, 'timegroup')],
        by.x = c('ID1', timegroup),
        by.y = c(id, timegroup),
        all.x = TRUE,
        sort = FALSE)
  data.table::setnames(m, coords, id1_coords)
  m <- merge(m,
             DT[, .SD, .SDcols = c(coords, id, 'timegroup')],
             by.x = c('ID2', timegroup),
             by.y = c(id, timegroup),
             all.x = TRUE,
             sort = FALSE)
  data.table::setnames(m, coords, id2_coords)

  if (out_xcol %in% colnames(m)) {
    message(paste(out_xcol, 'column will be overwritten by this function'))
    data.table::set(m, j = out_xcol, value = NULL)
  }

  if (out_ycol %in% colnames(m)) {
    message(paste(out_ycol, 'column will be overwritten by this function'))
    data.table::set(m, j = out_ycol, value = NULL)
  }

  m[, c(out_xcol) := rowMeans(.SD, na.rm = na.rm),
    .SDcols = c(data.table::first(id1_coords), data.table::first(id2_coords))]
  m[, c(out_ycol) := rowMeans(.SD, na.rm = na.rm),
    .SDcols = c(data.table::last(id1_coords), data.table::last(id2_coords))]

  data.table::set(m, j = c(id1_coords, id2_coords), value = NULL)
  data.table::setcolorder(m, colnames(edges))

  return(m[])
}
ropensci/spatsoc documentation built on Feb. 12, 2025, 7:16 a.m.