R/fusion_id.R

Defines functions fusion_id

Documented in fusion_id

#' Fission-fusion events
#'
#' \code{fusion_id} identifies fusion events in distance based edge lists.
#' The function accepts a distance based edge list generated by
#' \code{edge_dist}, a threshold argument and arguments controlling how fusion
#' events are defined.
#'
#' The \code{edges} must be a \code{data.table} returned by the \code{edge_dist}
#' function. In addition, \code{fusion_id} requires a dyad ID set on the edge
#' list generated by \code{dyad_id}. If your data is a \code{data.frame}, you
#' can convert it by reference using
#' \code{\link[data.table:setDT]{data.table::setDT}}.
#'
#' The \code{threshold} must be provided in the units of the coordinates. The
#' \code{threshold} must be larger than 0. The coordinates must be planar
#' coordinates (e.g.: UTM). In the case of UTM, a \code{threshold} = 50 would
#' indicate a 50 m distance threshold.
#'
#' The \code{n_min_length} argument defines the minimum number of successive
#' fixes that are required to establish a fusion event. The \code{n_max_missing}
#' argument defines the the maximum number of allowable missing observations for
#' the dyad within a fusion event. The \code{allow_split} argument defines if a
#' single observation can be greater than the threshold distance without
#' initiating fission event.
#'
#' @return \code{fusion_id} returns the input \code{edges} appended with a
#'   \code{fusionID} column.
#'
#'   This column represents the fusion event id. As with \code{spatsoc}'s
#'   grouping functions, the actual value of \code{fusionID} is arbitrary and
#'   represents the identity of a given fusion event. If the data was reordered,
#'   the \code{fusionID} may change, but the membership of each fusion event
#'   would not.
#'
#'   A message is returned when a column named \code{fusionID} already exists in
#'   the input \code{edges}, because it will be overwritten.
#'
#'
#' @param edges distance based edge list generated by \code{edge_dist} function,
#'   with dyad ID generated by \code{dyad_ID}
#' @param threshold spatial distance threshold in the units of the projection
#' @param n_min_length minimum length of fusion events
#' @param n_max_missing maximum number of missing observations within a fusion
#'   event
#' @param allow_split boolean defining if a single observation can be greater
#'   than the threshold distance without initiating fission event
#' @export
#' @seealso \code{\link{edge_dist}}
#' @references
#' See examples of identifying fission-fusion events with spatiotemporal data:
#'  * <https://doi.org/10.1111/ele.12457>
#'  * <https://doi.org/10.1016/j.anbehav.2018.03.014>
#'  * <https://doi.org/10.1890/08-0345.1>
#' @examples
#'
#' # Load data.table
#' library(data.table)
#' \dontshow{data.table::setDTthreads(1)}
#'
#' # Read example data
#' DT <- fread(system.file("extdata", "DT.csv", package = "spatsoc"))
#'
#' # Cast the character column to POSIXct
#' DT[, datetime := as.POSIXct(datetime, tz = 'UTC')]
#'
#' # Temporal grouping
#' group_times(DT, datetime = 'datetime', threshold = '20 minutes')
#'
#' # Edge list generation
#' edges <- edge_dist(
#'     DT,
#'     threshold = 100,
#'     id = 'ID',
#'     coords = c('X', 'Y'),
#'     timegroup = 'timegroup',
#'     returnDist = TRUE,
#'     fillNA = TRUE
#'   )
#'
#' dyad_id(edges, 'ID1', 'ID2')
#'
#' fusion_id(
#'   edges = edges,
#'   threshold = 100,
#'   n_min_length = 1,
#'   n_max_missing = 0,
#'   allow_split = FALSE
#'   )
fusion_id <- function(edges = NULL,
                      threshold = 50,
                      n_min_length = 0,
                      n_max_missing = 0,
                      allow_split = FALSE)  {

  # due to NSE notes  in R CMD check
  . <- both_rleid <- distance <- dyadID <- fusionID <- tg_diff <- timegroup <- within_rleid <- NULL

  if (is.null(edges)) {
    stop('input edges required')
  }

  stopifnot('dyadID' %in% colnames(edges))
  stopifnot('timegroup' %in% colnames(edges))
  stopifnot('distance' %in% colnames(edges))

  stopifnot(is.numeric(threshold))
  stopifnot(is.numeric(n_min_length))
  stopifnot(is.numeric(n_max_missing))
  stopifnot(is.logical(allow_split))

  stopifnot(threshold >= 0)

  unique_edges <- unique(edges[, .(dyadID, timegroup, distance)])

  setorder(unique_edges, 'timegroup')

  # Check if edge distance less than threshold
  unique_edges[, within := distance < threshold]

  # If allow split, check if previously within threshold, and
  #   timegroup difference between before and after is only 1
  if (allow_split) {
    unique_edges[, within := data.table::fifelse(
      within | timegroup == min(timegroup),
      within,
      data.table::shift(within, -1) &
        data.table::shift(within, 1) &
        timegroup - data.table::shift(timegroup, 1) == 1
    ), by = dyadID]
  }

  # Runs of within
  unique_edges[, within_rleid := data.table::rleid(within), by = dyadID]
  unique_edges[!(within), within_rleid := NA_integer_]

  # Check timegroup difference, unless first obs for dyad
  unique_edges[, tg_diff := data.table::fifelse(
    within,
    timegroup - data.table::shift(timegroup, 1) <= 1 |
      timegroup == min(timegroup),
    NA
  ), by = dyadID]

  # If missing obs allowed, adjust timegroup difference to allow as long as
  #   previously within threshold distance
  if (n_max_missing > 0) {
    unique_edges[, tg_diff := data.table::fifelse(
      tg_diff,
      tg_diff,
      data.table::shift(within, 1) &
        (timegroup - data.table::shift(timegroup, 1)) <=
        (1 + n_max_missing)
    ), by = dyadID]
  }

  # Get runs on within and timegroup difference. Adjust if runs of isolated
  #  observations together (eg. within T, T but timegroup diff F, F)
  unique_edges[(within), both_rleid := data.table::rleid(within_rleid, tg_diff), by = dyadID]
  unique_edges[(within) & !(tg_diff),
               both_rleid := (both_rleid + seq.int(.N)) * -1,
               by = dyadID]

  # If n minimum length > 0, check nrows and return NA if less than min
  if (n_min_length > 0) {
    unique_edges[!is.na(both_rleid), both_rleid := data.table::fifelse(
      .N >= n_min_length,
      both_rleid,
      NA_integer_),
      by = .(dyadID, both_rleid)]
  }

  # Set fusion id on runs and dyad id
  unique_edges[!is.na(both_rleid), fusionID := .GRP, by = .(dyadID, both_rleid)]

  # Merge fusion id onto input edges
  if ('fusionID' %in% colnames(edges)) {
    message('fusionID column will be overwritten by this function')
    data.table::set(edges, j = 'fusionID', value = NULL)
  }
  edges[unique_edges, fusionID := fusionID, on = .(timegroup, dyadID)]
  return(edges)
}
ropensci/spatsoc documentation built on Feb. 12, 2025, 7:16 a.m.