R/group_pts.R

Defines functions group_pts

Documented in group_pts

#' Group Points
#'
#' \code{group_pts} groups rows into spatial groups. The function accepts a
#' \code{data.table} with relocation data, individual identifiers and a
#' threshold argument. The threshold argument is used to specify the criteria
#' for distance between points which defines a group. Relocation data should be
#' in two columns representing the X and Y coordinates.
#'
#' The \code{DT} must be a \code{data.table}. If your data is a
#' \code{data.frame}, you can convert it by reference using
#' \code{\link[data.table:setDT]{data.table::setDT}}.
#'
#' The \code{id}, \code{coords}, \code{timegroup} (and optional \code{splitBy})
#' arguments expect the names of a column in \code{DT} which correspond to the
#' individual identifier, X and Y coordinates, timegroup (typically generated by
#' \code{group_times}) and additional grouping columns.
#'
#' The \code{threshold} must be provided in the units of the coordinates. The
#' \code{threshold} must be larger than 0. The coordinates must be planar
#' coordinates (e.g.: UTM). In the case of UTM, a \code{threshold} = 50 would
#' indicate a 50m distance threshold.
#'
#' The \code{timegroup} argument is required to define the temporal groups
#' within which spatial groups are calculated. The intended framework is to
#' group rows temporally with \code{\link{group_times}} then spatially with
#' \code{group_pts} (or \code{\link{group_lines}}, \code{\link{group_polys}}).
#' If you have already calculated temporal groups without
#' \code{\link{group_times}}, you can pass this column to the \code{timegroup}
#' argument. Note that the expectation is that each individual will be observed
#' only once per timegroup. Caution that accidentally including huge numbers of
#' rows within timegroups can overload your machine since all pairwise distances
#' are calculated within each timegroup.
#'
#'
#' The \code{splitBy} argument offers further control over grouping. If within
#' your \code{DT}, you have multiple populations, subgroups or other distinct
#' parts, you can provide the name of the column which identifies them to
#' \code{splitBy}. The grouping performed by \code{group_pts} will only consider
#' rows within each \code{splitBy} subgroup.
#'
#' @return \code{group_pts} returns the input \code{DT} appended with a
#'   \code{group} column.
#'
#'   This column represents the spatialtemporal group. As with the other
#'   grouping functions,  the actual value of \code{group} is arbitrary and
#'   represents the identity of a given group where 1 or more individuals are
#'   assigned to a group. If the data was reordered, the \code{group} may
#'   change, but the contents of each group would not.
#'
#'   A message is returned when a column named \code{group} already exists in
#'   the input \code{DT}, because it will be overwritten.
#'
#'
#' @param DT input data.table
#' @param threshold distance for grouping points, in the units of the
#'   coordinates
#' @param id Character string of ID column name
#' @param coords Character vector of X coordinate and Y coordinate column names
#' @param timegroup timegroup field in the DT within which the grouping will be
#'   calculated
#' @param splitBy (optional) character string or vector of grouping column
#'   name(s) upon which the grouping will be calculated
#'
#' @export
#'
#' @family Spatial grouping
#' @seealso \code{\link{group_times}}
#'
#' @examples
#' # Load data.table
#' library(data.table)
#' \dontshow{data.table::setDTthreads(1)}
#'
#' # Read example data
#' DT <- fread(system.file("extdata", "DT.csv", package = "spatsoc"))
#'
#' # Select only individuals A, B, C for this example
#' DT <- DT[ID %in% c('A', 'B', 'C')]
#'
#' # Cast the character column to POSIXct
#' DT[, datetime := as.POSIXct(datetime, tz = 'UTC')]
#'
#' # Temporal grouping
#' group_times(DT, datetime = 'datetime', threshold = '20 minutes')
#'
#' # Spatial grouping with timegroup
#' group_pts(DT, threshold = 5, id = 'ID',
#'           coords = c('X', 'Y'), timegroup = 'timegroup')
#'
#' # Spatial grouping with timegroup and splitBy on population
#' group_pts(DT, threshold = 5, id = 'ID', coords = c('X', 'Y'),
#'          timegroup = 'timegroup', splitBy = 'population')
group_pts <- function(DT = NULL,
                     threshold = NULL,
                     id = NULL,
                     coords = NULL,
                     timegroup,
                     splitBy = NULL) {
  # due to NSE notes in R CMD check
  N <- withinGroup <- ..id <- ..coords <- group <- NULL

  if (is.null(DT)) {
    stop('input DT required')
  }

  if (is.null(threshold)) {
    stop('threshold required')
  }

  if (!is.numeric(threshold)) {
    stop('threshold must be numeric')
  }

  if (threshold <= 0) {
    stop('threshold must be greater than 0')
  }

  if (is.null(id)) {
    stop('ID field required')
  }

  if (length(coords) != 2) {
    stop('coords requires a vector of column names for coordinates X and Y')
  }

  if (missing(timegroup)) {
    stop('timegroup required')
  }

  if (any(!(
    c(timegroup, id, coords, splitBy) %in% colnames(DT)
  ))) {
    stop(paste0(
      as.character(paste(setdiff(
        c(timegroup, id, coords, splitBy),
        colnames(DT)
      ), collapse = ', ')),
      ' field(s) provided are not present in input DT'
    ))
  }

  if (any(!(DT[, vapply(.SD, is.numeric, TRUE), .SDcols = coords]))) {
    stop('coords must be numeric')
  }

  if (!is.null(timegroup)) {
    if (any(unlist(lapply(DT[, .SD, .SDcols = timegroup], class)) %in%
            c('POSIXct', 'POSIXlt', 'Date', 'IDate', 'ITime', 'character'))) {
      warning(
        strwrap(
          prefix = " ",
          initial = "",
          x = 'timegroup provided is a date/time
          or character type, did you use group_times?'
        )
      )
    }
  }

  if ('group' %in% colnames(DT)) {
    message('group column will be overwritten by this function')
    data.table::set(DT, j = 'group', value = NULL)
  }

  if (DT[, .N, by = c(id, splitBy, timegroup)][N > 1, sum(N)] != 0) {
    warning(
      strwrap(
        prefix = " ",
        initial = "",
        x = 'found duplicate id in a
          timegroup and/or splitBy -
          does your group_times threshold match the fix rate?'
      )
    )
  }

  DT[, withinGroup := {
    distMatrix <-
      as.matrix(stats::dist(cbind(
        get(..coords[1]), get(..coords[2])
      ),
      method = 'euclidean'))
    graphAdj <-
      igraph::graph_from_adjacency_matrix(distMatrix <= threshold)
    igraph::components(graphAdj)$membership
  },
  by = c(splitBy, timegroup), .SDcols = c(coords, id)]
  DT[, group := .GRP,
     by = c(splitBy, timegroup, 'withinGroup')]
  set(DT, j = 'withinGroup', value = NULL)
  return(DT[])
}
ropensci/spatsoc documentation built on April 15, 2024, 9:59 a.m.