Nothing
#' Group Lines
#'
#' `group_lines` groups rows into spatial groups by generating LINESTRINGs and
#' grouping based on spatial intersection. The function accepts a `data.table`
#' with relocation data, individual identifiers and a distance threshold. The
#' relocation data is transformed into sf LINESTRINGs using [build_lines] and
#' intersecting LINESTRINGs are grouped. The threshold argument is used to
#' specify the distance criteria for grouping. Relocation data should be in two
#' columns representing the X and Y coordinates.
#'
#' ## R-spatial evolution
#'
#' Please note, spatsoc has followed updates from R spatial, GDAL and PROJ for
#' handling projections, see more at
#' <https://r-spatial.org/r/2020/03/17/wkt.html>.
#'
#' In addition, `group_lines` (and [build_lines]) previously used
#' [sp::SpatialLines], rgeos::gIntersects, rgeos::gBuffer but have been
#' updated to use [sf::st_as_sf], [sf::st_linestring], [sf::st_intersects], and
#' [sf::st_buffer] according to the R-spatial evolution, see more at
#' <https://r-spatial.org/r/2022/04/12/evolution.html>.
#'
#' ## Notes on arguments
#' The `DT` must be a `data.table`. If your data is a
#' `data.frame`, you can convert it by reference using [data.table::setDT].
#'
#' The `id`, `coords`, `sortBy` (and optional `timegroup`
#' and `splitBy`) arguments expect the names of respective columns in
#' `DT` which correspond to the individual identifier, X and Y coordinates,
#' sorting, timegroup (generated by [group_times]) and additional grouping
#' columns.
#'
#' The `projection` argument expects a numeric or character defining the
#' coordinate reference system. For example, for UTM zone 36N (EPSG 32736), the
#' projection argument is either `projection = 'EPSG:32736'` or `projection =
#' 32736`. See details in [`sf::st_crs()`] and
#' <https://spatialreference.org> for a list of EPSG codes.
#'
#' The `sortBy` argument is used to order the input `DT` when creating sf
#' LINESTRINGs. It must a column in the input `DT` of type POSIXct to ensure the
#' rows are sorted by date time.
#'
#' The `threshold` must be provided in the units of the coordinates. The
#' `threshold` can be equal to 0 if strict overlap is intended, otherwise it
#' should be some value greater than 0. The coordinates must be planar
#' coordinates (e.g.: UTM). In the case of UTM, a `threshold = 50` would
#' indicate a 50m distance threshold.
#'
#' The `timegroup` argument is optional, but recommended to pair with
#' [group_times]. The intended framework is to group rows temporally with
#' [group_times] then spatially with [group_lines] (or [group_pts],
#' [group_polys]). With [group_lines], pick a relevant [group_times] threshold
#' such as `'1 day'` or `'7 days'` which is informed by your study species,
#' system or question.
#'
#' The `splitBy` argument offers further control building LINESTRINGs. If in
#' your input `DT`, you have multiple temporal groups (e.g.: years) for example,
#' you can provide the name of the column which identifies them and build
#' LINESTRINGs for each individual in each year. The grouping performed by
#' [group_lines] will only consider rows within each `splitBy` subgroup.
#'
#' @return `group_lines` returns the input `DT` appended with a "group"
#' column.
#'
#' This column represents the spatial (and if `timegroup` was provided -
#' spatiotemporal) group calculated by intersecting lines. As with the other
#' grouping functions, the actual value of group is arbitrary and represents
#' the identity of a given group where 1 or more individuals are assigned to a
#' group. If the data was reordered, the group may change, but the contents of
#' each group would not.
#'
#' A message is returned when a column named "group" already exists in the
#' input `DT`, because it will be overwritten.
#'
#' @inheritParams group_pts
#' @inheritParams build_polys
#' @param threshold The width of the buffer around the lines in the units of the
#' projection. Use `threshold = 0` to compare intersection without buffering.
#' @param sfLines Alternatively to providing a DT, provide a simple feature
#' LINESTRING object generated with the sf package. The id argument is
#' required to provide the identifier matching each LINESTRING.
#' If an sfLines object is provided, groups cannot be calculated by timegroup
#' or splitBy.
#' @param sortBy Character string of date time column(s) to sort rows by. Must
#' be a POSIXct.
#'
#' @export
#'
#' @family Spatial grouping
#' @seealso [build_lines] [group_times]
#'
#' @examples
#' # Load data.table
#' library(data.table)
#' \dontshow{data.table::setDTthreads(1)}
#'
#' # Read example data
#' DT <- fread(system.file("extdata", "DT.csv", package = "spatsoc"))
#'
#' # Subset only individuals A, B, and C
#' DT <- DT[ID %in% c('A', 'B', 'C')]
#'
#' # Cast the character column to POSIXct
#' DT[, datetime := as.POSIXct(datetime, tz = 'UTC')]
#'
#' # EPSG code for example data
#' utm <- 32736
#'
#' group_lines(DT, threshold = 50, projection = utm, sortBy = 'datetime',
#' id = 'ID', coords = c('X', 'Y'))
#'
#' ## Daily movement tracks
#' # Temporal grouping
#' group_times(DT, datetime = 'datetime', threshold = '1 day')
#'
#' # Subset only first 50 days
#' DT <- DT[timegroup < 25]
#'
#' # Spatial grouping
#' group_lines(DT, threshold = 50, projection = utm,
#' id = 'ID', coords = c('X', 'Y'),
#' timegroup = 'timegroup', sortBy = 'datetime')
#'
#' ## Daily movement tracks by population
#' group_lines(DT, threshold = 50, projection = utm,
#' id = 'ID', coords = c('X', 'Y'),
#' timegroup = 'timegroup', sortBy = 'datetime',
#' splitBy = 'population')
group_lines <-
function(DT = NULL,
threshold = NULL,
projection = NULL,
id = NULL,
coords = NULL,
timegroup = NULL,
sortBy = NULL,
splitBy = NULL,
sfLines = NULL) {
# due to NSE notes in R CMD check
group <- ..coords <- ..id <- ..sortBy <- withinGroup <- NULL
if (is.null(threshold)) {
message('threshold missing, using 0 by default')
threshold <- 0
} else if (!is.numeric(threshold)) {
stop('threshold must be numeric')
} else if (threshold < 0) {
stop('cannot provide a negative threshold')
}
if (!is.null(sfLines) && !is.null(DT)) {
stop('cannot provide both DT and sfLines')
} else if (is.null(sfLines) && is.null(DT)) {
stop('must provide either DT or sfLines')
} else if (!is.null(sfLines) && is.null(DT)) {
if (!inherits(sfLines, 'sf') ||
!'LINESTRING' %in% sf::st_geometry_type(sfLines)) {
stop('sfLines provided must be a sf object with LINESTRINGs')
}
if (is.null(id)) {
stop('id must be provided')
}
if (uniqueN(sfLines[[id]]) != nrow(sfLines)) {
stop('number of unique values in sfLines does not match nrow(sfLines)')
}
if (threshold == 0) {
inter <- sf::st_intersects(sfLines, sfLines, sparse = FALSE)
} else {
buffered <- sf::st_buffer(sfLines, dist = threshold)
inter <- sf::st_intersects(sfLines, buffered, sparse = FALSE)
}
dimnames(inter) <- list(sfLines[[id]], sfLines[[id]])
g <- igraph::graph_from_adjacency_matrix(inter)
ovr <- igraph::clusters(g)$membership
out <- data.table::data.table(names(ovr),
unlist(ovr))
data.table::setnames(out, c('ID', 'group'))
return(out[])
} else if (is.null(sfLines) && !is.null(DT)) {
if (is.null(projection)) {
stop('projection must be provided when DT is')
}
if (is.null(coords)) {
stop('coords must be provided')
}
if (is.null(id)) {
stop('id must be provided')
}
if (is.null(sortBy)) {
stop('sortBy must be provided')
}
if (any(!(c(id, coords, timegroup, sortBy) %in% colnames(DT)))) {
stop(paste0(
as.character(paste(setdiff(
c(id, coords), colnames(DT)
),
collapse = ', ')),
' field(s) provided are not present in input DT'
))
}
if ('group' %in% colnames(DT)) {
message('group column will be overwritten by this function')
set(DT, j = 'group', value = NULL)
}
}
if (is.null(timegroup)) {
withCallingHandlers({
lns <- build_lines(
DT = DT,
projection = projection,
coords = coords,
id = id,
sortBy = sortBy,
splitBy = splitBy
)},
warning = function(w){
if (startsWith(conditionMessage(w), 'some rows dropped')) {
invokeRestart('muffleWarning')
}
}
)
if (nrow(lns) != 0) {
if (threshold == 0) {
inter <- sf::st_intersects(lns, lns, sparse = FALSE)
} else {
buffered <- sf::st_buffer(lns, dist = threshold)
inter <- sf::st_intersects(lns, buffered, sparse = FALSE)
}
dimnames(inter) <- list(lns[[id]], lns[[id]])
g <- igraph::graph_from_adjacency_matrix(inter)
ovr <- igraph::clusters(g)$membership
ovrDT <- data.table::data.table(ID = names(ovr),
group = unlist(ovr))
} else {
ovrDT <- data.table::data.table(ID = DT[[id]], group = as.integer(NA))
}
data.table::setnames(ovrDT, c(id, 'group'))
DT[ovrDT, group := group, on = id]
if (DT[is.na(group), .N] > 0) {
warning(
strwrap(
prefix = " ",
initial = "",
x = 'some rows were dropped,
cannot build a line with < 2 points.
in this case, group set to NA.'
)
)
}
return(DT[])
} else {
if (is.null(splitBy)) {
splitBy <- timegroup
}
else {
splitBy <- c(splitBy, timegroup)
}
ovrDT <-
DT[, {
withCallingHandlers({
lns <- build_lines(
DT = .SD,
projection = projection,
coords = ..coords,
id = ..id,
sortBy = ..sortBy
)},
warning = function(w){
if (startsWith(conditionMessage(w), 'some rows dropped')) {
invokeRestart('muffleWarning')
}
}
)
if (!is.null(lns)) {
if (threshold == 0) {
inter <- sf::st_intersects(lns, lns, sparse = FALSE)
} else {
buffered <- sf::st_buffer(lns, dist = threshold)
inter <- sf::st_intersects(lns, buffered, sparse = FALSE)
}
dimnames(inter) <- list(lns[[id]], lns[[id]])
g <- igraph::graph_from_adjacency_matrix(inter)
ovr <- igraph::clusters(g)$membership
out <- data.table::data.table(names(ovr),
unlist(ovr))
data.table::setnames(out, c(..id, 'withinGroup'))
} else {
out <- data.table(get(..id), withinGroup = as.double(NA))
data.table::setnames(out, c(..id, 'withinGroup'))
}
}, by = c(splitBy), .SDcols = c(coords, id, sortBy)]
DT[ovrDT, withinGroup := withinGroup, on = c(id, splitBy)]
DT[, group := ifelse(is.na(withinGroup), as.integer(NA), .GRP),
by = c(splitBy, 'withinGroup')]
set(DT, j = 'withinGroup', value = NULL)
if (DT[is.na(group), .N] > 0) {
warning(
strwrap(
prefix = " ",
initial = "",
x = 'some rows were dropped,
cannot build a line with < 2 points.
in this case, group set to NA.'
)
)
}
return(DT[])
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.