R/get_places.R

Defines functions get_places

Documented in get_places

#' Extract locations of stationary activity
#'
#' `get_places()` analyzes periods of stationary activity within a cluster
#' and transforms the coordinates into a single location or 'place'. The input data frame may include
#' spatiotemporal cluster groupings generated by the `circleclust()` function or
#' spatially joined clusters (i.e. `merge_clusters()`). `get_places()` also summarizes the
#' duration of time (hours) spent as well as the first and last timestamp recorded at each location.
#'
#' @param df a data frame with spatiotemporal or merged cluster groupings.
#' @param cluster_col character; name of column indicating cluster group (`sp_temporal_cluster` or `spatial_cluster`)
#' @param nested logical; if TRUE, metadata for each cluster is nested in a list
#' column (`p_data`)
#' @param geometry logical; if TRUE, an `sf` tibble with a corresponding `geometry`
#' column is returned
#' @param summary logical; if TRUE, the returned data frame is appended with summary
#' values for duration, start time, and end time for stationary activity recorded
#' for each cluster
#' @param dt_field character; name of datetime field used to calculate summary
#' information
#'
#' @return a data frame or `sf` object
#' @export
#'
#' @examples
#' \dontrun{
#'
#' get_places(df, cluster_col = NULL, nested = TRUE, geometry = TRUE, summary = TRUE, dt_field = NULL)
#' }
#'
get_places <- function(df, cluster_col = 'sp_temporal_cluster', nested = TRUE, geometry = TRUE, summary = FALSE, dt_field = NULL) {

  if (!cluster_col %in% c('sp_temporal_cluster', 'spatial_cluster')) {
    stop('`cluster_col` must be set to either `sp_temporal_cluster` or `spatial_cluster.` Did you use `circleclust()` or `merge_clusters()` \n to create the input data frame?',
         call. = FALSE)
  }

  if (!cluster_col %in% names(df)) {
    stop(paste0('Column `', cluster_col, '` is not in the input data frame. Did you use `circleclust()` or `merge_clusters()` to identify periods of stationary activity?'),
         call. = FALSE)
  }

  if (sum(is.na(df[[cluster_col]])) == nrow(df)) {
    stop('The input data frame does not contain periods of stationary activity/clustered coordinates.')
  }


  l_places <- df %>%
    dplyr::filter(!is.na(.data[[cluster_col]])) %>%
    dplyr::group_split(.data[[cluster_col]]) %>%
    purrr::map(., ~dplyr::mutate(., p_lat = median(lat, na.rm = TRUE),
                                 p_lon = median(lon, na.rm = TRUE)))

  dc <- l_places %>%
    purrr::map_df(., ~dplyr::group_nest(., .key = 'p_data', .data[[cluster_col]], p_lat, p_lon))

  if (nested == FALSE) {
    dc <- dc %>%
      dplyr::select(-p_data)
  }

  if (summary == TRUE) {

    if (is.null(dt_field)) {
      stop('Summary data for each place cannot be calculated. Did you assign the correct column to `dt_field`?',
           call. = FALSE)
    }

    time_unit <- floor(quantile(diff(df[[dt_field]]), 0.75))
    units(time_unit) <- "secs"
    time_unit <- as.numeric(time_unit)

    tv <- purrr::map_dbl(l_places, nrow)
    durs <- as.numeric(round(tv/(60/time_unit)/60, digits = 1)) %>%
      purrr::map_df(., ~tibble::enframe(., name = NULL, value = 'duration_hrs'))
    start_time <- purrr::map_df(l_places, ~dplyr::summarise(., start_time = min(.[[dt_field]])))
    end_time <- purrr::map_df(l_places, ~dplyr::summarise(., end_time = max(.[[dt_field]])))

    dc <- dplyr::bind_cols(dc, durs, start_time, end_time)

    # dc <- dc %>%
    #   dplyr::mutate(se_interval = lubridate::as.interval(start_time, end_time),
    #                 place_duration = round(as.numeric(lubridate::as.duration(se_interval), 'hours'), digits = 1)) %>%
    #   dplyr::relocate(place_duration, .after = end_time) %>%
    #   dplyr::select(-se_interval)

    message(paste0('Measurements appear to have been recorded at a ', time_unit,
                   ' second sampling interval.'))
  }

  if (geometry == TRUE) {
    dc <- dc %>%
      sf::st_as_sf(., coords = c('p_lon', 'p_lat'), crs = 4326)
  }

  np <- nrow(dc)

  if (cluster_col == 'spatial_cluster') {
    message(paste0('A total of ', np, ' places were detected.'))
  } else {
    message(paste0('A total of ', np, ' spatiotemporal places were detected.'))
  }

  if (cluster_col == 'spatial_cluster' & summary == TRUE) {

    # dc$start_time <- NA
    # dc$end_time <- NA

    warning(paste('  Looks like the input data frame includes merged clusters.',
                  cli::style_bold('Summary start and end times may not represent a continuous interval of time.'),
                  '\n Start and end times represent the first and last timestamp recorded at each location.'),
            call. = FALSE)
  }

  dc

}
wolfeclw/circleclust documentation built on Aug. 13, 2024, 3:33 a.m.