R/dt_aggregate.R

Defines functions dt_aggregate

Documented in dt_aggregate

#' Aggregate data by time unit
#'
#' `dt_aggregate()` aggregates numeric values to a specified time unit. Columns with
#' uniform character values are retained (i.e. patient ID, sensor name).
#'
#' @param df a data frame with a datetime field.
#' @param dt_field character; name of datetime field.
#' @param unit character; string specifying a time unit or a multiple of a unit to be rounded
#' @param summary_fun character; summary function (i.e. 'mean', 'median'). Default = 'median.'
#' @param na_rm logical; should NA values be removed before the chosen
#' `summary_fun` computes?
#'
#' @export
#'
#' @examples
#' \dontrun{
#'
#' dt_aggregate(df,
#'   dt_field = "Date_Time",
#'   unit = "5 seconds",
#'   summary_fun = "median", na_rm = TRUE
#' )
#' }
#' @importFrom stats median
#'
dt_aggregate <- function(df, dt_field = NULL, unit = "5 seconds",
                         summary_fun = "median", na_rm = TRUE) {
  if (is.null(dt_field)) {
    stop("`dt_field` has not been assigned a value.", call. = FALSE)
  } else if (!lubridate::is.POSIXct(df[[dt_field]])) {
    c_dt_field <- class(df[[dt_field]])
    stop(paste0("`dt_field` must be a datetime. `", {{ dt_field }}, "` is of class ", c_dt_field, "."),
         call. = FALSE
    )
  }

  ###

  d_agg_grp <- df %>%
    dplyr::mutate(agg_dt = lubridate::floor_date(.data[[dt_field]], unit = unit)) %>%
    dplyr::group_by(agg_dt)

  new_agg <- d_agg_grp %>%
    dplyr::select(agg_dt) %>%
    dplyr::distinct() %>%
    dplyr::ungroup() %>%
    dplyr::rename('{dt_field}' := agg_dt)

  if (isTRUE(na_rm)) {

    mean_nag <- function(x) {mean(x, na.rm = TRUE)}
    sd_nag <- function(x) {sd(x, na.rm = TRUE)}
    median_nag <- function(x) {median(x, na.rm = TRUE)}
    min_nag <- function(x) {

      min_v <- suppressWarnings(min(x, na.rm = TRUE))
      min_v[is.infinite(min_v)] <- NA
      min_v
    }
    max_nag <- function(x) {

      max_v <- suppressWarnings(max(x, na.rm = TRUE))
      max_v[is.infinite(max_v)] <- NA
      max_v
    }

    sfun_list <- list(mean = mean_nag,
                      sd = sd_nag,
                      median = median_nag,
                      min = suppressWarnings(min_nag),
                      max = max_nag)
  } else {

    sfun_list <- list(mean = mean,
                      sd = sd,
                      median = median,
                      min = suppressWarnings(min),
                      max = max)
  }

  sfun_wch <- names(sfun_list) %in% summary_fun
  slist <- sfun_list[sfun_wch]

  d_agg <- d_agg_grp %>%
    dplyr::summarise(dplyr::across(dplyr::where(is.numeric), slist, .names = '{.col}'), .groups = 'drop') %>%
    dplyr::rename('{dt_field}' := agg_dt)

  d_agg <- dplyr::left_join(new_agg, d_agg, by = dt_field)

  if ('imputed_coord' %in% names(d_agg)) {
    d_agg$imputed_coord <- ceiling(d_agg$imputed_coord)
  }

  d_agg <- d_agg %>%
    dplyr::mutate(dplyr::across(dplyr::where(is.numeric), ~ifelse(is.nan(.), NA, .)))

  ####

  no_num_cols <- dplyr::select(df, -dplyr::where((is.numeric))) %>%
    dplyr::select(-dplyr::all_of({{dt_field}})) %>%
    purrr::map(., unique)

  no_num_cols_len <- no_num_cols %>%
    purrr::map_dbl(., length)

  if (any(length(no_num_cols_len)) == 0) {
    d_agg
  } else {
    unq_lgl <- no_num_cols_len %>%
      purrr::map_lgl(., ~ . == 1)

    if (any(unq_lgl)) {

      char_keep <- no_num_cols[unq_lgl] %>%
        purrr::map2(., names(.), ~tibble::enframe(.x, name = NULL, value = .y)) %>%
        dplyr::bind_cols()
      d_agg <- dplyr::bind_cols(char_keep, d_agg)
    }

    rm_cols <- no_num_cols[!unq_lgl] %>% names()

    d_agg <- dplyr::relocate(d_agg, {{dt_field}})

    if (length(rm_cols) > 0) {

      cp <- ifelse(length(rm_cols) > 1, 'Columns ', 'Column ')
      is_are <- ifelse(length(rm_cols) > 1, ' are', ' is')
      message( cli::col_magenta(
        paste0(cp, paste0('`', rm_cols, '`', collapse = ", "), is_are,
               " non-numeric and contain more than one unique value. These columns were \n removed during aggregation.")
      ))

      exp_cols_rm <- names(df)[-which(names(df) %in% rm_cols)]
      exp_cols_new <- names(d_agg)[which(!exp_cols_rm %in% names(d_agg))]
      exp_cols <- c(exp_cols_rm, exp_cols_new)

      d_agg <- d_agg[ , exp_cols]
    } else {

      exp_cols_og <- names(df)
      exp_cols_new <- names(d_agg)[which(!names(df) %in% names(d_agg))]
      exp_cols <- c(exp_cols_og, exp_cols_new)
      # exp_cols <- names(df)
      d_agg <- d_agg[ , exp_cols]
    }
  }

  d_agg
}
wolfeclw/circleclust documentation built on Aug. 13, 2024, 3:33 a.m.