R/interval_aggregation.R

Defines functions interval_aggregation

Documented in interval_aggregation

#' Aggregate the raw data into an equally-spaced data frame
#'
#' @param data Data in a data.frame format
#' @param variable Name of variable in the data set you would like to re-structure
#' @param ID Name of ID variable in the data set
#' @param timestamp Name of time variable in the data set
#' @param format Date/time format of timestamp. Default is "%Y-%m-%dT%H:%M:%S" (i.e. yyyy-mm-dd hh:mm:ss).
#' @param interval The interval of time in minute. Default is 60 (i.e. The measurement is evaluated every hour.)
#' @param aggregatefun Function used for aggregation. Default is sum.
#' @param is.na.zero If TRUE, NA is considered as 0. If FALSE, the NA is considered as missing. Defulat is TRUE.
#' @param remove.na If TRUE, the NAs will be removed in the aggregated data. Defulat is TRUE.
#'
#' @importFrom hms hms
#' @importFrom hms as_hms
#' @importFrom magrittr %>%
#' @importFrom dplyr group_by
#' @importFrom dplyr summarise
#'
#' @export

interval_aggregation = function(data, variable, ID, timestamp, format = "%Y-%m-%dT%H:%M:%S", interval = 60,
                                aggregatefun = sum, is.na.zero = TRUE, remove.na = TRUE)
{
  if(is.na.zero && sum(is.na(data[,colnames(data) == variable])) > 0){data[,colnames(data) == variable][is.na(data[,colnames(data) == variable])] = 0}
  data[,colnames(data) == timestamp] = as.character(data[,colnames(data) == timestamp])
  tstamp = strptime(data[,colnames(data) == timestamp], format = format)
  data$date = as.Date(tstamp)
  data$time = as_hms(tstamp)

  data = data[order(data$date, data$time),]

  # Create a sequence of time by a minute
  # Hour, minute
  tseq0 = as_hms(seq(hms(0, 0, 0), hms(00, 00, 24), length = as.numeric(difftime(hms(0, 0, 24), hms(0, 0, 0), units = "min")) + 1))
  tseq0 = tseq0[-length(tseq0)] # 24:00:00
  tlength0 = length(tseq0)

  # Every (interval) minutes
  if(interval == 1) {tseq = tseq0; tlength = tlength0} else{

    timesep = interval * (1:round((tlength0 + 1)/interval)-1) + 1
    tseq = tseq0[timesep] # Aggregated time stamp
    tlength = length(tseq)
  }

  # Time interval indicator
  data$time2 = sapply(data$time, function(x){tseq[findInterval(x = x, vec = tseq)]})
  data$time2 = as_hms(data$time2)

  out = data.frame(data %>% group_by(!!as.name(ID), date, time2) %>% summarise(outvar = aggregatefun(!!as.name(variable))))

  colnames(out) = c(ID, "date", "time", variable)
  return(out)
}
jihuilee/mhealth documentation built on Jan. 1, 2021, 5:30 a.m.