R/time_based_aggregation.R

Defines functions data_aggregation_f prepare_agg_data get_time_grouping_elements th2_agg_func

Documented in data_aggregation_f

th2_agg_func <- function(x = NULL,target_metric = c("mean"),na.rm = TRUE){
  if(class(x)[1] %in% c("Date","POSIXct"))return(min(x)[1])
  if(class(x)[1] %in% c("character"))return(head(x,1))
  if(target_metric == "mean")return(mean(x,na.rm = na.rm))
  if(target_metric == "sum")return(sum(x,na.rm = na.rm))
  if(target_metric == "min")return(min(x,na.rm = na.rm))
  if(target_metric == "max")return(max(x,na.rm = na.rm))
  if(target_metric == "median")return(median(x,na.rm = na.rm))
  if(target_metric == "sd")return(sd(x,na.rm = na.rm))
  if(target_metric == "count")return(length(x))
}

get_time_grouping_elements  <- function(time_unit = NULL){
  if(is.null(time_unit))return(NULL)
  time_features <- c("years","quarters","months","weeks","days","hours","minutes","seconds")
  names(time_features) <- c("year","quarter","month","week","day","hour","minute","second")
  grouping_elements <-  names(time_features)[1:(grep(time_unit,time_features))]
  return(grouping_elements)
}


prepare_agg_data <- function(input_data = NULL, target_ts = NULL, sdukkel = NULL,time_unit = NULL, agg_metric = "Sum"){

  if(agg_metric == "Count"){
    # count requires only one variables, more than one is duplicates
    element_to_select <- c("date",target_ts[1],sdukkel)
    input_data <- input_data%>%
      dplyr::select(!!element_to_select)
    colnames(input_data)[2] <- target_ts <- "number_of_elements"
  }

  if(is.null(sdukkel))return(input_data)

  input_data <- input_data%>%
    tidyr::pivot_wider(id_cols  = "date", values_from = target_ts, names_from = sdukkel)

  grouping_elements1 <- get_time_grouping_elements(time_unit = time_unit)
  time_features <- c(colnames(input_data),grouping_elements1)
  input_data <- input_data%>%
    timetk::tk_augment_timeseries_signature(.date_var = "date")%>%
    dplyr::select(!!time_features)%>%dplyr::group_by(dplyr::across(!!grouping_elements1))%>%
    dplyr::summarise_all(th2_agg_func,"sum")%>%ungroup()%>%
    dplyr::select(-dplyr::all_of(!!grouping_elements1))%>%
    dplyr::arrange(date)
  return(input_data)
}



#' Data Aggregation based on a given time unit and grouping pattern
#' @author Farid Azouaou
#' @param  tisefka  data frame including date variable
#' @param  sdukkel group by a discrete variable
#' @param  base_unit time unit of the raw data
#' @param  time_unit time unit of the aggregated data
#' @param  aggregation_metric aggregation metric (sum mean max min)
#' @return list containing aggregated data

data_aggregation_f <- function(tisefka = NULL, time_unit = NULL, target_ts = NULL, sdukkel = NULL) {

  names(target_ts) <- target_ts
  if(is.null(target_ts))return(NULL)
  target_vars <- c("date",target_ts , sdukkel)
  tisefka <- tisefka%>%
    dplyr::select(!!target_vars)

    grouping_elements1 <- get_time_grouping_elements(time_unit = time_unit)
    time_features <- c(colnames(tisefka),grouping_elements1)

    grouping_elements <- c(sdukkel,grouping_elements1)%>%unique()

    tisefka <- tisefka%>%
      timetk::tk_augment_timeseries_signature(.date_var = "date")%>%
      dplyr::select(!!time_features)

    aggregations <- c("sum","mean","max","min","median","sd","count")
    names(aggregations) <- c("Sum","Average","Max","Min","Median","StD","Count")
    tisefka <- aggregations%>%
      purrr::map(~tisefka%>%dplyr::group_by(dplyr::across(!!grouping_elements))%>%
                   dplyr::summarise_all(th2_agg_func,.x)%>%ungroup()%>%
                   dplyr::select(-dplyr::all_of(!!grouping_elements1)))

    names(target_ts) <- NULL
    tisefka <- tisefka%>%
      purrr::imap(~ .x%>%prepare_agg_data(input_data = ., target_ts = target_ts,sdukkel = sdukkel, time_unit= time_unit, agg_metric = .y ))


  return(tisefka)
}
Aqvayli06/SaldaeModulesUI documentation built on Feb. 4, 2024, 6:25 a.m.