th2_agg_func <- function(x = NULL,target_metric = c("mean"),na.rm = TRUE){
if(class(x)[1] %in% c("Date","POSIXct"))return(min(x)[1])
if(class(x)[1] %in% c("character"))return(head(x,1))
if(target_metric == "mean")return(mean(x,na.rm = na.rm))
if(target_metric == "sum")return(sum(x,na.rm = na.rm))
if(target_metric == "min")return(min(x,na.rm = na.rm))
if(target_metric == "max")return(max(x,na.rm = na.rm))
if(target_metric == "median")return(median(x,na.rm = na.rm))
if(target_metric == "sd")return(sd(x,na.rm = na.rm))
if(target_metric == "count")return(length(x))
}
get_time_grouping_elements <- function(time_unit = NULL){
if(is.null(time_unit))return(NULL)
time_features <- c("years","quarters","months","weeks","days","hours","minutes","seconds")
names(time_features) <- c("year","quarter","month","week","day","hour","minute","second")
grouping_elements <- names(time_features)[1:(grep(time_unit,time_features))]
return(grouping_elements)
}
prepare_agg_data <- function(input_data = NULL, target_ts = NULL, sdukkel = NULL,time_unit = NULL, agg_metric = "Sum"){
if(agg_metric == "Count"){
# count requires only one variables, more than one is duplicates
element_to_select <- c("date",target_ts[1],sdukkel)
input_data <- input_data%>%
dplyr::select(!!element_to_select)
colnames(input_data)[2] <- target_ts <- "number_of_elements"
}
if(is.null(sdukkel))return(input_data)
input_data <- input_data%>%
tidyr::pivot_wider(id_cols = "date", values_from = target_ts, names_from = sdukkel)
grouping_elements1 <- get_time_grouping_elements(time_unit = time_unit)
time_features <- c(colnames(input_data),grouping_elements1)
input_data <- input_data%>%
timetk::tk_augment_timeseries_signature(.date_var = "date")%>%
dplyr::select(!!time_features)%>%dplyr::group_by(dplyr::across(!!grouping_elements1))%>%
dplyr::summarise_all(th2_agg_func,"sum")%>%ungroup()%>%
dplyr::select(-dplyr::all_of(!!grouping_elements1))%>%
dplyr::arrange(date)
return(input_data)
}
#' Data Aggregation based on a given time unit and grouping pattern
#' @author Farid Azouaou
#' @param tisefka data frame including date variable
#' @param sdukkel group by a discrete variable
#' @param base_unit time unit of the raw data
#' @param time_unit time unit of the aggregated data
#' @param aggregation_metric aggregation metric (sum mean max min)
#' @return list containing aggregated data
data_aggregation_f <- function(tisefka = NULL, time_unit = NULL, target_ts = NULL, sdukkel = NULL) {
names(target_ts) <- target_ts
if(is.null(target_ts))return(NULL)
target_vars <- c("date",target_ts , sdukkel)
tisefka <- tisefka%>%
dplyr::select(!!target_vars)
grouping_elements1 <- get_time_grouping_elements(time_unit = time_unit)
time_features <- c(colnames(tisefka),grouping_elements1)
grouping_elements <- c(sdukkel,grouping_elements1)%>%unique()
tisefka <- tisefka%>%
timetk::tk_augment_timeseries_signature(.date_var = "date")%>%
dplyr::select(!!time_features)
aggregations <- c("sum","mean","max","min","median","sd","count")
names(aggregations) <- c("Sum","Average","Max","Min","Median","StD","Count")
tisefka <- aggregations%>%
purrr::map(~tisefka%>%dplyr::group_by(dplyr::across(!!grouping_elements))%>%
dplyr::summarise_all(th2_agg_func,.x)%>%ungroup()%>%
dplyr::select(-dplyr::all_of(!!grouping_elements1)))
names(target_ts) <- NULL
tisefka <- tisefka%>%
purrr::imap(~ .x%>%prepare_agg_data(input_data = ., target_ts = target_ts,sdukkel = sdukkel, time_unit= time_unit, agg_metric = .y ))
return(tisefka)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.