################################################################################
# Time serie component decomposition
################################################################################
#------------------------------------------------------------------------------#
# Interface
#' The default component for time-series decomposition.
#'
#' @description A character vector of the component expected for a time serie
#' decomposition. Note that this is temporary only. Later only 'residual' will
#' be enforced and any decomposition will be allowed.
default_decomp <- c('trend', 'seasonal', 'daily', 'ar', 'residual')
#' Time-serie component decomposition
#'
#' @description
#'
#' @param cols Character vector of the columns that should be decomposed. If the
#' decomposition is missing for one of several column then that column is
#' decomposed with a null decomposition (all components equals to 0 and a
#' residual component equal to the full time-serie) and a warning is issued. But
#' no error is thrown.
#'
#' @param strategy A function that takes as parameter one tse, one column name
#' and that outputs a data.frame with 1 column per component. Right now the
#' default components are: trend, seasonal, daily, ar. They should
#' match the output of the strategy function column names.
#'
#' @return A tse object. It is the modified version of the input tse object. For
#' each column in cols, 4 new columns are created or overwritten in the tse:
#' decomp.col.trend, decomp.col.seasonal, decomp.col.daily and decomp.col.ar
#'
#' @family has_correction, apply_correct
tse_decomp <- function(tse, cols, strategy) {
return(tse)
}
#' Time-serie component composition and recomposition.
#'
#' @description apply_correct apply the correction according to the components
#' selected and put the results in a column called "decomp.cola.corrected".
#'
#' @param cols A character vector of the columns on which should the correction
#' be applied
#'
#' @param comps A character vector which indicates the components to include in
#' the recomposition
#'
#' @return The initial tse with the decomp.*.corrected columns updated
#'
#' @usage apply_correct(g17, "load", "trend")
#'
#' @family tse_decomp, has_correction
#'
#' @export
apply_correct <- function(tse, cols, comps) {
return(tse)
}
#' Check if correction stack if available for specified column.
#'
#' @description The correction stack a.k.a. time-serie decomposition is
#' implemented as special columns in the tse object: columns starting by
#' "decomp." and having a suffix like ".trend" or something else. When the
#' time-serie decomposition is created those column are created. This function
#' checks whether those columns are present or missing. If missing they can be
#' created using a call to tse_decomp.
#' It would be really good if the function could also check that the sum of the
#' componenent + the residual sums up to the original column, but currently that
#' is not implemented.
#'
#' @param col A character vector of length 1, which has to match one of the
#' column names of tse
#'
#' @return A boolean. True if the decomposition is available false otherwise.
#'
#' @family tse_decomp, apply_correct
#'
#' @export
has_correction <- function(tse, col) {
# input checks
stopifnot(is.tse(tse))
# col is a character vector of length 1
stopifnot(is.character(col) & identical(length(col), 1L))
stopifnot(col %in% get_value_cols(tse))
# get the decomposition columns
comps_cols <- get_decomp_cols(tse, col)
# check that the default decomposition columns are present
present <- decomp_col(col, default_decomp) %in% comps_cols
# if not all the value columns are present then it is not a valid
# decomposition and we return FALSE
if(!all(present))
return(FALSE)
# if all the required decomposition columns are present
# we check that they sum to the original column:
all.equal()
}
#------------------------------------------------------------------------------#
# Decomposition utils
#' [Internal] Get the column name of a time-serie component.
#'
#' @param col Character vector of column names.
#' @param comp Character vector of component names.
#' @return Character vector. Column names of the components
decomp_col <- function(col, comp) {
paste('decomp',
interaction(col, comp, lex.order = TRUE) %>% levels,
sep = '.')
}
#' Return all the decomposition columns names of a given value column.
#' @param col Column name
#' @return Vector of column names
get_decomp_cols <- function(tse, col) {
pattern <- paste0('^decomp.', col)
get_value_cols(tse) %>% grep(pattern = pattern, value = TRUE)
}
#------------------------------------------------------------------------------#
# Decomposition strategies
#' Default strategy for decomposing double seasonal time-series.
#'
#' @description compute the trend, seasonality, daily profiles, and
#' auto-regressive part of a time-serie according to the given model parameters
#'
#' @return a data.frame with 4 columns: trend, seasonality, daily, ar
#'
#' @export
compute_correction_stack <- function(data, use_trend,
use_seasonal, n_harmonics,
use_daily, n_clusters,
use_AR, value_var) {
stopifnot(typeof(value_var) == 'character')
trend <- paste(value_var, 'trend', sep = '_')
seasonal <- paste(value_var, 'seasonal', sep = '_')
daily_profiles <- paste(value_var, 'daily_profiles', sep = '_')
ar <- paste(value_var, 'ar', sep = '_')
residual <- paste(value_var, 'residual', sep = '_')
fitted <- paste(value_var, 'fitted', sep = '_')
# initialize all the components to 0
data[,trend] <- 0
data[,seasonal] <- 0
data[,daily_profiles] <- 0
data[,ar] <- 0
# apply the corrections that have been selected
if(use_trend) data[,trend] <- compute_trend(data, value_var)$component
if(use_seasonal) {
temp <- data
temp[, value_var] <- data[, value_var] - data[, trend]
data[, seasonal] <- compute_seasonal(temp, 1/seq_len(n_harmonics), value_var)$component
}
if(use_daily) {
temp <- data
temp[, value_var] <- data[, value_var] - data[, trend] - data[, seasonal]
data[, daily_profiles] <- compute_daily(temp, n_clusters, value_var)$component
}
if(use_AR) {
temp <- data
temp[, value_var] <- data[, value_var] - data[, trend] - data[, seasonal] - data[, daily_profiles]
data[, ar] <- compute_AR(temp, value_var)$component
}
# compute the residual component
data[, residual] <- data[, value_var] - data[, trend] - data[, seasonal] -
data[, daily_profiles] - data[, ar]
# compute the global fit by summing the component from each model
data[, fitted] <- data[, trend] + data[, seasonal] + data[, daily_profiles] + data[, ar]
# return the dataset with the correction
data
}
# computes the trend component of the data
# input: d dataframe with local_time, x1
# output: lm object, trend (fitted values)
compute_trend <- function(d, value_var) {
# compute the linear regression
m <- paste(value_var, 'local_time', sep = ' ~ ') %>% # make the formula
as.formula %>%
lm(data = d)
# return the results
list(model = m, component = fitted(m))
}
# computes the seasonal component of the data
# input: - d dataframe with local_time, x1, year
# - harmonics, vector of periods in years for the regression
# output: lm object, seasonal (fitted values)
compute_seasonal <- function(d, harmonics, value_var) {
n_harmonics <- length(harmonics)
# compute the basis functions for the regression
t <- as.numeric(d$local_time)
t <- 2*pi/3600/24/365 * matrix(rep(t, n_harmonics), ncol = n_harmonics) %*% diag(1/harmonics)
d <- data.frame(x = d[, value_var], cos(t), sin(t))
# compute the linear regression
m <- lm(x ~ ., data = d)
# return the results
list(model = m, component = fitted(m))
}
# computes the daily component of the data
# input: - d dataframe with local_time, x1, day, period
# Should have no incomplete days
# - n_clusters: number of clusters to be used
# output: kmeans object, daily (fitted values), clusters (per point)
compute_daily <- function(d, n_clusters, value_var) {
# we need numeric day for kmeans to work?
d$day <- as.numeric(d$day)
# cast the data into daily profiles
daily_profiles <-
melt(d, id.vars = c('local_time', 'day', 'period'), measure.vars = value_var) %>%
dcast(day ~ period)
# sort daily profiles by day
daily_profiles[order(daily_profiles$day),]
# apply the kmeans clustering algorithm
m <- kmeans(x = select(daily_profiles, -day), centers = n_clusters)
# merge the clustering information back into the dataset
d <-
data.frame(day = daily_profiles$day, clusters = m$cluster) %>%
merge(d, all.y = TRUE)
# flatten the daily component, daily_profiles is sorted by days so we can just string together all the rows
component <- c(t(fitted(m, method = "centers")))
list(model = m, component = component, clusters = d$clusters)
}
# computes the AR component of the data
# input: d dataframe with local_time, x1
# output: AR object, AR (fitted values)
compute_AR <- function(truc, value_var) {
load_ts <- ts(data = truc[, value_var], start = 1, end = dim(truc)[1], frequency = 1)
m <- ar(load_ts)
# the first values of the fit are NA
component <- load_ts - resid(m)
component[seq_len(m$order)] <- load_ts[seq_len(m$order)]
#component %>% length %>% print
list(model = m, component = component)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.