#------------------------------------------------------------------------------#
# Linear model with gap filling -> does not output any NAs
#' Linear regression for forecasting.
#'
#' @description This models uses LM to predict the future values of a variable.
#' It takes into account the day of the week and gap-filling: even if the input
#' values have NAs it will produce complete output.
linear_model <- function(tse, split, X_col, ...,
.orders_list = NULL) {
message('[linear_model] hi! How are you?')
if(missing(.orders_list))
orders <- list(...)
else orders <- .orders_list
# input checks
stopifnot(X_col %in% names(orders)) # must give the orders of the X_col
# all the orders must match some column in the tse
stopifnot(names(orders) %in% colnames(tse))
# all orders must be non-negative
stopifnot(all(c(orders, recursive = TRUE) >= 0))
# split must be logical
stopifnot(is.logical(split))
# construct lagged columns and remove useless ones
tse$temp.group_by <- factor(tse$time.wday)
tse <- tse %>%
subcol(c(names(orders), 'temp.group_by')) %>% # remove useless columns
D_multi(orders_list = orders) # construct the lagged columns
if(!split)
tse <- select(tse, -temp.group_by)
# remove the time columns, since the algorithm does not use them
# and keep only the columns with all values
data <- as.data.frame(tse)[tse$available, get_value_cols(tse)]
rownames(data) <- NULL
print('training data')
data %>% head(15) %>% print
# train the linear model
model <- list(lm = lm(paste(X_col, '~ .') %>% as.formula, data = data))
# train the daily_cluster_model that will be used for load gap filling
model$dcm_gf <- daily_cluster_model(tse, group_by = 'wd_s_sph', X_col)
# save some metadata
model$X_col <- X_col
model$X_order <- orders[[X_col]]
model$orders = orders
model$split = split
structure(model, class = 'linear_model')
}
predict.linear_model <- function(m, tse) {
build_columns(tse, m) %>% predict_one(m) # so clean wow
}
#' Build the lagged columns for the algorithm.
build_columns.linear_model <- function(tse, m) {
# build the column used to split the model
tse$temp.group_by <- factor(tse$time.wday)
# gap fill the predictor variables, not X_col
# for(col in setdiff(names(m$orders), m$X_col))
# tse[, col] <- fill_col(tse, col)
# right now dcm gap filling is not working so using linear interpol. gap
# gap filling
for(col in names(m$orders))
tse[, col] <- fill_col(tse, col)
# gap fill the X_col using dcm
# xcol <- tse[, m$X_col]
# xcol[is.na(xcol)] <- predict(m$dcm_gf, tse)[is.na(xcol)]
# tse[, m$X_col] <- xcol
# keeping only the good columns
tse <- tse %>%
subcol(c(names(m$orders), 'temp.group_by')) %>% # remove useless columns
D_multi(orders_list = m$orders) %>% # construct the lagged columns
# remove non-lagged X_col as it is not used for forecasting
select_(paste0('-', m$X_col)) %>%
update_available
if(!m$split)
tse <- select(tse, -temp.group_by)
# validity check
stopifnot(is.tse(tse))
tse
}
# predict one step, assuming all the lagged columns are already there
predict_one.linear_model <- function(tse, m) {
message('[predict_one] hi')
result <- rep(NA_real_, dim(tse)[1])
result[tse$available] <- predict(m$lm,
as.data.frame(tse)[tse$available,
get_value_cols(tse)])
result
}
#' which one is the X (or forecasted) columns?
get_X.linear_model <- function(m) { return(m$X_col) }
#' which lagged version of the forecasted columns are we using?
get_X_order.linear_model <- function(m) { return(m$X_order) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.