#' Generate time series features for non-simulated data
#'
#' @param tibble as tibble of data were the first column is a date and each other column is a time series
#' @param date_column_name string for the name of the date column
#' @param frequency seasonal period of the time series to be generated.
#' @return a set of time series features
#' @examples
#' small <- select(dat, 1:5)
#' small %>% make_ts_features %>% str
make_ts_features <- function(tibble, date_column_name = "days_date", frequency = 7) {
# a function which adds ~30 time series characteristics
build_features <- function(ts){
bind_cols(
tsfeatures(ts,features = ,c("acf_features","entropy","lumpiness","flat_spots","crossing_points")),
tsfeatures(ts,"stl_features", s.window='periodic', robust=TRUE),
tsfeatures(ts, "max_kl_shift", width=14),
tsfeatures(ts,c("mean","var"), scale=FALSE, na.rm=TRUE),
tsfeatures(ts,c("max_level_shift","max_var_shift"), trim=TRUE)
)
}
# prep tibble to be passed to build_features
tibble %>%
select(-date_column_name) %>%
# convert to a time series
map(~ts(.,frequency = frequency)) %>%
build_features(.)
}
#' Generate time series features from time series generated from random parameter spaces of the mixture autoregressive (MAR) models.
#'
#' @param n.ts number of time series to be generated.
#' @param freq seasonal period of the time series to be generated.
#' @param nComp number of mixing components when simulating time series using MAR models.
#' @param n.ts length of the generated time series.
#' @param n number of time series to be generated.
#' @return a set of time series features
#' @examples
#' sim_mar <- generate_feature_space()
generate_feature_space <- function(n.ts = 100, freq = 7, nComp = NULL, n = 60){
# makes ts features for simulated data
tsgeneration::generate_ts(n.ts = n.ts, freq = freq, nComp = nComp, n = n) %>%
as_data_frame() %>%
slice(1) %>%
unnest()
}
make_ts_features_sim <- function(tibble, date_column_name = "days_date", frequency = 7) {
# a function which adds ~30 time series characteristics
build_features <- function(ts){
bind_cols(
tsfeatures(ts,features = ,c("acf_features","entropy","lumpiness","flat_spots","crossing_points")),
tsfeatures(ts,"stl_features", s.window='periodic', robust=TRUE),
tsfeatures(ts, "max_kl_shift", width=14),
tsfeatures(ts,c("mean","var"), scale=FALSE, na.rm=TRUE),
tsfeatures(ts,c("max_level_shift","max_var_shift"), trim=TRUE)
)
}
df_names <- tibble %>% names() %>% data_frame("series" = .)
# prep tibble to be passed to build_features
tibble %>%
# select(-date_column_name) %>%
# convert to a time series
map(~ts(.,frequency = frequency)) %>%
build_features(.) %>%
bind_cols(df_names,.)
}
convert_batch_data_to_ts <- function(tibble, frequency = 7){
tibble %>%
# select(-date_column_name) %>%
# convert to a time series
map(~ts(.,frequency = frequency))
}
## forecast function for batch forecasting
# 1. WN: white noise
# 3. ARIMA: arima
# 4. RWD: random walk with drift
# 5. RW: random walk
# 6. THETA: theta
# 7. ETSNTNS: ets without trend and seasonal components
# 8. ETS: ets with trend and seasonal component
# 9. ETSNS: ets with trend component and no seasonal component
# 10. ETSD: ets with damped trend component and no seasonal
# 11. SARIMA: sarima
# 12. SN: seasonal naive
## forecast function for batch forecasting
#------------------------------------------
# 1. WN: white noise
WN <- function(ts, h = 14){
ts %>%
map_dfc(~forecast::meanf(., h = h)$mean) %>%
mutate(ts = "WN", idx = row_number()) %>%
gather(key = series, value = pred_value, -ts, -idx)
}
# 3. ARIMA: arima
ARIMA <- function(ts, h = 14){
ts %>%
map(~forecast::auto.arima(., seasonal = FALSE)) %>%
map_dfc(~forecast::forecast(.,h=h)$mean) %>%
mutate(ts = "ARIMA", idx = row_number()) %>%
gather(key = series, value = pred_value, - ts, -idx)
}
# 4. RWD: random walk with drift
RWD <- function(ts, h = 14){
ts %>%
map_dfc(~forecast::rwf(., drift = TRUE, h = h)$mean) %>%
mutate(ts = "RWD", idx = row_number()) %>%
gather(key = series, value = pred_value, - ts, -idx)
}
# 5. RW: random walk
RW <- function(ts, h = 14){
ts %>%
map_dfc(~forecast::rwf(., drift = FALSE, h = h)$mean) %>%
mutate(ts = "RW", idx = row_number()) %>%
gather(key = series, value = pred_value, - ts, -idx)
}
# 6. THETA: theta
THETA <- function(ts, h = 14){
ts %>%
map(~forecast::thetaf(.)) %>%
map_dfc(~forecast::forecast(.,h=h)$mean) %>%
mutate(ts = "THETA", idx = row_number()) %>%
gather(key = series, value = pred_value, - ts, -idx)
}
# 7. ETSNTNS: ets without trend and seasonal components
ETSNTNS <- function(ts, h = 14){
ts %>%
map(~forecast::ets(., model = "ZNN", damped = FALSE)) %>%
map_dfc(~forecast::forecast(.,h=h)$mean) %>%
mutate(ts = "ETSNTNS", idx = row_number()) %>%
gather(key = series, value = pred_value, - ts, -idx)
}
# 8. ETS: ets with trend and seasonal component
ETS <- function(ts, h = 14){
ts %>%
map(~forecast::ets(., model = "ZZZ", damped = NULL)) %>%
map_dfc(~forecast::forecast(.,h=h)$mean) %>%
mutate(ts = "ETS", idx = row_number()) %>%
gather(key = series, value = pred_value, - ts, -idx)
}
# 9. ETSNS: ets with trend component and no seasonal component
ETSNS <- function(ts, h = 14){
ts %>%
map(~forecast::ets(., model = "ZZN", damped = FALSE)) %>%
map_dfc(~forecast::forecast(.,h=h)$mean) %>%
mutate(ts = "ETSNS", idx = row_number()) %>%
gather(key = series, value = pred_value, - ts, -idx)
}
# 10. ETSD: ets with damped trend component and no seasonal
ETSD <- function(ts, h = 14){
ts %>%
map(~forecast::ets(., model = "ZZN", damped = TRUE)) %>%
map_dfc(~forecast::forecast(.,h=h)$mean) %>%
mutate(ts = "ETSD", idx = row_number()) %>%
gather(key = series, value = pred_value, - ts, -idx)
}
# 11. STL: stl
STL <- function(ts, h = 14){
ts %>%
map(~stl(., s.window = "periodic")) %>%
map_dfc(~forecast::forecast(.,h=h)$mean) %>%
mutate(ts = "STL", idx = row_number()) %>%
gather(key = series, value = pred_value, - ts, -idx)
}
# 12. SN: seasonal naive
SN <- function(ts, h = 14){
ts %>%
map_dfc(~forecast::snaive(., h = h)$mean) %>%
mutate(ts = "SN", idx = row_number()) %>%
gather(key = series, value = pred_value, - ts, -idx)
}
# 13. Prophet?
#------------------------------------------
make_classification_df <- function(dat, features){
# set up in and out of samples
in_samp <- dat[1:60, ]
out_samp <- dat[61:74, ] %>%
mutate(idx = row_number()) %>%
gather(key = series, value = actual_value, -idx)
# convert to ts and forecast next days
my_ts <- in_samp %>% convert_batch_data_to_ts()
mins_df <- bind_rows(list(
my_ts %>% WN,
my_ts %>% ARIMA,
my_ts %>% RWD,
my_ts %>% RW,
my_ts %>% THETA,
my_ts %>% ETSNTNS,
my_ts %>% ETS,
my_ts %>% ETSNS,
my_ts %>% ETSD,
my_ts %>% STL,
my_ts %>% SN
)) %>%
inner_join(out_samp, by = c("series", "idx")) %>%
mutate(ae = abs(actual_value - pred_value)) %>%
group_by(series, ts) %>%
summarise(mae = mean(ae)) %>%
group_by(series) %>%
filter(mae == min(mae))
mins_df <- mins_df[!duplicated(mins_df$series), ]
classification_df <- mins_df %>% inner_join(features, by = "series")
return(classification_df)
}
# keep ts data
make_classification_df_with_ts <- function(dat, features){
# set up in and out of samples
in_samp <- dat[1:60, ]
out_samp <- dat[61:74, ] %>%
mutate(idx = row_number()) %>%
gather(key = series, value = actual_value, -idx)
# convert to ts and forecast next days
my_ts <- in_samp %>% convert_batch_data_to_ts()
mins_df <- bind_rows(list(
my_ts %>% WN,
my_ts %>% ARIMA,
my_ts %>% RWD,
my_ts %>% RW,
my_ts %>% THETA,
my_ts %>% ETSNTNS,
my_ts %>% ETS,
my_ts %>% ETSNS,
my_ts %>% ETSD,
my_ts %>% STL,
my_ts %>% SN
))
return(mins_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.