R/make_ts_features.R

#' Generate time series features for non-simulated data
#'
#' @param tibble as tibble of data were the first column is a date and each other column is a time series
#' @param date_column_name string for the name of the date column
#' @param frequency seasonal period of the time series to be generated.
#' @return a set of time series features
#' @examples
#' small <- select(dat, 1:5)
#' small %>% make_ts_features %>% str
make_ts_features <- function(tibble, date_column_name = "days_date", frequency = 7) {
  # a function which adds ~30 time series characteristics
  build_features <- function(ts){
    bind_cols(
      tsfeatures(ts,features = ,c("acf_features","entropy","lumpiness","flat_spots","crossing_points")),
      tsfeatures(ts,"stl_features", s.window='periodic', robust=TRUE),
      tsfeatures(ts, "max_kl_shift", width=14),
      tsfeatures(ts,c("mean","var"), scale=FALSE, na.rm=TRUE),
      tsfeatures(ts,c("max_level_shift","max_var_shift"), trim=TRUE)
    )
  }

  # prep tibble to be passed to build_features
  tibble %>%
    select(-date_column_name) %>%
    # convert to a time series
    map(~ts(.,frequency = frequency)) %>%
    build_features(.)
}

#' Generate time series features from time series generated from random parameter spaces of the mixture autoregressive (MAR) models.
#'
#' @param n.ts number of time series to be generated.
#' @param freq seasonal period of the time series to be generated.
#' @param nComp number of mixing components when simulating time series using MAR models.
#' @param n.ts length of the generated time series.
#' @param n number of time series to be generated.
#' @return a set of time series features
#' @examples
#' sim_mar <- generate_feature_space()
generate_feature_space <- function(n.ts = 100, freq = 7, nComp = NULL, n = 60){

  # makes ts features for simulated data

  tsgeneration::generate_ts(n.ts = n.ts, freq = freq, nComp = nComp, n = n) %>%
    as_data_frame() %>%
    slice(1) %>%
    unnest()
}

make_ts_features_sim <- function(tibble, date_column_name = "days_date", frequency = 7) {
  # a function which adds ~30 time series characteristics
  build_features <- function(ts){
    bind_cols(
      tsfeatures(ts,features = ,c("acf_features","entropy","lumpiness","flat_spots","crossing_points")),
      tsfeatures(ts,"stl_features", s.window='periodic', robust=TRUE),
      tsfeatures(ts, "max_kl_shift", width=14),
      tsfeatures(ts,c("mean","var"), scale=FALSE, na.rm=TRUE),
      tsfeatures(ts,c("max_level_shift","max_var_shift"), trim=TRUE)
    )

  }

  df_names <- tibble %>% names() %>% data_frame("series" = .)
  # prep tibble to be passed to build_features
  tibble %>%
    # select(-date_column_name) %>%
    # convert to a time series
    map(~ts(.,frequency = frequency)) %>%
    build_features(.) %>%
    bind_cols(df_names,.)
}

convert_batch_data_to_ts <- function(tibble, frequency = 7){
    tibble %>%
    # select(-date_column_name) %>%
    # convert to a time series
    map(~ts(.,frequency = frequency))
}

## forecast function for batch forecasting
# 1.  WN:      white noise
# 3.  ARIMA:   arima
# 4.  RWD:     random walk with drift
# 5.  RW:      random walk
# 6.  THETA:   theta
# 7.  ETSNTNS: ets without trend and seasonal components
# 8.  ETS:     ets with trend and seasonal component
# 9.  ETSNS:   ets with trend component and no seasonal component
# 10. ETSD:    ets with damped trend component and no seasonal
# 11. SARIMA:  sarima
# 12. SN:      seasonal naive


## forecast function for batch forecasting
#------------------------------------------
# 1.  WN:      white noise
WN <- function(ts, h = 14){
  ts %>%
    map_dfc(~forecast::meanf(., h = h)$mean) %>%
    mutate(ts = "WN", idx = row_number()) %>%
    gather(key = series, value = pred_value, -ts, -idx)
}
# 3.  ARIMA:   arima
ARIMA <- function(ts, h = 14){
  ts %>%
    map(~forecast::auto.arima(., seasonal = FALSE)) %>%
    map_dfc(~forecast::forecast(.,h=h)$mean) %>%
    mutate(ts = "ARIMA", idx = row_number()) %>%
    gather(key = series, value = pred_value, - ts, -idx)
}
# 4.  RWD:     random walk with drift
RWD <- function(ts, h = 14){
  ts %>%
  map_dfc(~forecast::rwf(., drift = TRUE, h = h)$mean) %>%
    mutate(ts = "RWD", idx = row_number()) %>%
    gather(key = series, value = pred_value, - ts, -idx)
}
# 5.  RW:      random walk
RW <- function(ts, h = 14){
  ts %>%
    map_dfc(~forecast::rwf(., drift = FALSE, h = h)$mean) %>%
    mutate(ts = "RW", idx = row_number()) %>%
    gather(key = series, value = pred_value, - ts, -idx)
}
# 6.  THETA:   theta
THETA <- function(ts, h = 14){
  ts %>%
    map(~forecast::thetaf(.)) %>%
    map_dfc(~forecast::forecast(.,h=h)$mean) %>%
    mutate(ts = "THETA", idx = row_number()) %>%
    gather(key = series, value = pred_value, - ts, -idx)
}
# 7.  ETSNTNS: ets without trend and seasonal components
ETSNTNS <- function(ts, h = 14){
  ts %>%
  map(~forecast::ets(., model = "ZNN", damped = FALSE)) %>%
  map_dfc(~forecast::forecast(.,h=h)$mean) %>%
    mutate(ts = "ETSNTNS", idx = row_number()) %>%
    gather(key = series, value = pred_value, - ts, -idx)
}
# 8.  ETS:     ets with trend and seasonal component
ETS <- function(ts, h = 14){
  ts %>%
    map(~forecast::ets(., model = "ZZZ", damped = NULL)) %>%
    map_dfc(~forecast::forecast(.,h=h)$mean) %>%
    mutate(ts = "ETS", idx = row_number()) %>%
    gather(key = series, value = pred_value, - ts, -idx)
}
# 9.  ETSNS:   ets with trend component and no seasonal component
ETSNS <- function(ts, h = 14){
  ts %>%
    map(~forecast::ets(., model = "ZZN", damped = FALSE)) %>%
    map_dfc(~forecast::forecast(.,h=h)$mean) %>%
    mutate(ts = "ETSNS", idx = row_number()) %>%
    gather(key = series, value = pred_value, - ts, -idx)
}
# 10. ETSD:    ets with damped trend component and no seasonal
ETSD <- function(ts, h = 14){
  ts %>%
    map(~forecast::ets(., model = "ZZN", damped = TRUE)) %>%
    map_dfc(~forecast::forecast(.,h=h)$mean) %>%
    mutate(ts = "ETSD", idx = row_number()) %>%
    gather(key = series, value = pred_value, - ts, -idx)
}
# 11. STL:    stl
STL <- function(ts, h = 14){
  ts %>%
    map(~stl(., s.window = "periodic")) %>%
    map_dfc(~forecast::forecast(.,h=h)$mean) %>%
    mutate(ts = "STL", idx = row_number()) %>%
    gather(key = series, value = pred_value, - ts, -idx)
}
# 12. SN:      seasonal naive
SN <- function(ts, h = 14){
  ts %>%
    map_dfc(~forecast::snaive(., h = h)$mean) %>%
    mutate(ts = "SN", idx = row_number()) %>%
    gather(key = series, value = pred_value, - ts, -idx)
}
# 13. Prophet?
#------------------------------------------

make_classification_df <- function(dat, features){

  # set up in and out of samples
  in_samp <- dat[1:60, ]
  out_samp <- dat[61:74, ] %>%
    mutate(idx = row_number()) %>%
    gather(key = series, value = actual_value, -idx)

  # convert to ts and forecast next days
  my_ts <- in_samp %>% convert_batch_data_to_ts()
  mins_df <- bind_rows(list(
    my_ts %>% WN,
    my_ts %>% ARIMA,
    my_ts %>% RWD,
    my_ts %>% RW,
    my_ts %>% THETA,
    my_ts %>% ETSNTNS,
    my_ts %>% ETS,
    my_ts %>% ETSNS,
    my_ts %>% ETSD,
    my_ts %>% STL,
    my_ts %>% SN
  )) %>%
    inner_join(out_samp, by = c("series", "idx")) %>%
    mutate(ae = abs(actual_value - pred_value)) %>%
    group_by(series, ts) %>%
    summarise(mae = mean(ae)) %>%
    group_by(series) %>%
    filter(mae == min(mae))

  mins_df <- mins_df[!duplicated(mins_df$series), ]
  classification_df <- mins_df %>% inner_join(features, by = "series")
  return(classification_df)

}
# keep ts data
make_classification_df_with_ts <- function(dat, features){

  # set up in and out of samples
  in_samp <- dat[1:60, ]
  out_samp <- dat[61:74, ] %>%
    mutate(idx = row_number()) %>%
    gather(key = series, value = actual_value, -idx)

  # convert to ts and forecast next days
  my_ts <- in_samp %>% convert_batch_data_to_ts()
  mins_df <- bind_rows(list(
    my_ts %>% WN,
    my_ts %>% ARIMA,
    my_ts %>% RWD,
    my_ts %>% RW,
    my_ts %>% THETA,
    my_ts %>% ETSNTNS,
    my_ts %>% ETS,
    my_ts %>% ETSNS,
    my_ts %>% ETSD,
    my_ts %>% STL,
    my_ts %>% SN
  )) 

  return(mins_df)

}
alexhallam/tsMetaLearnWrap documentation built on May 31, 2019, 12:44 a.m.