R/BoxCox_Periodicity_Standardization.R

Defines functions BoxCox_Periodicity_Standardization

Documented in BoxCox_Periodicity_Standardization

#'@title Box cox transformation and standardization of the within day periodicity.
#'
#'@description A function that standardize the intra day periodicity on the scale of boxcox transformation.
#'
#'@details This function will together generate a set of motivation / diagnosis plots for the standardization.
#'Also, it will conduct long memory test on the TS before and after standardization.
#'
#'@param ts_vec a vector for the time series waiting to standardize the within day periodicity.
#'@param M an integer indicating the within day sampling frequency.
#'@param t the total number of days in the sample.
#'@param sec the intra sampling times in seconds.
#'@param N the number of the consecutive days the periodicity is defined on, default = 1.
#'@param quantity_label the name of the ts quantity, for the axis labe of the visualization.
#'@param sample_label the name of the sample, for the subtitle of the visualization.
#'@param saving_directory the name of the directory to be saved, control the file names.
#'@param lambda_length the search length of the boxcox exponent parameter over the range [0,1], default = 500.
#'
#'@return a matrix of 2 collumns,
#'one for the not standardized daily sum quantity,
#'one for the standardized daily sum quantity.
#'
#'@examples
#'
#'\dontrun{
#'
#'return_SPY <- readstata13::read.dta13("SPY 30 Sec Summary Return Data.dta")
#'t = length(unique(return_SPY$date))
#'M = table(return_SPY$date)[1]
#'volume_vec = return_SPY$volume
#'rm(return_SPY)
#'
#'#Intra day standardization
#'SherryChapter1::BoxCox_Periodicity_Standardization(ts_vec = volume_vec,
#'                                                   M = M,
#'                                                   t = t,
#'                                                   sec = 30,
#'                                                   N = 1,
#'                                                   quantity_label = "Volume",
#'                                                   sample_label = "SPY 30 sec",
#'                                                   saving_directory = "volume_standardized_day")
#'
#'#Intra week standardization
#'SherryChapter1::BoxCox_Periodicity_Standardization(ts_vec = volume_vec,
#'                                                   M = M,
#'                                                   t = t,
#'                                                   sec = 30,
#'                                                   N = 5,
#'                                                   quantity_label = "Volume",
#'                                                   sample_label = "SPY 30 sec",
#'                                                   saving_directory = "volume_standardized_week")
#'
#'}
#'
#'@import matrixStats
#'@import ggplot2
#'@import magrittr
#'@import reshape2
#'@import MASS
#'@import forecast
#'@import foreign
#'
#'@export
#'
#'
BoxCox_Periodicity_Standardization <- function(ts_vec,
                                               M,
                                               t,
                                               sec,
                                               N = 1,
                                               quantity_label,
                                               sample_label,
                                               saving_directory,
                                               lambda_length = 100){

  stopifnot(is.vector(ts_vec))
  stopifnot(M*t == length(ts_vec))

  ts_M <- t(matrix(ts_vec,
                   nrow = t,
                   byrow = T))

  #1. BoxCox transformation
  lambda <- rep(NA,M)
  for( i in seq_len(M) ) {
    likelihoods <- MASS::boxcox(lm(ts_M[i,]+0.0001~1),plotit = F,lambda = seq(0, 1, length = lambda_length))
    lambda[i] <- likelihoods$x[which.max(likelihoods$y)]
  }

  ts_M_boxcox <- ts_M^lambda

  #2. Domonstrate the heteroschedasity and the U shaped curve.

  #Demonstrate the U shaped curves.


  plot_df <- data.frame(value = c(rowMeans(ts_M),
                                  rowVars(ts_M)),
                        interval = rep(seq_len(M),4),
                        Stats =  rep(c(paste0("Avg(", quantity_label,")"),
                                       paste0("Var(", quantity_label,")")), each = M))
  p1 <- ggplot(plot_df) + geom_line(aes(x = interval, y = value),colour = "dark blue") + facet_wrap(~Stats, scales = "free") + theme_classic() +
    labs(x = paste0(sec, " sec interval"), title = paste0("Original Intraday ",quantity_label), subtitle = sample_label)

  ggsave(paste0("Intraday_",quantity_label, "_Original.pdf"), p1, width = 5.5, height = 2.8)

  plot_df <- data.frame(value = c(rowMeans(ts_M_boxcox),
                                  rowVars(ts_M_boxcox)),
                        interval = rep(seq_len(M),4),
                        Stats =  rep(c(paste0("Avg(", quantity_label, "^Lambda)"),
                                       paste0("Var(", quantity_label, "^Lambda)")), each = M))
  p2 <- ggplot(plot_df) + geom_line(aes(x = interval, y = value),colour = "dark blue") + facet_wrap(~Stats, scales = "free") + theme_classic() +
    labs(x = paste0(sec, " sec interval"), title = paste0("Transformed Intraday ",quantity_label), subtitle = sample_label)

  ggsave(paste0("Intraday_",quantity_label, "_BoxCox.pdf"), p2, width = 5.5, height = 2.8)


  plot_df <- data.frame(Mean_stat = c(rowMeans(ts_M),
                                      rowMeans(ts_M_boxcox)),
                        Var_stat = c(rowVars(ts_M),
                                     rowVars(ts_M_boxcox)),
                        Transform =  rep(rep(c("Original","BoxCox"),2), each = M))

  plot_df$Transform <- factor(plot_df$Transform,levels = c("Original","BoxCox"))

  p3 <- ggplot(plot_df,aes(x = Mean_stat, y = Var_stat)) +
    geom_point(colour = "dark blue", size = 0.5) +
    facet_wrap(~Transform, scales = "free", nrow = 1) +
    theme_classic() +
    geom_smooth() +
    labs(x = paste0("Avg(", quantity_label, ")"),
         y = paste0("Var(", quantity_label, ")"),
         title = paste0("Heteroschedasticity of Intraday ",quantity_label), subtitle = sample_label) +
    theme(axis.text.x = element_text(angle = 310,hjust = 0))

  ggsave(paste0("MeanVar_",quantity_label, "_BoxCox.pdf"), p3, width = 5.5, height = 2.8)

  #3. Demonstrate the boxcox transformation on 3 good time points
  sampling_time <- round(seq(2,M-1,length.out = 3))
  sampling_time_seconds <- sampling_time*sec

  #Return the model matrix

  ts_M_standard <- ((ts_M_boxcox-rowMeans(ts_M_boxcox))/rowSds(ts_M_boxcox))^(1/lambda)

  ts_M_dep <- ((ts_M-rowMedians(ts_M))/rowMads(ts_M))


  feature_standardized <- colSums(ts_M_standard,na.rm = TRUE)

  ts_M_standard <- ((ts_M-rowMeans(ts_M))/rowSds(ts_M))
  ts_M2_standard <- ((ts_M2-rowMeans(ts_M2))/rowSds(ts_M2))

  ts_M_tmp = ts_M_standard
  ts_M_tmp[ts_M_tmp <= quantile(as.vector(ts_M_tmp),.995)] <- NA
  feature_dom <- colSums(ts_M_tmp,na.rm = TRUE)

  ts_M_tmp2 = ts_M2_standard
  ts_M_tmp2[ts_M_tmp2 <= quantile(as.vector(ts_M_tmp2),.985)] <- NA
  feature_dom2 <- colSums(ts_M_tmp2,na.rm = TRUE)


  summary(lm(Y~.,cbind(MM,
                       feature_dom, colSums(ts_M),
                       feature_dom2, colSums(ts_M2))))

}
ZhenWei10/Sherry-Chapter1 documentation built on Oct. 31, 2019, 1:48 a.m.