#'@title Box cox transformation and standardization of the within day periodicity.
#'
#'@description A function that standardize the intra day periodicity on the scale of boxcox transformation.
#'
#'@details This function will together generate a set of motivation / diagnosis plots for the standardization.
#'Also, it will conduct long memory test on the TS before and after standardization.
#'
#'@param ts_vec a vector for the time series waiting to standardize the within day periodicity.
#'@param M an integer indicating the within day sampling frequency.
#'@param t the total number of days in the sample.
#'@param sec the intra sampling times in seconds.
#'@param N the number of the consecutive days the periodicity is defined on, default = 1.
#'@param quantity_label the name of the ts quantity, for the axis labe of the visualization.
#'@param sample_label the name of the sample, for the subtitle of the visualization.
#'@param saving_directory the name of the directory to be saved, control the file names.
#'@param lambda_length the search length of the boxcox exponent parameter over the range [0,1], default = 500.
#'
#'@return a matrix of 2 collumns,
#'one for the not standardized daily sum quantity,
#'one for the standardized daily sum quantity.
#'
#'@examples
#'
#'\dontrun{
#'
#'return_SPY <- readstata13::read.dta13("SPY 30 Sec Summary Return Data.dta")
#'t = length(unique(return_SPY$date))
#'M = table(return_SPY$date)[1]
#'volume_vec = return_SPY$volume
#'rm(return_SPY)
#'
#'#Intra day standardization
#'SherryChapter1::BoxCox_Periodicity_Standardization(ts_vec = volume_vec,
#' M = M,
#' t = t,
#' sec = 30,
#' N = 1,
#' quantity_label = "Volume",
#' sample_label = "SPY 30 sec",
#' saving_directory = "volume_standardized_day")
#'
#'#Intra week standardization
#'SherryChapter1::BoxCox_Periodicity_Standardization(ts_vec = volume_vec,
#' M = M,
#' t = t,
#' sec = 30,
#' N = 5,
#' quantity_label = "Volume",
#' sample_label = "SPY 30 sec",
#' saving_directory = "volume_standardized_week")
#'
#'}
#'
#'@import matrixStats
#'@import ggplot2
#'@import magrittr
#'@import reshape2
#'@import MASS
#'@import forecast
#'@import foreign
#'
#'@export
#'
#'
BoxCox_Periodicity_Standardization <- function(ts_vec,
M,
t,
sec,
N = 1,
quantity_label,
sample_label,
saving_directory,
lambda_length = 100){
stopifnot(is.vector(ts_vec))
stopifnot(M*t == length(ts_vec))
ts_M <- t(matrix(ts_vec,
nrow = t,
byrow = T))
#1. BoxCox transformation
lambda <- rep(NA,M)
for( i in seq_len(M) ) {
likelihoods <- MASS::boxcox(lm(ts_M[i,]+0.0001~1),plotit = F,lambda = seq(0, 1, length = lambda_length))
lambda[i] <- likelihoods$x[which.max(likelihoods$y)]
}
ts_M_boxcox <- ts_M^lambda
#2. Domonstrate the heteroschedasity and the U shaped curve.
#Demonstrate the U shaped curves.
plot_df <- data.frame(value = c(rowMeans(ts_M),
rowVars(ts_M)),
interval = rep(seq_len(M),4),
Stats = rep(c(paste0("Avg(", quantity_label,")"),
paste0("Var(", quantity_label,")")), each = M))
p1 <- ggplot(plot_df) + geom_line(aes(x = interval, y = value),colour = "dark blue") + facet_wrap(~Stats, scales = "free") + theme_classic() +
labs(x = paste0(sec, " sec interval"), title = paste0("Original Intraday ",quantity_label), subtitle = sample_label)
ggsave(paste0("Intraday_",quantity_label, "_Original.pdf"), p1, width = 5.5, height = 2.8)
plot_df <- data.frame(value = c(rowMeans(ts_M_boxcox),
rowVars(ts_M_boxcox)),
interval = rep(seq_len(M),4),
Stats = rep(c(paste0("Avg(", quantity_label, "^Lambda)"),
paste0("Var(", quantity_label, "^Lambda)")), each = M))
p2 <- ggplot(plot_df) + geom_line(aes(x = interval, y = value),colour = "dark blue") + facet_wrap(~Stats, scales = "free") + theme_classic() +
labs(x = paste0(sec, " sec interval"), title = paste0("Transformed Intraday ",quantity_label), subtitle = sample_label)
ggsave(paste0("Intraday_",quantity_label, "_BoxCox.pdf"), p2, width = 5.5, height = 2.8)
plot_df <- data.frame(Mean_stat = c(rowMeans(ts_M),
rowMeans(ts_M_boxcox)),
Var_stat = c(rowVars(ts_M),
rowVars(ts_M_boxcox)),
Transform = rep(rep(c("Original","BoxCox"),2), each = M))
plot_df$Transform <- factor(plot_df$Transform,levels = c("Original","BoxCox"))
p3 <- ggplot(plot_df,aes(x = Mean_stat, y = Var_stat)) +
geom_point(colour = "dark blue", size = 0.5) +
facet_wrap(~Transform, scales = "free", nrow = 1) +
theme_classic() +
geom_smooth() +
labs(x = paste0("Avg(", quantity_label, ")"),
y = paste0("Var(", quantity_label, ")"),
title = paste0("Heteroschedasticity of Intraday ",quantity_label), subtitle = sample_label) +
theme(axis.text.x = element_text(angle = 310,hjust = 0))
ggsave(paste0("MeanVar_",quantity_label, "_BoxCox.pdf"), p3, width = 5.5, height = 2.8)
#3. Demonstrate the boxcox transformation on 3 good time points
sampling_time <- round(seq(2,M-1,length.out = 3))
sampling_time_seconds <- sampling_time*sec
#Return the model matrix
ts_M_standard <- ((ts_M_boxcox-rowMeans(ts_M_boxcox))/rowSds(ts_M_boxcox))^(1/lambda)
ts_M_dep <- ((ts_M-rowMedians(ts_M))/rowMads(ts_M))
feature_standardized <- colSums(ts_M_standard,na.rm = TRUE)
ts_M_standard <- ((ts_M-rowMeans(ts_M))/rowSds(ts_M))
ts_M2_standard <- ((ts_M2-rowMeans(ts_M2))/rowSds(ts_M2))
ts_M_tmp = ts_M_standard
ts_M_tmp[ts_M_tmp <= quantile(as.vector(ts_M_tmp),.995)] <- NA
feature_dom <- colSums(ts_M_tmp,na.rm = TRUE)
ts_M_tmp2 = ts_M2_standard
ts_M_tmp2[ts_M_tmp2 <= quantile(as.vector(ts_M_tmp2),.985)] <- NA
feature_dom2 <- colSums(ts_M_tmp2,na.rm = TRUE)
summary(lm(Y~.,cbind(MM,
feature_dom, colSums(ts_M),
feature_dom2, colSums(ts_M2))))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.