#'@title A function that generate model matrix.
#'@description A function that generate model matrix from the return, volume, and the trade number data.
#'
#'@details This function will automatically generate a comprehensive model matrix involving return, volume, and trade number data.
#'
#'Additional predictors include return of the previous 12 time points and the dummy variable for friday.
#'
#'@param All_dta_dir The directory for the CSV file containing the raw volume data.
#'@param RV_raw_dta_dir The directory for the dta file containing the realized variance without periodicity standardization.
#'@param RV_C_dta_dir The directory for the dta file containing the realized variance with constant periodicity standardization.
#'@param RV_D_dta_dir The directory for the dta file containing the realized variance with daily periodicity standardization.
#'@param All_dta_dir The directory for the CSV file containing the raw volume data.
#'@param lag_days The maximum lagging days of acf plot, default = 5.
#'@param long_memory_tests Wheather perform long memory tests or not, default = TRUE
#'@param acf Wheather to save acf plots or not, default = TRUE
#'@param sec Seconds between intervals, default = 1800.
#'@param peak_param The threashold for peak values quantiles, default = 0.995.
#'@param sub_title The subtitle of the figures, default = "".
#'@param save_dir The directory to save the model matrix and the figures.
#'
#'@return 3 model matrixes will be saved under the folder named by save_dir.
#'
#'@examples
#'
#'\dontrun{
#'
#'SherryChapter1::Model_Matrix_generation(
#'All_dta_dir = "MSFT 1800 Sec Summary Return Data.dta",
#'RV_raw_dta_dir = "MSFT 1800 Sec my daily.dta",
#'RV_C_dta_dir = "MSFTC 1800 Sec my daily.dta",
#'RV_D_dta_dir = "MSFTD 1800 Sec my daily.dta",
#'sec = 1800,
#'sub_title = "MSFT",
#'save_dir = "MSFT_1800",
#'peak_param = c(0.995,0.985)
#')
#'
#'}
#'
#'@import matrixStats
#'@import readstata13
#'
#'@export
#'
Model_Matrix_generation <- function(
All_dta_dir,
RV_raw_dta_dir,
RV_C_dta_dir,
RV_D_dta_dir,
lag_days = 10,
long_memory_tests = TRUE,
acf = TRUE,
sec = 1800,
peak_param = c(0.995,0.985),
sub_title = "",
save_dir = "unnamed",
...
){
volatility_lst = list(
volatility_raw = read.dta13(RV_raw_dta_dir),
volatility_fC = read.dta13(RV_C_dta_dir),
volatility_fD = read.dta13(RV_D_dta_dir)
)
# Generate the features matrixes
retrun_feature_M = lapply(volatility_lst,
function(x,lag = 12) {
return_df <- data.frame(
Y = x$RV,
Monday = weekdays(as.POSIXct(x$date)) == "ζζδΈ"
)
N <- nrow(x)
for(i in seq_len( lag ) ) {
return_df[[paste0("RV_p",i)]] <- c(rep(NA,i), x$RV)[seq_len(N)]
}
return(return_df)
})
names(retrun_feature_M) = c("raw_RV","fC_RV","fD_RV")
# Generate the feature matrix unique for this study
Return_data <- read.dta13( All_dta_dir )
previous_dir <- getwd()
if(!file.exists(save_dir)) dir.create(save_dir)
setwd(save_dir)
Feature_V = Decompose_Volume(Return_data = Return_data,
lag_days = lag_days,
long_memory_tests = long_memory_tests,
acf = acf,
sec = sec,
peak_param = peak_param[1],
sub_title = sub_title,
...)
Feature_TN = Decompose_TradeNum(Return_data = Return_data,
lag_days = lag_days,
long_memory_tests = long_memory_tests,
acf = acf,
sec = sec,
peak_param = peak_param[2],
sub_title = sub_title,
...)
rm(Return_data)
feature_list <- lapply(retrun_feature_M, function(x)cbind(x,Feature_V,Feature_TN))
for (i in names(feature_list)){
write.csv(feature_list[[i]], paste0("MM_",i,".csv"))
}
setwd(previous_dir)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.