R/Model_Matrix_generation.R

Defines functions Model_Matrix_generation

Documented in Model_Matrix_generation

#'@title A function that generate model matrix.
#'@description A function that generate model matrix from the return, volume, and the trade number data.
#'
#'@details This function will automatically generate a comprehensive model matrix involving return, volume, and trade number data.
#'
#'Additional predictors include return of the previous 12 time points and the dummy variable for friday.
#'
#'@param All_dta_dir The directory for the CSV file containing the raw volume data.
#'@param RV_raw_dta_dir The directory for the dta file containing the realized variance without periodicity standardization.
#'@param RV_C_dta_dir The directory for the dta file containing the realized variance with constant periodicity standardization.
#'@param RV_D_dta_dir The directory for the dta file containing the realized variance with daily periodicity standardization.
#'@param All_dta_dir The directory for the CSV file containing the raw volume data.
#'@param lag_days The maximum lagging days of acf plot, default = 5.
#'@param long_memory_tests Wheather perform long memory tests or not, default = TRUE
#'@param acf Wheather to save acf plots or not, default = TRUE
#'@param sec Seconds between intervals, default = 1800.
#'@param peak_param The threashold for peak values quantiles, default = 0.995.
#'@param sub_title The subtitle of the figures, default = "".
#'@param save_dir The directory to save the model matrix and the figures.
#'
#'@return 3 model matrixes will be saved under the folder named by save_dir.
#'
#'@examples
#'
#'\dontrun{
#'
#'SherryChapter1::Model_Matrix_generation(
#'All_dta_dir = "MSFT 1800 Sec Summary Return Data.dta",
#'RV_raw_dta_dir = "MSFT 1800 Sec my daily.dta",
#'RV_C_dta_dir = "MSFTC 1800 Sec my daily.dta",
#'RV_D_dta_dir = "MSFTD 1800 Sec my daily.dta",
#'sec = 1800,
#'sub_title = "MSFT",
#'save_dir = "MSFT_1800",
#'peak_param = c(0.995,0.985)
#')
#'
#'}
#'
#'@import matrixStats
#'@import readstata13
#'
#'@export
#'
Model_Matrix_generation <- function(
  All_dta_dir,
  RV_raw_dta_dir,
  RV_C_dta_dir,
  RV_D_dta_dir,
  lag_days = 10,
  long_memory_tests = TRUE,
  acf = TRUE,
  sec = 1800,
  peak_param = c(0.995,0.985),
  sub_title = "",
  save_dir = "unnamed",
  ...
){

volatility_lst = list(
                 volatility_raw = read.dta13(RV_raw_dta_dir),
                 volatility_fC = read.dta13(RV_C_dta_dir),
                 volatility_fD = read.dta13(RV_D_dta_dir)
)

# Generate the features matrixes
retrun_feature_M = lapply(volatility_lst,
                          function(x,lag = 12) {
                           return_df <- data.frame(
                             Y = x$RV,
                             Monday = weekdays(as.POSIXct(x$date)) == "ζ˜ŸζœŸδΈ€"
                            )
                            N <- nrow(x)
                            for(i in seq_len( lag ) ) {
                            return_df[[paste0("RV_p",i)]] <- c(rep(NA,i), x$RV)[seq_len(N)]
                           }
                           return(return_df)
                         })

names(retrun_feature_M) = c("raw_RV","fC_RV","fD_RV")

# Generate the feature matrix unique for this study

Return_data <- read.dta13( All_dta_dir )

previous_dir <- getwd()

if(!file.exists(save_dir)) dir.create(save_dir)

setwd(save_dir)

Feature_V = Decompose_Volume(Return_data = Return_data,
                             lag_days = lag_days,
                             long_memory_tests = long_memory_tests,
                             acf = acf,
                             sec = sec,
                             peak_param = peak_param[1],
                             sub_title = sub_title,
                             ...)

Feature_TN = Decompose_TradeNum(Return_data = Return_data,
                                lag_days = lag_days,
                                long_memory_tests = long_memory_tests,
                                acf = acf,
                                sec = sec,
                                peak_param = peak_param[2],
                                sub_title = sub_title,
                                ...)

rm(Return_data)

feature_list <- lapply(retrun_feature_M, function(x)cbind(x,Feature_V,Feature_TN))

for (i in names(feature_list)){
  write.csv(feature_list[[i]], paste0("MM_",i,".csv"))
}

setwd(previous_dir)

}
ZhenWei10/Sherry-Chapter1 documentation built on Oct. 31, 2019, 1:48 a.m.