knitr::opts_chunk$set(echo = TRUE)
#Load data library(ggplot2) library(reshape2) R2_OLS_dir <- list.files("OLR_R2_all") plot_df <- data.frame(X = NA, variable = NA, value = NA, stock = NA, time_interval = NA) for ( i in c(5, 30, 60, 300, 1800) ) { for (j in c("MSFT","SPY","XRX")) { csv_ij <- read.csv(file.path("OLR_R2_all",paste0(j,"_",i,"_OLR_adj_R2.csv"))) plot_df_ij <- melt(csv_ij,id = "X") plot_df_ij$stock = j plot_df_ij$time_interval = i plot_df <- rbind(plot_df, plot_df_ij) } } plot_df <- plot_df[-1,] colnames( plot_df ) = c("predictors","RV_Filter","Rsquared","stock","time_interval") plot_df$time_interval = factor(plot_df$time_interval, levels = c(5, 30, 60, 300, 1800)) R2_OLS_dir <- list.files("RandomForest_R2_all") plot_df2 <- data.frame(X = NA, variable = NA, value = NA, stock = NA, time_interval = NA) for ( i in c(5, 30, 60, 300, 1800) ) { for (j in c("MSFT","SPY","XRX")) { csv_ij <- read.csv(file.path("RandomForest_R2_all",paste0(j,"_",i,"_RF_cv_R2.csv"))) plot_df2_ij <- melt(csv_ij,id = "X") plot_df2_ij$stock = j plot_df2_ij$time_interval = i plot_df2 <- rbind(plot_df2, plot_df2_ij) } } plot_df2 <- plot_df2[-1,] colnames( plot_df2 ) = c("predictors","RV_Filter","Rsquared","stock","time_interval") plot_df2$time_interval = factor(plot_df2$time_interval, levels = c(5, 30, 60, 300, 1800)) plot_df$regressor = "Linear regression" plot_df2$regressor = "Random Forest regression" plot_df <- rbind(plot_df,plot_df2) plot_df$RV_Filter = factor(plot_df$RV_Filter, levels = c("RV_raw","RV_fD","RV_fC")) levels(plot_df$RV_Filter) = c("no filter", "constant", "day") p1 <- ggplot(plot_df[plot_df$predictors == "add_V",], aes(x = time_interval, y = Rsquared)) + geom_bar(aes(fill = RV_Filter), position="dodge", width = 0.7, color = 1, stat = "identity", size = .3) + facet_grid(regressor~stock, scales = "free") + theme_bw() + labs( x = "Sampling Interval (Secs)", y = "R Squared", title = "R Squared of Default Model") ggsave("R_squared_RV_filter.pdf", p1, width = 7.5, height = 4.3)
#Load data library(reshape2) R2_OLS_dir <- list.files("OLR_R2_all") plot_df <- data.frame(X = NA, variable = NA, value = NA, stock = NA, time_interval = NA) for ( i in c(5, 30, 60, 300, 1800) ) { for (j in c("MSFT","SPY","XRX")) { csv_ij <- read.csv(file.path("OLR_R2_all",paste0(j,"_",i,"_OLR_increase_in_R2.csv"))) plot_df_ij <- melt(csv_ij,id = "X") plot_df_ij$stock = j plot_df_ij$time_interval = i plot_df <- rbind(plot_df, plot_df_ij) } } plot_df <- plot_df[-1,] colnames( plot_df ) = c("predictors","response","Rsquared_change","stock","time_interval") plot_df$time_interval = paste(plot_df$time_interval, "Sec") plot_df$time_interval = factor(plot_df$time_interval, levels = paste(c(5, 30, 60, 300, 1800),"Sec")) plot_df$predictors_filter = "Pred_raw" plot_df$predictors_filter[grepl("fd",plot_df$predictors)] = "Pred_fd" plot_df$predictors_filter[grepl("fw",plot_df$predictors)] = "Pred_fw" plot_df$predictors_add = gsub("fd|fw","",plot_df$predictors) plot_df$predictors_add = factor(plot_df$predictors_add) levels(plot_df$predictors_add) = c("+ Trade Number","+ Volume","+ Both") indx_keep <- (plot_df$predictors_filter == "Pred_fd" & plot_df$response == "RV_fC") | (plot_df$predictors_filter == "Pred_fw" & plot_df$response == "RV_fD") | (plot_df$predictors_filter == "Pred_raw" & plot_df$response == "RV_raw") plot_df <- plot_df[indx_keep,] plot_df$Filter <- "no filter" plot_df$Filter[ (plot_df$predictors_filter == "Pred_fw" & plot_df$response == "RV_fD") ] <- "by week" plot_df$Filter[ (plot_df$predictors_filter == "Pred_fd" & plot_df$response == "RV_fC") ] <- "by day" plot_df$Filter <- factor(plot_df$Filter, levels = c("no filter","by day", "by week")) p2 <- ggplot(plot_df, aes(x = predictors_add, y = Rsquared_change)) + geom_bar(aes(fill = Filter), position="dodge", width = 0.7, color = 0, stat = "identity", size = 1) + scale_fill_brewer(palette = "Paired") + facet_grid(stock~time_interval, scales = "fixed") + theme_bw() + labs( x = "New Predictors added", y = "Increase on adj R Squared", title = "Increase on Adjusted R Squared of OLR") + theme(axis.text.x = element_text(angle = 310,hjust = 0)) ggsave("R_squared_increase_OLS.pdf", p2, width = 7.5, height = 4.7)
#Load data library(reshape2) R2_OLS_dir <- list.files("RandomForest_R2_all") plot_df <- data.frame(X = NA, variable = NA, value = NA, stock = NA, time_interval = NA) for ( i in c(5, 30, 60, 300, 1800) ) { for (j in c("MSFT","SPY","XRX")) { csv_ij <- read.csv(file.path("RandomForest_R2_all",paste0(j,"_",i,"_RF_increase_in_R2.csv"))) plot_df_ij <- melt(csv_ij,id = "X") plot_df_ij$stock = j plot_df_ij$time_interval = i plot_df <- rbind(plot_df, plot_df_ij) } } plot_df <- plot_df[-1,] colnames( plot_df ) = c("predictors","response","Rsquared_change","stock","time_interval") plot_df$time_interval = paste(plot_df$time_interval, "Sec") plot_df$time_interval = factor(plot_df$time_interval, levels = paste(c(5, 30, 60, 300, 1800),"Sec")) plot_df$predictors_filter = "Pred_raw" plot_df$predictors_filter[grepl("fd",plot_df$predictors)] = "Pred_fd" plot_df$predictors_filter[grepl("fw",plot_df$predictors)] = "Pred_fw" plot_df$predictors_add = gsub("fd|fw","",plot_df$predictors) plot_df$predictors_add = factor(plot_df$predictors_add) levels(plot_df$predictors_add) = c("+ Trade Number","+ Volume","+ Both") indx_keep <- (plot_df$predictors_filter == "Pred_fd" & plot_df$response == "RV_fC") | (plot_df$predictors_filter == "Pred_fw" & plot_df$response == "RV_fD") | (plot_df$predictors_filter == "Pred_raw" & plot_df$response == "RV_raw") plot_df <- plot_df[indx_keep,] plot_df$Filter <- "no filter" plot_df$Filter[ (plot_df$predictors_filter == "Pred_fw" & plot_df$response == "RV_fD") ] <- "by week" plot_df$Filter[ (plot_df$predictors_filter == "Pred_fd" & plot_df$response == "RV_fC") ] <- "by day" plot_df$Filter <- factor(plot_df$Filter, levels = c("no filter","by day", "by week")) p3 <- ggplot(plot_df, aes(x = predictors_add, y = Rsquared_change)) + geom_bar(aes(fill = Filter), position="dodge", width = 0.7, color = 0, stat = "identity", size = 1) + scale_fill_brewer(palette = "Paired") + facet_grid(stock~time_interval, scales = "fixed") + theme_bw() + labs( x = "New Predictors added", y = "Increase on CV R Squared", title = "Increase on R Squared of Random Forest with 30 folds CV") + theme(axis.text.x = element_text(angle = 310,hjust = 0)) ggsave("R_squared_increase_RF.pdf", p3, width = 7.5, height = 4.7)
#Load data library(reshape2) R2_OLS_dir <- list.files("OLR_R2_all") plot_df <- data.frame(X = NA, variable = NA, value = NA, stock = NA, time_interval = NA) for ( i in c(5, 30, 60, 300, 1800) ) { for (j in c("MSFT","SPY","XRX")) { csv_ij <- read.csv(file.path("OLR_R2_all", paste0(j, "_", i, "_OLR_increase_in_R2.csv"))) plot_df_ij <- melt(csv_ij,id = "X") plot_df_ij$stock = j plot_df_ij$time_interval = i plot_df <- rbind(plot_df, plot_df_ij) } } plot_df <- plot_df[-1,] colnames( plot_df ) = c("predictors","response","Rsquared_change","stock","time_interval") plot_df$time_interval = paste(plot_df$time_interval, "Sec") plot_df$time_interval = factor(plot_df$time_interval, levels = paste(c(5, 30, 60, 300, 1800),"Sec")) plot_df$predictors_filter = "Pred_raw" plot_df$predictors_filter[grepl("fd",plot_df$predictors)] = "Pred_fd" plot_df$predictors_filter[grepl("fw",plot_df$predictors)] = "Pred_fw" plot_df$predictors_add = gsub("fd|fw","",plot_df$predictors) plot_df$predictors_add = factor(plot_df$predictors_add) levels(plot_df$predictors_add) = c("+ Trade Number","+ Volume","+ Both") indx_keep <- (plot_df$response == "RV_fD") plot_df <- plot_df[indx_keep,] plot_df$Pred_Filter <- "no filter" plot_df$Pred_Filter[ (plot_df$predictors_filter == "Pred_fw" ) ] <- "by week" plot_df$Pred_Filter[ (plot_df$predictors_filter == "Pred_fd" ) ] <- "by day" plot_df$Pred_Filter <- factor(plot_df$Pred_Filter, levels = c("no filter","by day", "by week")) p2 <- ggplot(plot_df, aes(x = predictors_add, y = Rsquared_change)) + geom_bar(aes(fill = Pred_Filter), position="dodge", width = 0.7, color = 0, stat = "identity", size = 1) + scale_fill_brewer(palette = "Paired") + facet_grid(stock~time_interval, scales = "fixed") + theme_bw() + labs( x = "New Predictors added", y = "Increase on adj R Squared", title = "Increase on Adjusted R Squared of OLR") + theme(axis.text.x = element_text(angle = 310,hjust = 0)) ggsave("R_squared_increase_OLS_2.pdf", p2, width = 7.5, height = 4.7)
#Load data library(reshape2) R2_OLS_dir <- list.files("RandomForest_R2_all") plot_df <- data.frame(X = NA, variable = NA, value = NA, stock = NA, time_interval = NA) for ( i in c(5, 30, 60, 300, 1800) ) { for (j in c("MSFT","SPY","XRX")) { csv_ij <- read.csv(file.path("RandomForest_R2_all",paste0(j,"_",i,"_RF_increase_in_R2.csv"))) plot_df_ij <- melt(csv_ij,id = "X") plot_df_ij$stock = j plot_df_ij$time_interval = i plot_df <- rbind(plot_df, plot_df_ij) } } plot_df <- plot_df[-1,] colnames( plot_df ) = c("predictors","response","Rsquared_change","stock","time_interval") plot_df$time_interval = paste(plot_df$time_interval, "Sec") plot_df$time_interval = factor(plot_df$time_interval, levels = paste(c(5, 30, 60, 300, 1800),"Sec")) plot_df$predictors_filter = "Pred_raw" plot_df$predictors_filter[grepl("fd",plot_df$predictors)] = "Pred_fd" plot_df$predictors_filter[grepl("fw",plot_df$predictors)] = "Pred_fw" plot_df$predictors_add = gsub("fd|fw","",plot_df$predictors) plot_df$predictors_add = factor(plot_df$predictors_add) levels(plot_df$predictors_add) = c("+ Trade Number","+ Volume","+ Both") indx_keep <- (plot_df$response == "RV_fD") plot_df <- plot_df[indx_keep,] plot_df$Pred_Filter <- "no filter" plot_df$Pred_Filter[ (plot_df$predictors_filter == "Pred_fw" ) ] <- "by week" plot_df$Pred_Filter[ (plot_df$predictors_filter == "Pred_fd" ) ] <- "by day" plot_df$Pred_Filter <- factor(plot_df$Pred_Filter, levels = c("no filter","by day", "by week")) p3 <- ggplot(plot_df, aes(x = predictors_add, y = Rsquared_change)) + geom_bar(aes(fill = Pred_Filter), position="dodge", width = 0.7, color = 0, stat = "identity", size = 1) + scale_fill_brewer(palette = "Paired") + facet_grid(stock~time_interval, scales = "fixed") + theme_bw() + labs( x = "New Predictors added", y = "Increase on CV R Squared", title = "Increase on R Squared of Random Forest with 30 folds CV") + theme(axis.text.x = element_text(angle = 310,hjust = 0)) ggsave("R_squared_increase_RF_2.pdf", p3, width = 7.5, height = 4.7)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.