required_packages <- c("mlr3verse", "data.table", "xts", "openxlsx", "glmnet", "readxl",
                       "PerformanceAnalytics", "outliers", "ggplot2")

# Function to check and install the necessary packages
install_and_load <- function(packages) {
  for (package in packages) {
    # Check if the package is installed
    if (!require(package, character.only = TRUE)) {
      # Install the package if it is not installed
      install.packages(package, dependencies = TRUE)
      # Load the package after installing
      library(package, character.only = TRUE)
    }
  }
}

install_and_load(required_packages)
setwd('..')
source(file.path('sandbox', 'sampledata.R'))
setwd('..')
source(file.path('sandbox', 'TSML.R'))
source(file.path('sandbox', 'MLutils.R'))
testtask <- TSML$new(data = regr_data,
                     ts_var = "DATE",
                     y = "WI.RET")
testtask$train_test_split(cutoff = 0.8)

Preprocessing

setwd('..')
# Outlier detection and removal
source(file.path('sandbox', 'outliers.R'))

# Feature Selection
source(file.path('sandbox', 'featureselection.R'))

# Rescaling
source(file.path('sandbox', 'rescale.R'))

# ML Utils
source(file.path('sandbox', 'MLutils.R'))
varnames = names(regr_data[, !c("DATE", "WI.RET")])
interval_outlier <- interval(regr_data, varnames = varnames)
winsorize_outlier <- winsorize(regr_data, varnames = varnames)
feature_corr <- feature_correlation(regr_data, varnames = varnames)
split_data <- train_test_split(regr_data, ts_var = "DATE")
regr_train <- split_data[[1]]
regr_test <- split_data[[2]]
setwd('..')
source(file.path('sandbox', 'cvglmnet.R'))
source(file.path('sandbox', 'rpart.R'))
source(file.path('sandbox', 'ranger.R'))
source(file.path('sandbox', 'svm.R'))
source(file.path('sandbox', 'lda.R'))
source(file.path('sandbox', 'qda.R'))
source(file.path('sandbox', 'naiveBayes.R'))
source(file.path('sandbox', 'KNN.R'))
source(file.path('sandbox', 'nnet.R'))
test_cvglmnet(regr_train[, !"DATE"], regr_test[, !"DATE"], "WI.RET")
test_rpart(regr_train[, !"DATE"], regr_test[, !"DATE"], "WI.RET")


JustinMShea/ExpectedReturns documentation built on June 28, 2024, 5:37 p.m.