Nothing
#' Function to train Deep Learning regression model for a single asset
#'
#' @description Function is training h2o deep learning model to match future
#' prices of the asset to the indicator pattern. Main idea is to be able
#' to predict future prices by solely relying on the recently retrieved
#' indicator pattern. This is to mimic traditional algorithmic systems
#' based on the indicator rule and to attempt automated optimization with AI.
#'
#' `r lifecycle::badge('experimental')`
#'
#' @details Deep learning model structure is obtained from several random
#' combinations of neurons within 3 hidden layers of the network.
#' The most accurate model configuration will be automatically selected
#' based either RMSE or Objective Test. In addition, the function will
#' check if there is a need to update the model. To do that function will check
#' results of the function aml_test_model.R.
#'
#' Function is using the dataset prepared by the function aml_collect_data.R.
#' Note that function will start to train the model as soon as there are more
#' than 1000 rows in the dataset
#'
#' @concept see https://docs.h2o.ai/h2o-tutorials/latest-stable/tutorials/deeplearning/index.html
#'
#' @author (C) 2020, 2021 Vladimir Zhbanko
#'
#' @param symbol Character symbol of the asset for which to train the model
#' @param timeframe Integer, value in minutes, e.g. 60 min
#' @param path_model Path where the models shall be stored
#' @param path_data Path where the aggregated historical data is stored, if exists in rds format
#' @param force_update Boolean, by setting this to TRUE function will generate new model
#' (useful after h2o engine update)
#' @param objective_test Boolean, option to use trading objective test as a parameter to validate best model,
#' defaults to FALSE
#' @param num_nn_options Integer, value from 0 to 24 or more. Used to change number of variants
#' of the random neural network structures, when value is 0 uses fixed structure
#' Higher number may lead to long code execution. Select value multiple of 3 otherwise
#' function will generate warning. E.g. 12, 24, 48, etc
#' @param fixed_nn_struct Integer vector with numeric elements, see par hidden in ?h2o.deeplearning,
#' default value is c(100,100). Note this will only work if num_nn_options is 0
#' @param num_epoch Integer, see parameter epochs in ?h2o.deeplearning, default value is 100
#' Higher number may lead to long code execution
#' @param num_bars_test Integer, value of bars used for model testing
#' @param num_bars_ahead Integer, value to specify how far should the function predict. Default 34 bars.
#' @param min_perf Double, value greater than 0. Used to set minimum value of model performance.
#' Higher value will increase computation time
#' @param num_cols_used Integer, number of columns to use for training the model, defaults to 16
#'
#' @return Function is writing a file object with the best Deep Learning Regression model
#' @export
#'
#' @examples
#'
#' \donttest{
#'
#'
#' library(dplyr)
#' library(readr)
#' library(h2o)
#' library(lazytrade)
#' library(lubridate)
#' library(magrittr)
#'
#' path_model <- normalizePath(tempdir(),winslash = "/")
#' path_data <- normalizePath(tempdir(),winslash = "/")
#'
#' ind = system.file("extdata", "AI_RSIADXUSDJPY60.csv",
#' package = "lazytrade") %>% read_csv(col_names = FALSE)
#'
#' ind$X1 <- ymd_hms(ind$X1)
#'
#' tick = system.file("extdata", "TickSize_AI_RSIADX.csv",
#' package = "lazytrade") %>% read_csv(col_names = FALSE)
#'
#' write_csv(tick, file.path(path_data, "TickSize_AI_RSIADX.csv"), col_names = FALSE)
#'
#' # data transformation using the custom function for one symbol
#' aml_collect_data(indicator_dataset = ind,
#' symbol = 'USDJPY',
#' timeframe = 60,
#' path_data = path_data)
#'
#' # dataset will be written to the temp directory
#'
#' # start h2o engine
#' h2o.init(nthreads = 2)
#'
#'
#' # performing Deep Learning Regression using 2 random neural network structures and objective test
#' aml_make_model(symbol = 'USDJPY',
#' timeframe = 60,
#' path_model = path_model,
#' path_data = path_data,
#' force_update=FALSE,
#' objective_test = TRUE,
#' num_nn_options = 6,
#' num_epoch = 10,
#' min_perf = 0,
#' num_bars_test = 600,
#' num_bars_ahead = 34,
#' num_cols_used = 16)
#'
#' # performing DL Regression using 2 random neural network structures
#' # with objective test, all columns
#' aml_make_model(symbol = 'USDJPY',
#' timeframe = 60,
#' path_model = path_model,
#' path_data = path_data,
#' force_update=FALSE,
#' objective_test = TRUE,
#' num_nn_options = 6,
#' num_epoch = 10,
#' min_perf = 0,
#' num_bars_test = 600,
#' num_bars_ahead = 34,
#' num_cols_used = 0)
#'
#'
#' # performing Deep Learning Regression using the custom function
#' aml_make_model(symbol = 'USDJPY',
#' timeframe = 60,
#' path_model = path_model,
#' path_data = path_data,
#' force_update=FALSE,
#' objective_test = FALSE,
#' num_nn_options = 6,
#' num_epoch = 10,
#' min_perf = 0,
#' num_bars_test = 600,
#' num_bars_ahead = 34,
#' num_cols_used = 16)
#'
#' # performing Deep Learning Regression, fixed mode
#' aml_make_model(symbol = 'USDJPY',
#' timeframe = 60,
#' path_model = path_model,
#' path_data = path_data,
#' force_update=TRUE,
#' num_nn_options = 0,
#' fixed_nn_struct = c(100, 100),
#' num_epoch = 10,
#' min_perf = 0)
#'
#' # stop h2o engine
#' h2o.shutdown(prompt = FALSE)
#'
#' #set delay to insure h2o unit closes properly before the next test
#' Sys.sleep(5)
#'
#' }
#'
#'
#'
aml_make_model <- function(symbol, timeframe = 60,
path_model, path_data,
force_update=FALSE,
objective_test = FALSE,
num_nn_options = 12,
fixed_nn_struct = c(100, 100),
num_epoch = 100,
num_bars_test = 600,
num_bars_ahead = 34,
num_cols_used = 16,
min_perf = 0.3){
requireNamespace("dplyr", quietly = TRUE)
requireNamespace("readr", quietly = TRUE)
requireNamespace("h2o", quietly = TRUE)
### check if it's actually required to make a model: e.g. if model is tested and results are good...
## recover the file name and path
dec_file_name <- paste0("StrTest-", symbol, "M",timeframe, ".csv")
dec_file_path <- file.path(path_model, dec_file_name)
# generate a file name for model
m_name <- paste0("DL_Regression", "-", symbol,"-", timeframe)
m_path <- file.path(path_model, m_name)
## read the file and the status of the model
if(file.exists(dec_file_path) && force_update == FALSE){
# read the file
model_status <- readr::read_csv(dec_file_path, col_types = readr::cols()) %>% select(MaxPerf) %$% MaxPerf
} else if(force_update == TRUE) {
# delete the model and previous test results
#try(remove(dec_file_path), silent = TRUE)
#try(remove(m_path),silent = TRUE)
model_status <- -1
} else { model_status <- 0 }
#construct the path to the data object see function aml_collect_data.R
# generate a file name
f_name <- paste0("AI_RSIADX", symbol,timeframe, ".rds")
full_path <- file.path(path_data, f_name)
x <- try(readr::read_rds(full_path), silent = T)
# file name with the tick data
path_tick <- file.path(path_data, "TickSize_AI_RSIADX.csv")
#dataset with tick data
z <- readr::read_csv(path_tick, col_names = FALSE) %>%
#filter line with a symbol we need
dplyr::filter(X1 == symbol) %$%
#value z will contain tick value for this symbol
X2
# proceed with further steps only if model status is < 0 and there are enough data in x
if((model_status <= min_perf && nrow(x) > 1000) || (!file.exists(m_path) && nrow(x) > 1000)) {
## Generate dataset required for neural network training
dat12 <- x %>%
# lagging the dataset: %>% mutate_all(~lag(., n = 28))
dplyr::mutate(dplyr::across(LABEL, ~lag(., n = num_bars_ahead))) %>%
# remove empty rows
na.omit() %>% filter_all(any_vars(. != 0)) %>%
select(-X1, -X2, -X3)
# split data to train and test blocks
# note: model will be trained on the OLDEST data
test_ind <- 1:num_bars_test #round(0.3*(nrow(dat12))) #train indices 1:xxx
dat21 <- dat12[test_ind, ] #dataset to test the model using 30% of data
dat22 <- dat12[-test_ind, ] #dataset to train the model
# find number of columns
dat22_ncol <- ncol(dat22)-1
# fail safe: check number of columns with parameter num_cols_used
if(num_cols_used > dat22_ncol){stop("Parameter num_cols_used is greater than what is in the available data",
call. = FALSE)}
# use full set of columns
if(num_cols_used == 0){
num_cols_used = dat22_ncol
}
## ---------- Data Modelling ---------------
#h2o.init()
# load data into h2o environment
#macd_ML <- as.h2o(x = dat22, destination_frame = "macd_ML")
macd_ML <- h2o::as.h2o(x = dat22, destination_frame = "macd_ML")
recent_ML <- h2o::as.h2o(x = dat21, destination_frame = "recent_ML")
# for loop to select the best neural network structure
### fix or random network structure num_nn_options <- 24
###
n_layers <- length(fixed_nn_struct)
if(num_nn_options == 0){
nn_sets <- fixed_nn_struct %>% matrix(ncol = n_layers)
} else {
nn_sets <- sample.int(n = 100, num_nn_options) %>% matrix(ncol = 3)
}
###
for (i in 1:dim(nn_sets)[1]) {
# i <- 2
# fit models from simplest to more complex
ModelC <- h2o::h2o.deeplearning(
model_id = paste0("DL_Regression", "-", symbol, "-", timeframe),
x = names(macd_ML[,1:num_cols_used]),
y = "LABEL",
training_frame = macd_ML,
activation = "Tanh",
autoencoder = FALSE,
hidden = nn_sets[i, ],
loss = "Automatic",
sparse = TRUE,
l1 = 1e-4,
distribution = "AUTO",
stopping_metric = "MSE",
#balance_classes = F,
epochs = num_epoch,
diagnostics = FALSE)
#ModelC
#summary(ModelC)
## Option to do model functional test
if(objective_test){
### define best model using objective test
#dataset without X1 column (for predictions)
te <- x %>%
dplyr::select(-X1, -X2, -X3, -LABEL) %>%
# only keep last month for simulation
utils::head(num_bars_test)
# uploading data to h2o
recent_ML <- h2o::as.h2o(x = te, destination_frame = "recent_ML")
# PREDICT the next period...
result_R1 <- h2o::h2o.predict(ModelC, recent_ML) %>% as.data.frame()
dat31 <- x %>%
# using last 600 observations
utils::head(num_bars_test) %>%
## select columns:
# X1 time index
# X2 price at the time index
# X3 price 34 bars ago
# LABEL is a price difference X3-X2
dplyr::select(X1, X2) %>%
# add column with predicted price change
dplyr::bind_cols(result_R1) %>%
## create columns:
# dP_34 - price difference now vs 34 bars (only for check)
# dplyr::mutate(dP_34 = X3-X2) %>%
## setup condition to enter the trade
# create a risk column, use 20 pips as a trigger
dplyr::mutate(Risk = if_else(predict > 20, 1/z, if_else(predict < -20, -1/z, 0))) %>%
## create a columns with shifted X2 price down:
# value BR will indicate number of bars we will hold this position
dplyr::mutate(X2_NB = lag(X2, num_bars_ahead)) %>%
# clean up this dataset
na.omit() %>%
# now calculate the scenario
dplyr::mutate(Hold_NB = Risk*(X2_NB - X2)) %>%
# remove zero values to calculate presumed number of trades
dplyr::filter(Risk != 0) %>%
# get the sum of columns
# Column Expected PNL would be the result in case all trades would be successful
# Column Achieved PNL is the results achieved in reality
dplyr::summarise(PnL_NB = sum(Hold_NB),
TotalTrades = n(),
TR_Level = 20,
NB_hold = num_bars_ahead,
Symbol = symbol) %>%
#extract only column with PnL
select(PnL_NB)
# record results of modelling
if(!exists("df_res_t")){
df_res_t <- nn_sets[i,] %>% t() %>% as.data.frame() %>% dplyr::bind_cols(dat31)
} else {
df_row <- nn_sets[i,] %>% t() %>% as.data.frame() %>% dplyr::bind_cols(dat31)
df_res_t <- df_res_t %>% dplyr::bind_rows(df_row)
}
#save intermediate models!
# save model object
temp_model_path <- file.path(path_model, i)
if(!dir.exists(temp_model_path)){dir.create(temp_model_path)}
h2o::h2o.saveModel(ModelC, path = temp_model_path, force = T)
#what is the most accurate model structure from objective test?
# find which row number in the df_res_t has the highest value slice(which.min(Employees))
lowest_RMSE <- df_res_t %>% dplyr::slice(which.max(PnL_NB)) %>% select(-PnL_NB) %>% unlist() %>% unname()
# find which row in the df_res_t produces max PnL_NB?
best_row <- which.max(df_res_t$PnL_NB)
} else {
### define best model using RMSE
RMSE <- h2o::h2o.performance(ModelC,newdata = recent_ML)@metrics$RMSE %>%
as.data.frame()
#RMSE <- h2o::h2o.performance(ModelC)@metrics$RMSE %>% as.data.frame()
names(RMSE) <- 'RMSE'
# record results of modelling
if(!exists("df_res")){
df_res <- nn_sets[i,] %>% t() %>% as.data.frame() %>% dplyr::bind_cols(RMSE)
} else {
df_row <- nn_sets[i,] %>% t() %>% as.data.frame() %>% dplyr::bind_cols(RMSE)
df_res <- df_res %>% dplyr::bind_rows(df_row)
}
#save intermediate models!
# save model object
temp_model_path <- file.path(path_model, i)
if(!dir.exists(temp_model_path)){dir.create(temp_model_path)}
h2o::h2o.saveModel(ModelC, path = temp_model_path, force = T)
#what is the most accurate model structure from RMSE test??
# find which row in the df_res has the smallest RMSE value slice(which.min(Employees))
lowest_RMSE <- df_res %>% dplyr::slice(which.min(RMSE)) %>% select(-RMSE) %>% unlist() %>% unname()
best_row <- which.min(df_res$RMSE)
}
} # end of for loop
## retrieve and copy/paste the best model
best_model_location <- file.path(path_model, best_row, m_name)
best_model_destination <- file.path(path_model, m_name)
# copy best model object
file.copy(best_model_location, path_model, overwrite = TRUE)
}
#h2o.shutdown(prompt = FALSE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.