#' @include sim_class.R generics.R model_helper.R
NULL
#' Validity Checker for auto_arima_sim Object
#'
#' @param object A auto_arima_sim object.
#' @return \code{TRUE} if the input sim object is valid, vector of error messages otherwise.
#' @keywords internal
check_valid_auto_arima_sim <- function(object) {
errors <- character()
window_type_choices <- c("max", "avg")
res_dist_choices <- c("normal", "skew_norm", "empirical", "discretized")
outlier_type_choices <- c("AO", "IO", "LS", "None", "All")
outlier_prediction_choices <- c("None", "Categorical", "Categorical-Dirichlet")
if (any(is.na(object@window_type_for_reg)) | all(object@window_type_for_reg != window_type_choices)) {
msg <- paste0("window_type_for_reg must be one of ", paste(window_type_choices, collapse = " "))
errors <- c(errors, msg)
}
if (length(object@res_dist) != 1 | is.na(object@res_dist) | all(object@res_dist != res_dist_choices)) {
msg <- paste0("res_dist must be one of ", paste(res_dist_choices, collapse = " "), ".")
errors <- c(errors, msg)
}
if (length(object@outlier_type) != 1 | is.na(object@outlier_type) | all(object@outlier_type != outlier_type_choices)) {
msg <- paste0("outlier_type must be one of ", paste(outlier_type_choices, collapse = " "), ".")
errors <- c(errors, msg)
}
if (length(object@outlier_cval) != 1 | any(object@outlier_cval < 3, na.rm = TRUE) | any(object@outlier_cval > 4, na.rm = TRUE)) {
msg <- paste0("outlier_cval must be a numeric value within 3 and 4, inclusively, or NA.")
errors <- c(errors, msg)
}
if (length(object@outlier_prediction) != 1 | is.na(object@outlier_prediction) | all(object@outlier_prediction != outlier_prediction_choices)) {
msg <- paste0("outlier_prediction must be one of ", paste(res_dist_choices, collapse = " "), ".")
errors <- c(errors, msg)
}
if (length(object@outlier_prediction_update_param) != 1 | is.na(object@outlier_prediction_update_param)) {
msg <- paste0("outlier_prediction_update_param must be length one logical value.")
errors <- c(errors, msg)
}
if (length(object@state_num) != 1 | (!is.na(object@state_num) & (object@state_num < 0 | object@state_num %% 1 != 0)) | (is.na(object@state_num) & object@granularity == 0)) {
msg <- paste0("state_num must be NA or positive integer, if granularity is 0, then state_num cannot be NA.")
errors <- c(errors, msg)
}
if (length(errors) == 0) {
return(TRUE)
} else {
return(errors)
}
}
#' @rdname sim-class
#' @param res_dist A character representing the distribution of residual, \code{"normal"} for normal distribution, \code{"skew_norm"} for skewed normal distribution, or \code{"empirical"} for empirical distribution. Default value is \code{"normal"}.
#' @param outlier_type A character representing the type of outlier it will be treated, it can be None for not checking outliers, AO as additive outliers, IO as innovative outliers, LS as level shift, or All for taking account of all outlier types. Default value is \code{"None"}.
#' @param outlier_cval A numeric value representing the critical value to determine the significance of each type of outlier. If NA_real_ is supplied, then it uses defaults: If n ≤ 50 then cval is set equal to 3.0; If n ≥ 450 then cval is set equal to 4.0; otherwise cval is set equal to 3 + 0.0025 * (n - 50). Default value is \code{NA_real_}.
#' @param outlier_prediction A character representing the distribution of occurences of outliers, and the prior distribution for probability of occurences. Current choices are "None", "Categorical" for predictions based on MLE, "Categorical-Dirichlet" for Bayesian prediction and prior distribution.
#' @param outlier_prediction_prior A numeric vector representing the starting value of the hyperparameters of prior distirbution.
#' @param outlier_prediction_update_param A logical value representing whether to update the value of the hyperparameters of prior distribution or MLE estimations of parameters depending on \code{outlier_prediction}.
#' @param freq A numeric value representing the number of observations per unit of time.
#' @param state_num A numeric number that represents the number of states in Multinomial chain.
#' @param train_args A list representing additional call passed into the training function, \code{forecast::auto.arima}. Default value is \code{list("order" = c(1, 0, 0))}.
#' @export auto_arima_sim
auto_arima_sim <- setClass("auto_arima_sim",
slots = list(window_size_for_reg = "numeric",
window_type_for_reg = "character",
res_dist = "character",
outlier_type = "character",
outlier_cval = "numeric",
outlier_prediction = "character",
outlier_prediction_prior = "numeric",
outlier_prediction_update_param = "logical",
freq = "numeric",
state_num = "numeric",
train_args = "list"),
contains = "sim",
prototype = list(window_size_for_reg = NA_real_,
window_type_for_reg = NA_character_,
name = "ARIMA",
res_dist = "normal",
outlier_type = "None",
outlier_cval = NA_real_,
outlier_prediction = "None",
outlier_prediction_prior = NA_real_,
outlier_prediction_update_param = TRUE,
freq = 12,
state_num = NA_real_,
train_args = list()),
validity = check_valid_auto_arima_sim)
#' @describeIn train_model Train ARMA Model specific to auto_arima_sim object.
setMethod("train_model",
signature(object = "auto_arima_sim", train_x = "matrix", train_xreg = "NULL", trained_model = "list"),
function(object, train_x, train_xreg, trained_model) {
if (!is.na(object@freq)) {
new_train_x <- stats::ts(convert_frequency_dataset(train_x, object@window_size, object@response, keep.names = TRUE), frequency = object@freq)
} else {
new_train_x <- stats::ts(convert_frequency_dataset(train_x, object@window_size, object@response, keep.names = TRUE))
}
args.tsmethod <- list()
for (i in names(object@train_args)) {
args.tsmethod[[i]] <- object@train_args[[i]]
}
if (object@outlier_type == "None") {
trained_result <- do.call(forecast::auto.arima, c(list("y" = new_train_x), args.tsmethod))
trained_result$call$x <- new_train_x
trained_result$call$orig_x <- train_x
} else {
if (is.na(object@outlier_cval)) {
cval <- ifelse(length(new_train_x) <= 50, 3, ifelse(length(new_train_x) >= 450, 4, 3 + 0.0025 * (length(new_train_x) - 50)))
} else {
cval <- object@outlier_cval
}
if (object@outlier_type == "All") {
ol_type <- c("AO", "IO", "TC")
} else {
ol_type <- object@outlier_type
}
trained_result <- tryCatch({
tso_model <- tsoutliers::tso(y = new_train_x, types = ol_type, cval = cval, maxit = 4, tsmethod = "auto.arima", args.tsmethod = args.tsmethod, maxit.oloop = 20, maxit.iloop = 10)
tso_model$fit$call$orig_x <- train_x
param_mle <- estimate_outliers(object, ol_type, new_train_x, tso_model, trained_model)
c(tso_model$fit, list("param_mle" = param_mle))
}, error = function(e) {
ts_model <- do.call(forecast::auto.arima, c(list("y" = new_train_x), args.tsmethod))
ts_model$call$x <- new_train_x
ts_model$call$orig_x <- train_x
ts_model
})
}
if (object@res_dist == "skew_norm") {
trained_result <- c(trained_result, skew_norm_param_estimation(trained_result$residuals))
}
return(list(trained_result))
})
#' @describeIn do_prediction Do prediction based on trained AR1 Model.
setMethod("do_prediction",
signature(object = "auto_arima_sim", trained_result = "list", predict_info = "data.frame", test_x = "matrix", test_xreg = "NULL"),
function(object, trained_result, predict_info, test_x, test_xreg) {
trained_result <- trained_result[[1]]
level <- (1 - object@cut_off_prob * 2) * 100
if (object@res_dist == "empirical") {
bootstrap <- TRUE
} else {
bootstrap <- FALSE
}
if (nrow(predict_info) == 0) {
if (is.null(trained_result$call$xreg)) {
target_model <- forecast::Arima(y = trained_result$call$x, model = trained_result)
} else {
target_model <- forecast::Arima(y = trained_result$call$x, xreg = trained_result$call$xreg, model = trained_result)
}
} else {
new_x <- c(trained_result$call$x, predict_info$actual)
res <- stats::ts(c(trained_result$residuals, predict_info$residuals))
if (object@outlier_type != "None") {
new_x <- find_and_remove_outliers_testing_set(object, trained_result, new_x, res)
}
prev_xreg <- trained_result$call$xreg
if (is.null(prev_xreg)) {
# Outliers are not found.
target_model <- forecast::Arima(new_x, model = trained_result)
} else {
# Outliers are considered and found
new_xreg <- matrix(0, nrow = length(new_x) - length(trained_result$call$x), ncol = ncol(prev_xreg))
colnames(new_xreg) <- colnames(prev_xreg)
new_xreg <- rbind(prev_xreg, new_xreg)
target_model <- forecast::Arima(new_x, xreg = new_xreg, model = trained_result)
}
}
if (is.null(trained_result$call$xreg)) {
# Outliers are not considered or outliers are not found
predict_result <- forecast::forecast(target_model, h = object@extrap_step, bootstrap = bootstrap, npaths = length(trained_result$call$x), level = level)
} else {
dxreg <- matrix(0, nrow = object@extrap_step, ncol = ncol(trained_result$call$xreg))
colnames(dxreg) <- colnames(trained_result$call$xreg)
predict_result <- forecast::forecast(target_model, xreg = dxreg, h = object@extrap_step, bootstrap = bootstrap, npaths = length(trained_result$call$x), level = level)
}
expected <- stats::setNames(as.data.frame(as.numeric(predict_result$mean)), "expected")
pi_up <- stats::setNames(as.data.frame(predict_result$upper), paste0("Quantile_", sort(1 - object@cut_off_prob)))
predicted_params <- data.frame("mean" = as.numeric(predict_result$mean), "sd" = (pi_up[,1] - expected[,1]) / stats::qnorm(sort(1 - object@cut_off_prob)[1]))
if (object@res_dist == "skew_norm") {
skewnorm_prediction_result <- skew_norm_param_prediction(object, trained_result, as.numeric(predict_result$mean), level)
expected <- skewnorm_prediction_result$expected
pi_up <- skewnorm_prediction_result$pi_up
predicted_params <- skewnorm_prediction_result$predicted_params
} else if (object@res_dist == "discretized") {
compute_pi_up <- function(prob, to_states) {
current_state <- 1
current_prob <- 0
while (current_state <= length(to_states)) {
current_prob <- current_prob + to_states[current_state]
if (current_prob < prob) {
current_state <- current_state + 1
} else {
break
}
}
pi_up <- current_state * (100 / length(to_states))
return(pi_up)
}
predicted_params <- discretized_from_normal_param_prediction(object, expected, pi_up)
pi_up <- matrix(0, nrow = object@extrap_step, ncol = length(1 - object@cut_off_prob))
for (j in 1:nrow(pi_up)) {
pi_up[j,] <- sapply(sort(1 - object@cut_off_prob), function(i) {
compute_pi_up(i, predicted_params[j,])
})
}
pi_up <- as.data.frame(pi_up)
colnames(pi_up) <- paste0("Quantile_", sort(1 - object@cut_off_prob))
expected <- data.frame("expected" = sapply(1:object@extrap_step, function(i) {
find_expectation_state_based_dist(predicted_params[i, grep("prob_dist.", colnames(predicted_params))])
}))
} else {
if (object@res_dist == "normal" & object@outlier_type != "None" & object@outlier_prediction != "None") {
if (is.null(trained_result$param_mle)) {
if (object@outlier_type == "All") {
ol_type <- c("AO", "IO", "TC")
} else {
ol_type <- object@outlier_type
}
expected <- expected
pi_up <- pi_up
mu <- stats::setNames(as.data.frame(do.call(rbind, lapply(1:object@extrap_step, function(h) {
rep(0, times = length(ol_type) + 1)
}))), paste0("mean.", c(ol_type, "NO")))
sd <- stats::setNames(as.data.frame(do.call(rbind, lapply(1:object@extrap_step, function(h) {
rep(0, times = length(ol_type) + 1)
}))), paste0("sd.", c(ol_type, "NO")))
pro <- stats::setNames(as.data.frame(do.call(rbind, lapply(1:object@extrap_step, function(h) {
rep(0, times = length(ol_type) + 1)
}))), paste0("pro.", c(ol_type, "NO")))
predicted_params <- cbind(mu, sd, pro)
} else {
outlier_prediction_result <- prediction_including_outlier_effect(object, trained_result, pi_up, expected, level)
expected <- outlier_prediction_result$expected
pi_up <- outlier_prediction_result$pi_up
predicted_params <- outlier_prediction_result$predicted_params
}
}
}
return(list("predicted_quantiles" = cbind(expected, pi_up), "predicted_params" = predicted_params))
})
#' @describeIn train_model Train ARMA Model specific to auto_arima_sim object.
setMethod("train_model",
signature(object = "auto_arima_sim", train_x = "matrix", train_xreg = "matrix", trained_model = "list"),
function(object, train_x, train_xreg, trained_model) {
if (!is.na(object@freq)) {
new_train_x <- stats::ts(convert_frequency_dataset(stats::setNames(train_x[(max(object@window_size_for_reg, object@window_size) + (object@extrap_step - 1) * object@window_size + 1):nrow(train_x),1], rownames(train_x)[(max(object@window_size_for_reg, object@window_size) + (object@extrap_step - 1) * object@window_size + 1):length(train_x)]),
object@window_size,
object@response,
keep.names = TRUE,
right.aligned = TRUE),
frequency = object@freq)
} else {
new_train_x <- stats::ts(convert_frequency_dataset(stats::setNames(train_x[(max(object@window_size_for_reg, object@window_size) + (object@extrap_step - 1) * object@window_size + 1):nrow(train_x),1], rownames(train_x)[(max(object@window_size_for_reg, object@window_size) + (object@extrap_step - 1) * object@window_size + 1):length(train_x)]),
object@window_size,
object@response,
keep.names = TRUE,
right.aligned = TRUE))
}
new_train_xreg <- as.matrix(convert_frequency_dataset_overlapping(stats::setNames(train_xreg[1:(nrow(train_xreg) - object@window_size * object@extrap_step),1], rownames(train_xreg)[1:(nrow(train_xreg) - object@window_size * object@extrap_step)]),
object@window_size_for_reg,
object@window_type_for_reg,
keep.names = TRUE,
jump = object@window_size,
right.aligned = TRUE,
length.out = length(new_train_x)))
colnames(new_train_xreg) <- "xreg"
args.tsmethod <- list()
for (i in names(object@train_args)) {
args.tsmethod[[i]] <- object@train_args[[i]]
}
if (object@outlier_type == "None") {
trained_result <- do.call(forecast::auto.arima, c(list("y" = new_train_x, "xreg" = new_train_xreg), args.tsmethod))
trained_result$call$x <- new_train_x
trained_result$call$xreg <- new_train_xreg
trained_result$call$orig_x <- train_x
trained_result$call$orig_xreg <- train_xreg
} else {
if (is.na(object@outlier_cval)) {
cval <- ifelse(length(new_train_x) <= 50, 3, ifelse(length(new_train_x) >= 450, 4, 3 + 0.0025 * (length(new_train_x) - 50)))
} else {
cval <- object@outlier_cval
}
if (object@outlier_type == "All") {
ol_type <- c("AO", "IO", "TC")
} else {
ol_type <- object@outlier_type
}
trained_result <- tryCatch({
tso_model <- tsoutliers::tso(y = new_train_x, xreg = new_train_xreg, types = ol_type, cval = cval, maxit = 4, tsmethod = "auto.arima", args.tsmethod = args.tsmethod, maxit.oloop = 20, maxit.iloop = 10)
tso_model$fit$call$orig_x <- train_x
tso_model$fit$call$orig_xreg <- train_xreg
param_mle <- estimate_outliers(object, ol_type, new_train_x, tso_model, trained_model)
c(tso_model$fit, list("param_mle" = param_mle))
}, error = function(e) {
ts_model <- do.call(forecast::auto.arima, c(list("y" = new_train_x, "xreg" = new_train_xreg), args.tsmethod))
ts_model$call$x <- new_train_x
ts_model$call$xreg <- new_train_xreg
ts_model$call$orig_x <- train_x
ts_model$call$orig_xreg <- train_xreg
ts_model
})
}
if (object@res_dist == "skew_norm") {
trained_result <- c(trained_result, skew_norm_param_estimation(trained_result$residuals))
}
return(list(trained_result))
})
#' @describeIn do_prediction Do prediction based on trained AR1 Model.
setMethod("do_prediction",
signature(object = "auto_arima_sim", trained_result = "list", predict_info = "data.frame", test_x = "matrix", test_xreg = "matrix"),
function(object, trained_result, predict_info, test_x, test_xreg) {
trained_result <- trained_result[[1]]
level <- (1 - object@cut_off_prob * 2) * 100
if (object@res_dist == "empirical") {
bootstrap <- TRUE
} else {
bootstrap <- FALSE
}
if (nrow(predict_info) == 0) {
target_model <- forecast::Arima(y = trained_result$call$x, xreg = trained_result$call$xreg, model = trained_result)
} else {
new_x <- c(trained_result$call$x, predict_info$actual)
res <- stats::ts(c(trained_result$residuals, predict_info$residuals))
if (object@outlier_type != "None") {
new_x <- find_and_remove_outliers_testing_set(object, trained_result, new_x, res)
}
prev_xreg <- trained_result$call$xreg
new_xreg <- c(trained_result$call$orig_xreg[,1], test_xreg[,1])
new_xreg <- as.matrix(convert_frequency_dataset_overlapping(new_xreg[1:(length(new_xreg) - object@window_size * object@extrap_step)],
object@window_size_for_reg,
object@window_type_for_reg,
keep.names = TRUE,
jump = object@window_size,
right.aligned = TRUE,
length.out = length(predict_info$actual)))
if (ncol(prev_xreg) > 1) {
# Outliers are considered and found.
new_ol <- matrix(0, nrow = nrow(new_xreg), ncol = ncol(prev_xreg) - 1)
new_xreg <- cbind(new_xreg, new_ol)
}
colnames(new_xreg) <- colnames(prev_xreg)
new_xreg <- rbind(prev_xreg, new_xreg)
target_model <- forecast::Arima(new_x, xreg = new_xreg, model = trained_result)
}
dxreg <- c(trained_result$call$orig_xreg[,1], test_xreg[,1])
dxreg <- as.matrix(convert_frequency_dataset_overlapping(dxreg,
object@window_size_for_reg,
object@window_type_for_reg,
keep.names = TRUE,
jump = object@window_size,
right.aligned = TRUE,
length.out = object@extrap_step))
if (ncol(dxreg) < ncol(trained_result$call$xreg)) {
dxreg <- cbind(dxreg, matrix(0, nrow = object@extrap_step, ncol = (ncol(trained_result$call$xreg) - ncol(dxreg))))
}
colnames(dxreg) <- colnames(trained_result$call$xreg)
predict_result <- forecast::forecast(target_model, xreg = dxreg, h = object@extrap_step, bootstrap = bootstrap, npaths = length(trained_result$call$x), level = level)
expected <- stats::setNames(as.data.frame(as.numeric(predict_result$mean)), "expected")
pi_up <- stats::setNames(as.data.frame(predict_result$upper), paste0("Quantile_", sort(1 - object@cut_off_prob)))
predicted_params <- data.frame("mean" = as.numeric(predict_result$mean), "sd" = (pi_up[,1] - expected[,1]) / stats::qnorm(sort(1 - object@cut_off_prob)[1]))
if (object@res_dist == "skew_norm") {
skewnorm_prediction_result <- skew_norm_param_prediction(object, trained_result, as.numeric(predict_result$mean), level)
expected <- skewnorm_prediction_result$expected
pi_up <- skewnorm_prediction_result$pi_up
predicted_params <- skewnorm_prediction_result$predicted_params
} else if (object@res_dist == "discretized") {
compute_pi_up <- function(prob, to_states) {
current_state <- 1
current_prob <- 0
while (current_state <= length(to_states)) {
current_prob <- current_prob + to_states[current_state]
if (current_prob < prob) {
current_state <- current_state + 1
} else {
break
}
}
pi_up <- current_state * (100 / length(to_states))
return(pi_up)
}
predicted_params <- discretized_from_normal_param_prediction(object, expected, pi_up)
pi_up <- matrix(0, nrow = object@extrap_step, ncol = length(1 - object@cut_off_prob))
for (j in 1:nrow(pi_up)) {
pi_up[j,] <- sapply(sort(1 - object@cut_off_prob), function(i) {
compute_pi_up(i, predicted_params[j,])
})
}
pi_up <- as.data.frame(pi_up)
colnames(pi_up) <- paste0("Quantile_", sort(1 - object@cut_off_prob))
expected <- data.frame("expected" = sapply(1:object@extrap_step, function(i) {
find_expectation_state_based_dist(predicted_params[i, grep("prob_dist.", colnames(predicted_params))])
}))
} else {
if (object@res_dist == "normal" & object@outlier_type != "None" & object@outlier_prediction != "None") {
if (is.null(trained_result$param_mle)) {
if (object@outlier_type == "All") {
ol_type <- c("AO", "IO", "TC")
} else {
ol_type <- object@outlier_type
}
expected <- expected
pi_up <- pi_up
mu <- stats::setNames(as.data.frame(do.call(rbind, lapply(1:object@extrap_step, function(h) {
rep(0, times = length(ol_type) + 1)
}))), paste0("mean.", c(ol_type, "NO")))
sd <- stats::setNames(as.data.frame(do.call(rbind, lapply(1:object@extrap_step, function(h) {
rep(0, times = length(ol_type) + 1)
}))), paste0("sd.", c(ol_type, "NO")))
pro <- stats::setNames(as.data.frame(do.call(rbind, lapply(1:object@extrap_step, function(h) {
rep(0, times = length(ol_type) + 1)
}))), paste0("pro.", c(ol_type, "NO")))
predicted_params <- cbind(mu, sd, pro)
} else {
outlier_prediction_result <- prediction_including_outlier_effect(object, trained_result, pi_up, expected, level)
expected <- outlier_prediction_result$expected
pi_up <- outlier_prediction_result$pi_up
predicted_params <- outlier_prediction_result$predicted_params
}
}
}
return(list("predicted_quantiles" = cbind(expected, pi_up), "predicted_params" = predicted_params))
})
#' @describeIn train_model Train ARMA Model specific to auto_arima_sim object.
setMethod("train_model",
signature(object = "auto_arima_sim", train_x = "matrix", train_xreg = "list", trained_model = "list"),
function(object, train_x, train_xreg, trained_model) {
if (!is.na(object@freq)) {
new_train_x <- stats::ts(convert_frequency_dataset(stats::setNames(train_x[(max(object@window_size_for_reg, object@window_size) + (object@extrap_step - 1) * object@window_size + 1):nrow(train_x),1], rownames(train_x)[(max(object@window_size_for_reg, object@window_size) + (object@extrap_step - 1) * object@window_size + 1):length(train_x)]),
object@window_size,
object@response,
keep.names = TRUE,
right.aligned = TRUE),
frequency = object@freq)
} else {
new_train_x <- stats::ts(convert_frequency_dataset(stats::setNames(train_x[(max(object@window_size_for_reg, object@window_size) + (object@extrap_step - 1) * object@window_size + 1):nrow(train_x),1], rownames(train_x)[(max(object@window_size_for_reg, object@window_size) + (object@extrap_step - 1) * object@window_size + 1):length(train_x)]),
object@window_size,
object@response,
keep.names = TRUE,
right.aligned = TRUE))
}
new_train_xreg <- do.call(cbind, lapply(1:length(train_xreg), function(reg) {
temp_reg <- train_xreg[[reg]]
as.matrix(convert_frequency_dataset_overlapping(stats::setNames(temp_reg[1:(nrow(temp_reg) - object@window_size * object@extrap_step),1], rownames(temp_reg)[1:(nrow(temp_reg) - object@window_size * object@extrap_step)]),
ifelse(length(object@window_size_for_reg) == 1, object@window_size_for_reg, object@window_size_for_reg[reg]),
ifelse(length(object@window_type_for_reg) == 1, object@window_type_for_reg, object@window_type_for_reg[reg]),
keep.names = TRUE,
jump = object@window_size,
right.aligned = TRUE,
length.out = length(new_train_x)))
}))
colnames(new_train_xreg) <- names(train_xreg)
args.tsmethod <- list()
for (i in names(object@train_args)) {
args.tsmethod[[i]] <- object@train_args[[i]]
}
if (object@outlier_type == "None") {
trained_result <- do.call(forecast::auto.arima, c(list("y" = new_train_x, "xreg" = new_train_xreg), args.tsmethod))
trained_result$call$x <- new_train_x
trained_result$call$xreg <- new_train_xreg
trained_result$call$orig_x <- train_x
trained_result$call$orig_xreg <- train_xreg
} else {
if (is.na(object@outlier_cval)) {
cval <- ifelse(length(new_train_x) <= 50, 3, ifelse(length(new_train_x) >= 450, 4, 3 + 0.0025 * (length(new_train_x) - 50)))
} else {
cval <- object@outlier_cval
}
if (object@outlier_type == "All") {
ol_type <- c("AO", "IO", "TC")
} else {
ol_type <- object@outlier_type
}
trained_result <- tryCatch({
tso_model <- tsoutliers::tso(y = new_train_x, xreg = new_train_xreg, types = ol_type, cval = cval, maxit = 4, tsmethod = "auto.arima", args.tsmethod = args.tsmethod, maxit.oloop = 20, maxit.iloop = 10)
tso_model$call$orig_x <- train_x
tso_model$call$orig_xreg <- train_xreg
param_mle <- estimate_outliers(object, ol_type, new_train_x, tso_model, trained_model)
c(tso_model$fit, list("param_mle" = param_mle))
}, error = function(e) {
ts_model <- do.call(forecast::auto.arima, c(list("y" = new_train_x, "xreg" = new_train_xreg), args.tsmethod))
ts_model$call$x <- new_train_x
ts_model$call$xreg <- new_train_xreg
ts_model$call$orig_x <- train_x
ts_model$call$orig_xreg <- train_xreg
ts_model
})
}
if (object@res_dist == "skew_norm") {
trained_result <- c(trained_result, skew_norm_param_estimation(trained_result$residuals))
}
return(list(trained_result))
})
#' @describeIn do_prediction Do prediction based on trained AR1 Model.
setMethod("do_prediction",
signature(object = "auto_arima_sim", trained_result = "list", predict_info = "data.frame", test_x = "matrix", test_xreg = "list"),
function(object, trained_result, predict_info, test_x, test_xreg) {
trained_result <- trained_result[[1]]
level <- (1 - object@cut_off_prob * 2) * 100
if (object@res_dist == "empirical") {
bootstrap <- TRUE
} else {
bootstrap <- FALSE
}
if (nrow(predict_info) == 0) {
target_model <- forecast::Arima(y = trained_result$call$x, xreg = trained_result$call$xreg, model = trained_result)
} else {
new_x <- c(trained_result$call$x, predict_info$actual)
res <- stats::ts(c(trained_result$residuals, predict_info$residuals))
if (object@outlier_type != "None") {
new_x <- find_and_remove_outliers_testing_set(object, trained_result, new_x, res)
}
prev_xreg <- trained_result$call$xreg
new_xreg <- do.call(cbind, lapply(1:length(test_xreg), function(reg) {
temp_xreg <- c(trained_result$call$orig_xreg[[reg]][,1], test_xreg[[reg]][,1])
convert_frequency_dataset_overlapping(temp_xreg[1:(length(temp_xreg) - object@window_size * object@extrap_step)],
ifelse(length(object@window_size_for_reg) == 1, object@window_size_for_reg, object@window_size_for_reg[reg]),
ifelse(length(object@window_type_for_reg) == 1, object@window_type_for_reg, object@window_type_for_reg[reg]),
keep.names = TRUE,
jump = object@window_size,
right.aligned = TRUE,
length.out = length(predict_info$actual))
}))
if (ncol(prev_xreg) > ncol(new_xreg)) {
# Outliers are considered and found.
new_ol <- matrix(0, nrow = nrow(new_xreg), ncol = ncol(prev_xreg) - ncol(new_xreg))
new_xreg <- cbind(new_xreg, new_ol)
}
colnames(new_xreg) <- colnames(prev_xreg)
new_xreg <- rbind(prev_xreg, new_xreg)
target_model <- forecast::Arima(new_x, xreg = new_xreg, model = trained_result)
}
dxreg <- do.call(cbind, lapply(1:length(test_xreg), function(reg) {
temp_xreg <- c(trained_result$call$orig_xreg[[reg]][,1], test_xreg[[reg]][,1])
convert_frequency_dataset_overlapping(temp_xreg,
ifelse(length(object@window_size_for_reg) == 1, object@window_size_for_reg, object@window_size_for_reg[reg]),
ifelse(length(object@window_type_for_reg) == 1, object@window_type_for_reg, object@window_type_for_reg[reg]),
keep.names = TRUE,
jump = object@window_size,
right.aligned = TRUE,
length.out = object@extrap_step)
}))
if (ncol(dxreg) < ncol(trained_result$call$xreg)) {
dxreg <- cbind(dxreg, matrix(0, nrow = object@extrap_step, ncol = (ncol(trained_result$call$xreg) - ncol(test_xreg))))
}
colnames(dxreg) <- colnames(trained_result$call$xreg)
predict_result <- forecast::forecast(target_model, xreg = dxreg, h = object@extrap_step, bootstrap = bootstrap, npaths = length(trained_result$call$x), level = level)
expected <- stats::setNames(as.data.frame(as.numeric(predict_result$mean)), "expected")
pi_up <- stats::setNames(as.data.frame(predict_result$upper), paste0("Quantile_", sort(1 - object@cut_off_prob)))
predicted_params <- data.frame("mean" = as.numeric(predict_result$mean), "sd" = (pi_up[,1] - expected[,1]) / stats::qnorm(sort(1 - object@cut_off_prob)[1]))
if (object@res_dist == "skew_norm") {
skewnorm_prediction_result <- skew_norm_param_prediction(object, trained_result, as.numeric(predict_result$mean), level)
expected <- skewnorm_prediction_result$expected
pi_up <- skewnorm_prediction_result$pi_up
predicted_params <- skewnorm_prediction_result$predicted_params
} else if (object@res_dist == "discretized") {
compute_pi_up <- function(prob, to_states) {
current_state <- 1
current_prob <- 0
while (current_state <= length(to_states)) {
current_prob <- current_prob + to_states[current_state]
if (current_prob < prob) {
current_state <- current_state + 1
} else {
break
}
}
pi_up <- current_state * (100 / length(to_states))
return(pi_up)
}
predicted_params <- discretized_from_normal_param_prediction(object, expected, pi_up)
pi_up <- matrix(0, nrow = object@extrap_step, ncol = length(1 - object@cut_off_prob))
for (j in 1:nrow(pi_up)) {
pi_up[j,] <- sapply(sort(1 - object@cut_off_prob), function(i) {
compute_pi_up(i, predicted_params[j,])
})
}
pi_up <- as.data.frame(pi_up)
colnames(pi_up) <- paste0("Quantile_", sort(1 - object@cut_off_prob))
expected <- data.frame("expected" = sapply(1:object@extrap_step, function(i) {
find_expectation_state_based_dist(predicted_params[i, grep("prob_dist.", colnames(predicted_params))])
}))
} else {
if (object@res_dist == "normal" & object@outlier_type != "None" & object@outlier_prediction != "None") {
if (is.null(trained_result$param_mle)) {
if (object@outlier_type == "All") {
ol_type <- c("AO", "IO", "TC")
} else {
ol_type <- object@outlier_type
}
expected <- expected
pi_up <- pi_up
mu <- stats::setNames(as.data.frame(do.call(rbind, lapply(1:object@extrap_step, function(h) {
rep(0, times = length(ol_type) + 1)
}))), paste0("mean.", c(ol_type, "NO")))
sd <- stats::setNames(as.data.frame(do.call(rbind, lapply(1:object@extrap_step, function(h) {
rep(0, times = length(ol_type) + 1)
}))), paste0("sd.", c(ol_type, "NO")))
pro <- stats::setNames(as.data.frame(do.call(rbind, lapply(1:object@extrap_step, function(h) {
rep(0, times = length(ol_type) + 1)
}))), paste0("pro.", c(ol_type, "NO")))
predicted_params <- cbind(mu, sd, pro)
} else {
outlier_prediction_result <- prediction_including_outlier_effect(object, trained_result, pi_up, expected, level)
expected <- outlier_prediction_result$expected
pi_up <- outlier_prediction_result$pi_up
predicted_params <- outlier_prediction_result$predicted_params
}
}
}
return(list("predicted_quantiles" = cbind(expected, pi_up), "predicted_params" = predicted_params))
})
#' @return A list containing all numeric parameter information.
#' @rdname get_param_slots
#' @export
setMethod("get_param_slots",
signature(object = "auto_arima_sim"),
function(object) {
numeric_lst <- methods::callNextMethod(object)
numeric_lst[["freq"]] <- methods::slot(object, "freq")
numeric_lst[["outlier_cval"]] <- methods::slot(object, "outlier_cval")
numeric_lst[["state_num"]] <- methods::slot(object, "state_num")
return(numeric_lst)
})
#' @return A list containing all character parameter information.
#' @rdname get_characteristic_slots
#' @export
setMethod("get_characteristic_slots",
signature(object = "auto_arima_sim"),
function(object) {
character_lst <- methods::callNextMethod(object)
character_lst[["res_dist"]] <- methods::slot(object, "res_dist")
character_lst[["outlier_type"]] <- methods::slot(object, "outlier_type")
character_lst[["outlier_prediction"]] <- methods::slot(object, "outlier_prediction")
character_lst[["outlier_prediction_update_param"]] <- methods::slot(object, "outlier_prediction_update_param")
return(character_lst)
})
#' @return A list containing all character parameter informations.
#' @rdname get_hidden_slots
#' @export
setMethod("get_hidden_slots",
signature(object = "auto_arima_sim"),
function(object) {
hidden_lst <- methods::callNextMethod(object)
hidden_lst[["train_args"]] <- methods::slot(object, "train_args")
hidden_lst[["outlier_prediction_prior"]] <- methods::slot(object, "outlier_prediction_prior")
hidden_lst[["window_size_for_reg"]] <- methods::slot(object, "window_size_for_reg")
hidden_lst[["window_type_for_reg"]] <- methods::slot(object, "window_type_for_reg")
return(hidden_lst)
})
#' @export
setAs("data.frame", "auto_arima_sim",
function(from) {
object <- methods::new("auto_arima_sim")
for (i in names(from)) {
if (i %in% methods::slotNames(object)) {
if (methods::is(from[, i], "character")) {
if (length(strsplit(from[, i], ",")[[1]]) == 1) {
methods::slot(object, i) <- from[, i]
} else {
methods::slot(object, i) <- as.numeric(strsplit(from[, i], ",")[[1]])
}
} else {
methods::slot(object, i) <- from[, i]
}
}
}
return(object)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.