R/vari11_sim.R

Defines functions check_valid_vari11_sim

Documented in check_valid_vari11_sim

#' @include sim_class.R generics.R
NULL

#' Validity Checker for vari11_sim Object
#'
#' @param object A vari11_sim object
#' @return \code{TRUE} if the input sim object is valid, vector of error messages otherwise.
#' @keywords internal
check_valid_vari11_sim <- function(object) {
  errors <- character()
  res_dist_choices <- c("norm", "skew_norm")
  if (length(object@res_dist) != 1 | is.na(object@res_dist) |  all(object@res_dist != res_dist_choices)) {
    msg <- paste0("train_policy must be one of ", paste(res_dist_choices, collapse = " "), ".")
    errors <- c(errors, msg)
  }
  if (object@reg_num != 2) {
    msg <- paste0("reg_num must be fixed to 2 for vari11 sim model.")
    errors <- c(errors, msg)
  }
  if (length(errors) == 0) {
    return(TRUE)
  } else {
    return(errors)
  }
}


#' @rdname sim-class
#' @param res_dist The distribution of residual.
#' @param reg_num The number of past regressive observations needed to forecast next observation.
#' @export vari11_sim
vari11_sim <- setClass("vari11_sim",
                     slots = list(res_dist = "character", reg_num = "numeric"),
                     contains = "sim",
                     prototype = list(name = "VARI11",
                                      res_dist = "norm",
                                      reg_num = 2),
                     validity = check_valid_vari11_sim)


#' @describeIn train_model Train VARI11 Model specific to vari11_sim object.
setMethod("train_model",
          signature(object = "vari11_sim", trainset_max = "numeric", trainset_avg = "numeric"),
          function(object, trainset_max, trainset_avg) {
            new_trainset_max <- convert_frequency_dataset(trainset_max, object@window_size, "max")
            new_trainset_avg <- convert_frequency_dataset(trainset_avg, object@window_size, "avg")
            if (object@response == "max") {
              new_trainset_max_diff <- diff(new_trainset_max)
              new_trainset_avg_diff <- diff(new_trainset_avg)
              uni_data_matrix <- matrix(nrow = length(new_trainset_max_diff), ncol = 2)
              uni_data_matrix[,1] <- new_trainset_max_diff
              uni_data_matrix[,2] <- new_trainset_avg_diff
              ts_model <- MTS::VAR(uni_data_matrix, p = 1, include.mean = TRUE, output = FALSE)
            } else {
              new_trainset_max_diff <- diff(new_trainset_max)
              new_trainset_avg_diff <- diff(new_trainset_avg)
              uni_data_matrix <- matrix(nrow = length(new_trainset_avg_diff), ncol = 2)
              uni_data_matrix[,1] <- new_trainset_avg_diff
              uni_data_matrix[,2] <- new_trainset_max_diff
              ts_model <- MTS::VAR(uni_data_matrix, p = 1, include.mean = TRUE, output = FALSE)
            }
            if (object@res_dist == "norm") {
              # phi
              phi <- ts_model$Phi[,1]

              # mu
              mu <- ts_model$Ph0[1]

              # sigma
              sigma <- sqrt(ts_model$Sigma[1,1])

              trained_result <- list("phi" = phi, "mu" = mu, "sigma" = sigma, "residuals" = ts_model$residuals[,1], "intercept" = ts_model$Ph0[1])
            } else {
              # phi
              phi <- ts_model$Phi[,1]

              res <- ts_model$residuals[,1]
              skew_res <- sample_moment_lag(res, k = 0, r = 3, s = 0) / (sample_moment_lag(res, k = 0, r = 2, s = 0) ^ (3/2))
              abs_skew_res <- min(abs(skew_res), 0.99)

              # alpha parameter
              delta <- sign(skew_res) * sqrt((pi / 2) * (abs_skew_res^(2/3)) / ((abs_skew_res ^ (2/3)) + (2 - 0.5 * pi) ^ (2/3)))
              alpha <- delta / sqrt(1 - delta ^ 2)

              # omega parameter
              omega2 <- sample_moment_lag(res, k = 0, r = 2, s = 0) / (1 - 2 / pi * delta ^ (2))
              omega <- sqrt(omega2)

              # xi parameter
              xi <- ts_model$Ph0[1] - sqrt(pi / 2) * omega * delta

              trained_result <- list("phi" = phi, "xi" = xi, "omega" = omega, "alpha" = alpha, "residuals" = ts_model$residuals[,1], "intercept" = ts_model$Ph0[1])
            }
            return(trained_result)
          })


#' @describeIn do_prediction Do prediction based on trained VAR Model.
setMethod("do_prediction",
          signature(object = "vari11_sim", trained_result = "list", last_obs_max = "numeric", last_obs_avg = "numeric", last_res = "numeric", level = "numeric"),
          function(object, trained_result, last_obs_max, last_obs_avg, last_res, level) {
            last_obs_max_diff <- last_obs_max[2] - last_obs_max[1]
            last_obs_avg_diff <- last_obs_avg[2] - last_obs_avg[1]
            if (object@response == "max") {
              last_obs_diff <- c(last_obs_max_diff, last_obs_avg_diff)
              if (object@res_dist == "norm") {
                mu <- trained_result$mu + sum(trained_result$phi * last_obs_diff)
                mu <- last_obs_max[2] + mu
              } else {
                xi <- trained_result$xi + sum(trained_result$phi * last_obs_diff)
                xi <- last_obs_max[2] + xi
              }
            } else {
              last_obs_diff <- c(last_obs_avg_diff, last_obs_max_diff)
              if (object@res_dist == "norm") {
                mu <- trained_result$mu + sum(trained_result$phi * last_obs_diff)
                mu <- last_obs_avg[2] + mu
              } else {
                xi <- trained_result$xi + sum(trained_result$phi * last_obs_diff)
                xi <- last_obs_avg[2] + xi
              }
            }

            if (object@res_dist == "norm") {
              sd <- trained_result$sigma
              prob <- NULL
              if (!is.na(level)) {
                prob <- 1 - stats::pnorm(q = level, mean = mu, sd = sd)
              }
              predicted_result <- list("prob" = as.numeric(prob), "mean" = mu, "sd" = sd, "expected" = mu)
            } else {
              omega <- trained_result$omega
              alpha <- trained_result$alpha
              prob <- NULL
              if (!is.na(level)) {
                prob <- 1 - sn::psn(x = level, xi = xi, omega = omega, alpha = alpha)
              }
              predicted_result <- list("prob" = as.numeric(prob), "xi" = xi, "omega" = omega, "alpha" = alpha, "expected" = xi)
            }
            return(predicted_result)
          })


#' @describeIn compute_pi_up Compute prediction interval Upper Bound based on trained AR1 Model.
setMethod("compute_pi_up",
          signature(object = "vari11_sim", predicted_result = "list"),
          function(object, predicted_result) {
            if (object@res_dist == "norm") {
              mu <- predicted_result$mean
              sd <- predicted_result$sd
              upper_bounds <- min(stats::qnorm(1 - object@cut_off_prob, mean = mu, sd = sd), 100)
            } else {
              xi <- predicted_result$xi
              omega <- predicted_result$omega
              alpha <- predicted_result$alpha
              upper_bounds <- min(sn::qsn(1 - object@cut_off_prob, xi = xi, omega = omega, alpha = alpha), 100)
            }
            return(upper_bounds)
          })


#' @return A list containing all numeric parameter informations.
#' @rdname get_param_slots
#' @export
setMethod("get_param_slots",
          signature(object = "vari11_sim"),
          function(object) {
            numeric_slots <- c("cut_off_prob", "granularity", "train_size", "update_freq", "tolerance1", "tolerance2")
            numeric_lst <- list()
            for (i in numeric_slots) {
              numeric_lst[[i]] <- methods::slot(object, i)
            }
            return(numeric_lst)
          })


#' @return A list containing all character parameter informations.
#' @rdname get_characteristic_slots
#' @export
setMethod("get_characteristic_slots",
          signature(object = "vari11_sim"),
          function(object) {
            character_slots <- c("name", "type", "window_size", "train_policy", "schedule_policy", "adjust_policy", "response", "res_dist")
            character_lst <- list()
            for (i in character_slots) {
              character_lst[[i]] <- methods::slot(object, i)
            }
            return(character_lst)
          })


#' @rdname plot_sim_tracewise
#' @export
setMethod("plot_sim_tracewise",
          signature(object = "vari11_sim", index = "numeric", trace_name = "character", trainset = "data.frame", testset = "data.frame", prev_score = "data.frame", last_score = "numeric", decision = "list"),
          function(object, index, trace_name, trainset, testset, prev_score, last_score, decision) {
            if (object@response == "max") {
              target_dataset <- c(utils::tail(trainset$trainset_max, 4 * nrow(testset)), testset$testset_max)
            } else {
              target_dataset <- c(utils::tail(trainset$trainset_avg, 4 * nrow(testset)), testset$testset_avg)
            }

            train_or_test <- c(rep("train", 4 * nrow(testset)), rep("test", nrow(testset)))
            t <- as.numeric(c(utils::tail(rownames(trainset), 4 * nrow(testset)), rownames(testset))) / 60

            train_decision <- decision$train_decision
            test_decision <- decision$test_decision

            train_sig <- train_decision$train_sig
            iter <- train_decision$iter
            trained_result <- train_decision$trained_result
            res <- trained_result$residuals

            pi_up <- c(rep(NA_real_, 4 * nrow(testset)), test_decision$pi_up)
            adjust_switch <- c(rep(NA_integer_, 4 * nrow(testset)), test_decision$adjust_switch)
            decision_opt <- c(rep(NA_integer_, 4 * nrow(testset)), test_decision$decision_opt)

            msg1 <- paste("Current batch has performance of", paste(last_score, collapse = " "))
            if (nrow(prev_score) == 0) {
              msg2 <- paste("No historical score to compare with on score1.")
              msg3 <- paste("No historical score to compare with on score2.")
            } else {
              msg2 <- paste("Current batch has performance not failing", sum(last_score[1] >= prev_score$prev_score1, na.rm = TRUE) / nrow(prev_score), "on score 1.")
              msg3 <- paste("Current batch has performance not failing", sum(last_score[2] >= prev_score$prev_score2, na.rm = TRUE) / nrow(prev_score), "on score 2.")
            }
            msg4 <- paste("Based on tolerance level of", object@tolerance1, object@tolerance2, "the training signal is", train_sig, "for next iteration.")
            wn_test <- stats::Box.test(res, lag = round(sqrt(length(res))), type = "Ljung-Box", fitdf = 2)
            msg5 <- paste("The White Noise Test for Residuals has p-value of", round(wn_test$p.value, 3), "for one-tailed test.")

            # Time Series Plot
            result <- data.frame("target_dataset" = target_dataset, "time" = t, "train_or_test" = train_or_test, "pi_up" = pi_up, "adjust_switch" = adjust_switch, "decision_opt" = decision_opt)
            ts_plt <- ggplot2::ggplot(result, aes(x = result$time, y = result$target_dataset)) +
              ggplot2::geom_line(aes(color = factor(result$train_or_test))) +
              ggplot2::geom_line(aes(y = result$pi_up, color = as.factor(result$adjust_switch)), na.rm = TRUE) +
              ggplot2::geom_line(aes(y = result$decision_opt), color = "darkcyan", na.rm = TRUE) +
              ggplot2::geom_hline(yintercept = 100, linetype = "dashed", color = "purple") +
              ggplot2::xlab("Time (minutes)") +
              ggplot2::ylab("Cpu (percent)") +
              ggplot2::theme(legend.position = "none") +
              ggplot2::ggtitle(paste("Diagnostic Plot of", trace_name, "at Iteration", iter)) +
              ggplot2::annotate("text", x = -Inf, y = Inf, vjust = c(2, 3.25, 4.5, 5.75, 7), hjust = 0, label = c(msg1, msg2, msg3, msg4, msg5))

            # Density Plot of Residuals
            residual <- data.frame("x" = as.numeric(res))
            dens_res <- ggplot2::ggplot(residual, aes(x = residual$x)) +
              ggplot2::geom_density(fill = "red", alpha = 0.5)
            if (object@res_dist == "norm") {
              mu <- trained_result$mu - trained_result$intercept
              sd <- trained_result$sigma
              dens_res <- dens_res +
                ggplot2::stat_function(fun = stats::dnorm, n = length(res), args = list("mean" = mu, "sd" = sd), color = "blue")
            } else {
              xi <- trained_result$xi - trained_result$intercept
              omega <- trained_result$omega
              alpha <- trained_result$alpha
              dens_res <- dens_res +
                ggplot2::stat_function(fun = sn::dsn, n = length(res), args = list("xi" = xi, "omega" = omega, "alpha" = alpha), color = "blue")
            }
            dens_res <- dens_res +
              ggplot2::theme(legend.position = "none") +
              ggplot2::ylab("density of residuals") +
              ggplot2::xlab("residuals")

            # Time Series Plot of Residuals
            t <- seq(to = as.numeric(rownames(trainset)[nrow(trainset)]), from = as.numeric(rownames(trainset)[nrow(trainset)]) - (length(res) - 1) * object@window_size * 300, by = object@window_size * 300) / 60
            residual <- data.frame("x" = as.numeric(res), "time" = t)
            ts_res <- ggplot2::ggplot(residual, aes(x = residual$time, y = residual$x)) +
              ggplot2::geom_line(color = "green") +
              ggplot2::theme(legend.position = "none") +
              ggplot2::ylab("residuals") +
              ggplot2::xlab("time")

            plt <- gridExtra::arrangeGrob(ts_plt, dens_res, ts_res, ncol = 2, nrow = 2, layout_matrix = rbind(c(1,1), c(2,3)))

            file_name <- paste(unlist(get_characteristic_slots(object)), collapse = " ")
            fp <- fs::path(paste0(object@result_loc, "tracewise_plots/", file_name, " index ", index, " trace ", trace_name, " iter ", iter), ext = "png")
            ggplot2::ggsave(fp, plot = plt, width = 12, height = 7)
          })
carlonlv/DataCenterSim documentation built on March 7, 2020, 8:51 a.m.