R/op.arima.R
In popstudy: Applied Techniques to Demographic and Time Series Analysis

#' op.arima
#'
#' Estimates the best predictive ARIMA model using overparameterization.
#'
#' @param arima_process numeric. The ARIMA(p,d,q)(P,D,Q) process.
#'
#' @param seasonal_periodicity numeric. The seasonal periodicity, 12 for monthly data.
#'
#' @param time_serie ts. The univariate time series object to estimate the models.
#'
#' @param reg Optionally, a vector or matrix of external regressors, which must have the same number of rows as time_serie.
#'
#' @param horiz numeric. The forecast horizon.
#'
#' @param prop numeric. Data proportion for training dataset.
#'
#' @param training_weight numeric. Importance weight for the goodness of fit and precision measures in the training dataset.
#'
#' @param testing_weight numeric. Importance weight for the goodness of fit and precision measures in the testing dataset.
#'
#' @param parallelize logical. If TRUE, then use parallel processing.
#'
#' @param clusters numeric. The number of clusters for the parallel process.
#'
#' @param LAMBDA Optionally. See \code{\link[forecast:Arima]{forecast::Arima()}} for details.
#'
#' @param ISP numeric. Overparameterization indicator to filter the estimated models in the (0,100] interval.
#'
#' @param ... additional arguments to be passed to \code{\link[forecast:Arima]{forecast::Arima()}}.
#'
#' @return \code{op.arima} returns an object of class \code{list} with the following components:
#'
#' \item{arima_models}{all models defined by the \code{arima_process} argument.}
#' \item{final_measures}{goodness of fit and precision measures for each model.}
#' \item{bests}{a sorted list with the best ARIMA models.}
#' \item{best_model}{a list of "Arima", see \code{\link[forecast:Arima]{forecast::Arima()}}}
#'
#' @examples
#'
#' \donttest{
#'
#' op.arima(arima_process = c(2,1,2,2,1,2),
#' time_serie = AirPassengers,
#' seasonal_periodicity = 12, parallelize=FALSE)
#'
#' }
#'
#' @author Cesar Gamboa-Sanabria
#'
#' @references
#'
#' \insertRef{tesis}{popstudy}
#'
#' @export
op.arima <- function (arima_process = c(
    p = 1,
    d = 1,
    q = 1,
    P = 1,
    D = 1,
    Q = 1
),
seasonal_periodicity,
time_serie,
reg = NULL,
horiz = 12,
prop = 0.8,
training_weight = 0.2,
testing_weight = 0.8,
parallelize = FALSE,
clusters = detectCores(logical = FALSE),
LAMBDA = NULL,
ISP = 100,
...)
{
    data_partition <- round(length(time_serie) * prop, 0)
    # train <<- subset(time_serie, end = data_partition)
    # test <<- subset(time_serie, start = data_partition + 1)
    train <- subset(time_serie, end = data_partition)
    test <- subset(time_serie, start = data_partition + 1)
    arima_model <- function(time_serie,
                            non_seasonal,
                            seasonal,
                            periodic,
                            regr = reg,
                            lambda = LAMBDA,
                            ...) {
        if (is.list(non_seasonal)) {
            non_seasonal <- unlist(non_seasonal)
        }
        if (is.list(seasonal)) {
            seasonal <- unlist(seasonal)
        }
        seasonal_part <- list(order = seasonal, period = periodic)
        if (is.null(regr)) {
            arima_model <- tryCatch({
                Arima(
                    time_serie,
                    order = non_seasonal,
                    seasonal = seasonal_part,
                    lambda = LAMBDA,
                    ...
                )
            }, error = function(e)
                NULL)
        }
        if (!is.null(regr)) {
            arima_model <- tryCatch({
                Arima(
                    time_serie,
                    order = non_seasonal,
                    seasonal = seasonal_part,
                    xreg = regr,
                    lambda = LAMBDA,
                    ...
                )
            }, error = function(e)
                NULL)
        }
        if (!is.null(arima_model)) {
            degrees_of_freedom <- arima_model$nobs - length(arima_model$coef)
            t_value <-
                arima_model$coef / sqrt(diag(arima_model$var.coef))
            prob <-
                stats::pf(
                    t_value ^ 2,
                    df1 = 1,
                    df2 = degrees_of_freedom,
                    lower.tail = FALSE
                )
            ifelse(sum(1 * prob > 0.05) < 1, return(arima_model),
                   1)
        }
    }
    arima_measures <- function(arima_model, testing, horizon,
                               regr = NULL) {
        model_spec <- capture.output(arima_model)
        model_spec <- substr(model_spec[2], 1, 23)
        data <- capture.output(summary(arima_model))
        data <- data[grepl("AIC", data) == T]
        model_info <- strsplit(data, " ")
        pos <- which(sapply(model_info, nchar) > 0)
        model_info <- model_info[[1]][pos]
        model_info <-
            do.call("rbind", strsplit(model_info, "=")) %>%
            data.frame()
        colnames(model_info) <- c("Medida", "Valor")
        model_info <-
            model_info %>% mutate(Valor = as.numeric(as.character(Valor))) %>%
            spread(Medida, Valor) %>% data.frame(arima_model = model_spec)
        model_performance <- data.frame(arima_model = c(model_spec,
                                                        paste(model_spec, "Validacion")),
                                        accuracy(forecast(arima_model,
                                                          horizon, xreg = regr), testing))
        merge(model_info,
              model_performance,
              by = "arima_model",
              all = TRUE) %>% select(arima_model, AIC, AICc, BIC,
                                     MAE, RMSE, MASE)
    }
    arima_selected <-
        function(model_table,
                 Wtrain = training_weight,
                 Wtest = testing_weight) {
            model_table <- model_table %>% distinct(arima_model,
                                                    .keep_all = TRUE)
            model_table <-
                model_table %>% mutate(mod = as.character(c(0,
                                                            rep(
                                                                1:(nrow(model_table) - 1) %/% 2
                                                            ))))
            tabla2 <- model_table %>% mutate_at(vars(contains("C")),
                                                function(x) {
                                                    x - min(x, na.rm = TRUE)
                                                }) %>% mutate_if(is.numeric, function(x)
                                                    ifelse(is.na(x),
                                                           0, x)) %>% mutate(
                                                               puntaje = AIC + AICc + BIC + MAE +
                                                                   RMSE + MASE,
                                                               ponde = ifelse(grepl("Validacion", arima_model) ==
                                                                                  TRUE, Wtest, Wtrain),
                                                               puntaje = puntaje * ponde
                                                           )
            suppressMessages({
                minimal_score <-
                    tabla2 %>% group_by(mod) %>% summarise(puntaje = sum(puntaje)) %>%
                    ungroup
            })
            pos <- minimal_score$mod[which(minimal_score$puntaje ==
                                               min(minimal_score$puntaje))]
            model_table %>% filter(mod %in% pos) %>% dplyr::select(arima_model:MASE)
        }
    suppressWarnings({
        valores <-
            expand.grid(
                p = 0:arima_process[1],
                d = 0:arima_process[2],
                q = 0:arima_process[3],
                P = 0:arima_process[4],
                D = 0:arima_process[5],
                Q = 0:arima_process[6]
            )

        pesos <-
            rep(c(no_estacional = 7.424944, estacional = 36.043200),
                each = 2)

        valores <- valores %>%
            mutate(
                isp = apply(
                    dplyr::select(valores, p, q, P, Q),
                    1,
                    weighted.mean,
                    pesos
                ),
                isp = scales::rescale(isp, to = c(0, 100))
            ) %>%
            filter(isp <= ISP) %>%
            dplyr::select(-isp)


        non_seasonal_values <- split(as.matrix(valores[, 1:3]),
                                     row(valores[, 1:3]))
        seasonal_values <-
            split(as.matrix(valores[, 4:6]), row(valores[,
                                                         4:6]))
        if (parallelize == FALSE) {
            arima_models <-
                mapply(
                    arima_model,
                    non_seasonal = non_seasonal_values,
                    seasonal = seasonal_values,
                    MoreArgs = list(
                        time_serie = train,
                        regr = reg,
                        periodic = seasonal_periodicity
                    ),
                    SIMPLIFY = FALSE
                )
        }
        else
            ({
                clp <- makeCluster(clusters, type = "SOCK", useXDR = FALSE)
                clusterEvalQ(clp, expr = {
                    library(forecast)
                })
                arima_models <- clusterMap(
                    cl = clp,
                    fun = arima_model,
                    non_seasonal = non_seasonal_values,
                    seasonal = seasonal_values,
                    MoreArgs = list(
                        time_serie = train,
                        regr = reg,
                        periodic = seasonal_periodicity
                    ),
                    SIMPLIFY = FALSE,
                    .scheduling = "dynamic"
                )
                stopCluster(clp)
            })
        pos <- which(sapply(lapply(arima_models, class), length) >
                         1)
        final_measures <-
            do.call("rbind",
                    lapply(arima_models[pos],
                           function(x) {
                               tryCatch(
                                   expr = {
                                       arima_measures(
                                           x,
                                           testing = test,
                                           horizon = horiz,
                                           regr = reg
                                       )
                                   },
                                   error = function(e)
                                       data.frame(
                                           arima_model = NA,
                                           AIC = NA,
                                           AICc = NA,
                                           BIC = NA,
                                           MAE = NA,
                                           RMSE = NA,
                                           MASE = NA
                                       )
                               )
                           })) %>% mutate_if(is.numeric, round, 2)
        final_list <- list(
            arima_models = arima_models[pos],
            final_measures = final_measures,
            bests = arima_selected(final_measures,
                                   Wtrain = training_weight, Wtest = testing_weight)
        )
        mod_index <-
            final_list$bests %>% row.names %>% as.numeric %>%
            floor %>% unique %>% as.character
        final_list$best_model <-
            eval(parse(
                text = paste0("final_list$arima_models$",
                              "`", mod_index, "`")
            ))
        final_list
    })
}
Any scripts or data that you put into this service are public.
popstudy documentation built on Oct. 18, 2023, 1:20 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
popstudy
Applied Techniques to Demographic and Time Series Analysis

R/op.arima.R
In popstudy: Applied Techniques to Demographic and Time Series Analysis

Try the popstudy package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

popstudy Applied Techniques to Demographic and Time Series Analysis

R/op.arima.R In popstudy: Applied Techniques to Demographic and Time Series Analysis

Try the popstudy package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

popstudy
Applied Techniques to Demographic and Time Series Analysis

R/op.arima.R
In popstudy: Applied Techniques to Demographic and Time Series Analysis