options(width = 80) knitr::opts_chunk$set(collapse = TRUE, comment = "#R", error = FALSE)
This vignette shows comparisons in terms of computation time with other packages.
Specifically, we will make comparisons with the
roll
package and zoo
package. It should be stressed though that the other
solutions do additional things than this package does. E.g., there is not
performed any rank test in the function in this package. We start by showing
the comparisons of computation times and then we show different options.
Some function definitions are shown at the end.
##### # simple R version roll_regress_R_for_loop <- function(X, y, width){ n <- nrow(X) p <- ncol(X) out <- matrix(NA_real_, n, p) for(i in width:n){ idx <- (i - width + 1L):i out[i, ] <- lm.fit(X[idx, , drop = FALSE], y[idx])$coefficients } out } ##### # zoo version .zoo_inner <- function(Z) { fit <- lm.fit(Z[, -1, drop = FALSE], Z[, 1]) fit$coefficients } library(zoo) roll_regress_zoo <- function(x, y, width){ Z <- cbind(y, X) rollapply(Z, width, FUN = .zoo_inner, by.column = FALSE, align = "right", fill = NA_real_) } ##### # roll_lm library(roll) roll_lm_func <- function(x, y ,width) roll_lm(X, matrix(y, ncol = 1), wdth, intercept = FALSE)$coefficients # use one thread as other methods are easily to compute in parallel too RcppParallel::setThreadOptions(numThreads = 1) # compile functions library(compiler) roll_regress_R_for_loop <- cmpfun(roll_regress_R_for_loop) .zoo_inner <- cmpfun(.zoo_inner) roll_regress_zoo <- cmpfun(roll_regress_zoo) roll_lm_func <- cmpfun(roll_lm_func)
We start by simulating the data
set.seed(32981054) n <- 10000 p <- 6 wdth = 50 X <- matrix(rnorm(p * n), n, p) y <- drop(X %*% runif(p)) + rnorm(n) df <- data.frame(y, X) frm <- eval(parse(text = paste0( "formula(y ~ -1 + ", paste0("X", 1:p, collapse = " + "), ")")))
Then we check that the functions give the same (the function definitions are at the end of this document)
library(rollRegres) base_res <- roll_regress_R_for_loop(X, y, wdth) all.equal(base_res, roll_regres.fit(X, y, wdth)$coefs, check.attributes = FALSE) all.equal(base_res, roll_regres(frm, df, wdth)$coefs, check.attributes = FALSE) all.equal(base_res, roll_regress_zoo(X, y, wdth), check.attributes = FALSE) all.equal(base_res, roll_lm_func(X, y, wdth), check.attributes = FALSE)
and here we compare the computation time
microbenchmark::microbenchmark( roll_regress = roll_regres.fit(X, y, wdth), # this will be slower due to call to `model.matrix` and `model.frame` roll_regress_df = roll_regres(frm, df, wdth), roll_regress_zoo = roll_regress_zoo(X, y, wdth), roll_regress_R_for_loop = roll_regress_R_for_loop(X, y, wdth), roll_lm = roll_lm_func(X, y, wdth), times = 5) # here is the formula used above frm
This section will cover some additional features.
Here are expanding window regressions with additional output
##### # simulate data set.seed(65731482) n <- 100 p <- 2 X <- matrix(rnorm(p * n), n, p) y <- drop(X %*% runif(p)) + rnorm(n) ##### # use package function pck_out <- roll_regres.fit( X, y, width = 50L, do_downdates = FALSE, do_compute = c("sigmas", "r.squareds", "1_step_forecasts")) ##### # assign R-version R_func <- function(X, y, width){ n <- nrow(X) p <- ncol(X) out <- matrix(NA_real_, n, p) sigmas <- rep(NA_real_, n) r.squared <- rep(NA_real_, n) one_step_forecasts <- rep(NA_real_, n) for(i in width:n){ idx <- 1:i fit <- lm(y[idx] ~ -1 + X[idx, , drop = FALSE]) out[i, ] <- fit$coefficients su <- summary(fit) sigmas[i] <- su$sigma ss1 <- sum((y[idx] - mean(y[idx]))^2) ss2 <- sum(fit$residuals^2) r.squared[i] <- 1 - ss2 / ss1 if(i < n){ next_i <- i + 1L one_step_forecasts[next_i] <- fit$coefficients %*% X[next_i, ] } } list(coef = out, sigmas = sigmas, r.squared = r.squared, one_step_forecasts = one_step_forecasts) } R_out <- R_func(X, y, 50L) ##### # gives the same stopifnot( isTRUE(all.equal(R_out$coef , pck_out$coefs)), isTRUE(all.equal(R_out$sigmas , pck_out$sigmas)), isTRUE(all.equal(R_out$r.squared , pck_out$r.squared)), isTRUE(all.equal(R_out$one_step_forecasts, pck_out$one_step_forecasts)))
You can use the grp
argument to make updates in blocks. E.g., here is an
example with weekly data
##### # simulate data set.seed(68799379) week <- as.integer(gl(25, 7)) head(week[1:10]) n <- length(week) p <- 2 X <- matrix(rnorm(p * n), n, p) y <- drop(X %*% runif(p)) + rnorm(n) ##### # use package function pck_out <- roll_regres.fit( X, y, width = 10L, grp = week, do_compute = c("sigmas", "r.squareds", "1_step_forecasts")) ##### # assign R-version R_func <- function(X, y, width, grp){ grp <- grp + 1L - min(grp) u_grp = unique(grp) n <- nrow(X) p <- ncol(X) out <- matrix(NA_real_, n, p) sigmas <- rep(NA_real_, n) r.squared <- rep(NA_real_, n) one_step_forecasts <- rep(NA_real_, n) start_val <- min(which(u_grp >= width)) for(g in u_grp[start_val:length(u_grp)]){ idx <- which(grp %in% (g - width + 1L):g) i <- which(grp == g) fit <- lm(y[idx] ~ -1 + X[idx, , drop = FALSE]) out[i, ] <- sapply(fit$coefficients, rep, times = length(i)) su <- summary(fit) sigmas[i] <- su$sigma ss1 <- sum((y[idx] - mean(y[idx]))^2) ss2 <- sum(fit$residuals^2) r.squared[i] <- 1 - ss2 / ss1 if(g != max(grp)){ next_g <- grp[min(which(grp > g))] next_g <- which(grp == next_g) one_step_forecasts[next_g] <- X[next_g, ] %*% fit$coefficients } } list(coef = out, sigmas = sigmas, r.squared = r.squared, one_step_forecasts = one_step_forecasts) } R_out <- R_func(X, y, 10L, grp = week) ##### # gives the same stopifnot( isTRUE(all.equal(R_out$coef , pck_out$coefs)), isTRUE(all.equal(R_out$sigmas , pck_out$sigmas)), isTRUE(all.equal(R_out$r.squared , pck_out$r.squared)), isTRUE(all.equal(R_out$one_step_forecasts, pck_out$one_step_forecasts)))
Suppose that we are performing rolling regression on stock data over a yearly window. Further, suppose that there are some gaps in the data where we do not have data and we require at least 6 months of data. This can be done as follows
##### # simulate data set.seed(96235555) n <- 10L * 12L * 21L # x years w/ 12 months of 21 trading days month <- (seq_len(n) - 1L) %/% 21L + 1L # group by months set.seed(29478439) X <- matrix(rnorm(n * 4L), ncol = 4L) y <- rnorm(n) # randomly drop rows keep <- seq_along(y) %in% sample.int(nrow(X), as.integer(n * .5)) X <- X [keep, ] y <- y [keep] month <- month[keep] ##### # use package function pck_out <- roll_regres.fit( X, y, width = 12L, grp = month, min_obs = 21L * 6L, do_downdates = TRUE, do_compute = c("sigmas", "r.squareds")) ##### # assign R-version R_func <- function(X, y, width, grp, downdate, min_obs){ grp <- grp + 1L - min(grp) u_grp = unique(grp) n <- nrow(X) p <- ncol(X) out <- matrix(NA_real_, n, p) sigmas <- rep(NA_real_, n) r.squared <- rep(NA_real_, n) start_val <- min(which(u_grp >= width)) for(g in u_grp[start_val:length(u_grp)]){ idx <- if(downdate) which(grp %in% (g - width + 1L):g) else which(grp %in% 1:g) i <- which(grp == g) if(length(idx) < min_obs) next fit <- lm(y[idx] ~ -1 + X[idx, , drop = FALSE]) out[i, ] <- sapply(fit$coefficients, rep, times = length(i)) su <- summary(fit) sigmas[i] <- su$sigma ss1 <- sum((y[idx] - mean(y[idx]))^2) ss2 <- sum(fit$residuals^2) r.squared[i] <- 1 - ss2 / ss1 } list(coef = out, sigmas = sigmas, r.squared = r.squared) } R_out <- R_func(X, y, width = 12L, downdate = TRUE, grp = month, min_obs = 21L * 6L) ##### # gives the same stopifnot( isTRUE(all.equal(R_out$coef , pck_out$coefs)), isTRUE(all.equal(R_out$sigmas , pck_out$sigmas)), isTRUE(all.equal(R_out$r.squared, pck_out$r.squared)))
sessionInfo()
Here are the function definitions
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.