Comparisons.R
In rollRegres: Fast Rolling and Expanding Window Linear Regression

## ----setup, include = FALSE---------------------------------------------------
options(width = 80)
knitr::opts_chunk$set(collapse = TRUE, comment = "#R", error = FALSE)

## ----def_roll_funcs, echo = FALSE, message=FALSE------------------------------
#####
# simple R version
roll_regress_R_for_loop <- function(X, y, width){
  n <- nrow(X)
  p <- ncol(X)
  out <- matrix(NA_real_, n, p)

  for(i in width:n){
    idx <- (i - width + 1L):i
    out[i, ] <- lm.fit(X[idx, , drop = FALSE], y[idx])$coefficients
  }

  out
}

#####
# zoo version
.zoo_inner <- function(Z) {
  fit <- lm.fit(Z[, -1, drop = FALSE], Z[, 1])
  fit$coefficients
}
library(zoo)
roll_regress_zoo <- function(x, y, width){
  Z <- cbind(y, X)
  rollapply(Z, width, FUN = .zoo_inner,  
            by.column = FALSE,  align = "right", fill = NA_real_)
}

#####
# roll_lm
library(roll)
roll_lm_func <- function(x, y ,width)
  roll_lm(X, matrix(y, ncol = 1), wdth, intercept = FALSE)$coefficients

# use one thread as other methods are easily to compute in parallel too
RcppParallel::setThreadOptions(numThreads = 1)

# compile functions
library(compiler)
roll_regress_R_for_loop <- cmpfun(roll_regress_R_for_loop)
.zoo_inner              <- cmpfun(.zoo_inner)
roll_regress_zoo        <- cmpfun(roll_regress_zoo)
roll_lm_func            <- cmpfun(roll_lm_func)

## ----sim_data-----------------------------------------------------------------
set.seed(32981054)
n <- 10000
p <- 6
wdth = 50
X <- matrix(rnorm(p * n), n, p)
y <- drop(X %*% runif(p)) + rnorm(n)
df <- data.frame(y, X)
frm <- eval(parse(text = paste0(
  "formula(y ~ -1 + ", paste0("X", 1:p, collapse = " + "), ")")))

## ----roll_gives_same----------------------------------------------------------
library(rollRegres)
base_res <- roll_regress_R_for_loop(X, y, wdth)
all.equal(base_res, roll_regres.fit(X, y, wdth)$coefs, 
          check.attributes = FALSE)
all.equal(base_res, roll_regres(frm, df, wdth)$coefs, 
          check.attributes = FALSE)
all.equal(base_res, roll_regress_zoo(X, y, wdth), 
          check.attributes = FALSE)
all.equal(base_res, roll_lm_func(X, y, wdth), 
          check.attributes = FALSE)

## ----benchmark_roll-----------------------------------------------------------
microbenchmark::microbenchmark(
  roll_regress            = roll_regres.fit(X, y, wdth),
  # this will be slower due to call to `model.matrix` and `model.frame`
  roll_regress_df         = roll_regres(frm, df, wdth),
  roll_regress_zoo        = roll_regress_zoo(X, y, wdth),
  roll_regress_R_for_loop = roll_regress_R_for_loop(X, y, wdth),
  roll_lm = roll_lm_func(X, y, wdth),
  times = 5)

# here is the formula used above
frm

## ----expanding_window---------------------------------------------------------
#####
# simulate data
set.seed(65731482)
n <- 100
p <- 2
X <- matrix(rnorm(p * n), n, p)
y <- drop(X %*% runif(p)) + rnorm(n)

#####
# use package function
pck_out <- roll_regres.fit(
  X, y, width = 50L, do_downdates = FALSE, 
  do_compute = c("sigmas", "r.squareds", "1_step_forecasts"))

#####
# assign R-version
R_func <- function(X, y, width){
  n <- nrow(X)
  p <- ncol(X)
  out <- matrix(NA_real_, n, p)
  sigmas             <- rep(NA_real_, n)
  r.squared          <- rep(NA_real_, n)
  one_step_forecasts <- rep(NA_real_, n)

  for(i in width:n){
    idx <- 1:i
    fit <- lm(y[idx] ~ -1 + X[idx, , drop = FALSE])
    out[i, ] <- fit$coefficients

    su <- summary(fit)
    sigmas[i] <- su$sigma

    ss1 <- sum((y[idx] - mean(y[idx]))^2)
    ss2 <- sum(fit$residuals^2)
    r.squared[i] <- 1 - ss2 / ss1

    if(i < n){
      next_i <- i + 1L
      one_step_forecasts[next_i] <- fit$coefficients %*% X[next_i, ]
    }
  }

  list(coef = out, sigmas = sigmas, r.squared = r.squared,
       one_step_forecasts = one_step_forecasts)
}

R_out <- R_func(X, y, 50L)

#####
# gives the same
stopifnot(
  isTRUE(all.equal(R_out$coef              , pck_out$coefs)), 
  isTRUE(all.equal(R_out$sigmas            , pck_out$sigmas)), 
  isTRUE(all.equal(R_out$r.squared         , pck_out$r.squared)), 
  isTRUE(all.equal(R_out$one_step_forecasts, pck_out$one_step_forecasts)))

## ----block_example------------------------------------------------------------
#####
# simulate data
set.seed(68799379)
week <- as.integer(gl(25, 7))
head(week[1:10])
n <- length(week)
p <- 2
X <- matrix(rnorm(p * n), n, p)
y <- drop(X %*% runif(p)) + rnorm(n)

#####
# use package function
pck_out <- roll_regres.fit(
  X, y, width = 10L, grp = week, 
  do_compute = c("sigmas", "r.squareds", "1_step_forecasts"))

#####
# assign R-version
R_func <- function(X, y, width, grp){
  grp <- grp + 1L - min(grp)
  u_grp = unique(grp)
  n <- nrow(X)
  p <- ncol(X)
  out <- matrix(NA_real_, n, p)
  sigmas             <- rep(NA_real_, n)
  r.squared          <- rep(NA_real_, n)
  one_step_forecasts <- rep(NA_real_, n)

  start_val <- min(which(u_grp >= width))
  for(g in u_grp[start_val:length(u_grp)]){
    idx <- which(grp %in% (g - width + 1L):g)
    i <- which(grp == g)
    fit <- lm(y[idx] ~ -1 + X[idx, , drop = FALSE])
    out[i, ] <- sapply(fit$coefficients, rep, times = length(i))

    su <- summary(fit)
    sigmas[i] <- su$sigma

    ss1 <- sum((y[idx] - mean(y[idx]))^2)
    ss2 <- sum(fit$residuals^2)
    r.squared[i] <- 1 - ss2 / ss1

    if(g != max(grp)){
      next_g <- grp[min(which(grp > g))]
      next_g <- which(grp == next_g)
      one_step_forecasts[next_g] <- X[next_g, ] %*% fit$coefficients
    }
  }

  list(coef = out, sigmas = sigmas, r.squared = r.squared,
       one_step_forecasts = one_step_forecasts)
}

R_out <- R_func(X, y, 10L, grp = week)

#####
# gives the same
stopifnot(
  isTRUE(all.equal(R_out$coef              , pck_out$coefs)), 
  isTRUE(all.equal(R_out$sigmas            , pck_out$sigmas)), 
  isTRUE(all.equal(R_out$r.squared         , pck_out$r.squared)), 
  isTRUE(all.equal(R_out$one_step_forecasts, pck_out$one_step_forecasts)))

## ----min_obs------------------------------------------------------------------
#####
# simulate data
set.seed(96235555)
n   <- 10L * 12L * 21L # x years w/ 12 months of 21 trading days
month <- (seq_len(n) - 1L) %/% 21L + 1L # group by months
set.seed(29478439)
X <- matrix(rnorm(n * 4L), ncol = 4L)
y <- rnorm(n)

# randomly drop rows
keep <- seq_along(y) %in% sample.int(nrow(X), as.integer(n * .5))
X     <- X    [keep, ]
y     <- y    [keep]
month <- month[keep]

#####
# use package function
pck_out <- roll_regres.fit(
  X, y, width = 12L, grp = month, min_obs = 21L * 6L, do_downdates = TRUE, 
  do_compute = c("sigmas", "r.squareds"))

#####
# assign R-version
R_func <- function(X, y, width, grp, downdate, min_obs){
  grp <- grp + 1L - min(grp)
  u_grp = unique(grp)
  n <- nrow(X)
  p <- ncol(X)
  out <- matrix(NA_real_, n, p)
  sigmas    <- rep(NA_real_, n)
  r.squared <- rep(NA_real_, n)

  start_val <- min(which(u_grp >= width))
  for(g in u_grp[start_val:length(u_grp)]){
    idx <-
      if(downdate)
        which(grp %in% (g - width + 1L):g) else
          which(grp %in% 1:g)
    i <- which(grp == g)
    if(length(idx) < min_obs)
      next
    fit <- lm(y[idx] ~ -1 + X[idx, , drop = FALSE])
    out[i, ] <- sapply(fit$coefficients, rep, times = length(i))

    su <- summary(fit)
    sigmas[i] <- su$sigma

    ss1 <- sum((y[idx] - mean(y[idx]))^2)
    ss2 <- sum(fit$residuals^2)
    r.squared[i] <- 1 - ss2 / ss1
  }

  list(coef = out, sigmas = sigmas, r.squared = r.squared)
}

R_out <- R_func(X, y, width = 12L, downdate = TRUE, grp = month, 
                min_obs = 21L * 6L)

#####
# gives the same
stopifnot(
  isTRUE(all.equal(R_out$coef     , pck_out$coefs)),
  isTRUE(all.equal(R_out$sigmas   , pck_out$sigmas)),
  isTRUE(all.equal(R_out$r.squared, pck_out$r.squared)))

## ----ses_info-----------------------------------------------------------------
sessionInfo()

## ----def_roll_funcs, eval = FALSE---------------------------------------------
#  #####
#  # simple R version
#  roll_regress_R_for_loop <- function(X, y, width){
#    n <- nrow(X)
#    p <- ncol(X)
#    out <- matrix(NA_real_, n, p)
#  
#    for(i in width:n){
#      idx <- (i - width + 1L):i
#      out[i, ] <- lm.fit(X[idx, , drop = FALSE], y[idx])$coefficients
#    }
#  
#    out
#  }
#  
#  #####
#  # zoo version
#  .zoo_inner <- function(Z) {
#    fit <- lm.fit(Z[, -1, drop = FALSE], Z[, 1])
#    fit$coefficients
#  }
#  library(zoo)
#  roll_regress_zoo <- function(x, y, width){
#    Z <- cbind(y, X)
#    rollapply(Z, width, FUN = .zoo_inner,
#              by.column = FALSE,  align = "right", fill = NA_real_)
#  }
#  
#  #####
#  # roll_lm
#  library(roll)
#  roll_lm_func <- function(x, y ,width)
#    roll_lm(X, matrix(y, ncol = 1), wdth, intercept = FALSE)$coefficients
#  
#  # use one thread as other methods are easily to compute in parallel too
#  RcppParallel::setThreadOptions(numThreads = 1)
#  
#  # compile functions
#  library(compiler)
#  roll_regress_R_for_loop <- cmpfun(roll_regress_R_for_loop)
#  .zoo_inner              <- cmpfun(.zoo_inner)
#  roll_regress_zoo        <- cmpfun(roll_regress_zoo)
#  roll_lm_func            <- cmpfun(roll_lm_func)

Any scripts or data that you put into this service are public.

rollRegres documentation built on May 5, 2022, 1:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

rollRegres
Fast Rolling and Expanding Window Linear Regression

inst/doc/Comparisons.R
In rollRegres: Fast Rolling and Expanding Window Linear Regression

Try the rollRegres package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rollRegres Fast Rolling and Expanding Window Linear Regression

inst/doc/Comparisons.R In rollRegres: Fast Rolling and Expanding Window Linear Regression

Try the rollRegres package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rollRegres
Fast Rolling and Expanding Window Linear Regression

inst/doc/Comparisons.R
In rollRegres: Fast Rolling and Expanding Window Linear Regression