opts_chunk$set(fig.path = "figures/", comment = NA, results = 'markup', tidy = F, message = F, warning = F, echo = T, cache = T)
library(plyr)
library(reshape2)

library(ggplot2)
library(gridExtra)

library(pander)
theme_set(theme_light())
panderOptions('table.split.table', Inf)
panderOptions('knitr.auto.asis', FALSE)

About

References

Include

library(Rcpp)
library(RcppArmadillo)

library(Matrix)

library(microbenchmark)

Xy operation

sourceCpp(code ='
  #include <RcppArmadillo.h>
  // [[Rcpp::depends(RcppArmadillo)]]

  using namespace Rcpp;
  using namespace arma;

  // [[Rcpp::export]]
  arma::vec A_matvec_mult(const arma::mat & X, const arma::vec & y){
    arma::vec out = X * y;  
    return out;
  }'
) 

Testing on a small example

A <- matrix(c(-261, 209, -49, 
  -530, 422, -98,
  -800, 631, -144),
  ncol = 3, nrow = 3, byrow = TRUE)

v <- c(1, 2, 3)
A %*% v
A_matvec_mult(A, v)

microbenchmark

n <- 50000
k <- 50
X <- matrix(rnorm(n*k), nrow = k)
e <- rnorm(n)

out <- microbenchmark(R = X %*% e, 
  R_crossprod = tcrossprod(X, t(e)),  
  RcppArmadillo = A_matvec_mult(X, e),
  times = 10)

autoplot(out)  

Power method

sourceCpp("source/eigenPower_Rcpp.cpp") 
eigenPower_wrapper <- function(A, v0, ...)
{
  stopifnot(!missing(A))
  if(missing(v0)) {
    v0 <- runif(ncol(A))
  }

  eigenPower_Rcpp(A, v0, ...)
}
eigenPower <- function(A, v0, tol = 1e-6, maxit = 1e3, 
  sparse = FALSE, sparseSymm = FALSE, 
  verbose = 0)
{
  timing <- list()
  timing$args <- proc.time()

  ### arguments
  stopifnot(!missing(A))

  if(missing(v0)) {
    v0 <- runif(ncol(A))
  }

  ### convert into sparse matrices
  if(any(sparse, sparseSymm)) {
    stopifnot(require(Matrix))

    if(sparse) {
      A <- Matrix(A, sparse = T)
    }

    if(sparseSymm) {
      A <- Matrix(A, sparse = T)
      A <- as(A, "symmetricMatrix")
    }
  }

  ### vars
  sparseMatrix <- FALSE
  cl <- attr(class(A), "package")
  if(!is.null(cl)) {
    if(cl == "Matrix") {
      sparseMatrix <- TRUE
    }
  }

  ### preparation before looping
  timing$algo <- proc.time()

  v0 <- as.numeric(v0)

  v <- v0
  v <- v / sqrt(v %*% v)

  ### loop
  lambda0 <- 0
  for(it in 1:maxit) {
    if(verbose > 1) {
      cat(" * it:", it, "/", maxit, "\n")
    }

    b <- tcrossprod(A, t(v)) # A %*% v
    v <- b / sqrt(as.numeric(crossprod(b)))
    lambda <- as.numeric(crossprod(v, b)) # t(v) %*% b

    delta <- abs((lambda - lambda0) / lambda)
    if(delta < tol) {
      break
    }

    lambda0 <- lambda
  }

  ### post-process
  converged <- (it < maxit)

  ### output
  timing$return <- proc.time()

  timing$targs <- timing$algo[["elapsed"]] - timing$args[["elapsed"]]
  timing$talgo <- timing$return[["elapsed"]] - timing$algo[["elapsed"]]

  out <- list(v0 = v0, tol = tol, maxit = maxit,
    it = it, delta = delta, converged = converged, 
    sparseMatrix = sparseMatrix,
    timing = timing,
    lambda = lambda, v = v)

  return(out)
}
sourceCpp("source/eigenPower_RcppParallel.cpp")
eigenPower_wrapper_Parallel <- function(A, v0, ...)
{
  stopifnot(!missing(A))
  if(missing(v0)) {
    v0 <- runif(ncol(A))
  }

  eigenPower_Rcpp_Parallel(A, v0, ...)
}

Big examples

n <- seq(250, 1500, by = 250)

out <- lapply(n, function(ni) {
  set.seed(1)
  M <-  matrix(runif(ni * ni), ni, ni)

  lt <- lower.tri(M)
  M[lt] <- t(M)[lt]

  list(n = ni,
    t.eigenPower = system.time(eigenPower(M))[["elapsed"]],
    t.eigenPower_Rcpp = system.time(eigenPower_wrapper(M))[["elapsed"]]
  )
})  
df <- ldply(out, function(x) data.frame(n = x$n,
  t_eigenPower = x$t.eigenPower,
  t_eigenPower_Rcpp = x$t.eigenPower_Rcpp))

pf <- melt(df, id.vars = "n")

ggplot(pf, aes(n, value, color = variable)) + geom_point() + geom_line()

Direct call of eigenPower_Rcpp

n <- seq(250, 1500, by = 250)

out <- lapply(n, function(ni) {
  set.seed(1)
  M <-  matrix(runif(ni * ni), ni, ni)

  lt <- lower.tri(M)
  M[lt] <- t(M)[lt]

  list(n = ni,
    t.eigenPower = system.time(eigenPower(M))[["elapsed"]],
    t.eigenPower_Rcpp_wrapper = system.time(eigenPower_wrapper(M))[["elapsed"]],    
    t.eigenPower_Rcpp = system.time(eigenPower_Rcpp(M, runif(ncol(M))))[["elapsed"]]
  )
})  
df <- ldply(out, function(x) data.frame(n = x$n,
  t_eigenPower = x$t.eigenPower,
  t_eigenPower_Rcpp_wrapper = x$t.eigenPower_Rcpp_wrapper,
  t_eigenPower_Rcpp = x$t.eigenPower_Rcpp))

pf <- melt(df, id.vars = "n")

ggplot(pf, aes(n, value, color = variable)) + geom_point() + geom_line()

microbenchmark

Random matrix

n <- 1000
M <- matrix(runif(n * n), n, n)

out <- microbenchmark(eigenPower = eigenPower(M), 
  eigenPower_Rcpp_wrapper = eigenPower_wrapper(M),
  eigenPower_Rcpp_Parallel_wrapper = eigenPower_wrapper_Parallel(M),    
  times = 10)

autoplot(out)  

0.75 Sparse symmetric matrix

n <- 1000
#M <- matrix(runif(n * n), n, n)
M <- as.matrix(rsparsematrix(n, n, 0.75, symmetric = TRUE))

out <- microbenchmark(eigenPower = eigenPower(M), 
  eigenPower_Rcpp_wrapper = eigenPower_wrapper(M),
  eigenPower_Rcpp_Parallel_wrapper = eigenPower_wrapper_Parallel(M),    
  times = 10)

autoplot(out)  


variani/cpca documentation built on May 3, 2019, 4:34 p.m.