inst/doc/Introducing_MatrixExtra.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE)

## ---- echo=FALSE--------------------------------------------------------------
### Don't want to run compute-heavy code on CRAN
### Should build it with 'R CMD build --no-build-vignettes MatrixExtra'
### Then set this to TRUE is the vignette is ever to be rebuilt
RUN_ALL <- FALSE
if (!RUN_ALL) {
    options("MatrixExtra.nthreads" = 1L)
}

## -----------------------------------------------------------------------------
library(Matrix)
### Will construct this Matrix
### [ 1, 0, 2 ]
### [ 0, 0, 3 ]
### [ 0, 4, 0 ]
### Non-zero coordinates are:
### [(1,1), (1,3), (2,3), (3,2)]
### Row and column coordinates go separate
row_ix <- c(1, 1, 2, 3)
col_ix <- c(1, 3, 3, 2)
values <- c(1, 2, 3, 4)
X <- Matrix::sparseMatrix(
    i=row_ix, j=col_ix, x=values,
    index1=TRUE, repr="T"
)
X

## -----------------------------------------------------------------------------
as(X, "RsparseMatrix")

## -----------------------------------------------------------------------------
X + X

## -----------------------------------------------------------------------------
Xr <- as(X, "RsparseMatrix")
### This will forcibly convert the matrix to triplets
Xr[1:2, ]

## -----------------------------------------------------------------------------
### This will forcibly convert the matrix to CSC
rbind(Xr, Xr)

## -----------------------------------------------------------------------------
### This will forcibly convert the matrix to CSC
X * X

## ---- eval=RUN_ALL------------------------------------------------------------
#  library(microbenchmark)
#  set.seed(1)
#  X_big_csc <- Matrix::rsparsematrix(1e4, 1e4, .05, repr="C")
#  X_big_csr <- as(t(X_big_csc), "RsparseMatrix")
#  microbenchmark({X_slice <- X_big_csr[1:10, ]}, times=10L)

## ---- eval=RUN_ALL------------------------------------------------------------
#  microbenchmark({X_slice <- X_big_csc[, 1:10]}, times=10L)

## ---- eval=RUN_ALL------------------------------------------------------------
#  microbenchmark({X_col <- X_big_csc[, 100, drop=FALSE]}, times=10L)

## ---- eval=RUN_ALL------------------------------------------------------------
#  set.seed(1)
#  Y_dense <- matrix(rnorm(1e2*nrow(X_big_csc)), nrow=1e2)
#  microbenchmark({Z <- Y_dense %*% X_big_csc}, times=10L)

## -----------------------------------------------------------------------------
library(MatrixExtra)

## -----------------------------------------------------------------------------
Xr

## -----------------------------------------------------------------------------
options("MatrixExtra.quick_show" = FALSE)
Xr

## -----------------------------------------------------------------------------
### This will not change the format
Xr[1:2, ]

## -----------------------------------------------------------------------------
### This will not change the format
rbind(Xr, Xr)

## -----------------------------------------------------------------------------
### This will not change the format
Xr * Xr

## ---- eval=RUN_ALL------------------------------------------------------------
#  microbenchmark({X_slice <- X_big_csr[1:10, ]}, times=10L)

## ---- eval=RUN_ALL------------------------------------------------------------
#  microbenchmark({X_col <- X_big_csc[, 100, drop=FALSE]}, times=10L)

## ---- eval=RUN_ALL------------------------------------------------------------
#  microbenchmark({Z <- Y_dense %*% X_big_csc}, times=10L)

## -----------------------------------------------------------------------------
as(Xr, "ngRMatrix")

## -----------------------------------------------------------------------------
MatrixExtra::as.csr.matrix(Xr, binary=TRUE)

## -----------------------------------------------------------------------------
### Here Matrix would return a 'dgRMatrix'
t(Xr)

## -----------------------------------------------------------------------------
### Here Matrix would return a dense vector
Xr[1,]

## -----------------------------------------------------------------------------
restore_old_matrix_behavior()
set_new_matrix_behavior()

## ---- eval=RUN_ALL------------------------------------------------------------
#  library(readsparse)
#  data <- readsparse::read.sparse("real-sim")
#  X <- data$X
#  y <- as.numeric(factor(data$y))-1 ### convert to 0/1
#  X

## ---- eval=RUN_ALL------------------------------------------------------------
#  X <- cbind(rep(1, nrow(X)), X) ### Accelerated by 'MatrixExtra'
#  set.seed(1)
#  ix_train <- sample(nrow(X), floor(.5*nrow(X)), replace=FALSE)
#  X_train <- X[ix_train,] ### Accelerated by 'MatrixExtra'
#  y_train <- y[ix_train]
#  X_test <- X[-ix_train,] ### Accelerated by 'MatrixExtra'
#  y_test <- y[-ix_train]

## ---- eval=RUN_ALL------------------------------------------------------------
#  logistic_fun <- function(coefs, X, y, lambda) {
#      pred <- 1 / (1 + exp(-as.numeric(X %*% coefs))) ### Accelerated by 'MatrixExtra'
#      ll <- mean(y * log(pred) + (1 - y) * log(1 - pred))
#      reg <- lambda * as.numeric(coefs %*% coefs)
#      ### Don't regularize the intercept
#      reg <- reg - lambda * (coefs[1]^2)
#      return(-ll + reg)
#  }
#  
#  logistic_grad <- function(coefs, X, y, lambda) {
#      pred <- 1 / (1 + exp(-(X %*% coefs))) ### Accelerated by 'MatrixExtra'
#      grad <- colMeans(X * as.numeric(pred - y)) ### Accelerated by 'MatrixExtra'
#      grad <- grad + 2 * lambda * as.numeric(coefs)
#      ### Don't regularize the intercept
#      grad[1] <- grad[1] - 2 * lambda * coefs[1]
#      return(as.numeric(grad))
#  }
#  
#  lambda <- 1e-5 ### <- Regularization parameter
#  res <- optim(numeric(ncol(X_train)),
#               logistic_fun,
#               logistic_grad,
#               method="L-BFGS-B",
#               X_train, y_train, lambda)
#  fitted_coefs <- res$par

## ---- eval=RUN_ALL------------------------------------------------------------
#  y_hat_test <- as.numeric(X_test %*% fitted_coefs)
#  MLmetrics::AUC(y_hat_test, y_test)

## ---- eval=RUN_ALL------------------------------------------------------------
#  x0 <- numeric(ncol(X_train))
#  microbenchmark::microbenchmark({
#      res <- optim(x0,
#                   logistic_fun,
#                   logistic_grad,
#                   method="L-BFGS-B",
#                   X_train, y_train, lambda)
#  }, times=10L)
david-cortes/MatrixExtra documentation built on June 23, 2024, 3:34 a.m.