Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "figures/fast-big--",
fig.width = 7,
fig.height = 4.5,
dpi = 150,
message = FALSE,
warning = FALSE
)
LOCAL <- identical(Sys.getenv("LOCAL"), "TRUE")
## ----simulate-bigmatrix, cache=TRUE, eval=LOCAL-------------------------------
library(bigPLScox)
set.seed(2024)
n_obs <- 5000
n_pred <- 100
k_true <- 3
Sigma <- diag(n_pred)
for (b in 0:2) {
idx <- (b * 10 + 1):(b * 10 + 10)
Sigma[idx, idx] <- 0.7
diag(Sigma[idx, idx]) <- 1
}
L <- chol(Sigma)
Z <- matrix(rnorm(n_obs * n_pred), nrow = n_obs)
X_dense <- Z %*% L
w1 <- numeric(n_pred); w1[1:4] <- c( 1.0, 0.8, 0.6, -0.5)
w2 <- numeric(n_pred); w2[5:8] <- c(-0.7, 0.9, -0.4, 0.5)
w3 <- numeric(n_pred); w3[9:12] <- c( 0.6, -0.5, 0.7, 0.8)
w1 <- w1 / sqrt(sum(w1^2))
w2 <- w2 / sqrt(sum(w2^2))
w3 <- w3 / sqrt(sum(w3^2))
t1 <- as.numeric(scale(drop(X_dense %*% w1), center = TRUE, scale = TRUE))
t2 <- as.numeric(scale(drop(X_dense %*% w2), center = TRUE, scale = TRUE))
t3 <- as.numeric(scale(drop(X_dense %*% w3), center = TRUE, scale = TRUE))
beta_true <- c(1.0, 0.6, 0.3)
eta <- beta_true[1]*t1 + beta_true[2]*t2 + beta_true[3]*t3
lambda0 <- 0.05
u <- runif(n_obs)
time_event <- -log(u) / (lambda0 * exp(eta))
target_event <- 0.65
f <- function(lc) {
mean(time_event <= rexp(n_obs, rate = lc)) - target_event
}
lambda_c <- uniroot(f, c(1e-4, 1), tol = 1e-4)$root
time_cens <- rexp(n_obs, rate = lambda_c)
time <- pmin(time_event, time_cens)
status <- as.integer(time_event <= time_cens)
## ----bigmatrixbigmatrix, cache=TRUE, eval=LOCAL-------------------------------
if (requireNamespace("bigmemory", quietly = TRUE)) {
library(bigmemory)
big_dir <- tempfile("bigPLScox-")
dir.create(big_dir)
if(file.exists(paste(big_dir,"X.bin",sep="/"))){unlink(paste(big_dir,"X.bin",sep="/"))}
if(file.exists(paste(big_dir,"X.desc",sep="/"))){unlink(paste(big_dir,"X.desc",sep="/"))}
X_big <- bigmemory::as.big.matrix(
X_dense,
backingpath = big_dir,
backingfile = "X.bin",
descriptorfile = "X.desc"
)
X_big[1:6, 1:6]
}
## ----dense-solvers, cache=TRUE, eval=LOCAL------------------------------------
fit_legacy <- big_pls_cox(
X = X_dense,
time = time,
status = status,
ncomp = k_true
)
fit_fast_dense <- big_pls_cox_fast(
X = X_dense,
time = time,
status = status,
ncomp = k_true
)
list(
legacy = head(fit_legacy$scores),
fast = head(fit_fast_dense$scores)
)
list(
legacy = apply(fit_legacy$scores, 2, var),
fast = apply(fit_fast_dense$scores, 2, var)
)
## ----dense-summary, cache=TRUE, eval=LOCAL------------------------------------
summary(fit_fast_dense)
## ----fast-big, cache=TRUE, eval=LOCAL-----------------------------------------
if (exists("X_big")) {
fit_fast_big <- big_pls_cox_fast(
X = X_big,
time = time,
status = status,
ncomp = k_true
)
summary(fit_fast_big)
}
## ----gd-fit, cache=TRUE, eval=LOCAL-------------------------------------------
if (exists("X_big")) {
fit_gd <- big_pls_cox_gd(
X = X_big,
time = time,
status = status,
ncomp = k_true,
max_iter = 2000,
learning_rate = 0.05,
tol = 1e-8
)
str(fit_gd)
}
## ----compare-solvers, cache=TRUE, eval=LOCAL----------------------------------
if (exists("fit_fast_dense") && exists("fit_gd")) {
eta_fast_dense <- drop(fit_fast_dense$scores %*% fit_fast_dense$cox_fit$coefficients)
eta_fast_big <- drop(fit_fast_big$scores %*% fit_fast_big$cox_fit$coefficients)
eta_gd <- drop(fit_gd$scores %*% fit_gd$cox_fit$coefficients)
list(
correlation_fast_dense_big = cor(eta_fast_dense, eta_fast_big),
correlation_fast_dense_gd = cor(eta_fast_dense, eta_gd),
concordance = c(
fast_dense = survival::concordance(survival::Surv(time, status) ~ eta_fast_dense)$concordance,
fast_big = survival::concordance(survival::Surv(time, status) ~ eta_fast_big)$concordance,
gd = survival::concordance(survival::Surv(time, status) ~ eta_gd)$concordance
)
)
}
## ----predict-new, cache=TRUE, eval=LOCAL--------------------------------------
X_new <- matrix(rnorm(10 * n_pred), nrow = 10)
scores_new <- predict(
fit_fast_dense,
newdata = X_new,
type = "components"
)
risk_new <- predict(
fit_fast_dense,
newdata = X_new,
type = "risk"
)
list(scores = scores_new, risk = risk_new)
## ----timing, cache=TRUE, eval=LOCAL-------------------------------------------
if (requireNamespace("bench", quietly = TRUE) && exists("X_big")) {
bench_res <- bench::mark(
big_pls = big_pls_cox(X_dense, time, status, ncomp = k_true),
fast_dense = big_pls_cox_fast(X_dense, time, status, ncomp = k_true),
fast_big = big_pls_cox_fast(X_big, time, status, ncomp = k_true),
gd = big_pls_cox_gd(X_big, time, status, ncomp = k_true, max_iter = 1500),
iterations = 30,
check = FALSE
)
bench_res$expression <- names(bench_res$expression)
bench_res[, c("expression", "median", "itr/sec", "mem_alloc")]
}
## ----timing-plot, cache=TRUE, eval=LOCAL--------------------------------------
if (exists("bench_res")) {
plot(bench_res, type = "jitter")
}
## ----cleanup, cache=TRUE, eval=LOCAL------------------------------------------
if (exists("X_big")) {
rm(X_big)
file.remove(file.path(big_dir, "X.bin"))
file.remove(file.path(big_dir, "X.desc"))
unlink(big_dir, recursive = TRUE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.