inst/doc/pensim.R

## ----dataprep-----------------------------------------------------------------
library(pensim)
data(beer.exprs)
data(beer.survival)
##select just 100 genes to speed computation, just for the sake of example:
set.seed(1)
beer.exprs.sample <- beer.exprs[sample(1:nrow(beer.exprs), 100),]
#
gene.quant <- apply(beer.exprs.sample, 1, quantile, probs = 0.75)
dat.filt <- beer.exprs.sample[gene.quant > log2(100),]
gene.iqr <- apply(dat.filt, 1, IQR)
dat.filt <- as.matrix(dat.filt[gene.iqr > 0.5,])
dat.filt <- t(dat.filt)
dat.filt <- data.frame(dat.filt)
#
library(survival)
surv.obj <- Surv(beer.survival$os, beer.survival$status)

## ----lassotest----------------------------------------------------------------
library(penalized)
testfit <- optL1(
  response = surv.obj,
  penalized = dat.filt,
  fold = 5,
  maxlambda1 = 5,
  positive = FALSE,
  standardize = TRUE,
  trace = FALSE
)

## ----opt.nested.crossval------------------------------------------------------
set.seed(1)
preds <-
  opt.nested.crossval(
    outerfold = 5,
    nprocessors = 1,
    #opt.nested.crossval arguments
    optFUN = "opt1D",
    scaling = FALSE,
    #opt.splitval arguments
    setpen = "L1",
    nsim = 1,
    #opt1D arguments
    response = surv.obj,
    #rest are penalized::optl1 arguments
    penalized = dat.filt,
    fold = 5,
    positive = FALSE,
    standardize = TRUE,
    trace = FALSE
  )

## ----coxfit-------------------------------------------------------------------
coxfit.continuous <- coxph(surv.obj~preds)
summary(coxfit.continuous)

## ----dichot-------------------------------------------------------------------
preds.dichot <- preds > median(preds)

## ----ROCplot, fig.cap="**Figure 1: ROC plot of cross-validated continuous risk predictions at 12 months.** Note that the predictions are better if you don't randomly select 250 genes to start with!  We only did this to ease the load on the CRAN checking servers."----
nobs <- length(preds)
cutoff <- 12
if (requireNamespace("survivalROC", quietly = TRUE)) {
 preds.roc <-
  survivalROC::survivalROC(
    Stime = beer.survival$os,
    status = beer.survival$status,
    marker = preds,
    predict.time = cutoff,
    span = 0.01 * nobs ^ (-0.20)
  )
 plot(
  preds.roc$FP,
  preds.roc$TP,
  type = "l",
  xlim = c(0, 1),
  ylim = c(0, 1),
  xlab = paste("FP", "\n", "AUC = ", round(preds.roc$AUC, 3)),
  lty = 2,
  ylab = "TP",
  main = "LASSO predictions\n ROC curve at 12 months"
 )
 abline(0, 1)
}

## ----full.model---------------------------------------------------------------
beer.coefs <- opt1D(
  setpen = "L1",
  nsim = 1,
  response = surv.obj,
  penalized = dat.filt,
  fold = 5,
  maxlambda1 = 5,
  positive = FALSE,
  standardize = TRUE,
  trace = FALSE
)

## ----unpenalized.eg-----------------------------------------------------------
beer.coefs.unpen <-
  opt1D(
    setpen = "L1",
    nsim = 1,
    response = surv.obj,
    penalized = dat.filt[-1],
    # This is equivalent to dat.filt[,-1]
    unpenalized = dat.filt[1],
    fold = 5,
    maxlambda1 = 5,
    positive = FALSE,
    standardize = TRUE,
    trace = FALSE
  )

## ----lookatcoefs--------------------------------------------------------------
beer.coefs[1, 1:5]        #example output with no unpenalized covariates
beer.coefs.unpen[1, 1:5]  #example output with first covariate unpenalized

## ----genbinary----------------------------------------------------------------
set.seed(9)
x <- create.data(
  nvars = c(15, 5),
  cors = c(0, 0.8),
  associations = c(0, 2),
  firstonly = c(TRUE, TRUE),
  nsamples = 50,
  response = "binary",
  logisticintercept = 0.5
)

## ----lookbinary---------------------------------------------------------------
summary(x)
x$summary

## ----fitmodel-----------------------------------------------------------------
simplemodel <- glm(outcome ~ ., data = x$data, family = binomial)
summary(simplemodel)

## ----binarylassodemo----------------------------------------------------------
lassofit <-
  opt1D(
    nsim = 3,
    nprocessors = 1,
    setpen = "L1",
    penalized = x$data[1:20],
    response = x$data[, "outcome"],
    trace = FALSE,
    fold = 10
  )
print(lassofit)

## ----heatmap, fig.cap = "**Figure 2: Heatmap of simulated data with binary response.**"----
dat <- t(as.matrix(x$data[,-match("outcome", colnames(x$data))]))
heatmap(dat, ColSideColors = ifelse(x$data$outcome == 0, "black", "white"))

## ----survoutcome--------------------------------------------------------------
set.seed(1)
x <- create.data(
  nvars = c(15, 5),
  cors = c(0, 0.8),
  associations = c(0, 0.5),
  firstonly = c(TRUE, TRUE),
  nsamples = 50,
  censoring = c(2, 10),
  response = "timetoevent"
)

## ----howmanycensored----------------------------------------------------------
sum(x$data$cens == 0) / nrow(x$data)

## ----simulatedKM, fig.cap = "**Figure 3: Kaplan-Meier plot of survival of simulated cohort.**"----
library(survival)
surv.obj <- Surv(x$data$time, x$data$cens)
plot(survfit(surv.obj ~ 1), ylab = "Survival probability", xlab = "time")

## -----------------------------------------------------------------------------
sessionInfo()

Try the pensim package in your browser

Any scripts or data that you put into this service are public.

pensim documentation built on Dec. 9, 2022, 1:10 a.m.