plsmselect.R
In plsmselect: Linear and Smooth Predictor Modelling with Penalisation and Variable Selection

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(message=F, warning = F, cache=T, echo=T, eval=T,
                      fig.pos="H", fig.align="center", comment='#>')

## ---- echo=FALSE--------------------------------------------------------------
tab <- matrix(c("\u2713","",rep("\u2713",4)),2,3)
rownames(tab) = c("$\\beta$","$f_j$'s")
colnames(tab) = c("None","$\\ell_1$","$\\ell_2$")
knitr::kable(tab, align = "rccc")

## -----------------------------------------------------------------------------
library(plsmselect)
library(purrr)

data(simData)

## ---- echo=FALSE--------------------------------------------------------------
knitr::kable(head(simData), format = "html", digits=2,
             caption="Table 1. First 6 samples of the simulated data set: simData.") %>% 
  kableExtra::kable_styling(bootstrap_options = "striped", font_size = 11.5)

## ----normalfit----------------------------------------------------------------
## Create model matrix X corresponding to linear terms
## (necessary for the formula option of gamlasso below)
simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1]

## The formula approach
gfit = gamlasso(Yg ~ X +
                  s(z1, k=5, bs="ts") +
                  s(z2, k=5, bs="ts") +
                  s(z3, k=5, bs="ts") +
                  s(z4, k=5, bs="ts"),
                data = simData,
                seed = 1)

## ----normalfitalt, eval = F---------------------------------------------------
#  ## The term specification approach
#  gfit = gamlasso(response = "Yg",
#                  linear.terms = paste0("x",1:10),
#                  smooth.terms = paste0("z",1:4),
#                  data = simData,
#                  linear.penalty = "l1",
#                  smooth.penalty = "l1",
#                  num.knots = 5,
#                  seed = 1)

## -----------------------------------------------------------------------------
# mgcv::gam object:
class(gfit$gam)
# glmnet::cv.glmnet object
class(gfit$cv.glmnet)

## -----------------------------------------------------------------------------
summary(gfit)

## ----plotgam, fig.width=6, fig.height=6---------------------------------------
## Plot the estimates of the smooth effects:
plot(gfit$gam, pages=1)

## ----fittedvsobserved, fig.width=6, fig.height=6------------------------------
## Plot fitted versus observed values:
plot(simData$Yg, predict(gfit), xlab = "Observed values", ylab = "Fitted Values")

## ----poifit-------------------------------------------------------------------
## Create model matrix X corresponding to linear terms
## (necessary for the formula option of gamlasso below)
simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1]

## Poisson response. Formula approach.
pfit = gamlasso(Yp ~ X + 
                  s(z1, bs="ts", k=5) + 
                  s(z2, bs="ts", k=5) + 
                  s(z3, bs="ts", k=5) + 
                  s(z4, bs="ts", k=5),
                data = simData,
                family = "poisson",
                seed = 1)

## ----poifitalt, eval=FALSE----------------------------------------------------
#  ## Poisson response. Term-specification approach.
#  pfit = gamlasso(response = "Yp",
#                  linear.terms = paste0("x",1:10),
#                  smooth.terms = paste0("z",1:4),
#                  data = simData,
#                  linear.penalty = "l1",
#                  smooth.penalty = "l1",
#                  family = "poisson",
#                  num.knots = 5,
#                  seed = 1)

## -----------------------------------------------------------------------------
coef(pfit$cv.glmnet, s="lambda.min")

## ---- fig.width=6, fig.height=3-----------------------------------------------
par(mfrow=c(1,2))
plot(pfit$gam, select=1) # estimate of smooth term z1
plot(pfit$gam, select=2) # estimate of smooth term z2

## ---- fig.width=6, fig.height=6-----------------------------------------------
plot(predict(pfit, type="response"), exp(simData$lp), xlab="predicted count", ylab="true expected count")

## ----binfit, eval=FALSE-------------------------------------------------------
#  ## Create model matrix X corresponding to linear terms
#  ## (necessary for the formula option of gamlasso below)
#  simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1]
#  
#  ## Bernoulli trials response
#  bfit = gamlasso(Yb ~ X +
#                    s(z1, bs="ts", k=5) +
#                    s(z2, bs="ts", k=5) +
#                    s(z3, bs="ts", k=5) +
#                    s(z4, bs="ts", k=5),
#                  data = simData,
#                  family = "binomial",
#                  seed = 1)

## ----binfitalt, eval = F------------------------------------------------------
#  ## The term specification approach
#  bfit = gamlasso(response = "Yb",
#                  linear.terms = paste0("x",1:10),
#                  smooth.terms = paste0("z",1:4),
#                  data = simData,
#                  family="binomial",
#                  linear.penalty = "l1",
#                  smooth.penalty = "l1",
#                  num.knots = 5,
#                  seed = 1)

## ---- eval=FALSE--------------------------------------------------------------
#  summary(bfit)
#  plot(bfit$gam, pages=1)
#  pred.prob <- predict(bfit, type="response")
#  true.prob <- exp(simData$lp)/(1+exp(simData$lp))
#  plot(pred.prob, true.prob, xlab="predicted probability", ylab="true probability")

## ----binomfit, eval=FALSE-----------------------------------------------------
#  ## Create model matrix X corresponding to linear terms
#  ## (necessary for the formula option of gamlasso below)
#  simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1]
#  
#  ## Binomial counts response. Formula approach.
#  bfit2 = gamlasso(cbind(success,failure) ~ X +
#                     s(z1, bs="ts", k=5) +
#                     s(z2, bs="ts", k=5) +
#                     s(z3, bs="ts", k=5) +
#                     s(z4, bs="ts", k=5),
#                   data = simData,
#                   family = "binomial",
#                   seed = 1)

## ----binomfitalt, eval = F----------------------------------------------------
#  ## Binomial counts response. Term specification approach
#  bfit2 = gamlasso(c("success","failure"),
#                   linear.terms=paste0("x",1:10),
#                   smooth.terms=paste0("z",1:4),
#                   data=simData,
#                   family = "binomial",
#                   linear.penalty = "l1",
#                   smooth.penalty = "l1",
#                   num.knots = 5,
#                   seed=1)

## ---- eval=FALSE--------------------------------------------------------------
#  summary(bfit2)
#  plot(bfit2$gam, pages=1)
#  pred.prob <- predict(bfit2, type="response")
#  true.prob <- exp(simData$lp)/(1+exp(simData$lp))
#  plot(pred.prob, true.prob, xlab="predicted probability", ylab="true probability")

## ----coxfit-------------------------------------------------------------------
## Create model matrix X corresponding to linear terms
## (necessary for the formula option of gamlasso below)
simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1]

# Censored time-to-event response. Formula approach.
cfit = gamlasso(time ~ X +
                  s(z1, bs="ts", k=5) +
                  s(z2, bs="ts", k=5) +
                  s(z3, bs="ts", k=5) +
                  s(z4, bs="ts", k=5),
                data = simData,
                family = "cox",
                weights = "status",
                seed = 1)

## ----coxfitalt, eval=FALSE----------------------------------------------------
#  # Censored time-to-event response. Term specification approach.
#  cfit = gamlasso(response = "time",
#                  linear.terms = paste0("x",1:10),
#                  smooth.terms = paste0("z",1:4),
#                  data = simData,
#                  linear.penalty = "l1",
#                  smooth.penalty = "l1",
#                  family = "cox",
#                  weights="status",
#                  num.knots = 5,
#                  seed = 1)

## ----coxdiagnos, fig.width=6, fig.height=6------------------------------------
## Obtain and plot predicted cumulative baseline hazard:
H0.pred <- cumbasehaz(cfit)

time.seq <- seq(0, 60, by=1)
plot(time.seq, H0.pred(time.seq), type="l", xlab = "Time", ylab="",
     main = "Predicted Cumulative \nBaseline Hazard")

## ----coxpredict, fig.width=6, fig.height=6------------------------------------
## Obtain predicted survival at days 1,2,3,...,60:
S.pred <- predict(cfit, type="response", new.event.times=1:60)

## Plot the survival curve for sample (subject) 17:
plot(1:60, S.pred[17,], xlab="time (in days)", ylab="Survival probability", type="l")

## ----simdata, eval=FALSE------------------------------------------------------
#  generate.inputdata <- function(N) {
#  
#    id <- paste0("i",1:N)
#    ## Define linear predictors
#    linear.pred <- matrix(c(rbinom(N*3,size=1,prob=0.2),
#                            rbinom(N*5,size=1,prob=0.5),
#                            rbinom(N*2,size=1,prob=0.9)),
#                          nrow=N)
#    colnames(linear.pred) = paste0("x", 1:ncol(linear.pred))
#  
#    ## Define smooth predictors
#    smooth.pred = as.data.frame(matrix(runif(N*4),nrow=N))
#    colnames(smooth.pred) = paste0("z", 1:ncol(smooth.pred))
#  
#    ## Coalesce the predictors
#    dat = cbind.data.frame(id, linear.pred, smooth.pred)
#  
#    return(dat)
#  
#  }
#  
#  ## Simulate N input data observations:
#  N <- 100
#  simData2 <- generate.inputdata(N)

## ---- eval=FALSE--------------------------------------------------------------
#  ## "True" coefficients of linear terms (last 7 are zero):
#  beta <- c(1, -0.6, 0.5, rep(0,7))
#  
#  ## "True" nonlinear smooth terms:
#  f1 <- function(x) x^3
#  f2 <- function(x) sin(x*pi)
#  
#  ## Calculate "True" linear predictor (lp) based on above data (simData2)
#  simData2$lp <- as.numeric(as.matrix(simData2[,paste0("x",1:10)]) %*% beta + f1(simData2$z1) + f2(simData2$z2))

## ---- eval=FALSE--------------------------------------------------------------
#  ## Simulate gaussian response with mean lp:
#  simData2$Yg = simData2$lp + rnorm(N, sd = 0.1)
#  
#  ## Simulate bernoulli trials with success probability exp(lp)/(1+exp(lp))
#  simData2$Yb = map_int(exp(simData2$lp), ~ rbinom(1, 1, p = ( ./(1+.) ) ) )
#  
#  ## Simulate Poisson response with log(mean) = lp
#  simData2$Yp = map_int(exp(simData2$lp), ~rpois(1, .) )

## ---- eval=FALSE--------------------------------------------------------------
#  ## Simulate binomial counts with success probability exp(lp)/(1+exp(lp))
#  sizes = sample(10, nrow(simData2), replace = TRUE)
#  # Simulate success:
#  simData2$success = map2_int(exp(simData2$lp), sizes, ~rbinom(1, .y, p = ( .x/(1+.x) )))
#  # Calculate failure:
#  simData2$failure = sizes - simData2$success

## ---- eval=FALSE--------------------------------------------------------------
#  ## Function that simulates N samples of censored event times (time, status)
#  ## Event times ~ Weibull(lambda0, rho) with linear predictor lp
#  ## rho=1 corresponds to exponential distribution
#  simulWeib <- function(N, lambda0, rho, lp)
#  {
#  
#    # Censoring times ~ Exponential(lambdaC)
#    lambdaC=0.0005 # very mild censoring
#  
#    # Simulate Weibull latent event times
#    v <- runif(n=N)
#    Tlat <- (- log(v) / (lambda0 * exp(lp)))^(1 / rho)
#  
#    # Simulate censoring times
#    C <- rexp(n=N, rate=lambdaC)
#  
#    # Calculate follow-up times and event indicators
#    time <- pmin(Tlat, C)
#    status <- as.numeric(Tlat <= C)
#  
#    data.frame(time=time, status=status)
#  
#  }
#  
#  ## Set parameters of Weibull baseline hazard h0(t) = lambda*rho*t^(rho-1):
#  lambda0 <- 0.01; rho <- 1;
#  
#  ## Simulate (times, status) from above censored Weibull model and cbind to our data:
#  simData2 <- cbind.data.frame(simData2, simulWeib(N, lambda0, rho, simData2$lp))

Any scripts or data that you put into this service are public.

plsmselect documentation built on Dec. 1, 2019, 1:11 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

plsmselect
Linear and Smooth Predictor Modelling with Penalisation and Variable Selection

inst/doc/plsmselect.R
In plsmselect: Linear and Smooth Predictor Modelling with Penalisation and Variable Selection

Try the plsmselect package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

plsmselect Linear and Smooth Predictor Modelling with Penalisation and Variable Selection

inst/doc/plsmselect.R In plsmselect: Linear and Smooth Predictor Modelling with Penalisation and Variable Selection

Try the plsmselect package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

plsmselect
Linear and Smooth Predictor Modelling with Penalisation and Variable Selection

inst/doc/plsmselect.R
In plsmselect: Linear and Smooth Predictor Modelling with Penalisation and Variable Selection