Nothing
### R code from vignette source 'SurvivalEnsembles.Rnw'
###################################################
### code chunk number 1: setup
###################################################
source("setup.R")
if (!require("TH.data"))
stop("cannot attach package ", sQuote("TH.data"))
if (!require("rpart"))
stop("cannot attach package ", sQuote("rpart"))
if (!require("survival"))
stop("cannot attach package ", sQuote("survival"))
if (!require("partykit"))
stop("cannot attach package ", sQuote("partykit"))
set.seed(290875)
CEX <- 0.85
options(digits = 3)
### mean difference plots
mdplot <- function(obs, pred, main = "", ...) {
m <- (obs + pred)/2
d <- obs - pred
plot(m, d, xlab = "(Observed + Predicted) / 2",
ylab = "Observed - Predicted", main =
main, cex.axis = CEX, cex.main = CEX, cex.lab = CEX, ...)
abline(h = 0, lty = 3)
}
###################################################
### code chunk number 2: loaddata
###################################################
### load data. See `mboost/inst/readAML_Bullinger.R' for
### how the data were generated from the raw data.
load(file.path(path.package(package = "TH.data"), "rda", "AML_Bullinger.rda"))
###################################################
### code chunk number 3: AML-dpp
###################################################
### compute IPC weights
AMLw <- IPCweights(Surv(clinical$time, clinical$event))
### risk score
risk <- rep(0, nrow(clinical))
rlev <- levels(clinical[, "Cytogenetic.group"])
risk[clinical[, "Cytogenetic.group"] %in% rlev[c(7,8,4)]] <- "low"
risk[clinical[, "Cytogenetic.group"] %in% rlev[c(5, 9)]] <- "intermediate"
risk[clinical[, "Cytogenetic.group"] %in% rlev[-c(4,5, 7,8,9)]] <- "high"
risk <- as.factor(risk)
### set-up learning sample
AMLlearn <- cbind(clinical[, c("time", "Sex", "Age", "LDH", "WBC",
"FLT3.aberration.", "MLL.PTD", "Tx.Group.")],
risk = risk,
iexpressions[, colnames(iexpressions) %in% selgenes[["Clone.ID"]]])
cc <- complete.cases(AMLlearn)
AMLlearn <- AMLlearn[AMLw > 0 & cc,]
AMLw <- AMLw[AMLw > 0 & cc]
###################################################
### code chunk number 4: AML-RF
###################################################
### controls for tree growing
ctrl <- ctree_control(testtype = "Teststatistic",
teststat = "maximum", mincriterion = .1, minsplit = 5)
### was: cforest_control(mincriterion = 0.1, mtry = 5, minsplit = 5, ntree = 250)
### fit random forest for censored data (warnings are OK here)
AMLrf <- cforest(log(time) ~ ., data = AMLlearn, control = ctrl,
weights = AMLw, mtry = 5, ntree = 250,
perturb = list(replace = TRUE, fraction = 0.632))
###################################################
### code chunk number 5: AML-boost
###################################################
AMLl2b <- glmboost(I(log(time)) ~ ., data = AMLlearn, weights = AMLw,
control = boost_control(mstop = 5000))
###################################################
### code chunk number 6: AML-AIC
###################################################
### AIC criterion
plot(aic <- AIC(AMLl2b))
###################################################
### code chunk number 7: AML-fitted
###################################################
### restrict number of boosting iterations and inspect selected variables
AMLl2b <- AMLl2b[mstop(aic)]
cAML <- coef(AMLl2b)
cAML[abs(cAML) > 0]
### fitted values
AMLprf <- predict(AMLrf, newdata = AMLlearn)
AMLpb <- predict(AMLl2b, newdata = AMLlearn)
###################################################
### code chunk number 8: Figure1
###################################################
Mmod <- sum(AMLw * log(AMLlearn$time))/sum(AMLw )
par(mai = par("mai") * c(0.7, 0.8, 0.7, 0.6))
layout(matrix(1:4, ncol = 2))
mdplot(log(AMLlearn$time), AMLprf, main = "Random Forest",
cex = AMLw / 4, ylim = c(-4, 4), xlim = c(0, 7))
plot(log(AMLlearn$time), AMLprf, cex = AMLw / 4,
ylim = range(log(AMLlearn$time)), ylab = "Predicted", xlab = "Observed",
main = "Random Forest", cex.axis = CEX, cex.main = CEX, cex.lab = CEX)
abline(h = Mmod, lty = 2)
mdplot(log(AMLlearn$time), AMLpb,
cex = AMLw / 4, main = "Boosting", ylim = c(-4, 4), xlim = c(0, 7))
plot(log(AMLlearn$time), AMLpb, cex = AMLw / 4,
ylim = range(log(AMLlearn$time)), ylab = "Predicted", xlab = "Observed",
main = "Boosting", cex.axis = CEX, cex.main = CEX, cex.lab = CEX)
abline(h = Mmod, lty = 2)
###################################################
### code chunk number 9: GBSG2-dpp
###################################################
### attach data
data("GBSG2", package = "TH.data")
### IPC weights
GBSG2w <- IPCweights(Surv(GBSG2$time, GBSG2$cens))
### set-up learning sample
GBSG2learn <- cbind(GBSG2[,-which(names(GBSG2) %in% c("time", "cens"))],
ltime = log(GBSG2$time))
n <- nrow(GBSG2learn)
###################################################
### code chunk number 10: GBSG2-models
###################################################
### linear model
LMmod <- lm(ltime ~ . , data = GBSG2learn, weights = GBSG2w)
LMerisk <- sum((GBSG2learn$ltime - predict(LMmod))^2*GBSG2w) / n
### regression tree
pos <- GBSG2w > 0
TRmod <- rpart(ltime ~ . , data = GBSG2learn, weights = GBSG2w,
subset = pos)
TRerisk <- sum((GBSG2learn$ltime[pos] - predict(TRmod))^2*GBSG2w[pos]) / n
### tree controls
ctrl <- ctree_control(testtype = "Teststatistic",
teststat = "maximum", mincriterion = qnorm(.95),
minsplit = 5)
### was: cforest_control(mincriterion = qnorm(0.95), mtry = 5,
### minsplit = 5, ntree = 100)
### fit random forest for censored data (warnings are OK here)
RFmod <- cforest(ltime ~ . , data = GBSG2learn, weights = GBSG2w,
control = ctrl, mtry = 5, ntree = 100,
perturb = list(replace = TRUE,
fraction = 0.632 * sum(GBSG2w > 0)))
### fit L2 boosting for censored data
L2Bmod <- glmboost(ltime ~ ., data = GBSG2learn, weights = GBSG2w,
control = boost_control(mstop = 250))
### with Huber loss function
L2BHubermod <- glmboost(ltime ~ ., data = GBSG2learn, weights = GBSG2w,
family = Huber(d = log(2)))
###################################################
### code chunk number 11: GBSG2-AIC
###################################################
plot(aic <- AIC(L2Bmod))
###################################################
### code chunk number 12: GBSG2-fitted
###################################################
GBSG2Hp <- predict(L2BHubermod, newdata = GBSG2learn)
L2Berisk <- sum((GBSG2learn$ltime - predict(L2Bmod, newdata = GBSG2learn))^2*GBSG2w) / n
RFerisk <- sum((GBSG2learn$ltime - predict(RFmod, newdata = GBSG2learn))^2*GBSG2w) / n
###################################################
### code chunk number 13: Figure3
###################################################
lim <- c(4,9)
mylwd <- 0.5
par(mai = par("mai") * c(0.7, 0.8, 0.7, 0.6))
layout(matrix(1:4, ncol = 2))
Mmod <- sum(GBSG2w * GBSG2learn$ltime)/sum(GBSG2w)
mdplot(GBSG2learn$ltime, predict(LMmod), cex = GBSG2w / 4, main = "Linear Model",
ylim = c(-3, 3), xlim = c(5, 8))
mdplot(GBSG2learn$ltime[pos], predict(TRmod), cex = GBSG2w / 4, main = "Tree",
ylim = c(-3, 3), xlim = c(5, 8))
mdplot(GBSG2learn$ltime, predict(RFmod, newdata = GBSG2learn), cex = GBSG2w / 4,
main = "Random Forest", ylim = c(-3, 3), xlim = c(5, 8))
mdplot(GBSG2learn$ltime, predict(L2Bmod, newdata = GBSG2learn), cex = GBSG2w / 4,
main = "Boosting", ylim = c(-3, 3), xlim = c(5, 8))
###################################################
### code chunk number 14: Figure5
###################################################
RFpr <- predict(RFmod, newdata = GBSG2learn)
L2Bpr <- predict(L2Bmod, newdata = GBSG2learn)
ylim <- range(c(RFpr[GBSG2w > 0], L2Bpr[GBSG2w > 0]))
mydf <- 4
par(mai = par("mai") * c(0.7, 0.8, 0.4, 0.6))
layout(matrix(1:4, ncol = 2))
plot(GBSG2learn$pnodes, RFpr, cex = GBSG2w/4, xlim = c(0,40), lwd = mylwd,
xlab = "Nr. positive lymph nodes", ylim = ylim, ylab =
expression(hat(Y)), cex.axis = CEX, cex.main = CEX, cex.lab = CEX,)
lines(smooth.spline(GBSG2learn$pnodes, RFpr, GBSG2w/4, df = mydf))
plot(GBSG2learn$age, RFpr, cex = GBSG2w/4, xlab = "Age",
ylab = expression(hat(Y)), ylim = ylim, lwd = mylwd, cex.axis = CEX,
cex.main = CEX, cex.lab = CEX)
lines(smooth.spline(GBSG2learn$age, RFpr, GBSG2w/4, df = mydf))
plot(GBSG2learn$estrec, RFpr, cex = GBSG2w/4, xlab = "Estrogen receptor",
ylab = expression(hat(Y)), ylim = ylim, lwd = mylwd, cex.axis = CEX,
cex.main = CEX, cex.lab = CEX)
lines(smooth.spline(GBSG2learn$estrec, RFpr, GBSG2w/4, df = mydf))
indx <- which(GBSG2learn$progrec < 100)
plot(GBSG2learn$progrec[indx], RFpr[indx], cex = GBSG2w[indx]/4,
xlab = "Progesterone receptor (< 100 fmol / l)",
ylab = expression(hat(Y)), ylim = ylim, lwd = mylwd, cex.axis = CEX,
cex.main = CEX, cex.lab = CEX)
lines(smooth.spline(GBSG2learn$progrec[indx], RFpr[indx], GBSG2w[indx]/4, df = mydf))
###################################################
### code chunk number 15: Figure6
###################################################
par(mai = par("mai") * c(0.7, 0.8, 0.4, 0.6))
layout(matrix(1:4, ncol = 2))
plot(GBSG2learn$pnodes, L2Bpr, cex = GBSG2w/4, xlim = c(0,40),
ylab = expression(hat(Y)),
xlab = "Nr. positive lymph nodes", ylim = ylim, lwd = mylwd, cex.axis =
CEX, cex.main = CEX, cex.lab = CEX)
lines(smooth.spline(GBSG2learn$pnodes, L2Bpr, GBSG2w/4, df = mydf))
plot(GBSG2learn$age, L2Bpr, cex = GBSG2w/4, xlab = "Age",
ylab = expression(hat(Y)),
ylim = ylim, lwd = mylwd, cex.axis = CEX, cex.main = CEX, cex.lab =
CEX)
lines(smooth.spline(GBSG2learn$age, L2Bpr, GBSG2w/4, df = mydf))
plot(GBSG2learn$estrec, L2Bpr, cex = GBSG2w/4, xlab = "Estrogen receptor",
ylab = expression(hat(Y)), ylim = ylim, lwd = mylwd, cex.axis = CEX,
cex.main = CEX, cex.lab = CEX)
lines(smooth.spline(GBSG2learn$estrec, L2Bpr, GBSG2w/4, df = mydf))
indx <- which(GBSG2learn$progrec < 100)
plot(GBSG2learn$progrec[indx], L2Bpr[indx], cex = GBSG2w[indx]/4,
xlab = "Progesterone receptor (< 100 fmol / l)",
ylab = expression(hat(Y)), ylim = ylim, lwd = mylwd, cex.axis = CEX,
cex.main = CEX, cex.lab = CEX)
lines(smooth.spline(GBSG2learn$progrec[indx], L2Bpr[indx], GBSG2w[indx]/4, df = mydf))
###################################################
### code chunk number 16: Figure7
###################################################
Mmod <- sum(GBSG2w * GBSG2learn$ltime)/sum(GBSG2w)
par(mai = par("mai") * c(0.7, 0.8, 0.7, 0.6))
layout(matrix(1:4, ncol = 2))
yl <- range(c(GBSG2Hp[GBSG2w > 0], L2Bpr[GBSG2w > 0]))
mdplot(GBSG2learn$ltime, GBSG2Hp, main = "Huber Loss",
cex = GBSG2w / 4, ylim = c(-3, 3), xlim = c(5, 8))
plot(GBSG2learn$ltime, GBSG2Hp, cex = GBSG2w / 4,
xlim = range(GBSG2learn$ltime[GBSG2w > 0]), ylim = yl, ylab = "Predicted", xlab = "Observed",
main = "Huber Loss", cex.axis = CEX, cex.main = CEX, cex.lab = CEX)
mdplot(GBSG2learn$ltime, L2Bpr,
cex = GBSG2w / 4, main = "Quadratic Loss", ylim = c(-3, 3), xlim = c(5, 8))
plot(GBSG2learn$ltime, L2Bpr, cex = GBSG2w / 4,
xlim = range(GBSG2learn$ltime[GBSG2w > 0]), ylim = yl, ylab = "Predicted", xlab = "Observed",
main = "Quadratic Loss", cex.axis = CEX, cex.main = CEX, cex.lab = CEX)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.