hazsurv.plot <- function(x, eq, newdata, type = "surv", t.range = NULL, t.vec = NULL, intervals = TRUE, n.sim = 100, prob.lev = 0.05,
shade = FALSE, bars = FALSE, ylim, ylab, xlab, pch, ls = 100, baseline = FALSE,
min.dn = 1e-200, min.pr = 1e-200, max.pr = 1,
plot.out = TRUE, ...){
pr <- h <- hs <- prs <- CIpr <- CIh <- poe <- poet <- NULL
pr.avg <- h.avg <- ch.avg <- CIpr.avg <- CIh.avg <- CIch.avg <- NULL
toleps <- 1e-04
if(x$univar.gamlss == FALSE && x$surv.flex == TRUE && x$margins[1] %in% c(x$VC$m2,x$VC$m3) && x$margins[2] %in% c(x$bl) ) eq <- 2
if(missing(eq) && x$univar.gamlss == FALSE) stop("You must provide the equation number (either 1 or 2).")
if(x$univar.gamlss == TRUE) eq <- 1
if(missing(newdata)) stop("You have to provide a new data frame.")
if(!is.data.frame(newdata)) stop("You have to provide a new data frame.")
if(!(type %in% c("surv", "hazard", "cumhaz"))) stop("The type argument can either be surv, hazard or cumhaz")
if(x$surv.flex == FALSE) stop("This function is only suitable for flexible survival models.")
if(missing(ylim)) ylim <- NULL
# Checks on the dataset definition
if(nrow(newdata) < 1) stop("The data frame needs to have at least one row.")
# Check on new time variable t.vec (do not allow for t.range and t.vec to both be given)
if(length(t.range) > 2) stop("When using t.range only provide min and max of the time range to be considered.")
if(!is.null(t.range) & !is.null(t.vec)) stop("You cannot provide both t.range and t.vec. See help for more details.")
if(shade == TRUE & bars == TRUE) stop("shade and bars are mutually exclusive, they cannot both be TRUE.")
#################################################################################################
if(eq == 1){
ntv <- as.character(x$formula[[1]][2])
#rlb <- range(x$y1)[1]
if(!is.null(t.range)){
rlb <- t.range[1]
} else if(!is.null(t.vec)){
rlb <- t.vec #check on rlb works anyway
} else{
if(x$univar.gamlss == TRUE) rlb <- x$rangeSurv[1] else rlb <- range(x$y1)[1]
}
rlb <- ifelse(rlb < toleps, toleps, rlb)
if(!is.null(t.range)){
tv <- seq(rlb, t.range[2], length.out = ls)
} else if(!is.null(t.vec)){
tv <- rlb
} else{
if(x$univar.gamlss == TRUE) tv <- seq(rlb, x$rangeSurv[2], length.out = ls) else tv <- seq(rlb, range(x$y1)[2], length.out = ls)
}
indp <- 1:x$VC$X1.d2
gob <- x$gam1
}
if(eq == 2){
ntv <- as.character(x$formula[[2]][2])
if(!is.null(t.range)){
rlb <- t.range[1]
} else if(!is.null(t.vec)){
rlb <- t.vec
} else{
rlb <- range(x$y2)[1]
}
rlb <- ifelse(rlb < toleps, toleps, rlb)
if(!is.null(t.range)){
tv <- seq(rlb, t.range[2], length.out = ls)
} else if(!is.null(t.vec)){
tv <- rlb
} else{
tv <- seq(rlb, range(x$y2)[2], length.out = ls)
}
indp <- (x$X1.d2 + 1):(x$X1.d2 + x$X2.d2)
gob <- x$gam2
}
ti <- data.frame(tv)
names(ti) <- ntv
#############################################
# Cumulative quantities which we will use at the end of the 'for cycle' for the plot
# Note1: if we are plotting individual surv/hazard plot this will just be the individual curve (so we can think of these
# cumulative quantities as more general versions of the base ones)
# Note2: we define three seperate couples of quantities for clarity. Only one out of three couples will actually be used
# (depending on type chosen)
pr.cumul <- h.cumul <- ch.cumul <- rep(0, length(tv))
if(intervals == TRUE) CIpr.cumul <- CIh.cumul <- CIch.cumul <- matrix(0, length(tv), n.sim)
#############################################
if(!is.null(x$VC$mono.sm.pos)) mono.sm.pos <- x$VC$mono.sm.pos else mono.sm.pos <- c(x$VC$mono.sm.pos1, x$VC$mono.sm.pos2 + x$VC$X1.d2)
# we only sample once
if(intervals == TRUE){
bs <- rMVN(n.sim, mean = x$coef.t, sigma = x$Vb.t)
bs[, mono.sm.pos] <- ifelse(bs[, mono.sm.pos] < 0, 0, bs[, mono.sm.pos])
}
#############################################
for(obs in 1:dim(newdata)[1]){
if(obs %% 30 == 0) print(paste(round(obs/dim(newdata)[1]*100), '% of iterations complete', sep = ''))
newdata.temp = as.data.frame(newdata[obs,])
row.names(newdata.temp) = NULL
names(newdata.temp) <- names(newdata)
newdata.temp <- data.frame(ti, newdata.temp)
Xpred <- predict(x, newdata.temp, eq = eq, type = "lpmatrix")
if(baseline == TRUE){ # should be general enough but it may need checking in future ...
Xd <- Xdpred(gob, newdata.temp, ntv)
ind0 <- (colSums(Xd == 0) == dim(Xpred)[1])
ind0[1] <- FALSE # intercept must stay to have surv prob properly scaled
Xpred[, ind0] <- 0
}
params1 <- x$coef.t[indp]
eta1 <- Xpred%*%params1
pd <- probmS(eta1, x$VC$margins[eq], min.dn = min.dn, min.pr = min.pr, max.pr = max.pr)
pr <- pd$pr
if(intervals == TRUE){
eta1s <- Xpred%*%t(bs[,indp])
pds <- probmS(eta1s, x$VC$margins[eq], min.dn = min.dn, min.pr = min.pr, max.pr = max.pr)
prs <- pds$pr
}
#################################################################################################
if(type == "surv"){
if(intervals == TRUE){
# safety check
for(i in 1:length(tv)){
poe <- which(prs[i,] %in% boxplot.stats(prs[i,])$out)
prs[, poe] <- NA
poe <- union(poe, poet)
poet <- poe
}
CIpr <- prs
}
###########################################################
# Keeping track of confidence intervals and survival curves
pr.cumul = pr.cumul + pr
if(intervals == TRUE) CIpr.cumul = CIpr.cumul + CIpr
###########################################################
if(obs == dim(newdata)[1]){ # i.e. if we have arrived to the end of the for cycle, then produce plot
# Obtain average quantities from the cumulative ones
pr.avg = pr.cumul/dim(newdata)[1]
if(intervals == TRUE){ CIpr.avg <- CIpr.cumul/dim(newdata)[1]
CIpr.avg <- rowQuantiles(CIpr.avg, probs = c(prob.lev/2, 1-prob.lev/2), na.rm = TRUE)
}
if(plot.out){
if(intervals == TRUE) CIpr.avg.temp = matrix(CIpr.avg, nrow = length(tv)) # this trick is needed in case only one time point is entered
if(is.null(ylim) && intervals == TRUE) ylim <- c(min(CIpr.avg.temp[,1]), max(CIpr.avg.temp[,2]) )
if(is.null(ylim) && intervals == FALSE) ylim <- c(min(pr.avg), max(pr.avg) )
if(missing(ylab)) ylab <- "Survival function"
if(missing(xlab)) xlab <- "Time"
if(missing(pch)) pch <- 19
if(length(tv) == 1 | bars == TRUE) type.plot = 'p' else type.plot = 'l' # to account for the case in which just 1 time point is given (and for user choice of bars)
plot(tv, pr.avg, type = type.plot, ylab = ylab, xlab = xlab, ylim = ylim, pch = pch, ...)
if(intervals == TRUE){
if(length(tv) == 1 | bars == TRUE){
for(i in 1:length(tv)) lines(c(tv[i], tv[i]), CIpr.avg.temp[i,])
} else{
if(shade == FALSE){
lines(tv, CIpr.avg.temp[,1], lty = 2)
lines(tv, CIpr.avg.temp[,2], lty = 2)
}else{
polygon(c(tv, rev(tv)), c(CIpr.avg.temp[,1],rev(CIpr.avg.temp[,2])), col= "gray80", border = NA)
lines(tv, pr.avg, type = "l")
}
}
}
}
}
}
#################################################################################################
if(type == "hazard"){
if(baseline == FALSE) Xd <- Xdpred(gob, newdata.temp, ntv) # if true, already calculated above
Xthe <- Xd%*%params1
Gp <- pd$dS
h <- -Gp/pr*Xthe
if(intervals == TRUE){
Gps <- pds$dS
Xthes <- Xd%*%t(bs[,indp])
hs <- -Gps/prs*Xthes
# safety check
for(i in 1:length(tv)){
poe <- which(hs[i,] %in% boxplot.stats(hs[i,])$out)
hs[, poe] <- NA
poe <- union(poe, poet)
poet <- poe
}
CIh <- hs
CIh <- ifelse(CIh < 0, 0, CIh)
}
###########################################################
# Keeping track of confidence intervals and hazard curves
h.cumul = h.cumul + h
if(intervals == TRUE) CIh.cumul = CIh.cumul + CIh
###########################################################
if(obs == dim(newdata)[1]){ # i.e. if we have arrived to the end of the for cycle, then produce plot
h.avg <- h.cumul/dim(newdata)[1]
if(intervals == TRUE){ CIh.avg <- CIh.cumul/dim(newdata)[1]
CIh.avg <- rowQuantiles(CIh.avg, probs = c(prob.lev/2,1-prob.lev/2), na.rm = TRUE)
}
if(plot.out){
if(intervals == TRUE) CIh.avg.temp = matrix(CIh.avg, nrow = length(tv)) # this trick is needed in case only one time point is entered
if(is.null(ylim) && intervals == TRUE) ylim <- c(min(CIh.avg.temp[,1]), max(CIh.avg.temp[,2]) )
if(is.null(ylim) && intervals == FALSE) ylim <- c(min(h.avg), max(h.avg) )
if(missing(ylab)) ylab <- "Hazard"
if(missing(xlab)) xlab <- "Time"
if(missing(pch)) pch = 19
if(length(tv) == 1 | bars == TRUE) type.plot = 'p' else type.plot = 'l'
plot(tv, h.avg, type = type.plot, ylab = ylab, xlab = xlab, ylim = ylim, pch = pch, ...)
if(intervals == TRUE){
if(length(tv) == 1 | bars == TRUE){
for(i in 1:length(tv)) lines(c(tv[i], tv[i]), CIh.avg.temp[i,])
} else {
if(shade == FALSE){
lines(tv, CIh.avg.temp[,1], lty = 2)
lines(tv, CIh.avg.temp[,2], lty = 2)
}else{
polygon(c(tv,rev(tv)), c(CIh.avg.temp[,1],rev(CIh.avg.temp[,2])), col = "gray80", border = NA)
lines(tv, h.avg, type = "l")
}
}
}
}
}
}
#################################################################################################
if(type == "cumhaz"){
ch <- -log(pr)
if(intervals == TRUE){
prs <- -log(prs)
for(i in 1:length(tv)){
poe <- which(prs[i,] %in% boxplot.stats(prs[i,])$out)
prs[, poe] <- NA
poe <- union(poe, poet)
poet <- poe
}
CIch <- prs
}
###########################################################
# Keeping track of confidence intervals and hazard curves
ch.cumul = ch.cumul + ch
if(intervals == TRUE) CIch.cumul = CIch.cumul + CIch
###########################################################
if(obs == dim(newdata)[1]){ # i.e. if we have arrived to the end of the for cycle, then produce plot
ch.avg = ch.cumul/dim(newdata)[1]
if(intervals == TRUE){ CIch.avg = CIch.cumul/dim(newdata)[1]
CIch.avg <- rowQuantiles(CIch.avg, probs = c(prob.lev/2,1-prob.lev/2), na.rm = TRUE)
}
if(plot.out){
# if(is.null(ylim) && intervals == TRUE) ylim <- c(min(CIch.cumul[,1]),max(CIch.cumul[,2]) )
if(intervals == TRUE) CIch.avg.temp = matrix(CIch.avg, nrow = length(tv)) # this trick is needed in case only one time point is entered
if(is.null(ylim) && intervals == TRUE) ylim <- c(min(CIch.avg.temp[,1]), max(CIch.avg.temp[,2]) )
if(is.null(ylim) && intervals == FALSE) ylim <- c(min(ch.avg),max(ch.avg) )
if(missing(ylab)) ylab <- "Cumulative Hazard"
if(missing(xlab)) xlab <- "Time"
if(length(tv) == 1 | bars == TRUE) type.plot = 'p' else type.plot = 'l'
plot(tv, ch.avg, type = type.plot, ylab = ylab, xlab = xlab, ylim = ylim, pch = pch, ...)
if(intervals == TRUE){
if(length(tv) == 1 | bars == TRUE){
for(i in 1:length(tv)) lines(c(tv[i], tv[i]), CIch.avg.temp[i,])
} else{
if(shade == FALSE){
lines(tv, CIch.avg.temp[,1], lty = 2)
lines(tv, CIch.avg.temp[,2], lty = 2)
}else{
polygon(c(tv,rev(tv)), c(CIch.avg.temp[,1],rev(CIch.avg.temp[,2])), col= "gray80", border = NA)
lines(tv, ch.avg, type = "l")
}
}
}
}
}
}
}
#################################################################################################
#################################################################################################
out.r <- list(s = pr.avg, h = h.avg, ch = ch.avg, h.sim = hs, s.sim = prs, l.poe = length(poe),
CIs = CIpr.avg, CIh = CIh.avg, CIch = CIch.avg)
invisible(out.r)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.