Nothing
#' Compute crude probability of death
#'
#' Estimates the crude probability of death due to disease and due to
#' population reasons
#'
#' NOTE: The follow-up time must be specified in days. The \code{ratetable}
#' being used may have different variable names and formats than the user's
#' data set, this is dealt with by the \code{rmap} argument. For example, if
#' age is in years in the data set but in days in the \code{ratetable} object,
#' age=age*365.241 should be used. The calendar year can be in any date format
#' (date, Date and POSIXt are allowed), the date formats in the
#' \code{ratetable} and in the data may differ.
#'
#' Note that numerical integration is required to calculate the variance
#' estimator. The integration precision is set with argument \code{precision},
#' which defaults to daily intervals, a default that should give enough
#' precision for any practical purpose.
#'
#' The area under the curve is calculated on the interval [0,\code{tau}].
#'
#' Function \code{summary} may be used to get the output at specific points in
#' time.
#'
#' @aliases cmp.rel print.cmp.rel
#' @param formula a formula object, with the response as a \code{Surv} object
#' on the left of a \code{~} operator, and, if desired, terms separated by the
#' \code{+} operator on the right. If no strata are used, \code{~1} should be
#' specified.
#'
#' NOTE: The follow-up time must be in days.
#' @param data a data.frame in which to interpret the variables named in the
#' \code{formula}.
#' @param ratetable a table of event rates, organized as a \code{ratetable}
#' object, such as \code{slopop}.
#' @param na.action a missing-data filter function, applied to the model.frame,
#' after any subset argument has been used. Default is
#' \code{options()$na.action}.
#' @param tau the maximum follow-up time of interest, all times larger than
#' \code{tau} shall be censored. Equals maximum observed time by default
#' @param conf.int the level for a two-sided confidence interval on the
#' survival curve(s). Default is 0.95.
#' @param precision the level of precision used in the numerical integration of
#' variance. Default is 1, which means that daily intervals are taken, the
#' value may be decreased to get a higher precision or increased to achieve a
#' faster calculation. The calculation intervals always include at least all
#' times of event and censoring as border points.
#' @param add.times specific times at which the value of estimator and its
#' variance should be evaluated. Default is all the event and censoring times.
#' @param rmap an optional list to be used if the variables are not organized
#' and named in the same way as in the \code{ratetable} object. See details
#' below.
#' @return An object of class \code{cmp.rel}. Objects of this class have
#' methods for the functions \code{print} and \code{plot}. The \code{summary}
#' function can be used for printing output at required time points. An object
#' of class \code{cmp.rel} is composed of several lists, each pertaining the
#' cumulative hazard function for one risk and one strata. Each of the lists
#' contains the following objects: \item{time}{the time-points at which the
#' curves are estimated} \item{est}{the estimate} \item{var}{the variance of
#' the estimate} \item{lower}{the lower limit of the confidence interval}
#' \item{upper}{the upper limit of the confidence interval} \item{area}{the
#' area under the curve calculated on the interval [0,\code{tau}]}
#' \item{index}{indicator of event and censoring times among all the times in
#' the output. The times added via paramater \code{add.times} are also
#' included} \item{add.times}{the times added via parameter \code{add.times}}
#' @seealso \code{rs.surv}, \code{summary.cmp.rel}
#' @references Package: Pohar Perme, M., Pavlic, K. (2018) "Nonparametric
#' Relative Survival Analysis with the R Package relsurv". Journal of
#' Statistical Software. 87(8), 1-27, doi: "10.18637/jss.v087.i08"
#' @keywords survival
#' @examples
#'
#'
#' data(slopop)
#' data(rdata)
#' #calculate the crude probability of death
#' #note that the variable year must be given in a date format and that
#' #age must be multiplied by 365.241 in order to be expressed in days.
#' fit <- cmp.rel(Surv(time,cens)~sex,rmap=list(age=age*365.241),
#' ratetable=slopop,data=rdata,tau=3652.41)
#' fit
#' plot(fit,col=c(1,1,2,2),xscale=365.241,xlab="Time (years)")
#' #if no strata are desired:
#' fit <- cmp.rel(Surv(time,cens)~1,rmap=list(age=age*365.241),
#' ratetable=slopop,data=rdata,tau=3652.41)
#'
#'
#'
cmp.rel <- function (formula = formula(data), data = parent.frame(), ratetable = relsurv::slopop,
na.action,tau,conf.int=0.95,precision=1,add.times,rmap)
#formula: for example Surv(time,cens)~1 #not implemented for subgroups - DO IT!
#data: the observed data set
#ratetable: the population mortality tables
#conf.type: confidence interval calculation (plain, log or log-log)
#conf.int: confidence interval
#tau: max. cas do katerega racuna
{
call <- match.call()
if (!missing(rmap)) {
rmap <- substitute(rmap)
}
rform <- rformulate(formula, data, ratetable, na.action,rmap) #get the data ready
data <- rform$data #the data set
se.fac <- sqrt(qchisq(conf.int, 1)) #factor needed for confidence interval
if(missing(tau)) tau<-max(rform$Y)
p <- rform$m #number of covariates
if (p > 0) #if covariates
data$Xs <- strata(rform$X[, ,drop=FALSE ]) #make strata according to covariates
else data$Xs <- rep(1, nrow(data)) #if no covariates, just put 1
tab.strata <- table(data$Xs) #unique strata values
ntab.strata <- length(tab.strata) #number of strata
dtemp <- list(NULL)
out <- as.list(rep(dtemp,ntab.strata*2))
for (kt in 1:ntab.strata) { #for each stratum
inx <- which(data$Xs == names(tab.strata)[kt]) #individuals within this stratum
extra <- as.numeric(seq(1,max(rform$Y[inx]),by=precision))
if(!missing(add.times)) extra <- c(extra,as.numeric(add.times))
tis <- sort(unique(pmin(tau,union(rform$Y[inx],extra))) ) #1-day long intervals used - to take into the account the continuity of the pop. part
#if(!all.times)tis <- sort(unique(pmin(rform$Y[inx],tau))) #unique times
#else{
# tis <- sort(union(rform$Y[inx], as.numeric(1:floor(max(rform$Y[inx]))))) #1-day long intervals used - to take into the account the continuity of the pop. part
# tis <- unique(pmin(tis,tau))
#}
k <- length(tis)
out[[2*kt-1]]$time <- out[[2*kt]]$time <- c(0,tis)
temp <- exp.prep(rform$R[inx,,drop=FALSE],rform$Y[inx],rform$ratetable,rform$status[inx],times=tis,fast=TRUE,cmp=T) #calculate the values for each interval of time
areae <- sum(temp$areae)/365.241 # sum(diff(c(0,tis))*temp$cumince)/365.241
areap <- sum(temp$areap)/365.241 #sum(diff(c(0,tis))*temp$cumincp)/365.241
options(warn=-1)
out[[2*kt-1]]$est <- c(0,temp$cumince)
out[[2*kt-1]]$var <- c(0,temp$ve)
out[[2*kt-1]]$lower <- temp$cumince-se.fac*sqrt(temp$ve)
out[[2*kt-1]]$upper <- temp$cumince+se.fac*sqrt(temp$ve)
out[[2*kt-1]]$area <- areae
out[[2*kt]]$est <- c(0,temp$cumincp)
out[[2*kt]]$var <- c(0,temp$vp)
out[[2*kt]]$lower <- temp$cumincp-se.fac*sqrt(temp$vp)
out[[2*kt]]$upper <- temp$cumincp+se.fac*sqrt(temp$vp)
out[[2*kt]]$area <- areap
options(warn=0)
ne <- sum(temp$ve<0)
if(ne>0) warning(paste(names(tab.strata)[kt],": The estimated variance of crude mortality is negative in ", ne, " out of ", length(temp$ve)," intervals"), call. = FALSE)
if(!missing(add.times)){
out[[2*kt-1]]$index <- out[[2*kt]]$index <- unique(c(1,which(tis %in% c(rform$Y[inx],add.times,tau))))
out[[2*kt-1]]$add.times <- out[[2*kt]]$add.times <- add.times
}
else out[[2*kt-1]]$index <- out[[2*kt]]$index <- unique(c(1,which(tis %in% c(rform$Y[inx],tau))))
}
if(p>0)names(out) <- paste(rep(c("causeSpec","population"),ntab.strata),rep(names(tab.strata),each=2))
else names(out) <- c("causeSpec","population")
out$tau <- tau
class(out) <- "cmp.rel"
out
}
#' Plot the crude probability of death
#'
#' Plot method for cmp.rel. Plots the cumulative probability of death due to
#' disease and due to population reasons
#'
#' By default, the graph is plotted as a step function for the cause specific
#' mortality and as a piecewise linear function for the population mortality.
#' It is evaluated at all event and censoring times even though it constantly
#' changes also between these time points.
#'
#' If the argument \code{all.times} is set to \code{TRUE}, the plot is
#' evaluated at all times that were used for numerical integration in the
#' \code{cmp.rel} function (there, the default is set to daily intervals). If
#' only specific time points are to be added, this should be done via argument
#' \code{add.times} in \code{cmp.rel}.
#'
#' @param x a list, with each component representing one curve in the plot,
#' output of the function \code{cmp.rel}.
#' @param main the main title for the plot.
#' @param curvlab Curve labels for the plot. Default is \code{names(x)}, or if
#' that is missing, \code{1:nc}, where \code{nc} is the number of curves in
#' \code{x}.
#' @param ylim yaxis limits for plot.
#' @param xlim xaxis limits for plot (default is 0 to the largest time in any
#' of the curves).
#' @param wh if a vector of length 2, then the upper right coordinates of the
#' legend; otherwise the legend is placed in the upper right corner of the
#' plot.
#' @param xlab X axis label.
#' @param ylab y axis label.
#' @param lty vector of line types. Default \code{1:nc} (\code{nc} is the
#' number of curves in \code{x}). For color displays, \code{lty=1},
#' \code{color=1:nc}, might be more appropriate. If \code{length(lty)<nc}, then
#' \code{lty[1]} is used for all.
#' @param xscale Scale of the X axis. Default is in days (1).
#' @param col vector of colors. If \code{length(col)<nc}, then the
#' \code{col[1]} is used for all.
#' @param lwd vector of line widths. If \code{length(lwd)<nc}, then
#' \code{lwd[1]} is used for all.
#' @param curves Vector if integers, specifies which curves should be plotted.
#' May take values \code{1:nc}, where \code{nc} is the number of curves in
#' \code{x}. By default, all of the curves are plotted.
#' @param conf.int Vector if integers, specifies which confidence intervals
#' should be plotted. May take values \code{1:nc}, where \code{nc} is the
#' number of curves in \code{x}. By default, no confidence intervals are
#' plotted.
#' @param all.times By default, the disease specific mortality estimate is
#' plotted as a step function between event or censoring times. If set to
#' \code{TRUE}, the graph is evaluated at all estimated times.
#' @param ... additional arguments passed to the initial call of the plot
#' function.
#' @return No value is returned.
#' @seealso \code{rs.surv}
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#' fit <- cmp.rel(Surv(time,cens)~sex,rmap=list(age=age*365.241),
#' ratetable=slopop,data=rdata,tau=3652.41)
#' plot(fit,col=c(1,1,2,2),xscale=365.241,conf.int=c(1,3))
#'
plot.cmp.rel <- function (x, main = " ", curvlab, ylim = c(0, 1), xlim, wh = 2,
xlab = "Time (days)", ylab = "Probability", lty = 1:length(x), xscale=1,
col = 1, lwd = par("lwd"), curves, conf.int, all.times=FALSE,...)
{
#wh= upper left coordinates of the legend, if of length 1, the legend is placed top left.
tau <- x$tau
x$tau <- NULL
nc <- length(x) #number of curves
if (length(lty) < nc) #if not enough different line types
lty <- rep(lty[1], nc)
else lty <- lty[1:nc]
if (length(lwd) < nc) #if not enough different line widths
lwd <- rep(lwd[1], nc)
else lwd <- lwd[1:nc]
if (length(col) < nc) #if not enough different colors
col <- rep(col[1], nc)
else col <- col[1:nc]
if (missing(curvlab)) { #if no curve labels desired
if (mode(names(x)) == "NULL") { #and no curve labels prespecified
curvlab <- as.character(1:nc)
}
else curvlab <- names(x)[1:nc] #use prespecified if they exist
}
if (missing(xlim)) { #if no limits desired
xmax <- 0
for (i in 1:nc) {
xmax <- max(c(xmax, x[[i]][[1]]/xscale)) #take max time over all strata
}
xlim <- c(0, xmax)
}
if(all.times){
for(it in 1:nc){
x[[it]]$index <- 1:length(x[[it]][[1]])
}
}
if(missing(curves))curves <- 1:nc
if(missing(conf.int))conf.int <- NULL
curves <- unique(curves)
conf.int <- unique(conf.int)
if(any((curves %in% 1:nc)==FALSE)) stop(paste("The curves argument should be specified as a vector of integers from 1 to", nc,sep=" "))
if(any((conf.int %in% 1:nc)==FALSE)) stop(paste("The conf.int argument should be specified as a vector of integers from 1 to", nc,sep=" "))
if(any((conf.int %in% curves)==FALSE)) stop("Confidence interval may only be plotted if the curve is plotted, see argument curves")
col_nums <- floor(seq(from=95,to=50,length.out=length(conf.int)+2))
col.conf.temp <- sapply(col_nums,function(x)paste("gray",as.character(x),sep=""))
col.conf.int <- rep("white",nc)
col.conf.int[conf.int] <- col.conf.temp[-c(1,length(col.conf.temp))]
plot((x[[1]][[1]]/xscale)[x[[1]]$index], (x[[1]][[2]])[x[[1]]$index], type = "n", ylim = ylim, xlim = xlim,
main = main, xlab = xlab, ylab = ylab, bty = "l", ...) #plot estimates [[1]]=time, [[2]]=est
if (length(wh) != 2) {
wh <- c(xlim[1], ylim[2])
}
u <- list(...)
if (length(u) > 0) {
i <- pmatch(names(u), names(formals(legend)), 0)
do.call("legend", c(list(x = wh[1], y = wh[2], legend = curvlab[curves],
col = col[curves], lty = lty[curves], lwd = lwd[curves], bty = "n", bg = -999999),
u[i > 0]))
}
else {
do.call("legend", list(x = wh[1], y = wh[2], legend = curvlab[curves],
col = col[curves], lty = lty[curves], lwd = lwd[curves], bty = "n", bg = -999999))
}
for(i in conf.int){
if(i%%2==0)with(x[[i]],polygon(c(time[index][!is.na(lower[index])],rev(time[index][!is.na(upper[index])]))/xscale,c(lower[index][!is.na(lower[index])],rev(upper[index][!is.na(upper[index])])),col = col.conf.int[i] , border = FALSE))
else with(x[[i]],my.poly(time[index][!is.na(lower[index])]/xscale,time[index][!is.na(upper[index])]/xscale,lower[index][!is.na(lower[index])],upper[index][!is.na(upper[index])],col = col.conf.int[i] , border = FALSE))
}
for (i in curves) {
tip <- "s"
if(i%%2==0)tip <- "l"
lines((x[[i]][[1]]/xscale)[x[[i]]$index], (x[[i]][[2]])[x[[i]]$index], lty = lty[i], col = col[i],
lwd = lwd[i], type=tip, ...)
}
}
my.poly <- function(x1,x2,y1,y2,...){
x1 <- rep(x1,each=2)[-1]
y1 <- rep(y1,each=2)[-(2*length(y1))]
x2 <- rep(x2,each=2)[-1]
y2 <- rep(y2,each=2)[-(2*length(y2))]
polygon(c(x1,rev(x2)),c(y1,rev(y2)),...)
}
print.cmp.rel <- function (x, ntp = 4, maxtime,scale=365.241, ...)
{
tau <- x$tau
x$tau <- NULL
nc <- length(x)
if (missing(maxtime)) {
maxtime <- 0
for (i in 1:nc) maxtime <- max(maxtime, x[[i]]$time)
}
tp <- pretty(c(0, maxtime/scale), ntp + 1)
tp <- tp[-c(1, length(tp))]
if(length(x[[1]]$add.times)>0 & length(x[[1]]$add.times)<5){
tp <- sort(unique(c(tp,round(x[[1]]$add.times/scale,1))))
}
cat("Estimates, variances and area under the curves:\n")
x$tau <- tau
print(summary(x, tp,scale,area=TRUE), ...)
invisible()
}
#' Summary of the crude probability of death
#'
#' Returns a list containing the estimated values at required times.
#'
#' The variance is calculated using numerical integration. If the required time
#' is not a time at which the value was estimated, the value at the last time
#' before it is reported. The density of the time points is set by the
#' \code{precision} argument in the \code{cmp.rel} function.
#'
#' @param object output of the function \code{cmp.rel}.
#' @param times the times at which the output is required.
#' @param scale The time scale in which the times are specified. The default
#' value is \code{1}, i.e. days.
#' @param area Should area under the curves at time \code{tau} be printed out?
#' Default is \code{FALSE}.
#' @param ... Additional arguments, currently not implemented
#' @return A list of values is returned.
#' @seealso \code{cmp.rel}
#' @keywords survival
#' @examples
#'
#' data(slopop)
#' data(rdata)
#' #calculate the crude probability of death and summarize it
#' fit <- cmp.rel(Surv(time,cens)~sex,rmap=list(age=age*365),
#' ratetable=slopop,data=rdata,tau=3652.41)
#' summary(fit,c(1,3),scale=365.241)
#'
summary.cmp.rel <- function (object, times,scale=365.241,area=FALSE,...)
{
tau <- object$tau
object$tau <- NULL
ng <- length(object)
times <- sort(unique(times))*scale
nt <- length(times)
storage.mode(times) <- "double"
storage.mode(nt) <- "integer"
ind <- matrix(0, ncol = nt, nrow = ng)
oute <- matrix(NA, ncol = nt, nrow = ng)
outv <- oute
outa <- matrix(NA,ncol=1,nrow=ng)
storage.mode(ind) <- "integer"
slct <- rep(TRUE, ng)
for (i in 1:ng) {
if (is.null((object[[i]])$est)) {
slct[i] <- FALSE
}
else {
z <- rep(NA,nt)
for(kt in 1:nt)z[kt] <- rev(which(object[[i]][[1]]<=times[kt]))[1]
ind[i, ] <- z
oute[i, ind[i, ] > 0] <- object[[i]][[2]][z]
outa[i,] <- object[[i]][[6]]
if (length(object[[i]]) > 2)
outv[i, ind[i, ] > 0] <- object[[i]][[3]][z]
}
}
dimnames(oute) <- list(names(object)[1:ng], as.character(times/scale))
dimnames(outv) <- dimnames(oute)
rownames(outa) <- rownames(oute)
colnames(outa) <- paste("Area at tau =",tau/scale)
if(area)list(est = oute[slct, , drop = FALSE], var = outv[slct, ,
drop = FALSE], area=outa[slct,,drop=FALSE])
else list(est = oute[slct, , drop = FALSE], var = outv[slct, ,
drop = FALSE])
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.