R/pseudo.R

Defines functions pseudo

Documented in pseudo

#
# Function for pseudo-values
#
pseudo <- function(fit, times, type, addNA=TRUE, data.frame=FALSE,
                   minus1=FALSE, ...) {
    if (!inherits(fit, "survfit"))
        stop("fit argument must be a survfit object")
    if (inherits(fit, "survfit.coxph"))
        stop("psuedo values not defined for a coxph survival curve")

    if (missing(type)) type <- "pstate"
    else {
        # allow various aliases for the type
        temp <- c("pstate", "cumhaz", "sojourn", "survival",
                              "chaz", "rmst", "rmts", "auc")
        type <- match.arg(casefold(type), temp)
        itemp <-  c(1,2,3,1,2,3,3,3)[match(type, temp)]
        type <- c("pstate", "cumhaz", "auc")[itemp]
    }

    # all the real work is done by the residuals function
    rfit <- survresid.fit(fit, times=times, type=type, ...)
    resid <- rfit$residuals
    curve <- rfit$curve
    id    <- rfit$id
    idname <- rfit$idname

    ddr <- dim(resid)
    ddf <- dim(fit)

    if (is.null(fit$strata)) ncurve <- 1L  
    else ncurve <- length(fit$strata)  # ncurve will also = max(curve)

    ntime <- length(times)
    nstate <- length(fit$states)

    minus <- ifelse(minus1, 1, 0)  # use n-1 or n to multiply

    # get the estimates
    # retain the dimension of the residuals, which will have subjects first,
    #  timepoints second and (state or cumhaz) last.
    # The residuals have a row for each subject.  To recenter them for a
    #  pseudovalue, however, we need to match each one to the curve from
    #  whence it came.
    # If the data has been collapsed, n is the number of subjects after
    #  the collapse, otherwise the number of observations.
    nn <- as.vector(table(curve)) - minus

    if (type == "pstate") {
        temp <- summary(fit, times=times, extend=TRUE)
        if (!is.null(temp$pstate)) { # multi-state
            # pstate will have times, curve, state,  as the order
            # resid will have subject, times, state
            yhat <-  array(temp$pstate, dim=c(ntime, ncurve, nstate))
            yhat <-  aperm(yhat, c(2,1,3))
            # yhat[curve,,] is the trick: resid has first dimension of n =
            #  number of subjects, yhat has 1 row per curve, yhat[curve,,]
            #  will have n rows, each pulled from the correct curve
            ptemp <- yhat[curve,,] + nn[curve]*resid
        }
        else {
            yhat <- matrix(temp$surv, nrow = ncurve, byrow=TRUE)
            ptemp <- yhat[curve,] + nn[curve]*resid
        }
    }       
    else if (type=="cumhaz") {
        temp <- summary(fit, times=times, extend=TRUE)$cumhaz
        if (is.matrix(temp)) { # multi-state
            nhaz <- ncol(fit$cumhaz)
            yhat <- array(temp, dim=c(ntime, ncurve, nhaz))
            yhat <- aperm(yhat, c(2,1,3)) 
            ptemp <- yhat[curve,,] + nn[curve]*resid
        }
         else {
            yhat <- matrix(temp, nrow=ncurve, byrow=TRUE)
            ptemp <- yhat[curve,] + nn[curve]*resid
        }
    }

    else { #auc
        fit <- survfit0(fit) # add in time 0
        aucfun <- function(x, y, times) {
            # the nuisance is allowing for in-between values  
            # the solution is to expand the list of times, and fill in y for
            #  those insertions.
            alltime <- unique(sort(c(x, times)))
            alltime <- alltime[alltime <= max(times)]
            index <- findInterval(alltime, x, left.open=FALSE)
            delta <- c(diff(alltime), 0)
            i2 <- match(times, alltime)
            if (is.matrix(y)) {
               temp <- apply(y[index,,drop=FALSE], 2, 
                             function(y) cumsum(delta*y))
               temp[i2-1,,drop=FALSE]
            }
            else {
                temp <- cumsum(y[index]*delta)
                temp[i2-1]
            }       
        }                  

        if (inherits(fit, "survfitms")) { # multi-state
            if (is.null(fit$strata)) {
                auc <- aucfun(fit$time, fit$pstate, times)
                yhat <- array(auc, dim=c(1, ntime, nstate))
                ptemp <- yhat[curve,,] + nn[curve]*resid
            } else {
                yhat <- array(0, dim=c(ncurve, ntime, nstate))
                for (i in 1:ncurve) {
                    temp <- fit[i,]
                    yhat[i,,] <- aucfun(temp$time, temp$pstate, times)
                }
                ptemp <- yhat[curve,,] + nn[curve]*resid
            }
         } else { # simple survival
             if (is.null(fit$strata)) {
                 yhat <- aucfun(fit$time, fit$surv, times) # will be a vector
                 ptemp <- yhat[col(resid)] + nn[curve]*resid
             } else {
                 yhat <- matrix(0, ncurve, ntime)
                 for (i in 1:ncurve) {
                     temp <- fit[i]
                     yhat[i,] <- aucfun(temp$time, temp$surv, times)
                 }
             ptemp <- yhat[curve,] + nn[curve]*resid
             } 
         }
    }
        
    if (missing(addNA) && !is.null(id) && 
               any(duplicated(id))) addNA <- FALSE
    if (addNA && length(fit$na.action) >0) {
        if (!is.null(id)) {
            if (any(duplicated(id))) {
                # the data set was collapsed over id, we can't do it
                warning("data collapsed on id, addNA option ignored")
            } else {
                ptemp <- pexpand(fit$na.action, ptemp)
                if (data.frame) {
                    omit <- fit$na.action  #observations tossed out
                    n2 <- dim(ptemp)[1]
                    keep <- rep.int(NA, n2)
                    keep[-omit] <- seq(along.with =id)
                    temp <- id
                    id <- vector(class(id), n2)
                    id[keep] <- temp
                }
            }
        }
        else ptemp <- pexpand(fit$na.action, ptemp)
    }

    if (data.frame) { # return this as a data.frame
        # warning: expand.grid defaults to stringsAsFactors = TRUE
        if (is.null(id)) id <- seq.int(1, dim(ptemp)[1])
        else if (length(id) > length(cluster)) id <- unique(id)
        if (length(fit$states) >0)
             temp <- expand.grid(id=id, times=times, state=fit$states,
                                 stringsAsFactors=FALSE)
        else temp <-  expand.grid(id =id, time=times, stringsAsFactors=FALSE)
        ptemp <- data.frame(temp, pseudo= as.vector(ptemp))
        # if there is no id variable we want to return a column labeled as (Id).
        #  like (Intercept) a convention that made up names not conflict with
        #  standard names.  But anything you do to a dataframe with such a
        #  name will cause it to be replaced with X.Id.  So we have to do this
        #  at the very last.
        if (!is.null(idname)) names(ptemp)[1] <- idname
        else names(ptemp)[1] <- "(Id)"
    }

    ptemp
}        

# this is a copy of the naomit.exclude function
pexpand <- function (omit, x) 
{
    if (is.null(x)) 
        return(x)
    n <- NROW(x)
    keep <- rep.int(NA, n + length(omit))
    keep[-omit] <- 1:n
        
    if (is.matrix(x)) {
        x <- x[keep, , drop = FALSE]
        temp <- rownames(x)
        if (length(temp)) {
            temp[omit] <- names(omit)
            rownames(x) <- temp
        }
    }
    else if (is.array(x) && length(d <- dim(x)) > 2L) {
        x <- x[keep, , , drop = FALSE]
        temp <- (dn <- dimnames(x))[[1L]]
        if (!is.null(temp)) {
            temp[omit] <- names(omit)
            dimnames(x)[[1L]] <- temp
        }
    }
    else {
        x <- x[keep]
        temp <- names(x)
        if (length(temp)) {
            temp[omit] <- names(omit)
            names(x) <- temp
        }
    }
    x
}

Try the survival package in your browser

Any scripts or data that you put into this service are public.

survival documentation built on Aug. 24, 2021, 5:06 p.m.