R/confints.R
In shinyCox: Create 'shiny' Applications for Cox Proportional Hazards Models

Documented in get_confint predict_se surv_pred_info

#' Obtains information for standard errors of predictions
#'
#' @description Computes necessary information to calculate standard errors and
#'   confidence intervals in shiny app. This is adapted from parts of
#'   [survival::survfit.coxph()]. This function is meant to be used in conjunction with
#'   [predict_se()].
#'
#' @param model a `coxph` object
#' @param ctype whether the cumulative hazard computation should have a
#'   correction for ties, 1=no, 2=yes.
#' @param individual deprecated argument, replaced by `id`
#' @param id optional variable name of subject identifiers. Not supported in app
#' @param se.fit a logical value indicating whether standard errors should be
#'   computed. Default is TRUE for standard models, FALSE for multi-state (code
#'   not yet present for that case.)
#' @param stype computation of the survival curve, 1=direct, 2=exponential of
#'   the cumulative hazard. Default is 2.
#' @returns A list of information needed for computing predicted standard
#'   errors.
#' @examplesIf interactive()
#' library(survival)
#'
#' colondeaths <- colon[colon$etype == 2, ]
#' split_colon <- split(colondeaths, colondeaths$rx)
#' colon_arm1 <- split_colon$Obs
#'
#' colon1ph <- coxph(Surv(time, status) ~ factor(extent) + nodes + strata(surg)
#'                    + factor(differ),
#'                    colon_arm1,
#'                    x = TRUE, model = TRUE)
#' surv_pred_info(colon1ph)
#'
#' @import stats
#' @import survival
#' @export
surv_pred_info = function(model, ctype, individual = FALSE, id, se.fit = TRUE, stype = 2) {
  object <- model

  Terms  <- terms(object)
  robust <- !is.null(object$naive.var)

  if (!is.null(attr(object$terms, "specials")$tt)) {
    stop("The survfit function can not process coxph models with a tt term")
  }


  if (missing(ctype)) {
    # Use the appropriate one from the model
    temp1 <- match(object$method, c("exact", "breslow", "efron"))
    ctype <- c(1,1,2)[temp1]
  } else if (!(ctype %in% 1:2)) {stop ("ctype must be 1 or 2")}
  if (!(stype %in% 1:2)) {stop("stype must be 1 or 2")}

  tfac <- attr(Terms, 'factors')
  temp <- attr(Terms, 'specials')$strata
  has.strata <- !is.null(temp)
  if (has.strata) {
    stangle = untangle.specials(Terms, "strata")  #used multiple times, later
    # Toss out strata terms in tfac before doing the test 1 line below, as
    #  strata end up in the model with age:strat(grp) terms or *strata() terms
    #  (There might be more than one strata term)
    for (i in temp) tfac <- tfac[,tfac[i,] ==0]  # toss out strata terms
  } else stangle = NULL
  if (any(tfac >1)) {
    stop("not able to create a curve for models that contain an interaction without the lower order effect")
  }

  Terms <- object$terms
  n <- object$n[1]
  if (!has.strata) {strata <- NULL} else {strata <- object$strata}

  if (!missing(individual)) {warning("the `id' option supersedes `individual'")}
  # This part will need addressing, for now missid will just be set to true
  missid <- missing(id) # I need this later, and setting id below makes
  # "missing(id)" always false
  #missid = TRUE

  if (!missid) {individual <- TRUE
  } else if (missid && individual) {id <- rep(0L,n)
  }else {id <- NULL}
  #id = NULL
  # Maybe this is needed? Don't know yet
  # if (individual & missing(newdata)) {
  #   stop("the id option only makes sense with new data")
  # }

  if (has.strata) {
    temp <- attr(Terms, "specials")$strata
    factors <- attr(Terms, "factors")[temp,]
    strata.interaction <- any(t(factors)*attr(Terms, "order") >1)
  }

  # We don't allow multistate at this point, so maybe remove this
  coxms <- inherits(object, "coxphms")
  if (coxms || is.null(object$y) || is.null(object[['x']]) ||
      !is.null(object$call$weights) || !is.null(object$call$id) ||
      (has.strata && is.null(object$strata)) ||
      !is.null(attr(object$terms, 'offset'))) {

    mf <- stats::model.frame(object)
  } else mf <- NULL

  position <- NULL
  Y <- object[['y']]
  if (is.null(mf)) {
    weights <- object$weights  # let offsets/weights be NULL until needed
    offset <- NULL
    offset.mean <- 0
    X <- object[['x']]
  } else {
    weights <- model.weights(mf)
    offset <- model.offset(mf)
    if (is.null(offset)) offset.mean <- 0
    else {
      if (is.null(weights)) offset.mean <- mean(offset)
      else offset.mean <- sum(offset * (weights/sum(weights)))
    }
    X <- model.matrix.coxph(object, data=mf)
    if (is.null(Y) || coxms) {
      Y <- model.response(mf)
      if (is.null(object$timefix) || object$timefix) Y <- aeqSurv(Y)
    }
    oldid <- model.extract(mf, "id")
    if (length(oldid) && ncol(Y)==3) position <- survflag(Y, oldid)
    else position <- NULL
    if (!coxms && (nrow(Y) != object$n[1]))
      stop("Failed to reconstruct the original data set")
    if (has.strata) {
      if (length(strata)==0) {
        if (length(stangle$vars) ==1) strata <- mf[[stangle$vars]]
        else strata <- strata(mf[, stangle$vars], shortlabel=TRUE)
      }
    }

  }

  varmat <- object$var
  beta <- ifelse(is.na(object$coefficients), 0, object$coefficients)
  xcenter <- sum(object$means * beta) + offset.mean
  if (!is.null(object$frail)) {
    keep <- !grepl("frailty(", dimnames(X)[[2]], fixed=TRUE)
    X <- X[,keep, drop=F]
  }

  if (is.null(offset)) {risk <- c(exp(X%*% beta - xcenter))
  }else   {risk <- c(exp(X%*% beta + offset - xcenter))}

  wt <- weights
  if (missing(strata) || length(strata)==0) {strata <- rep(0L, nrow(Y))}

  if (is.factor(strata)) {ustrata <- levels(strata)
  } else {ustrata <- sort(unique(strata))}
  nstrata <- length(ustrata)
  survlist <- vector('list', nstrata)
  names(survlist) <- ustrata
  survtype <- if (stype==1) {1
  }else {ctype+1}
  vartype <- survtype
  if (is.null(wt)) wt <- rep(1.0, nrow(Y))
  if (is.null(strata)) strata <- rep(1L, nrow(Y))
  for (i in 1:nstrata) {
    indx <- which(strata== ustrata[i])
    survlist[[i]] <- agsurv(Y[indx,,drop=F], X[indx,,drop=F],
                                       wt[indx], risk[indx],
                                       survtype, vartype)
  }

  stuff_for_later <- list(survlist = survlist, Terms = Terms, has.strata = has.strata,
                         stangle = stangle, xlevels = object$xlevels, means = object$means,
                         beta = beta, xcenter = xcenter, se.fit = se.fit, varmat = varmat,
                         survtype = survtype)

  return(stuff_for_later)
}

#' Creates predicted survival and standard errors for confidence intervals
#'
#' @description Adapted from parts of [survival::survfit.coxph()], computes predictions for
#'   standard errors based on [surv_pred_info()] output and `newdata` from the
#'   shiny app.
#'
#' @param listsurv Output from [surv_pred_info()] function
#' @param coxfit `coxfit` object created for predictions. Used to find strata
#' @param newdata Data used to make predicted standard errors
#'
#' @returns a list of number of subjects for each curve, times at which the
#'   curve has a step, number at risk for each time, number of events at each
#'   time, number censored at each time (no event but exit risk set), estimated
#'   survival, cumulative hazard at each transition, and standard error of the
#'   cumulative hazard.
#'
#' @examplesIf interactive()
#'   library(survival)
#'   library(shinyCox)
#'   colondeaths <- colon[colon$etype == 2, ]
#'   split_colon <- split(colondeaths, colondeaths$rx)
#'
#'   colon_arm1 <- split_colon$Obs
#'   colon1ph <- coxph(Surv(time, status) ~
#'   factor(extent) + nodes + strata(surg) + factor(differ), colon_arm1, x =
#'   TRUE, model = TRUE)
#'
#'   new.data = cbind.data.frame(`factor(extent)` = 3, `surg` =
#'   "surg=0",`factor(differ)` = 2,`nodes` = 5)
#'
#'
#'   coxfit = prep_coxfit(colon1ph)
#'   coxlist = surv_pred_info(colon1ph)
#'
#'   predict_se(coxlist, coxfit, new.data)
#'
#' @import survival
#' @import stats
#' @export
predict_se = function(listsurv, coxfit, newdata) {
  object <- list()
  survlist <- listsurv[[1]]
  Terms <- listsurv[[2]]
  has.strata <- listsurv[[3]]
  stangle <- listsurv[[4]]
  object$xlevels <- listsurv[[5]]
  object$means <- listsurv[[6]]
  beta <- listsurv[[7]]
  xcenter <- listsurv[[8]]
  se.fit <- listsurv[[9]]
  varmat <- listsurv[[10]]
  #strata <- listsurv[[11]]
  survtype <- listsurv[[11]]
  found.strata <- FALSE
  ustrata <- NULL

  strt.col.name <- names(coxfit$bl.surv[, !names(coxfit$bl.surv) %in% c("time", "surv"), drop = FALSE])
  if(length(strt.col.name) > 0) {
  strt.var <- newdata[, strt.col.name]
  #strata.new = paste0(strt.col.name, '=', strt.var)

  survlist <- list(survlist[[strt.var]])
  strt.val <- strsplit(newdata[[strt.col.name]], "=")[[1]][2]
  newdata[[strt.col.name]] <- as.numeric(strt.val)
  }
  step_1 <- gsub("factor", "", names(newdata))
  step_2 <- gsub("\\(", "", step_1)
  step_3 <- gsub("\\)", "", step_2)
  names(newdata) <- step_3
  Call <- match.call()
  Call[[1]] <- as.name("survfit")
  individual = FALSE
  if (!individual)  {
    Terms2 <- delete.response(Terms)
    y2 <- NULL  # a dummy to carry along, for the call to coxsurv.fit
  }

  if (is.vector(newdata, "numeric")) {
    if (individual) stop("newdata must be a data frame")
    if (is.null(names(newdata))) {
      stop("Newdata argument must be a data frame")
    }
    newdata <- data.frame(as.list(newdata), stringsAsFactors=FALSE)
  }  else if (is.list(newdata)) newdata <- as.data.frame(newdata)
  if (has.strata) {
    found.strata <- TRUE
    tempenv <- new.env(, parent=emptyenv())
    assign("strata", function(..., na.group, shortlabel, sep)
      list(...), envir=tempenv)
    assign("list", list, envir=tempenv)
    for (svar in stangle$vars) {
      temp <- try(eval(parse(text=svar), newdata, tempenv),
                  silent=TRUE)
      if (!is.list(temp) ||
          any(unlist(lapply(temp, class))== "function"))
        found.strata <- FALSE
    }

    if (!found.strata) {
      ss <- untangle.specials(Terms2, "strata")
      Terms2 <- Terms2[-ss$terms]
    }
  }

  tcall <- Call[c(1, match(c('id', "na.action"),
                           names(Call), nomatch=0))]
  tcall$data <- newdata
  tcall$formula <- Terms2
  tcall$xlev <- object$xlevels[match(attr(Terms2,'term.labels'),
                                     names(object$xlevels), nomatch=0)]
  tcall[[1L]] <- quote(stats::model.frame)
  mf2 <- eval(tcall)

  if (has.strata && found.strata) { #pull them off
    temp <- untangle.specials(Terms2, 'strata')

    if (length(temp$terms) >0) Terms2 <- Terms2[-temp$terms]
  } else strata2 <- factor(rep(0, nrow(mf2)))

  offset2 <- model.offset(mf2)
  if (length(offset2)==0 ) offset2 <- 0
  # a model with only an offset, but newdata containing a value for it
  if (length(object$means)==0) {x2 <- 0
  } else x2 <- model.matrix(Terms2, mf2)[,-1, drop=FALSE]

  if (length(object$means)) {
    risk2 <- exp(c(x2 %*% beta) + offset2 - xcenter)
  } else risk2 <- exp(offset2 - xcenter)

  expand <- function(fit, x2, varmat, se.fit) {
    if (survtype==1) {
      surv <- cumprod(fit$surv)
    } else surv <- exp(-fit$cumhaz)

    if (is.matrix(x2) && nrow(x2) >1) {  #more than 1 row in newdata
      fit$surv <- outer(surv, risk2, '^')
      dimnames(fit$surv) <- list(NULL, row.names(x2))
      if (se.fit) {
        varh <- matrix(0., nrow=length(fit$varhaz), ncol=nrow(x2))
        for (i in 1:nrow(x2)) {
          dt <- outer(fit$cumhaz, x2[i,], '*') - fit$xbar
          varh[,i] <- (cumsum(fit$varhaz) + rowSums((dt %*% varmat)* dt))*
            risk2[i]^2
        }
        fit$std.err <- sqrt(varh)
      }
      fit$cumhaz <- outer(fit$cumhaz, risk2, '*')
    }
    else {
      fit$surv <- surv^risk2
      if (se.fit) {
        dt <-  outer(fit$cumhaz, c(x2)) - fit$xbar
        varh <- (cumsum(fit$varhaz) + rowSums((dt %*% varmat)* dt)) *
          risk2^2
        fit$std.err <- sqrt(varh)
      }
      fit$cumhaz <- fit$cumhaz * risk2
    }
    fit
  }
  result <- lapply(survlist, expand, x2, varmat, se.fit)
  unlist = TRUE
  if (unlist) {
    if (length(result)==1) { # the no strata case
      if (se.fit) {
        result = result[[1]][c("n", "time", "n.risk", "n.event", "n.censor",
                               "surv", "cumhaz", "std.err")]
        return(result)
      } else {result[[1]][c("n", "time", "n.risk", "n.event", "n.censor",
                            "surv", "cumhaz")]}
    }
    else {
      temp <-list(n   =    unlist(lapply(result, function(x) x$n),
                                  use.names=FALSE),
                  time=    unlist(lapply(result, function(x) x$time),
                                  use.names=FALSE),
                  n.risk=  unlist(lapply(result, function(x) x$n.risk),
                                  use.names=FALSE),
                  n.event= unlist(lapply(result, function(x) x$n.event),
                                  use.names=FALSE),
                  n.censor=unlist(lapply(result, function(x) x$n.censor),
                                  use.names=FALSE),
                  strata = sapply(result, function(x) length(x$time)))
      names(temp$strata) <- names(result)


        temp$surv <- t(matrix(unlist(lapply(result,
                                            function(x) t(x$surv)), use.names=FALSE),
                              nrow= nrow(x2)))
        dimnames(temp$surv) <- list(NULL, row.names(x2))
        temp$cumhaz <- t(matrix(unlist(lapply(result,
                                              function(x) t(x$cumhaz)), use.names=FALSE),
                                nrow= nrow(x2)))
        if (se.fit)
          temp$std.err <- t(matrix(unlist(lapply(result,
                                                 function(x) t(x$std.err)), use.names=FALSE),
                                   nrow= nrow(x2)))

      return(temp)
    }
  }
  else {
    names(result) <- ustrata
    return(result)
  }
}

#' Get confidence intervals for predicted survival curves
#'
#' Creates confidence levels for plotting predicted survival curves.
#'
#' @param p Vector of survival probabilities
#' @param se Vector of standard errors
#' @param conf.type Type of confidence interval, includes 'plain', 'log',
#' 'log-log', 'logit', and 'arcsin'.
#' @param conf.int The level for two-sided confidence interval on the predicted
#' survival curve, default is 0.95.
#' @param ulimit Should upper bound be limited to 1, default is 'TRUE'
#' @returns list of length two, containing the lower and upper confidence levels
#'
#' @examplesIf interactive()
#' library(survival)
#' library(shinyCox)
#' colondeaths <- colon[colon$etype == 2, ]
#' split_colon <- split(colondeaths, colondeaths$rx)
#'
#' colon_arm1 <- split_colon$Obs
#' colon1ph <- coxph(Surv(time, status) ~ factor(extent) + nodes + strata(surg)
#'                   + factor(differ),
#'                   colon_arm1,
#'                   x = TRUE, model = TRUE)
#'
#' new.data = cbind.data.frame(`factor(extent)` = 3,
#'                          `surg` = "surg=0",`factor(differ)` = 2,`nodes` = 5)
#'
#'
#' coxfit = prep_coxfit(colon1ph)
#' coxlist = surv_pred_info(colon1ph)
#'
#' for_ci = predict_se(coxlist, coxfit, new.data)
#'
#' get_confint(for_ci$surv, for_ci$std.err, conf.int = 0.95,
#'             conf.type = "log-log")
#'
#' @export
get_confint <- function(p, se, conf.type, conf.int, ulimit=TRUE) {
  zval <- qnorm(1- (1-conf.int)/2, 0,1)
  scale <- 1.0

  if (conf.type=='plain') {
    se2 <- se* p * zval  # matches equation 4.3.1 in Klein & Moeschberger
    if (ulimit) list(lower= pmax(p -se2*scale, 0), upper = pmin(p + se2, 1))
    else  list(lower= pmax(p -se2*scale, 0), upper = p + se2)
  }
  else if (conf.type=='log') {
    #avoid some "log(0)" messages
    xx <- ifelse(p==0, NA, p)
    se2 <- zval* se
    temp1 <- exp(log(xx) - se2*scale)
    temp2 <- exp(log(xx) + se2)
    if (ulimit) list(lower= temp1, upper= pmin(temp2, 1))
    else  list(lower= temp1, upper= temp2)
  }
  else if (conf.type=='log-log') {
    xx <- ifelse(p==0 | p==1, NA, p)
    se2 <- zval * se/log(xx)
    temp1 <- exp(-exp(log(-log(xx)) - se2*scale))
    temp2 <- exp(-exp(log(-log(xx)) + se2))
    list(lower = temp1 , upper = temp2)
  }
  else if (conf.type=='logit') {
    xx <- ifelse(p==0, NA, p)  # avoid log(0) messages
    se2 <- zval * se *(1 + xx/(1-xx))

    temp1 <- 1- 1/(1+exp(log(p/(1-p)) - se2*scale))
    temp2 <- 1- 1/(1+exp(log(p/(1-p)) + se2))
    list(lower = temp1, upper=temp2)
  }
  else if (conf.type=="arcsin") {
    xx <- ifelse(p==0, NA, p)
    se2 <- .5 *zval*se * sqrt(xx/(1-xx))
    list(lower= (sin(pmax(0, asin(sqrt(xx)) - se2*scale)))^2,
         upper= (sin(pmin(pi/2, asin(sqrt(xx)) + se2)))^2)
  }
  else stop("invalid conf.int type")
}

agsurv <- utils::getFromNamespace("agsurv", "survival")

#' For  use in internal [surv_pred_info()] function, borrowed from `survival`
#' package
#'
#' @param y Surv object
#' @param id vector of ids coinciding with y.
#' @noRd
survflag <- function(y, id) {
  if (!inherits(y, "Surv")) stop("y must be a Surv object")
  if (nrow(y) != length(id)) stop("length mismatch")
  if (ncol(y) != 3) stop("y needs to be of (tstart, tstop) form")

  n <- nrow(y)
  indx <- order(id, y[,2])  # sort the data by time within id
  y2 <- y[indx,]
  id2 <- id[indx]

  newid <- (id2[-n] != id2[-1])
  gap <-  (y2[-n,2] < y2[-1,1])

  flag <- 1L*c(TRUE, newid | gap) + 2L*c(newid | gap, TRUE)
  flag[indx] <- flag   # return it to data order
  flag
}



#' Model.matrix method for coxph models
#'
#' Reconstruct the model matrix for a cox model
#' @noRd
model.matrix.coxph <- function(object, data=NULL,
                               contrast.arg=object$contrasts, ...) {
  #
  # If the object has an "x" component, return it, unless a new
  #   data set is given
  if (is.null(data) && !is.null(object[['x']]))
    return(object[['x']]) #don't match "xlevels"

  Terms <- delete.response(object$terms)
  if (is.null(data)) mf <- stats::model.frame(object)
  else {
    if (is.null(attr(data, "terms")))
      mf <- stats::model.frame(Terms, data, xlev=object$xlevels)
    else mf <- data  #assume "data" is already a model frame
  }

  cluster <- attr(Terms, "specials")$cluster
  if (length(cluster)) {
    temp <- untangle.specials(Terms, "cluster")
    dropterms <- temp$terms
  }
  else dropterms <- NULL

  strats <- attr(Terms, "specials")$strata
  hasinteractions <- FALSE
  if (length(strats)) {
    stemp <- untangle.specials(Terms, 'strata', 1)
    if (length(stemp$vars)==1) strata.keep <- mf[[stemp$vars]]
    else strata.keep <- strata(mf[,stemp$vars], shortlabel=TRUE)
    istrat <- as.integer(strata.keep)

    for (i in stemp$vars) {  #multiple strata terms are allowed
      # The factors attr has one row for each variable in the frame, one
      #   col for each term in the model.  Pick rows for each strata
      #   var, and find if it participates in any interactions.
      if (any(attr(Terms, 'order')[attr(Terms, "factors")[i,] >0] >1))
        hasinteractions <- TRUE
    }
    if (!hasinteractions) dropterms <- c(dropterms, stemp$terms)
  } else istrat <- NULL


  if (length(dropterms)) {
    Terms2 <- Terms[-dropterms]
    X <- model.matrix(Terms2, mf, constrasts.arg=contrast.arg)
    # we want to number the terms wrt the original model matrix
    temp <- attr(X, "assign")
    shift <- sort(dropterms)
    for (i in seq(along.with=shift))
      temp <- temp + 1*(shift[i] <= temp)
    attr(X, "assign") <- temp
  }
  else X <- model.matrix(Terms, mf, contrasts.arg=contrast.arg)

  # drop the intercept after the fact, and also drop strata if necessary
  Xatt <- attributes(X)
  if (hasinteractions) adrop <- c(0, untangle.specials(Terms, "strata")$terms)
  else adrop <- 0
  xdrop <- Xatt$assign %in% adrop  #columns to drop (always the intercept)
  X <- X[, !xdrop, drop=FALSE]
  attr(X, "assign") <- Xatt$assign[!xdrop]
  attr(X, "contrasts") <- Xatt$contrasts
  X
}
Any scripts or data that you put into this service are public.
shinyCox documentation built on April 3, 2025, 9:35 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
shinyCox
Create 'shiny' Applications for Cox Proportional Hazards Models

R/confints.R
In shinyCox: Create 'shiny' Applications for Cox Proportional Hazards Models

Defines functions model.matrix.coxph survflag get_confint predict_se surv_pred_info

Documented in get_confint predict_se surv_pred_info

Try the shinyCox package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

shinyCox Create 'shiny' Applications for Cox Proportional Hazards Models

R/confints.R In shinyCox: Create 'shiny' Applications for Cox Proportional Hazards Models

Defines functions model.matrix.coxph survflag get_confint predict_se surv_pred_info

Documented in get_confint predict_se surv_pred_info

Try the shinyCox package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

shinyCox
Create 'shiny' Applications for Cox Proportional Hazards Models

R/confints.R
In shinyCox: Create 'shiny' Applications for Cox Proportional Hazards Models