R/semiParametric.R

Defines functions getMaxEndpointTime extractCumHazData

Documented in getMaxEndpointTime

##' @include survivalData.R
NULL

setOldClass("coxph")
setOldClass("survfit")
setClassUnion("S4coxph", members=c("coxph","NULL"))

##' SemiParametric class definition
##' @details Class storing the fitted Cox semi-parametric model and the
##' Kaplan Meier estimate (from survfit) for a given subgroup and endpoint
##' for a SurvivalData object.
##' @slot cox (coxph or NULL) The Cox model fit for the given subgroup and endpoint fitted using \code{coxph}
##' and no stratifiation or covariate variables. NULL if single arm trial 
##' @slot coxWithStrata (coxph or Null) The Cox model fit for the given subgroup and endpoint fitted 
##' using \code{coxph} and the selected stratifiation and covariate variable. 
##' If no strata included then this is the same as slot cox. NULL if single arm trial and no covariates/strata
##' @slot km (survfit) The Kaplan-Meier estimator associated with the given subgroup and endpoint
##' generated by calling \code{survfit}. Note this estimator does NOT include the covariates
##' and strata included with the Cox model fit
##' @slot survData (SurvivalData) object used when fitting the models. NOTE: if a subgroup
##' was selected when creating this object then only subjects in the given subgroup who have data for the
##' given endpoint will be included in this object
##' @slot subgroup (character) which subgroup the SemiParametricModel is associated with (or \code{NA}
##' if not associated with any)
##' @slot endPoint (character) which endpoint the SemiParametricModel is associated with
##' @slot covariates (character vector) names of covariates used when fitting the Cox model
##' @slot strata (character vector) names of covariates to be used to stratify the Cox model fit
##' @slot endPointDef (list) defines name, time and censor column for the fitted
##'       endpoint
##' @seealso \code{fitSemiParametric} for further information
##' @export
setClass("SemiParametricModel",
         slots = list(cox="S4coxph",
                      coxWithStrata="S4coxph",
                      km="survfit",
                      survData = "SurvivalData",
                      subgroup = "character",
                      endPoint = "character",
                      covariates="character",
                      strata="character",
                      endPointDef="list"))

##' Method to fit semi-parametric model (Cox) and Kaplan-Meier
##' estimator to a SurvivalData object
##' @rdname fitSemiParametric-methods
##' @name fitSemiParametric
##' @param object (SurvivalData) The object to fit the semi-parametric model
##' and calculate the KM estimator
##' @param ... additional argument
##' @export
setGeneric( "fitSemiParametric", function(object,...)
  standardGeneric("fitSemiParametric"))


##' @name fitSemiParametric
##' @aliases fitSemiParametric,SurvivalData-method
##' @rdname fitSemiParametric-methods
##' @param endPoint (character) which endpoint the SemiParametricModel is associated with
##' @param subgroup (character) which subgroup the SemiParametricModel is associated with (or \code{NA}
##' if not associated with any)
##' @param covariates (character vector) names of covariates used when fitting the Cox model if any
##' @param strata (character vector) names of covariates to be stratified when fitting the Cox model
##' @param conf.type ("none", "plain", "log" [default], "log-log") argument passed to survfit
##' @param ties ("efron", "breslow" (default), "exact") argument passed to Coxph
##' @export
setMethod("fitSemiParametric", signature(object="SurvivalData"),
  function(object, endPoint, subgroup = as.character(NA), covariates=character(0), strata=character(0),
           conf.type=c("none", "plain", "log", "log-log")[3], ties=c("efron", "breslow", "exact")[2]){

    if(length(conf.type)!=1 || !conf.type %in% c("none", "plain", "log", "log-log")){
      stop("Invalid conf.type argument")
    }
    
    if(length(ties)!=1 || !ties %in% c("efron", "breslow", "exact")){
      stop("Invalid ties argument")
    }
    
    
    if (!all(vapply(covariates, is.character, FUN.VALUE = TRUE))){
      stop("covariates must be a vector of character strings")
    }
    hasCovariates(object, covariates)

    if (!all(vapply(strata, is.character, FUN.VALUE = TRUE))){
      stop("strata must be a vector of character strings")
    }
    hasCovariates(object, strata)


    #validate endpoint and subgroup
    if (!is.character(endPoint) || length(endPoint) != 1){
      stop("endPoint must be a character string")
    }

    #endPoint matches one of the known endpoints (object@endPoints)
    # Look up relevant column names for the specified endpoint
    if(!endPoint %in% names(object@endPoints)){
      stop("Invalid endpoint argument, must be one of: ", paste(names(object@endPoints), collapse=", "))
    }

    if (!is.na(subgroup) && (!is.character(subgroup) || length(subgroup) != 1)){
      stop("subgroup must be either NA or a character string")
    }


    allowedSubgroups <- listColumnDefSlot(object@subgroupDef,"columnName")
    if(!is.na(subgroup) && !subgroup %in% allowedSubgroups){
      stop(paste("subgroup argument must be one of: ",paste(allowedSubgroups, collapse = ", ")))
    }

    # Look up relevant column names for the specified endpoint
    endPointDef <- object@endPoints[[endPoint]]

     # Create new SurvivalData object with data for this subgroup only
    survData <- object
    survData@subject.data <- extractSubgroup(object@subject.data, subgroup)

    #remove subjects who have missing endpoint data for the given endpoint
    haveData <- !is.na(survData@subject.data[, endPointDef$timeCol])
    survData@subject.data <- survData@subject.data[haveData, ] 
    
    
    # Check there is data on all arms, after subsetting
    armCol <- survData@armDef@columnName
    armValues <- levels(survData@armDef@categories)
    hasData <- vapply(armValues,
                      function(a){any(survData@subject.data[, armCol] == a)},
                      FUN.VALUE = FALSE)
    if (!all(hasData)){
      stop(paste0("The following arms contain no data in subgroup '", subgroup, "': ",
                  paste0(armValues[!hasData], collapse = ", ")))
    }

    # Check each arm has events
    censorCol <- endPointDef[["censorCol"]]
    hasEvents <- vapply(armValues,
                        function(a){any(!survData@subject.data[survData@subject.data[, armCol] == a, censorCol])},
                        FUN.VALUE = FALSE)
    if (!all(hasEvents)){
      stop(paste0("The following arms have no events in subgroup '", subgroup, "': ",
                  paste0(armValues[!hasEvents], collapse = ", ")))
    }

    # Create formula for Kaplan-Meier estimator
    formulaToFit <- survivalFormula(armAsFactor=!isSingleArm(object),
                                    covariates=character(0),
                                    strata=character(0),
                                    timeCol = endPointDef[["timeCol"]],
                                    censorCol = endPointDef[["censorCol"]])

    km <- survfit(formulaToFit,data=survData@subject.data, conf.type=conf.type)
    cox <- coxph(formulaToFit,
                 data=survData@subject.data, 
                 ties=ties, model=TRUE)

    #fit Cox with strata
    #see note in code of coxphLogRankTest for possible refactoring 
    coxWithStrata <- cox
    if(length(strata)!=0 || length(covariates)!=0){
      formulaToFit <- survivalFormula(armAsFactor=!isSingleArm(object),
                                      covariates=covariates,
                                      strata=strata,
                                      timeCol = endPointDef[["timeCol"]],
                                      censorCol = endPointDef[["censorCol"]])
      coxWithStrata <- coxph(formulaToFit,
                             data=survData@subject.data, 
                             ties=ties, model=TRUE)
    }
    
    #current coxph.null is not exported by survival so 
    #cannot store single arm coxph models
    if("coxph.null" %in% class(cox)) cox <- NULL
    if("coxph.null" %in% class(coxWithStrata)) coxWithStrata <- NULL

    new("SemiParametricModel",
        cox=cox,
        coxWithStrata=coxWithStrata,
        km=km,
        survData=survData,
        subgroup=subgroup,
        covariates=covariates,
        strata=strata,
        endPoint=endPoint,
        endPointDef = endPointDef)
  }
)



##' @name isSingleArm
##' @aliases isSingleArm,SemiParametricModel-method
##' @rdname isSingleArm-methods
##' @export
setMethod("isSingleArm", signature(object="SemiParametricModel"),
  function(object){
    isSingleArm(object@survData)
  }
)


##' @rdname getEndpointUnits-methods
##' @aliases getEndpointUnits,SemiParametricModel-methods
##' @export
setMethod("getEndpointUnits", signature(object="SemiParametricModel"),
  function(object){
    getEndpointUnits(object@survData)  
  }
)


##' Method to extract the logrank test from the Cox model
##' fit for the SemiParametricModel 
##' @details This uses the strata and
##' covariates parameters used when creating the SemiParametricModel object.
##' The "ties" argument used by coxph is the value used when creating the SemiParametric object
##' @name coxphLogRankTest
##' @rdname coxphLogRankTest-methods
##' @param object (SemiParametricModel) The object which was created when
##' fitting the Cox model
##' @return A dataframe containing the log rank test results
##' @export
setGeneric("coxphLogRankTest", function(object){
  standardGeneric("coxphLogRankTest")
})

##' @name coxphLogRankTest
##' @aliases coxphLogRankTest,SemiParametricModel-method
##' @rdname coxphLogRankTest-methods
##' @export
setMethod("coxphLogRankTest", "SemiParametricModel",function(object){
  #at the moment the only options needed are no strata or a fixed chosen
  #set of stratification variables therefore the slight technical debt gained
  #by changing the previous code to add coxWithStrata slot is deemed acceptable. 
  #If  an additional requirement of being able to select multiple sets of 
  #stratification variables then cox and coxWith strata should be removed
  #from the object and a strata parameter included in this function
  if(isSingleArm(object)){
    stop("Cannot calculate logrank test for a single arm trial")
  }
  
  if(length(object@strata)==0){
    return(as.data.frame(t(data.frame(summary(object@cox)$sctest))))
  }
  result <- cbind(data.frame(summary(object@cox)$sctest),
        data.frame(summary(object@coxWithStrata)$sctest))
  
  colnames(result) <- c("No strata/covariates", "With strata/covariates")
  as.data.frame(t(result))
})


##' Method to calculate Cox-Snell residual
##' @details This is calculated using surv(t, e) ~ arm, with no
##' covariates or strata. Data is the subset of data contained in the
##' SemiParametricModel object used and the "ties" argument used by coxph is the value used 
##' when creating the SemiParametric object
##' @name coxSnellRes
##' @rdname coxSnellRes-methods
##' @param object (SemiParametricModel) The object which was created when
##' fitting the Cox model
##' @param ... additional parameters needed for specific instances of this
##' generic
##' @return (survfit object) contains call and residuals
##' @export
setGeneric("coxSnellRes", function(object){
  standardGeneric("coxSnellRes")
})

##' @name coxSnellRes
##' @aliases coxSnellRes,SemiParametricModel-method
##' @rdname coxSnellRes-methods
##' @export
setMethod("coxSnellRes", "SemiParametricModel", function(object){

  if(isSingleArm(object)){
    stop("Cannot calculate Cox-Snell resiudals for a one arm trial!")
  }
  
  #Extract event indicators
  censorCol    <- object@endPointDef[["censorCol"]]
  censorValues <- object@survData@subject.data[, censorCol]
  hasEvent     <- !censorValues

  coxsnell.res <- hasEvent - resid(object@cox, type="martingale")

  survfit(coxph(Surv(coxsnell.res, hasEvent) ~ 1,
                ties=object@cox$method),
                type="aalen")
})

##' Method to calculate max endpoint time for semiparametric data
##' @name getMaxEndpointTime
##' @aliases getMaxEndpointTime,SemiParametricModel-method
##' @rdname getMaxEndpointTime-methods
##' @export
getMaxEndpointTime <- function(object){
  
  timeCol    <- object@endPointDef[["timeCol"]]
  timeValues <- object@survData@subject.data[, timeCol]
  
  round(max(timeValues), digits=2)  
  }


# For each arm, extract the data frame of time v probability of survival from
# the survfit object km and rename the "Arm" column of the data frames
# as the armNames (as survfit mangles these)
#
# If outputCI is TRUE then confidence intervals are output
# in the data frames, note these will use the confidence
# level calculated by the model fit (the conf.int argument to survfit
# when km was created, which by default is 0.95)
# Return as a list of data frames, one per arm
extractCumHazData <- function(km, armNames, outputCI=FALSE, isSingleArm){

  #create the KM data for a single arm - named armName,
  #where indexes is a logical vector of which rows of the km object
  #are required
  createOneArmData <- function(indexes, armName){
    #cumulative hazard
    S <- km$surv[indexes]
    #time
    t <- km$time[indexes]
    
    #confidence intervals
    lower <- km$lower[indexes]
    upper <- km$upper[indexes]
    
    # Curve must start at t = 0, S = 1
    if (length(t) < 1 || t[1] != 0){
      t <- c(0, t)
      S <- c(1, S)
      upper <- c(1, upper)
      lower <- c(1, lower)
    }
    
    data <- data.frame(t=t,
                       S=S,
                       Arm=rep(armName, length(S)))
    if(outputCI){
      data <- cbind(data,lower=lower,upper=upper)
    }
    
    data  
  }
  
  #handle the single arm case here 
  if(isSingleArm){
    return(list(createOneArmData(indexes=TRUE, armName=armNames[1])))
  }
  
  #Have to convert km$strata (a named vector saying arm 1 is first
  #i rows of data table, arm 2 is next y rows) to a vector with x
  #elements of "arm1" followed by y elements of "arm2"
  strataIndex <- unlist(lapply(seq_along(km$strata),function(i){
    rep(names(km$strata)[i],km$strata[i])
  }))

  #using strataIndex, split the km$surv, km$time vectors
  #into separate arms
  #
  # Result is a list of data frames (one per stratum)
  lapply(seq_along(names(km$strata)), function(stratCounter){
    strat <- names(km$strata)[stratCounter]
    createOneArmData(index=(strataIndex==strat),  armName=armNames[stratCounter])
  })
}
scientific-computing-solutions/sibyl documentation built on May 21, 2019, 8:40 a.m.