R/VerifyModels.R

Defines functions commonFormula rmCov

# Verify input 'models'
#
# method is not exported and is for internal convenience only
#
# ensures that 'models' is provided as a formula or a list of formula,
# that the provided models can be generated by the data, and extracts the
# survival response variable. 
#
# successful methods return a list containing the formula(s) and
# the survival response variable(s)
#
setGeneric(name = ".VerifyModels",
           def = function(models, ...) { standardGeneric(".VerifyModels") })

#-------------------------------------------------------------------------------
# the default method generates an error
#-------------------------------------------------------------------------------
setMethod(f = ".VerifyModels",
          signature = c(models = "ANY"),
          definition = function(models, ...) { 
              stop("models must be a formula object or a list of formula objects")
            })

#-------------------------------------------------------------------------------
# method returns a list containing "models", the models for each decision,  
# "response", the survival response as a single column matrix, and
# "delta", the event status as a single column matrix
#-------------------------------------------------------------------------------
#' @importFrom stats as.formula
setMethod(f = ".VerifyModels",
          signature = c(models = "formula"),
          definition = function(models, ..., 
                                nDP, 
                                data, 
                                txName, 
                                stageLabel, 
                                usePrevTime) { 

              # ensure that model frame can be generated from provided data
              mf <- tryCatch(expr = model.frame(formula = models, 
                                                data = data, 
                                                na.action = na.pass),
                             error = function(e){
                                       message("unable to create model frame for model ",
                                               deparse(expr = models), "\n", e$message)
                                       return( NULL )
                                     })

              if (is.null(x = mf) && nDP > 1L) {

                # assume that a failure indicates a common formula
                models <- commonFormula(models = models, 
                          nDP = nDP, 
                          data = data, 
                          txName = txName, 
                          stageLabel = stageLabel, 
                          usePrevTime = usePrevTime) 

                # call method for list of models
                return( .VerifyModels(models = models, 
                                      nDP = nDP, 
                                      data = data, 
                                      txName = txName) )

              }

              # formula objects can only be provided for single decision point
              # or for common formula
              if (nDP != 1L) {
                stop("insufficient number of models provided", call. = FALSE)
              }

              # ensure that model frame can be generated from provided data
              mf <- tryCatch(expr = model.frame(formula = models, 
                                                data = data, 
                                                na.action = na.pass),
                             error = function(e){
                                       stop("unable to create model frame for model ",
                                            deparse(expr = models), "\n", e$message,
                                            call. = FALSE)
                                     })

              if (!is(object = mf[,1], class2 = "Surv")) {
                stop("models must be survival models; use Surv()", 
                     call. = FALSE)
              }

              # extract survival response
              resp <- matrix(data = model.response(data = mf)[,1L], ncol = 1L)

              # extract event
              del <- matrix(data = model.response(data = mf)[,2L], ncol = 1L)

              return( list("models" = models, "response" = resp, "delta" = del) )
            })

# internal function identifies element of dataNames that include the provided
# label and removes all instances of that label from the dataNames object
rmCov <- function(dataNames, label, stageLabel, nDP) {

  # split remaining data names at stageLabel
  # a list is created, each element contains 1 or more elements
  # the first is the covariate name before stageLabel
  # the second, if present, is the first component after the
  # first stageLabel
  # the third, if present, means that there are multiple stageLabels
  #   in the covariate name -- this is no longer allowed
  cov <- strsplit(x = dataNames, split = stageLabel, fixed = TRUE)

  tst <- sapply(X = cov, FUN = length) > 2L
  if (any(tst)) {
    stop("data headers cannot contain multiple instances of the stage label", 
         call. = FALSE)
  }

  # identify the label
  areLabel <- sapply(X = cov, FUN = function(x){x[1L] == label})

  if (sum(areLabel) < nDP) {
    stop("insufficient number of ", label, "variables in data",
         call. = FALSE)
  }

  # remove label's from dataNames
  dataNames <- dataNames[!areLabel]

  return( dataNames )
}


commonFormula <- function(models, ..., 
                          nDP, 
                          data, 
                          txName, 
                          stageLabel, 
                          usePrevTime) { 

  message("assuming a common formula")

  # extract y and delta labels from lhs of formula
  yLabel <- deparse(expr = models[[ 2L ]][[ 2L ]])
  dLabel <- deparse(expr = models[[ 2L ]][[ 3L ]])

  # create lhs of models
  resp <- paste0("Surv(", yLabel, stageLabel, 1L:nDP, ",", 
                          dLabel, stageLabel, 1L:nDP, ")~")

  # extract covariate labels from rhs of formula
  xLabels <- attr(x = terms(x = models), which = "term.labels")

  # extract tx variable label from txName vector
  txLabel <- strsplit(x = txName, split = stageLabel, fixed = TRUE)
  txLabel <- sapply(X = txLabel, function(x){ x[1L] })
  if (!all(txLabel %in% txLabel[1L])) {
    stop("tx names do not have a common label", call. = FALSE)
  }
  txLabel <- txLabel[1L]

  # check to see if  treatment is in formula
  txInXLabels <- sum(xLabels %in% txLabel)

  # if more than 1 label in xLabels matches treatment, there is a problem
  if (txInXLabels > 1L) {
    stop("unable to interpret model", call. = FALSE)
  }

  # if tx variable in model, remove from covariate label vector
  if (txInXLabels == 1L) {
    xLabels <- xLabels[!(xLabels %in% txLabel)]
  }

  # extract headers from data
  dataNames <- colnames(x = data)

  # remove txNames from dataNames
  dataNames <- rmCov(dataNames = dataNames, 
                     label = txLabel, 
                     stageLabel = stageLabel, 
                     nDP = nDP)

  # remove response from dataNames
  dataNames <- rmCov(dataNames = dataNames, 
                     label = yLabel, 
                     stageLabel = stageLabel, 
                     nDP = nDP)

  # remove event indicators from dataNames
  dataNames <- rmCov(dataNames = dataNames, 
                     label = dLabel, 
                     stageLabel = stageLabel, 
                     nDP = nDP)

  # dataNames now only contains covariate names

  # if tx label in formula, add back dp component and include in
  # rhs of models
  if (txInXLabels) {
    mod <- as.list(x = paste0(txLabel, stageLabel, 1L:nDP))
  } else {
    mod <- vector(mode = "list", length = nDP)
  }

  # if non-tx covariates in rhs, extract, add decision point if appropriate 
  # and add to rhs
  if (length(x = xLabels) > 0L) {

    # split remaining data names at stageLabel
    cov <- strsplit(x = dataNames, split = stageLabel, fixed = TRUE)

    # extract label component
    covLabels <- sapply(X = cov, FUN = function(x){x[1L]})

    # identify which covLabels are in model
    covInModel <- covLabels %in% xLabels

    # if data is (X1, X2, X3) as baseline covariates xLabels will be
    # ("X1","X2","X3")
    # if data is (X.1, X.2, X.3) as dp covariates, xLabels will be "X"
    if (sum(covInModel) > length(x = xLabels)) {

      # implies that there are decision point values concatenated
      for (i in 1L:nDP) {
        stageCovs <- paste0(xLabels, stageLabel, i)
        # this allows for covariates that are not defined in other stages
        useCovs <- stageCovs %in% dataNames
        mod[[ i ]] <- c(mod[[ i ]], stageCovs[useCovs])
      }
                    
    } else {
      # implies that covariates are baseline
      for (i in 1L:nDP) {
        mod[[ i ]] <- c(mod[[ i ]], xLabels)
      }
    }
  }

  if (usePrevTime) {
    # if previous times are to be included in model, add to rhs
    for (i in 2L:nDP) {
      pTime = paste(paste0(yLabel, stageLabel, 1L:{i-1}), collapse = "+")
      pTime = paste0("I(",pTime,")")
      mod[[ i ]] <- c(mod[[ i ]], pTime)
    }
  }

  message("models identified as ")

  mods <- list()
  for (i in 1L:nDP) {
    message("\t", paste0(resp[i], paste(mod[[ i ]], collapse="+")))
    mods[[ i ]] <- stats::as.formula(paste0(resp[i], 
                                     paste(mod[[ i ]], collapse="+")))

  }

  return( mods )

}

#-------------------------------------------------------------------------------
# method to ensure that the number of models provided is appropriate, that
# each model can be generated from the provided data, and to extract the
# survival response variables.
#-------------------------------------------------------------------------------
# method returns a list containing "models", the unaltered model, 
# "response", the survival response as a nDP column matrix, and
# "delta", the event status as a nDP column matrix
#-------------------------------------------------------------------------------
setMethod(f = ".VerifyModels",
          signature = c(models = "list"),
          definition = function(models, ..., nDP, data) { 

              # a model must be provided for each decision point
              if (length(x = models) != nDP) {
                stop("insufficient number of models", call. = FALSE)
              }

              # ensure that each element of the list is a formula and extract
              # the response variable
              resp <- NULL
              del <- NULL
              for (i in 1L:nDP) {
                tst <- .VerifyModels(models = models[[ i ]], 
                                     nDP = 1L,  
                                     data = data)
                models[[ i ]] <- tst$models
                resp <- cbind(resp, tst$resp)
                del <- cbind(del, tst$del)
              }

              return( list("models" = models, "response" = resp, "delta" = del) )
            })

Try the dtrSurv package in your browser

Any scripts or data that you put into this service are public.

dtrSurv documentation built on June 7, 2022, 5:07 p.m.