Nothing
#' Cure Rate Model with pseudo-observation approach
#'
#' Fits either a mixture cure model or a bounded cumulative hazard (promotion time) model
#' with pseudo-observation approach.
#'
#' @param formula1 A formula object starting with \code{~} for the model formula.
#' This specifies the covariates in the incidence component and the long-term component under
#' the mixture cure model and the bounded cumulative model, respectively.
#' @param formula2 A formula object starting with \code{~} for the model formula.
#' This specifies the covariates in the latency component and the short-term component under
#' the mixture cure model and the bounded cumulative model, respectively.
#' @param time A numeric vector for the observed survival times.
#' @param status A numeric vector for the event indicator;
#' 0 indicates right-censoring and 1 indicates events.
#' @param data An optional data frame that contains the covariates and response variables
#' (\code{time} and \code{event}).
#' @param subset An optional logical vector specifying a subset of
#' observations to be used in the fitting process.
#' @param model A character string specifying the underlying model.
#' The available functional form are \code{"mixture"} and \code{"promotion"}
#' correspond to the mixture cure model and the bounded cumulative model, respectively.
#' @param t0 A vector of times, where the pseudo-observations are constructed.
#' When not specified, the default values are the 10, 20, ..., 90th percentiles of
#' uncensored event times.
#' @param lambda1,lambda2 An option for specifying the tuning parameter used in penalization.
#' When this is unspecified or has a \code{NULL} value,
#' penalization will not be applied and \code{pCure()} will uses all covariates
#' specified in the formulas.
#' Alternatively, this can be specified as a vector numeric vector of non-negative values
#' or "auto" for auto selection.
#' @param penalty1,penalty2 A character string specifying the penalty function.
#' The available options are \code{"lasso"} and \code{"scad"}.
#' @param exclude1,exclude2 A character string specifying which variables to exclude from variable selection.
#' Variables matching elements in this string will not be penalized during the variable selection process.
#' in variable selection.
#' @param nfolds An optional integer value specifying the number of folds.
#' The default value is 5.
#' @param control A list of control parameters. See detail.
#'
#' @return An object of class \code{"pCure"} representing a cure model fit.
#'
#' @references Su, C.-L., Chiou, S., Lin, F.-C., and Platt, R. W. (2022)
#' Analysis of survival data with cure fraction and variable selection: A pseudo-observations approach
#' \emph{Statistical Methods in Medical Research}, \bold{31}(11): 2037--2053.
#'
#' @importFrom stats model.frame model.matrix model.extract as.formula terms
#' @importFrom stats .getXlevels formula pnorm quantile runif sd symnum coef
#'
#' @example inst/examples/ex_pCure.R
#' @export
pCure <- function(formula1, formula2, time, status, data, subset, t0,
model = c("mixture", "promotion"), nfolds = 5,
lambda1 = NULL, exclude1 = NULL, penalty1 = c("lasso", "scad"),
lambda2 = NULL, exclude2 = NULL, penalty2 = c("lasso", "scad"),
control = list()) {
model <- match.arg(model)
penalty1 <- match.arg(penalty1)
penalty2 <- match.arg(penalty2)
## terms to exclude if ~. is used
fExcl <- c(deparse(substitute(time)), deparse(substitute(status)))
## Checks and define control
if (is.null.missing(formula1) & is.null.missing(formula2))
stop("At least one 'formula' needs to be specified.")
if (missing(time)) stop("Argument 'time' is required.")
if (missing(status)) stop("Argument 'status' is required.")
if (!is.null(lambda1) && !is.character(lambda1) && any(lambda1 < 0))
stop("Positive tuning parameters ('lambda1') is required.")
if (!is.null(lambda2) && !is.character(lambda2) && any(lambda2 < 0))
stop("Positive tuning parameters ('lambda2') is required.")
if (missing(data)) {
if (is.null.missing(formula1)) data <- environment(formula1)
else data <- environment(formula2)
}
if (!missing(subset)) {
sSubset <- substitute(subset)
subIdx <- eval(sSubset, data, parent.frame())
if (!is.logical(subIdx)) stop("'subset' must be logical")
subIdx <- subIdx & !is.na(subIdx)
data <- data[subIdx, ]
}
ctrl <- pCure.control()
namc <- names(control)
if (!all(namc %in% names(ctrl)))
stop("unknown names in control: ", namc[!(namc %in% names(ctrl))])
ctrl[namc] <- control
ctrl$model <- model
ctrl$lambda1 <- lambda1
ctrl$lambda2 <- lambda2
ctrl$penalty1 <- penalty1
ctrl$penalty2 <- penalty2
ctrl$nfolds <- nfolds
if (is.null.missing(formula1)) ctrl$formula1 <- NULL
else ctrl$formula1 <- formula1
if (is.null.missing(formula2)) ctrl$formula2 <- NULL
else ctrl$formula2 <- formula2
if (is.character(lambda1) && lambda1 != "auto")
stop("Only 'auto' is allowed when 'lambda1' is a character string.")
if (is.character(lambda2) && lambda2 != "auto")
stop("Only 'auto' is allowed when 'lambda2' is a character string.")
mf <- match.call(expand.dots = FALSE)
if (is.null.missing(formula1))
mf <- mf[match(c("formula2", "data", "time", "status"), names(mf), 0L)]
else
mf <- mf[match(c("formula1", "data", "time", "status"), names(mf), 0L)]
mf$data <- data
mf$drop.unused.levels <- TRUE
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
xlevel1 <- xlevel2 <- .getXlevels(attr(mf, "terms"), mf)
time <- as.numeric(model.extract(mf, time))
status <- as.numeric(model.extract(mf, status))
varInfo <- NULL # to prepare exclude
if (is.null.missing(formula1)) {
mm1 <- NULL
mm2 <- stats::model.matrix(formula2, data = mf)
varInfo$var2 <- attr(terms(formula2, data = mf), "term.labels")
varInfo$assign2 <- attr(mm2, "assign")
mm2 <- mm2[, colnames(mm2) != "(Intercept)", drop = FALSE]
} else {
mm1 <- mm2 <- stats::model.matrix(formula1, data = mf)
varInfo$var1 <- varInfo$var2 <- attr(terms(formula1, data = mf), "term.labels")
varInfo$assign1 <- varInfo$assign2 <- attr(mm1, "assign")
if (is.null.missing(formula2)) mm2 <- NULL
}
if (!is.null.missing(formula1) && !is.null.missing(formula2)) {
mf <- match.call(expand.dots = FALSE)
mf <- mf[match(c("formula2", "data"), names(mf), 0L)]
mf$data <- data
mf$drop.unused.levels <- TRUE
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
xlevel2 <- .getXlevels(attr(mf, "terms"), mf)
mm2 <- stats::model.matrix(formula2, data = mf)
varInfo$var2 <- attr(terms(formula2, data = mf), "term.labels")
varInfo$assign2 <- attr(mm2, "assign")
varInfo$assign2 <- varInfo$assign2[colnames(mm2) != "(Intercept)"]
mm2 <- mm2[, colnames(mm2) != "(Intercept)", drop = FALSE]
}
## remove cluster of excluded variables
exclude1 <- 1 * (varInfo$assign1 %in% match(exclude1, varInfo$var1))
exclude2 <- 1 * (varInfo$assign2 %in% match(exclude2, varInfo$var2))
# exclude2[names(mm2) == "(Intercept)"] <- 1
if (!is.null.missing(formula1) &&
(any(grepl("\\. ", as.character(formula1))) | formula1 == as.formula("~."))) {
kick <- colnames(mm1) %in% c(fExcl, "`(time)`", "`(status)`")
exclude1 <- exclude1[!kick]
mm1 <- mm1[, !kick, drop = FALSE]
exclude1[colnames(mm1) == "(Intercept)"] <- 1
}
if (!is.null.missing(formula2) &&
(any(grepl("\\. ", as.character(formula2))) | formula2 == as.formula("~."))) {
kick <- colnames(mm2) %in% c(fExcl, "`(time)`", "`(status)`")
exclude2 <- exclude2[!kick]
mm2 <- mm2[, !kick, drop = FALSE]
exclude2[colnames(mm2) == "(Intercept)"] <- 1
}
ctrl$exclude1 <- exclude1
ctrl$exclude2 <- exclude2
tmax <- max(time[status > 0])
if (missing(t0)) t0 <- quantile(time[status > 0], c(1:9 / 10, .95))
## auto choose lambda; still under development
if (is.character(lambda1) || is.character(lambda2))
tmp <- auto.lambda(mm1, mm2, time, status, t0, ctrl)
if (is.character(lambda1) && lambda1 == "auto") ctrl$lambda1 <- tmp$lambda1
if (is.character(lambda2) && lambda2 == "auto") ctrl$lambda2 <- tmp$lambda2
## Fit models
if (model == "mixture")
out <- fitPHMC(mm1, mm2, time, status, t0, ctrl)
if (model == "promotion")
out <- fitPHPH(mm1, mm2, time, status, t0, ctrl)
out$t0 <- t0
out$call <- match.call()
out$xlevel1 <- xlevel1
out$xlevel2 <- xlevel2
out$control <- ctrl
out <- out[order(names(out))]
class(out) <- "pCure"
return(out)
}
#' Package options for pseudoCure
#'
#' This function provides the fitting options for the \code{pCure()} function.
#'
#' @param binit1 Initial value for the first component.
#' A zero vector will be used if not specified.
#' @param binit2 Initial value for the second component
#' A zero vector will be used if not specified.
#' @param corstr A character string specifying the correlation structure.
#' The following are permitted: \code{"independence"}, \code{"exchangeable"},
#' and \code{"ar1"}.
#' @param nlambda1,nlambda2 An integer value specifying the number of lambda.
#' This is only evoked when \code{lambda1 = "auto"} or \code{lambda2 = "auto"}.
#' @param tol A positive numerical value specifying the absolute error tolerance in GEE algorithms.
#' @param maxit An integer value specifying the maximum number of iteration.
#'
#' @return A list with control parameters.
#'
#' @seealso \code{\link{pCure}}
#' @export
pCure.control <- function(binit1 = NULL, binit2 = NULL,
corstr = c("independence", "exchangeable", "ar1"),
nlambda1 = 100, nlambda2 = 100,
tol = 1e-7, maxit = 100) {
corstr <- match.arg(corstr)
list(nlambda1 = 200, nlambda2 = 200,
binit1 = binit1, binit2 = binit2, corstr = corstr,
eps = 1e-6, tol = tol, maxit = maxit)
}
#' Auto select lambda to apply CV
#'
#' Still under development
#'
#' @noRd
auto.lambda <- function(mm1, mm2, time, status, t0, ctrl) {
trys <- exp(-7:10)
lambda1.max <- lambda2.max <- max(trys)
for (i in 1:length(trys)) {
ctrl$lambda1 <- ctrl$lambda2 <- trys[i]
if (ctrl$model == "mixture")
tmp <- fitPHMC(mm1, mm2, time, status, t0, ctrl)
if (ctrl$model == "promotion")
tmp <- fitPHPH(mm1, mm2, time, status, t0, ctrl)
if (all(abs(tmp$fit1$b) < 1e-6)) lambda1.max <- trys[i]
if (all(abs(tmp$fit2$b) < 1e-6)) lambda2.max <- trys[i]
if (max(lambda1.max, lambda2.max) < max(trys)) break
}
list(lambda1 = exp(seq(log(1e-04), log(lambda1.max), length.out = ctrl$nlambda1)),
lambda2 = exp(seq(log(1e-04), log(lambda2.max), length.out = ctrl$nlambda2)))
}
is.null.missing <- function(x) missing(x) || is.null(x)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.