Nothing
#' Boostrapping algorithm for \code{coxed}
#'
#' This function uses bootstrapping to create standard errors and
#' confidence intervals for the quantities produced by the coxed()
#' function. It is adapted from the \code{\link[rms]{bootcov}} function
#' in the \code{\link[rms]{rms}} package. It is called by the \code{\link[coxed]{coxed}}
#' function and is not intended to be used by itself. Please refer to
#' the original \code{\link[rms]{bootcov}} function for general bootstrapping
#' applications.
#' @param fit an estimated Cox proportional hazards model object with
#' class "coxph" or "cph"
#' @param cluster a variable indicating groupings. \code{cluster} may
#' be any type of vector (factor, character, integer). Unique values
#' of \code{cluster} indicate possibly correlated groupings of observations.
#' Note the data used in the fit and stored in \code{fit$x} and \code{fit$y}
#' may have had observations containing missing values deleted. It is assumed
#' that if there were any \code{NA}s, an \code{naresid} function exists for
#' the class of fit. This function restores \code{NA}s so that the rows of
#' the design matrix coincide with \code{cluster}
#' @param B Number of bootstrap simulation iterations
#' @param fitter the name of a function with arguments \code{(x,y)} that will fit
#' bootstrap samples. Default is taken from the class of fit if it is \code{ols},
#' \code{lrm}, \code{cph}, \code{psm}, \code{Rq}. If \code{fitter="tvc"} the
#' function employs \code{\link[survival]{agreg.fit}}
#' @param coef.reps set to \code{TRUE} if you want to store a matrix of all
#' bootstrap regression coefficient estimates in the returned component \code{boot.Coef}.
#' @param loglik set to \code{TRUE} to store -2 log likelihoods for each bootstrap model,
#' evaluated against the original \code{x} and \code{y} data. The default is to do this when
#' \code{coef.reps} is specified as \code{TRUE}. The use of \code{loglik=TRUE} assumes
#' that an \code{oos.loglik} method exists for the type of model being analyzed, to calculate
#' out-of-sample -2 log likelihoods (see \code{rmsMisc}). After the B -2 log likelihoods
#' (stored in the element named \code{boot.loglik} in the returned fit object), the \code{B+1}
#' element is the -2 log likelihood for the original model fit
#' @param pr set to \code{TRUE} to print the current sample number to monitor progress
#' @param maxit maximum number of iterations, to pass to \code{fitter}
#' @param group a grouping variable used to stratify the sample upon bootstrapping. This
#' allows one to handle \code{k}-sample problems, i.e., each bootstrap sample will be forced
#' to select the same number of observations from each level of group as the number appearing
#' in the original dataset. You may specify both \code{group} and \code{cluster}
#' @param stat a single character string specifying the name of a \code{stats} element produced
#' by the fitting function to save over the bootstrap repetitions. The vector of saved statistics
#' will be in the \code{boot.stats} part of the list returned by \code{bootcov}
#'
#' @author Jonathan Kropko <jkropko@@virginia.edu> and Jeffrey J. Harden <jharden@@nd.edu>, based
#' on the code for the \code{\link[rms]{bootcov}} function in the \code{\link[rms]{rms}} package
#' by Frank Harrell and Bill Pikounis
#'
#' @details This function contains the same code as the \code{\link[rms]{bootcov}} function in
#' the \code{\link[rms]{rms}} package, with a few alterations to work better with the \code{\link[coxed]{coxed}}
#' function. First, we output a result attribute \code{b.ind}, which contains the observation numbers from the estimation sample
#' that are drawn with replacement to produce the bootstrap sample and takes into account clustering.
#' Second, we program a new class, \code{tvc}, for
#' \code{fitter} to use \code{\link[survival]{agreg.fit}} instead of \code{\link[survival:agreg.fit]{coxph.fit}}
#' when the data contain time-varying covariates.
#'
#' @seealso \code{\link[coxed]{coxed}}, \code{\link[survival]{coxph}}, \code{\link[rms]{cph}}, \code{\link[rms]{bootcov}}
#' @export
bootcov2 <- function (fit, cluster, B = 200, fitter, coef.reps = TRUE, loglik = FALSE,
pr = FALSE, maxit = 15, group = NULL, stat = NULL)
{
if(missing(cluster)) b.ind <- matrix(NA, nrow = nrow(fit$x), ncol = B)
else b.ind <- list()
coxcph <- inherits(fit, "coxph") || inherits(fit, "cph") ||
(length(fit$fitFunction) && any(c("cph", "coxph") %in%
fit$fitFunction))
nfit <- fit$fitFunction[1]
if (!length(nfit))
nfit <- setdiff(oldClass(fit), "Design")[1]
if (length(fit$weights) && (coxcph || nfit[1] == "Rq"))
stop("does not handle weights")
if (!length(X <- fit$x) | !length(Y <- fit$y))
stop("you did not specify x=TRUE and y=TRUE in the fit")
sc.pres <- match("scale", names(fit), 0) > 0
ns <- fit$non.slopes
if (nfit == "psm") {
fixed <- fit$fixed
fixed <- if (length(fixed) == 1 && is.logical(fixed) &&
!fixed)
list()
else list(scale = TRUE)
fixed <- NULL
dist <- fit$dist
parms <- fit$parms
}
if (nfit == "Glm")
fitFamily <- fit$family
penalty.matrix <- fit$penalty.matrix
if (missing(fitter)) {
fitter <- switch(nfit, ols = if (length(penalty.matrix)) {
function(x, y, penalty.matrix, ...) {
lm.pfit(x, y, penalty.matrix = penalty.matrix,
tol = 1e-11, regcoef.only = TRUE)
}
} else function(x, y, ...) {
lm.fit.qr.bare(x, y, tolerance = 1e-11, intercept = FALSE)
}, lrm = function(x, y, maxit = 15, penalty.matrix, ...) {
lrm.fit(x, y, maxit = maxit, tol = 1e-11, penalty.matrix = penalty.matrix)
}, cph = function(x, y, strata = NULL, maxit = 15, ...) {
coxph.fit(x, y,
strata = strata,
method = "efron",
rownames = rownames(x),
control = coxph.control(eps = 1e-04,
toler.chol = 1e-11,
toler.inf=1,
iter.max = maxit))
}, tvc = function(x, y, strata = NULL, init=NULL, maxit = 15, ...) {
agreg.fit(x, y,
strata = strata,
init = init,
method = "efron",
rownames = rownames(x),
control = coxph.control(eps = 1e-04,
toler.chol = 1e-11,
toler.inf=1,
iter.max = maxit))
}, psm = function(x, y, maxit = 15, ...) {
survreg.fit2(x, y, dist = dist, parms = parms, fixed = fixed,
offset = NULL, init = NULL, maxiter = maxit)
}, bj = function(x, y, maxit = 15, eps = 1e-04, ...) {
bj.fit(x, y, control = list(iter.max = maxit, eps = 1e-04))
}, Glm = function(x, y, ...) {
glm.fit(x, as.vector(y), family = fitFamily)
}, Rq = RqFit(fit, wallow = FALSE))
}
if (!length(fitter))
stop("fitter not valid")
if (loglik) {
oosl <- switch(nfit, ols = oos.loglik.ols, lrm = oos.loglik.lrm,
cph = oos.loglik.cph, psm = oos.loglik.psm, Glm = oos.loglik.Glm)
if (!length(oosl))
stop("loglik=TRUE but no oos.loglik method for model in rmsMisc")
Loglik <- double(B + 1)
Loglik[B + 1] <- oosl(fit)
}
else Loglik <- NULL
n <- nrow(X)
p <- length(fit$coef)
vname <- names(fit$coef)
if (sc.pres) {
p <- p + 1
vname <- c(vname, "log scale")
}
fillInTheBlanks <- function(S) {
L <- !is.na(S)
c(S[L][1], S[L])[cumsum(L) + 1]
}
fill <- function(cof, vn, ns) {
p <- length(vn)
if (length(cof) == p)
return(cof)
nc <- ns - (p - length(cof))
cints <- cof[1:nc]
ints <- rep(NA, ns)
names(ints) <- vn[1:ns]
ints[names(cints)] <- cints
if (is.na(ints[ns])) {
l <- ns
if (ns > 1) {
for (j in (ns - 1):1) {
if (!is.na(ints[j]))
break
l <- j
}
}
ints[l:ns] <- -Inf
}
c(rev(fillInTheBlanks(rev(ints))), cof[-(1:nc)])
}
bar <- rep(0, p)
cov <- matrix(0, nrow = p, ncol = p, dimnames = list(vname,
vname))
if (coef.reps)
coefs <- matrix(NA, nrow = B, ncol = p, dimnames = list(NULL,
vname))
if (length(stat))
stats <- numeric(B)
#Y <- as.matrix(if (is.category(Y))
# oldUnclass(Y)
#else Y)
ny <- ncol(Y)
Strata <- fit$Strata
nac <- fit$na.action
if (length(group)) {
if (length(group) > n) {
if (length(nac)) {
j <- !is.na(naresid(nac, Y) %*% rep(1, ny))
group <- group[j]
}
}
if (length(group) != n)
stop("length of group does not match # rows used in fit")
group.inds <- split(1:n, group)
ngroup <- length(group.inds)
}
else ngroup <- 0
anyinf <- FALSE
if (missing(cluster)) {
b <- 0
for (i in 1:B) {
if (pr)
cat(i, "\r")
if (ngroup) {
j <- integer(n)
for (si in 1:ngroup) {
gi <- group.inds[[si]]
j[gi] <- sample(gi, length(gi), replace = TRUE)
}
}
else j <- sample(1:n, n, replace = TRUE); b.ind[ , i] <- j
f <- tryCatch(fitter(X[j, , drop = FALSE], Y[j, ,
drop = FALSE], maxit = maxit, penalty.matrix = penalty.matrix,
strata = Strata[j]), error = function(...) list(fail = TRUE))
if (length(f$fail) && f$fail)
next
cof <- f$coefficients
if (any(is.na(cof)))
next
b <- b + 1
if (sc.pres)
cof <- c(cof, `log scale` = log(f$scale))
cof <- fill(cof, vname, ns)
if (any(is.infinite(cof)))
anyinf <- TRUE
if (coef.reps)
coefs[b, ] <- cof
if (length(stat))
stats[b] <- f$stats[stat]
bar <- bar + cof
cof <- as.matrix(cof)
cov <- cov + cof %*% t(cof)
if (loglik)
Loglik[b] <- oosl(f, matxv(X, cof), Y)
}
if (pr)
cat("\n")
}
else {
if (length(cluster) > n) {
if (length(nac)) {
j <- !is.na(naresid(nac, Y) %*% rep(1, ny))
cluster <- cluster[j]
}
}
if (length(cluster) != n)
stop("length of cluster does not match # rows used in fit")
if (any(is.na(cluster)))
stop("cluster contains NAs")
cluster <- as.character(cluster)
clusters <- unique(cluster)
nc <- length(clusters)
Obsno <- split(1:n, cluster)
b <- 0
for (i in 1:B) {
if (pr)
cat(i, "\r")
if (ngroup) {
j <- integer(0)
for (si in 1:ngroup) {
gi <- group.inds[[si]]
cluster.gi <- cluster[gi]
clusters.gi <- unique(cluster.gi)
nc.gi <- length(clusters.gi)
Obsno.gci <- split(gi, cluster.gi)
j.gci <- sample(clusters.gi, nc.gi, replace = TRUE)
obs.gci <- unlist(Obsno.gci[j.gci])
j <- c(j, obs.gci)
}
obs <- j
}
else {
j <- sample(clusters, nc, replace = TRUE)
obs <- unlist(Obsno[j])
b.ind[[i]] <- obs
}
f <- tryCatch(fitter(X[obs, , drop = FALSE], Y[obs,
, drop = FALSE], maxit = maxit, penalty.matrix = penalty.matrix,
strata = Strata[obs]), error = function(...) list(fail = TRUE))
if (length(f$fail) && f$fail)
next
cof <- f$coefficients
if (any(is.na(cof)))
next
b <- b + 1
if (sc.pres)
cof <- c(cof, `log scale` = log(f$scale))
cof <- fill(cof, vname, ns)
if (any(is.infinite(cof)))
anyinf <- TRUE
if (coef.reps)
coefs[b, ] <- cof
if (length(stat))
stats[b] <- f$stats[stat]
bar <- bar + cof
cof <- as.matrix(cof)
cov <- cov + cof %*% t(cof)
if (loglik)
Loglik[b] <- oosl(f, matxv(X, cof), Y)
}
if (pr)
cat("\n")
}
if (b < B) {
warning(paste("fit failure in", B - b, "resamples. Might try increasing maxit"))
if (coef.reps)
coefs <- coefs[1:b, , drop = FALSE]
Loglik <- Loglik[1:b]
}
if (anyinf)
warning("at least one resample excluded highest Y values, invalidating bootstrap covariance matrix estimate")
bar <- bar/b
fit$B <- b
names(bar) <- vname
fit$boot.coef <- bar
if (coef.reps)
fit$boot.Coef <- coefs
bar <- as.matrix(bar)
cov <- (cov - b * bar %*% t(bar))/(b - 1)
fit$orig.var <- fit$var
fit$var <- cov
fit$boot.loglik <- Loglik
if(missing(cluster)) fit$b.ind <- b.ind
else fit$b.ind <- as.matrix(unname(do.call(cbind, lapply(b.ind, ts))))
if (length(stat))
fit$boot.stats <- stats
if (nfit == "Rq") {
newse <- sqrt(diag(cov))
newt <- fit$summary[, 1]/newse
newp <- 2 * (1 - pt(abs(newt), fit$stats["n"] - fit$stats["p"]))
fit$summary[, 2:4] <- cbind(newse, newt, newp)
}
fit
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.