#' Nonparametrically estimate the survival function using B-spline sieves (for use with cross-validation)
#'
#' @param time Column name for follow-up time
#' @param event Column name for event indicators
#' @param covar (Optional) column name(s) for additional fully-observed covariates. Default is \code{covar=NULL}, which estimates unconditional survival.
#' @param bspline Column names for B-spline basis.
#' @param data Dataframe or matrix containing (at least) named columns \code{time}, \code{event}, \code{covar}, and \code{bspline}.
#' @param tol Tolerance to define convergence. Default is \code{tol=1E-4}.
#' @param max_iter Maximum number of iterations allowed for the EM algorithm. Default is \code{max_iter = 1000}.
#' @param assume_last Assume last observed \code{time} is an event (for use when the integration of the survival function is desired). Default is \code{FALSE}.
#'
#' @return A list with the following elements:
#' \item{bspline_coeff}{the values of the B-spline coefficients at convergence}
#' \item{conv}{indicator of convergence for the EM algorithn}
#' \item{conv_msg}{description of nonconvergence if \code{conv = FALSE}}
cv_sieveSurv <- function(time, event, covar = NULL, bspline, data,
tol = 1E-4, max_iter = 1000, assume_last = FALSE) {
# Create censored variable
data$x <- data[, time]
## Make NA for censored subjects
data$x[data[, event] == 0] <- NA
# Assume largest or smallest observed t is an event regardless
## For stability of the integral estimate
if (assume_last) {
# Assume largest observed t is an event regardless
max_t <- max(data[, time])
max_t_at <- which.max(data[, time])
data[max_t_at, c("x", event)] <- c(max_t, 1)
}
# Subset data to valid times
## Remove censored subjects with T > max(X)
data <- data[data[, time] <= max(data[data[, event] == 1, time]), ]
### Also remove uncensored subjects with X < min(T_cens)
#data <- data[data[, event] == 0 | data$x >= min(data[data[, event] == 0, time]), ]
# Order by x
## This leads to the uncensored subjects coming first
data <- data[order(data$x), ]
dist_x <- unique(data[data[, event] == 1, "x"])
m <- length(dist_x)
merge_k <- data.frame(k = 1:m, x = dist_x)
data <- merge(data, merge_k, all.x = TRUE)
# Create indicators of being uncensored
uncens <- data[, event] == 1
# Create ID variable for grouping/summing
n <- nrow(data)
data$id <- 1:n
# Save B-spline basis separately
B <- data.matrix(data[, bspline])
# Initialize B-spline coefficients, p_{kj}
p_val_num <- rowsum(x = B[uncens, ], group = data$k[uncens], reorder = TRUE)
if (any(colSums(p_val_num) == 0)) {
return(list(bspline_coeff = NA,
conv = FALSE,
conv_msg = "empty B-spline among censored subjects"))
}
prev_p <- t(t(p_val_num) / colSums(p_val_num))
# Create complete dataset
## Censored subjects
cdat <- data[!uncens, c(time, event, bspline, "id")]
cdat <- cbind(cdat[rep(1:nrow(cdat), times = m), ], x = rep(dist_x, each = nrow(cdat)),
k = rep(1:m, each = nrow(cdat)), row.names = NULL)
## Uncensored subjects
cdat <- rbind(data[uncens, c(time, event, bspline, "id", "x", "k")], cdat, row.names = NULL)
### Subset to possible values of x (i.e., x > T)
cdat <- cdat[cdat[, "x"] >= cdat[, time], ]
# Prepare for EM algorithm
it <- 1
conv <- FALSE
# EM algorithm
while(!conv & it < max_iter) {
# E step
pXU_num <- data.matrix(prev_p[cdat$k, ] * B[cdat$id, ])
pXU_denom <- rowSums(rowsum(x = pXU_num, group = cdat$id, reorder = TRUE))
pXU_denom[pXU_denom == 0] <- 1
pXU <- pXU_num / pXU_denom[cdat$id]
# M step
add_to_p <- rowsum(x = pXU[cdat[, event] == 0, ], group = cdat$k[cdat[, event] == 0], reorder = TRUE)
## Check for values of xk < min(Ti among uncensored)
pad_rows <- setdiff(x = 1:m, y = unique(cdat$k[cdat[, event] == 0]))
## For these rows, add an empty tow to add_to_p
empty_rows <- matrix(data = 0, nrow = length(pad_rows), ncol = ncol(add_to_p))
## Update b-spline coefficients with contribution from censored subjects
new_p_num <- p_val_num + rbind(empty_rows, add_to_p)
new_p <- t(t(new_p_num) / colSums(new_p_num))
# Check for convergence
if (!any(abs(new_p - prev_p) > tol)) {
conv <- TRUE
} else {
it <- it + 1
prev_p <- new_p
}
}
if (conv) {
return(list(bspline_coeff = cbind(x = dist_x, new_p),
conv = conv,
conv_msg = ""))
} else {
if (it >= max_iter) {
return(list(bspline_coeff = NA,
conv = conv,
conv_msg = "max_iter reached"))
}
return(list(bspline_coeff = NA,
conv = conv,
conv_msg = "unknown"))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.