sieveSurv: Nonparametric Survival Estimator with B-spline Sieves

Documented in loglik_p

#' Observed-data log-likelihood for B-spline coefficients
#'
#' @param bspline_coeff Matrix of B-spline coefficients returned from \code{sieveSurv()}.
#' @param time Column name for follow-up time.
#' @param event Column name for event indicators.
#' @param bspline Column names for B-spline basis.
#' @param data Dataframe or matrix containing (at least) named columns \code{time}, \code{event}, \code{covar}, and \code{bspline}.
#' @param try_event_times (Optional) Vector or dataframe of observed values of \code{time} (for events) to be used to create the complete dataset. If \code{NULL} (DEFAULT), then the unique event times from \code{data} will be used.
#'
#' @return Scalar
#'
#' @export
#'
loglik_p <- function(bspline_coeff, time, event, bspline, data, try_event_times = NULL) {
  # (If applicable) Remove existing column "k"
  data[, "k"] <- NULL

  # Create ID variable -------------------------------------------------------------
  n <- nrow(data)
  data$id <- 1:n

  # Create covariate censored variable ---------------------------------------------
  data$x <- data[, time]
  ## Make NA for censored subjects -------------------------------------------------
  data$x[data[, event] == 0] <- NA
  ## Order by x --------------------------------------------------------------------
  data <- data[order(data$x), ]

  # Use user-specified event times to create complete data -------------------------
  # If none supplied, use unique observed event times from data  -------------------
  if (!is.null(try_event_times)) {
    dist_x <- unique(try_event_times)
    dist_x <- dist_x[order(dist_x)]
  } else {
    dist_x <- unique(data[data[, event] == 1, "x"])
  }
  m <- length(dist_x)
  merge_k <- data.frame(k = 1:m, x = dist_x)

  # Create complete dataset --------------------------------------------------------
  ## Create indicators of being uncensored -----------------------------------------
  uncens <- data[, event] == 1
  ## Merge the m dist_x into the complete data from uncensored subjects ------------
  cdat_uncens <- merge(data[uncens, ], merge_k, all.x = TRUE)
  ## Try all x with each observed, censored t --------------------------------------
  cdat_cens <- merge(data[!uncens, -which(x = colnames(data) %in% c("x"))], merge_k, all.x = TRUE, all.y = TRUE)
  ## Filter to only possible x for observed t and censoring type -------------------
  cdat_cens <- cdat_cens[cdat_cens[, "x"] > cdat_cens[, time], ]
  ## Uncensored subjects
  cdat <- rbind(cdat_uncens[, c(time, event, bspline, "id", "x", "k")],
                cdat_cens[, c(time, event, bspline, "id", "x", "k")], row.names = NULL)
  # Calculate the observed-data log-likelihood for the B-spine coefficients p_{kj}
  bc <- bspline_coeff[cdat$k, ]
  p <- rowSums(rowsum(x = ifelse(is.na(bc), 0, bc) * cdat[, bspline], group = cdat$id))
  p[p == 0] <- 1 # Prevent log(p) = infinity
  log_p <- log(p)
  return(sum(log_p))
}