#' Fit a parametric risk curve
#' @param object a fitted model object
#' @param dataSample a \code{data.frame} on which the curve is to be calibrated against. Must contain all predictors of \code{object}
#' @param timeInvariant if not \code{NULL}, a 1 row \code{data.frame} setting time invariant variables used of the fit.
#' @param timeVarying if not \code{NULL}, a \code{data.frame} with a column indicating years as in \code{object}
#' as well as other time-varying covariates.
#' @param weights optional vector of weights of length \code{nrow(dataSample)} (useful if \code{object} was fitted using a matched sample)
#' @param level if not \code{NULL} levels for confidence intervals.
#' @param nSamples if \code{level} is not \code{NULL}, number of Monte Carlo samples to be drawn
#' to compute the confidence intervals around predictions.
#' @details A risk curve is the cumulative probability of an event up to some time point. For instance, it would be the probability of having been fired
#' up to year 2010. Confidence intervals around each event denote the probability of this event as opposed to other events. As such, confidence
#' intervals for all events do not sum to 1. The risk curve is calibrated at mean sample values, using the data in \code{dataSample}.
#' @return If \code{level} is \code{NULL}, a \code{length(years)} rows, 3 columns \code{tibble} with year, and probability for 2 of the three outcomes. If
#' \code{level} is not \code{NULL}, a \code{2 * length(years)} rows \code{tibble} with year and probability for 2 of the three outcomes, and as many columns
#' as necessary for confidence intervals.
#' @export
riskCurve <- function(object, dataSample, timeInvariant = NULL, timeVarying = NULL, weights = NULL, level = c(.9, .95), nSamples = 1e3) {
if(is.null(weights) & (!is.null(object$formulaMatch))) {
warning(
"The model was estimated using a matched sample, but you're not using weights to fit the survival curve. Are you sure? (weights are in object$MatchIt$weights)"
)
}
yearCol <- object$yearCol
years <- dplyr::pull(dataSample, !!yearCol) %>% unique()
minYear <- min(as.numeric(as.character(years)))
if(is.null(timeVarying)) timeVarying <- tibble::tibble(!!yearCol := years)
if(!all(years %in% dplyr::pull(timeVarying, !!yearCol))) stop("timeVarying does not contain all years")
if(nrow(timeVarying) != length(years)) stop("timeVarying does not contain one row per year")
mf <- dplyr::filter(dataSample, as.character(!!yearCol) == as.character(minYear))
y <- model.response(model.frame(formula(object), mf))
if(!is.null(weights)) y <- y * weights
mf <- model.frame(delete.response(terms(formula(object))), mf)
mf <- cbind(y = rowSums(y), mf)
cInvariant <- colnames(timeInvariant)
cVarying <- colnames(timeVarying)
cols <- c(cInvariant, cVarying)
mf <- dplyr::select(mf, -cols)
mf <- dplyr::group_by_at(mf, -1) %>%
dplyr::summarize(y = sum(y)) %>%
dplyr::ungroup()
mf <- dplyr::bind_cols(mf, timeInvariant[rep(1,nrow(mf)),])
mf <- dplyr::inner_join(
dplyr::mutate(mf, joinCol = 1),
dplyr::mutate(timeVarying, joinCol = 1),
by = "joinCol") %>%
dplyr::select(-joinCol)
probs <- predict(object, mf, type = "response")
pe <- tibble::as_tibble(makeSurv(mf, probs, yearCol)) %>%
dplyr::mutate(year = sort(as.numeric(as.character(years)))) %>%
dplyr::select(year, outcome_1, outcome_2)
if(!is.null(level)) {
alpha <- (1-level)/2
alpha <- sort(unique(c(alpha, 1-alpha)))
betas <- MASS::mvrnorm(nSamples, coef(object), vcov(object))
x <- model.matrix(delete.response(terms(formula(object))), mf)
probs <- as.numeric(apply(betas, 1, predictSurv, x = x))
probs <- array(probs, c(nrow(x), 3, nrow(betas)))
survs <- as.numeric(apply(probs, 3, makeSurv, mf = mf, yearCol = yearCol))
survs <- array(survs, c(length(years), 2, nrow(betas)))
ci <- dplyr::bind_rows(
tibble::as_tibble(t(apply(survs[,1,], 1, quantile, probs = alpha))),
tibble::as_tibble(t(apply(survs[,2,], 1, quantile, probs = alpha)))
)
pe <- tidyr::gather(pe, outcome, value, -year) %>%
dplyr::bind_cols(ci)
}
pe
}
predictSurv <- function(x, vBeta) {
vBeta <- cbind(0, matrix(vBeta, ncol = 2))
probs <- exp(x %*% vBeta)
probs / rowSums(probs)
}
makeSurv <- function(mf, probs, yearCol) {
colnames(probs) <- sprintf("outcome_%s", 0:2)
probs <- tibble::as_tibble(probs)
mf <- dplyr::bind_cols(
dplyr::select(mf, year = !!yearCol, y),
probs
) %>% dplyr::group_by(year) %>%
dplyr::summarize(
outcome_0 = stats::weighted.mean(outcome_0, y),
outcome_1 = stats::weighted.mean(outcome_1, y),
outcome_2 = stats::weighted.mean(outcome_2, y)
) %>%
dplyr::ungroup() %>%
dplyr::mutate(year = as.numeric(as.character(year))) %>%
dplyr::arrange(year)
mf %>%
dplyr::mutate(
outcome_0 = cumprod(outcome_0),
S = dplyr::lag(outcome_0)
) %>%
tidyr::replace_na(list(S = 1)) %>%
dplyr::mutate(
outcome_1 = cumsum(outcome_1 * S),
outcome_2 = cumsum(outcome_2 * S)
) %>%
dplyr::select(outcome_1, outcome_2) %>%
as.matrix()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.