#' Fitting dynamic frailty models with the EM algorithm
#'
#'
#' @importFrom survival Surv survSplit
#' @importFrom magrittr "%>%"
#' @importFrom Rcpp evalCpp
#' @importFrom tibble rownames_to_column
#' @importFrom tidyr separate
#' @importFrom dplyr arrange_
#' @importFrom stats nlm drop.terms model.frame quantile terms model.matrix
#' @useDynLib dynfrail, .registration=TRUE
#' @include dynfrail_aux.R
#' @include dynfrail_fit.R
#'
#' @param formula A formula that contains on the left hand side an object of the type \code{Surv}
#' and on the right hand side a \code{+cluster(id)} statement.
#' @param data A data frame in which the formula argument can be evaluated
#' @param distribution An object as created by \code{\link{dynfrail_dist}}
#' @param control An object as created by \code{\link{dynfrail_control}}
#' @param ... Other arguments, currently used to warn about deprecated argument names
#' @export
#'
#' @return A \code{dynfrail} object that contains the following fields:
#' \item{coefficients}{A named vector of the estimated regression coefficients}
#' \item{hazard}{The breslow estimate of the baseline hazard at each event time point, in chronological order}
#' \item{imat}{Fisher's information matrix corresponding to the coefficients and hazard, assuming \eqn{\theta, \lambda} constant}
#' \item{logtheta}{The point estimate of the logarithm of the frailty parameter \eqn{\theta}. See details.}
#' \item{loglambda}{The point estimate of the logarithm of the autocorrelation parameter \eqn{\lambda}. See details.}
#' \item{frail}{A \code{data.frame} containing the variables: \code{id} (cluster id), \code{interval} (for piecewise constant frailty, the label of the interval
#' on which the frailty is constant), \code{Y} (a \code{Surv} object which determines a starting and a stopping time for each row), \code{frail} (the empirical
#' Bayes estimates of the piecewise constant frailty corresponding to that specific cluster and that specific time period)}
#' \item{tev}{The time points of the events in the data set, this is the same length as hazard}
#' \item{loglik}{A vector of length two with the log-likelihood of the starting Cox model
#' and the maximized log-likelihood}
#' \item{formula}{The original formula argument}
#' \item{distribution}{The original distribution argument}
#' \item{control}{The original control argument}
#' @export
#'
#' @references Putter, H., & Van Houwelingen, H. C. (2015). Dynamic frailty models based on compound birth–death processes. Biostatistics, 16(3), 550-564.
#'
#' @details This function fits dynamic frailty models where the intensity of the process is described by
#' \deqn{\lambda(t) = Z(t) \exp(\beta^\top x) \lambda_0(t).} As in regular frailty models, the random effect is
#' shared by observations from a cluster, or by recurrent event episodes within an individual.
#' This implementation generally follows the lines of Putter & van Houwelingen (2015). The maximum likelihood
#' estimates are obtained with an exact E step.
#'
#' \eqn{Z(t)} has two parameters: \eqn{\theta} plays the role of the spread of the frailty distribution. For the frailty distributons with finite
#' variance (all except the positive stable) this is the inverse of the variance, so that 0 corresponds to infinite variance and infinity to
#' variance 0. The second parameter \eqn{\lambda} determines how much variation in time is in \eqn{Z(t)}, so that
#' \deqn{cor(Z(t_1), Z(t_2)) = exp(-\lambda (t_2 - t_1)).} Note that this heavily depends on the time scale, so
#' the starting value in the \code{distribution} should reflect that.
#'
#' By default, the program must calculate \eqn{Z(t)} for each cluster and for each event time point in the data.
#' This is computationally challenging. An option is to use the \code{nints} argument in the \code{control} argument.
#' This considers \eqn{Z(t)} to be piecewise constant over \code{nints + 1} intervals. These intervals are determined
#' automatically so that there are roughly an equal number of observations for each interval. Using \code{nints = 0}
#' is equivalent to fitting a shared frailty model with the \code{frailtyEM} package.
#'
#' It is recommended that the user starts with \code{nints = 0} and then slowly increase the number of intervals.
#' Other options for performance may be set within the \code{control} argument. Also, this could be tried out first
#' on a subset of the data.
#'
#' For computational reasons, the standard errors of \eqn{\theta} and \eqn{\lambda} are not calculated, and the standard errors of
#' the regression coefficients are obtained under the assumption that the frailty distribution is fixed.
#'
#' @examples
#' # 5 piecewise constant intervals
#' m2 <- dynfrail(Surv(time, status) ~ rx + sex + cluster(litter),
#' data = rats,
#' distribution = dynfrail_dist(n_ints = 4))
#'
#' \dontrun{
#' #' # essentially a gamma frailty fit
#' m1 <- dynfrail(Surv(time, status) ~ rx + sex + cluster(litter),
#' data = rats,
#' distribution = dynfrail_dist(n_ints = 0))
#'
#' # completely semiparametric gamma frailty
#' m2 <- dynfrail(Surv(time, status) ~ rx + sex + cluster(litter),
#' data = rats)
#' }
dynfrail <- function(formula, data,
distribution = dynfrail_dist(),
control = dynfrail_control(),
...)
{
if(!inherits(distribution, "dynfrail_dist"))
stop("distribution argument misspecified; see ?dynfrail_dist()")
if(!inherits(control, "dynfrail_control"))
stop("control argument misspecified; see ?dynfrail_control()")
Call <- match.call()
# Here checking inputs
if(missing(formula) | missing(data)) stop("Missing arguments")
cluster <- function(x) x
Y <- model.frame(formula, data)[[1]]
if(!inherits(Y, "Surv")) stop("left hand side not a survival object")
if(ncol(Y) != 3) {
Y <- Surv(rep(0, nrow(Y)), Y[,1], Y[,2])
}
tev_unique_ord <- sort(unique(Y[,2][Y[,3]==1]))
# browser()
if(!is.null(distribution$n_ints)) {
quants <- quantile(tev_unique_ord,
probs = seq(from = 0, to = 1,
length.out = distribution$n_ints + 2), type = 1)
cut <- quants[-c(1, length(quants))]
# this is a bit dodgy but this is only needed further to
# assign time points to intervals of the frailty
tev_unique_ord <- quants[-1]
} else
if(!is.null(distribution$times)) cut <- distribution$times else
cut <- tev_unique_ord[-length(tev_unique_ord)]
df_dynfrail <- survSplit(formula, data = data, cut = cut, episode = "interval_")
names(df_dynfrail)[grep("cluster", names(df_dynfrail))] <- "id_"
pos_id <- grep("cluster", attr(terms(formula), "term.labels"))
terms2 <- drop.terms(terms(formula), dropx = pos_id, keep.response = TRUE)
mf <- model.frame(terms2, df_dynfrail)
Y <- mf[[1]]
if(ncol(Y) != 3) {
Y <- Surv(df_dynfrail$tstart, Y[,1], Y[,2])
}
# get the model matrix
X1 <- model.matrix(terms2, df_dynfrail)
X <- X1[,-c(1), drop=FALSE]
# note: X has no attributes, in coxph it does.
mcox <- survival::agreg.fit(x = X,
y = Y,
strata = NULL, offset = NULL, init = NULL,
control = survival::coxph.control(),
weights = NULL,
method = "breslow",
rownames = NULL)
# Now we need to get the hazard out of this for every row
if(length(X) == 0) {
newrisk <- 1
exp_g_x <- matrix(rep(1, length(mcox$linear.predictors)), nrow = 1)
g <- 0
g_x <- t(matrix(rep(0, length(mcox$linear.predictors)), nrow = 1))
} else {
x2 <- matrix(rep(0, ncol(X)), nrow = 1, dimnames = list(123, dimnames(X)[[2]]))
x2 <- (scale(x2, center = mcox$means, scale = FALSE))
newrisk <- exp(c(x2 %*% mcox$coefficients) + 0)
exp_g_x <- exp(mcox$coefficients %*% t(X))
g <- mcox$coefficients
g_x <- t(mcox$coefficients %*% t(X))
}
# what I need here is
explp <- exp(mcox$linear.predictors) # these are with centered covariates
#nev_id <- rowsum(Y[,3], id) # nevent per id or am I going crazy
nrisk <- rev(cumsum(rev(rowsum(explp, Y[, ncol(Y) - 1]))))
esum <- rev(cumsum(rev(rowsum(explp, Y[, 1]))))
# the stuff that won't change
death <- (Y[, ncol(Y)] == 1)
nevent <- as.vector(rowsum(1 * death, Y[, ncol(Y) - 1])) # per time point
time <- sort(unique(Y[,2])) # unique tstops incl. censoring times
# this gives the next entry time for each unique tstop (not only event)
etime <- c(0, sort(unique(Y[, 1])), max(Y[, 1]) + min(diff(time)))
indx <- findInterval(time, etime, left.open = TRUE) # left.open = TRUE is very important
# this gives for every tstart (line variable) after which event time did it come
# indx2 <- findInterval(Y[,1], time, left.open = FALSE, rightmost.closed = TRUE)
indx2 <- findInterval(Y[,1], time)
time_to_stop <- match(Y[,2], time)
# order_id <- findInterval(id, unique(id))
atrisk <- list(death = death, nevent = nevent,
#nev_id = nev_id,
#order_id = order_id,
time = time, indx = indx, indx2 = indx2,
time_to_stop = time_to_stop)
atrisk$x2 <- x2
# Some stuff that won't change
atrisk$id_interval <- paste0(df_dynfrail$id_, "_",df_dynfrail$interval_)
# events / interval for each id
death_id_interval <- rowsum(Y[,3], group = atrisk$id_interval, reorder = TRUE) %>%
as.data.frame() %>%
tibble::rownames_to_column() %>%
tidyr::separate("rowname", into = c("id", "interval"), sep = "_", convert = TRUE) %>%
dplyr::arrange_("id", "interval")
delta <- split(death_id_interval$V1, death_id_interval$id)
intervals <- split(death_id_interval$interval, death_id_interval$id)
atrisk$times_incluster <- lapply(intervals, function(x)
tev_unique_ord[x]
)
# for each cluster, in which interval do the events fall
atrisk$events_incluster <- lapply(delta, function(x) {
rep(1:length(x), x)
})
# for each row, to which frailty interval it belongs
atrisk$interval_incluster <-
split(df_dynfrail$interval_, df_dynfrail$id_)
# First calculation of the cumulative hazard
nrisk <- nrisk - c(esum, 0,0)[indx]
haz <- nevent/nrisk * newrisk
basehaz_line <- haz[atrisk$time_to_stop]
cumhaz <- cumsum(haz)
cumhaz_0_line <- cumhaz[atrisk$time_to_stop]
cumhaz_tstart <- c(0, cumhaz)[atrisk$indx2 + 1]
cumhaz_line <- (cumhaz_0_line - cumhaz_tstart) * explp / newrisk
chz_id_interval <- rowsum(cumhaz_line,
group = atrisk$id_interval,
reorder = TRUE) %>%
as.data.frame() %>%
tibble::rownames_to_column() %>%
tidyr::separate("rowname", into = c("id", "interval"), sep = "_", convert = TRUE) %>%
arrange_("id", "interval")
c_vecs <- split(chz_id_interval$V1, chz_id_interval$id)
#
# browser()
#
# poia<-nlm(f = dynfrail_fit, p =log(c(distribution$theta, distribution$lambda)),
# dist = distribution$dist,
# pvfm = distribution$pvfm, Y = Y, Xmat = X,
# atrisk = atrisk, basehaz_line = basehaz_line,
# mcox =list(coefficients = g, loglik = mcox$loglik),
# c_vecs = c_vecs,
# inner_control = control$inner_control)
#
# optim(par = log(c(distribution$theta, distribution$lambda)), fn = dynfrail_fit,
# dist = distribution$dist,
# pvfm = distribution$pvfm, Y = Y, Xmat = X,
# atrisk = atrisk, basehaz_line = basehaz_line,
# mcox =list(coefficients = g, loglik = mcox$loglik),
# c_vecs = c_vecs,
# inner_control = control$inner_control)
outer_m <- do.call(nlm,
args = c(list(f = dynfrail_fit, p = log(c(distribution$theta, distribution$lambda)),
dist = distribution$dist,
pvfm = distribution$pvfm, Y = Y, Xmat = X,
atrisk = atrisk, basehaz_line = basehaz_line,
mcox =list(coefficients = g, loglik = mcox$loglik),
c_vecs = c_vecs,
inner_control = control$inner_control), control$nlm_control))
#
# outer_m <- poia
inner_m <- dynfrail_fit(logfrailtypar = c(outer_m$estimate[1], outer_m$estimate[2]),
dist = distribution$dist,
pvfm = distribution$pvfm, Y = Y, Xmat = X, atrisk = atrisk, basehaz_line = basehaz_line,
mcox =list(coefficients = g, loglik = mcox$loglik),
c_vecs = c_vecs,
inner_control = control$inner_control,
return_loglik = FALSE)
res <- list(coefficients = inner_m$coef,
hazard = inner_m$haz,
imat = inner_m$Imat,
logtheta = outer_m$estimate[1],
loglambda = outer_m$estimate[2],
# var_logtheta = NA,
# ci_logtheta = NA,
# var_loglambda = NA,
# ci_loglambda = NA,
frail_id = data.frame(id = df_dynfrail$id_,
interval = df_dynfrail$interval_,
frail = inner_m$frail,
tstart = Y[,1],
tstop = Y[,2],
status = Y[,3]),
# residuals = NA,
tev = inner_m$tev,
loglik = c(mcox$loglik[length(mcox$loglik)], -outer_m$minimum),
formula = formula,
distribution = distribution,
control = control)
# nobs = NA,
# fitted = NA,
# mf = NA,
# mm = NA)
attr(res, "call") <- Call
attr(res, "class") <- "dynfrail"
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.