R/AFTfit.R

Defines functions survreg.aft

Documented in survreg.aft

##############################################
#' Fit a parametric AFT model to time-to-event data
#' @description Fit a parametric AFT model for right censored data using the maximum likelihood method. 
#'     It includes both standard (non-recurrent) survival data analysis and recurrent event data analysis.
#' @keywords AFT model, Recurrent event data analysis
#' @importFrom Rmpfr mpfr
#' @importFrom numDeriv hessian
#' @param Formula a formula of the form \code{response ~ model}. The \code{response} is 
#'        \code{c(st, status)} for standard survival analysis and \code{c(start, stop, status)} for recurrent event
#'        data analysis, where
#'        \itemize{ 
#'            \item \code{st}: survival time for right censored data,
#'            \item \code{status}: censoring indicator,
#'            \item \code{start} and \code{stop}: the start and end time of each time interval for recurrent event data. 
#'          }
#'        The linear predictor is speficified by the \code{model} argument of the Formula expression. 
#'        It consists of a series of terms (covariates) 
#'        separated by + operators. 
#'        It also allows \code{:} and \code{*} operators to define interaction terms. 
#'        \itemize{
#'           \item Example 1: \code{c(st, status) ~ z1 + z2 + z3}
#'           \item Example 2: \code{c(start, stop, status) ~ z1 + z2 * z3}
#'        }
#' @param init initial values for the parameters (optional). It is a matrix with each row has one set
#'       of initial values. If there are \eqn{p} 
#'       regression coefficients \eqn{\beta_1}, \eqn{\beta_2}, ...,  
#'       \eqn{\beta_p}, then each row has initial values with the following sequence:
#'       \eqn{\beta_1}, \eqn{\beta_2}, ..., 
#'       \eqn{\beta_p}, log(\eqn{\kappa}), log(\eqn{\gamma}), log(\eqn{\rho})
#'       for the exponentiated Weibull and generalized 
#'       gamma models, and \eqn{\beta_1}, \eqn{\beta_2}, ..., 
#'       \eqn{\beta_p}, log(\eqn{\kappa}), log(\eqn{\rho})
#'       for the Weibull, log-logistic and log-normal models, where \eqn{\kappa} and 
#'       \eqn{\gamma} are the
#'       shape parameters and \eqn{\rho} is the rate parameter (see below). That is, multiple sets of
#'       initial values can be given. For example, for the exponentiated Weibull model, 
#'       \code{init = rbind(c(rep(0, p), rep(0, 3)), c(rep(0, p), rep(0.5,} \code{3)))} gives two sets of 
#'       initial values.
#' @param Data a data frame in which to interpret the variables named in the Formula.
#' @param model assumed distribution for survival times. Available options are
#'       \code{"weibull"}, \code{"lnormal"}, \code{"llogistic"}, \code{"eweibull"} and \code{"ggamma"} for Weibull,
#'       log-normal, log-logistic, exponentiated Weibull and generalized gamma
#'       distributions, respectively. Default is \code{"weibull"}. The probability density functions of these distributions are:  
#'        \itemize{ 
#'           \item Weibull: \eqn{f(t)=\kappa\rho(\rho}\eqn{t)^{\kappa-1}} exp[\eqn{-(\rho}\eqn{t)^\kappa}] 
#'           \item Log-logistic: \eqn{f(t) = \kappa\rho(\rho}\eqn{t)^{\kappa-1}/[1+(\rho}\eqn{t)^\kappa]^2}
#'           \item Log-normal: \eqn{f(t) = \kappa}
#'                              exp\eqn{[-(\kappa}log\eqn{(\rho}\eqn{t))^2/2]/[t(2\pi)^{1/2}]}
#'            \item Generalized gamma: \eqn{f(t) = \kappa\rho(\rho}\eqn{t)^{\kappa\gamma-1}} 
#'                              exp\eqn{[-(\rho}\eqn{t)^\kappa]/\Gamma(\gamma)}
#'            \item Exponentiated Weibull: \eqn{f(t) =\kappa\gamma\rho(\rho}\eqn{t)^{\kappa-1}
#'                       (1-} exp\eqn{[-(\rho}\eqn{t)^\kappa])^{\gamma-1}} exp[\eqn{-(\rho}\eqn{t)^\kappa}] 
#'         }
#' @param iter.max maximum number of iterations for optimization (default is 150).
#' @details The AFT model is specified using the hazard function: 
#'       \eqn{h(t; z) = h_0[t} exp\eqn{(-z'\beta)]} exp\eqn{(-z'\beta)},
#'       where \eqn{h_0(.)} is the baseline hazard function of the assumed distribution, 
#'      \eqn{z} is the \eqn{p} x 1 vector of covariates, and \eqn{\beta} is the corresponding vector
#'       of regression coefficients (see Section 2.3 of Kalbfleisch and 
#'       Prentice, and Section 3.2 of Cook and Lawless). The hazard functions of the 
#'        distributions are:
#'        \itemize{ 
#'           \item Weibull: \eqn{h(t) = \kappa\rho(\rho}\eqn{t)^{\kappa-1}}
#'           \item Log-logistic: \eqn{h(t) = \kappa\rho(\rho}\eqn{t)^{\kappa-1}/[1+(\rho}\eqn{t)^\kappa]}
#'           \item Log-normal: \eqn{h(t) = f(t)/S(t)}, whete \eqn{S(t)} is the survivor function
#'           \item Generalized gamma: \eqn{h(t) = f(t)/S(t)}, whete \eqn{S(t)} is the survivor function
#'            \item Exponentiated Weibull: \eqn{h(t) =\kappa\gamma\rho(\rho}\eqn{t)^{\kappa-1}
#'                       (1-} exp\eqn{[-(\rho}\eqn{t)^\kappa])^{\gamma-1}} exp[\eqn{-(\rho}\eqn{t)^\kappa]/
#'                       [1-(1-} exp\eqn{[-(\rho}\eqn{t)^\kappa])^\gamma]}
#'          }
#'         For recurrent event analysis, each line of data for a given subject must include the start time and 
#'         stop time for each interval of follow-up.
#' @return \code{Model:} the survival model.
#' @return \code{Data summary:} number of observations, number of events, and number of predictors.
#' @return \code{Fit:} estimate, standard error, z, p value, and 95\% confidence interval.
#' @return \code{exp(est):} exp(regression coefficient), and 95\% confidence interval.
#' @return \code{exp(-est):} exp(- regression coefficient), and 95\% confidence interval.
#' @return \code{Fit criteria:} log-likelihood, deviance, and AIC.
#' @return \code{optimizer:} nlminb or optim.
#' @return \code{cov:} covariance matrix.
#' @return \code{st:} survival times (for recurrent event, an n x 2 matrix for start and stop times).
#' @return \code{design.mat:} design matrix.
#' @references Cook RJ and Lawless J, The statistical analysis of recurrent events, Springer, 2007.
#' @references Kalbfleisch JD and Prentice RL, The statistical analysis of failure time data, Wiley, 2002.
#' @author Shahedul Khan <khan@math.usask.ca>
#' @seealso \code{\link{LR.test}}, \code{\link{surv.resid}}, 
#'          \code{\link[survival]{survreg}}, \code{\link[survival]{coxph}}.
#' @examples 
#' # Example 1: Recurrent Event Analysis
#'   library(frailtypack)
#'   data(readmission)
#' # Recoding sex and chemo
#'   sex<-factor(2-as.numeric(readmission$sex),
#'        labels=c("Female","Male"))
#'   chemo<-factor(as.numeric(readmission$chemo)-1,
#'          labels=c("NonTreated","Treated"))
#'   Data<-readmission
#'   Data$sex<-sex
#'   Data$chemo<-chemo
#'   fit.gg<-survreg.aft(c(t.start,t.stop,event)~sex+chemo+charlson,
#'       Data=Data,model="ggamma")
#'   fit.gg
#'   fit.gg$cov
#'
#' # Example 2: Non-recurrent Data (Right Censored)
#'   library(survival)
#'   fit.ll<-survreg.aft(c(time,status)~karno+diagtime+age+prior+celltype+trt,
#'       Data=veteran,model="llogistic")
#'   fit.ll
#'   fit.ll$design.mat
#' @export

survreg.aft<-function(Formula,init=NULL,Data,model="weibull",iter.max=150){
allvar<-all.vars(Formula)
if(isFALSE(all(allvar %in% colnames(Data)))){
stop("variable names in 'Formula' did not match
     with the variable names in 'Data'")
}
if(isFALSE(model=="weibull" || model=="eweibull" || model=="ggamma" ||
   model=="llogistic" || model=="lnormal")){
stop("available options for the time-to-event model are
     'weibull', 'llogistic', 'lnormal', 
     'eweibull' and 'ggamma'")
}
ind<-length(all.vars(Formula[1:2]))
if(model=="eweibull"){
if(ind>2){
fit<-Rewaft.fit(Formula,init=init,data=Data,iter.max=iter.max)
} else{
fit<-ewaft.fit(Formula,init=init,data=Data,iter.max=iter.max)
}
}
if(model=="ggamma"){
if(ind>2){
fit<-Rggamma.fit(Formula,init=init,data=Data,iter.max=iter.max)
} else{
fit<-ggamma.fit(Formula,init=init,data=Data,iter.max=iter.max)
}
}
if(model=="weibull"){
if(ind>2){
fit<-Rwaft.fit(Formula,init=init,data=Data,iter.max=iter.max)
} else{
fit<-waft.fit(Formula,init=init,data=Data,iter.max=iter.max)
}
}
if(model=="llogistic"){
if(ind>2){
fit<-Rllaft.fit(Formula,init=init,data=Data,iter.max=iter.max)
} else{
fit<-llaft.fit(Formula,init=init,data=Data,iter.max=iter.max)
}
}
if(model=="lnormal"){
if(ind>2){
fit<-Rlnaft.fit(Formula,init=init,data=Data,iter.max=iter.max)
} else{
fit<-lnaft.fit(Formula,init=init,data=Data,iter.max=iter.max)
}
}
class(fit)<-"AFTjmr"
attr(fit, "hidden") <-c("optimizer","cov","st","status","design.mat","surv.model","code")
return(fit)
}
sa4khan/AFTjmr documentation built on March 12, 2020, 1:24 a.m.