Nothing
#' @title xhaz function
#'
#' @description Fits the excess hazard models proposed by Esteve et al. (1990) <doi:10.1002/sim.4780090506>,
#' with the possibility to account for time dependent covariates. Fits also the
#' non-proportional excess hazard model proposed by Giorgi et al. (2005) <doi:10.1002/sim.2400>.
#' In addition, fits excess hazard models with possibility to rescale
#' (Goungounga et al. (2019) <doi:10.1186/s12874-019-0747-3>) or to correct the background mortality with a
#' proportional (Touraine et al. (2020) <doi:10.1177/0962280218823234>) or non-proportional (Mba et al. (2020) <doi:10.1186/s12874-020-01139-z>)
#' effect.
#'
#' @param formula a formula object of the function with the response on the left
#' of a \code{~} operator and the terms on the right. The response must be a
#' survival object as returned by the \code{Surv} function (time in first and status
#' in second).
#'
#' @note `time` is OBLIGATORY in YEARS.
#'
#'
#' @param data a data frame in which to interpret the variables named in the
#' formula
#'
#' @param ratetable a rate table stratified by age, sex, year (if missing,
#' `ratedata` is used)
#'
#' @param rmap a list that maps data set names to the ratetable names.
#'
#' @param baseline an argument to specify the baseline hazard: if it follows a
#' piecewise constant, \code{baseline = "constant"} is used and corresponds to
#' the baseline in Esteve et al. model; if the baseline follows a quadratic b-splines,
#' \code{baseline = "bsplines"} is used, corresponding to the baseline excess
#' hazard in Giorgi et al model.
#'
#' @param pophaz indicates three possibles arguments in character: classic or
#' rescaled and corrected. If \code{pophaz = "classic"} chosen, fits the model
#' that do not require to rescale or to correct the background mortality (i.e. the Esteve
#' et al. model or Giorgi et al. model); if \code{pophaz = "rescaled"} or
#' \code{pophaz = "corrected"} chosen, fits the models that require to rescale
#' or to correct the background mortality.
#'
#' @param only_ehazard a boolean argument (by default, \code{only_ehazard=FALSE}).
#' If \code{only_ehazard = TRUE}, \code{pophaz = "classic"} must be provided and
#' the total value of the log-likelihood will not account for the cumulative population hazard.
#'
#' @param add.rmap character that indicates the name in character of the additional
#' demographic variable from `data` to be used for correction of the life table,
#' in particular when one is in the presence of an insufficiently stratified life
#' table (see Touraine et al. model). This argument is not used if
#' \code{pophaz = "classic"} or \code{pophaz = "rescaled"}.
#'
#'
#' @param add.rmap.cut a list containing arguments to specify the modeling
#' strategy for breakpoint positions, which allows a non-proportional effect of
#' the correction term acting on the background mortality. By default
#' \code{list(breakpoint = FALSE)}, i.e. a proportional effect of the correction
#' term acting on the background mortality is needed; in this case, all the other
#' argument of the list are not working for the model specification;
#'
#' if \code{list(breakpoint = TRUE, cut = c(70))}, the chosen cut-point(s) is (are)
#' the numeric value(s) proposed. If \code{list(breakpoint = TRUE, cut = NA)},
#' there is the same number of breakpoints as the number of NA, with their possible
#' positions specified as here by \code{probs},
#' i.e. \code{list(breakpoint = TRUE, cut = NA, probs = seq(0, 1, 0.25))}.
#' That corresponds to a numeric vector of probabilities with values between 0 and 1
#' as in \code{quantile} function.
#' \code{criterion} is used to choose the best model, using the AIC or the
#' BIC (the default criterion). If needed, all the fitted models are printed
#' by the user by adding in the list \code{print_stepwise = FALSE}.
#'
#'
#' @param interval a vector indicating either the location of the year-scale time
#' intervals for models with piecewise constant function, or the location of the
#' knots for models with B-splines functions for their baseline hazard (see the
#' appropriate specification in \code{baseline} argument). The first component of
#' the vector is 0, and the last one corresponds to the maximum time fellow-up of the study.
#'
#' @param ratedata a data frame of the hazards mortality in general population.
#'
#' @param subset an expression indicating which subset of the data should be used
#' in the modeling. All observations are included by default
#'
#' @param na.action as in the \code{coxph} function, a missing-data filter function.
#'
#'
#' @param init a list of initial values for the parameters to estimate. For each
#' elements of the list, give the name of the covariate followed by the vector
#' of the fixed initials values
#' @param control a list of control values used to control the optimization
#' process. In this list, `eps`, is a convergence criteria (by default, \code{eps=10^-4}),
#' `iter.max` is the maximum number of iteration (by default, \code{iter.max=15}),
#' and \code{level}, is the level used for the confidence intervals (by default, \code{level=0.95}).
#'
#'
#' @param optim a Boolean argument (by default, \code{optim = FALSE}).
#' If \code{optim = TRUE}), the maximization algorithm uses the \code{optim} function
#'
#' @param scale a numeric argument to specify whether the life table contains death rates
#' per day (default \code{scale = 365.2425}) or death rates per year (\code{scale = 1}).
#'
#' @param trace a Boolean argument, if \code{trace = TRUE}), tracing information
#' on the progress of the optimization is produced
#'
#'
#' @param speedy a Boolean argument, if \code{speedy = TRUE}, optimization is done in a
#' parallel mode
#'
#'
#' @param nghq number of nodes and weights for Gaussian quadrature
#'
#' @param method corresponds to \code{optim} function argument.
#'
#' @param ... other parameters used with the \code{xhaz} function
#'
#'
#' @details Use the \code{Surv(time_start, time_stop, status)} notation for time
#' dependent covariate with the appropriate organization of the data set (see
#' the help page of the \code{Surv} function)
#'
#' Only two interior knots are possible for the model with B-splines functions
#' to fit the baseline (excess) hazard. Determination of the intervals might be
#' user's defined or automatically computed according to the quantile of the
#' distribution of deaths. Use NA for an automatic determination (for example,
#' \code{interval = c(0, NA, NA, 5)}).
#'
#'
#' @keywords xhaz
#'
#'
#' @return An object of class \code{constant} or \code{bsplines},
#' according to the type of functions chosen to fit the baseline hazard of
#' model (see details for argument \code{baseline}). This object is a list containing
#' the following components:
#'
#'
#' \item{coefficients}{estimates found for the model}
#'
#' \item{varcov}{the variance-covariance matrix}
#'
#' \item{loglik}{for the Estève et al. model: the log-likelihood of the null
#' model, i.e without covariate, and the log-likelihood of the full model,
#' i.e with all the covariates declared in the formula; for the Giorgi et al.
#' model: the log-likelihood of the full model}
#'
#' \item{cov.test}{for the Esteve et al.model: the log-likelihood of the null
#' model, i.e without covariate, and the log-likelihood of the full model,
#' i.e with all the covariates declared in the formula; for the Giorgi et al.
#' model: the log-likelihood of the full model}
#'
#' \item{message}{a character string returned by the optimizer
#' see details in \code{optim} help page}
#'
#' \item{convergence}{an integer code as in \code{optim} when `"L-BFGS-B"` method
#' is used.}
#'
#' \item{n}{the number of individuals in the dataset}
#'
#' \item{n.events}{the number of events in the dataset. Event are considered
#' as death whatever the cause}
#'
#' \item{level}{the confidence level used}
#'
#' \item{interval}{the intervals used to split time for piecewise baseline excess
#' hazard, or knots positions for Bsplines baseline}
#'
#' \item{terms}{the representation of the terms in the model}
#'
#' \item{call}{the function `call` based on model}
#'
#' \item{pophaz}{the assumption considered for the life table used in the
#' excess hazard model}
#'
#' \item{add.rmap}{the additional variable for which the life table is not
#' stratified}
#'
#' \item{ehazardInt}{the cumulative hazard of each individuals calculated from
#' the ratetable used in the model}
#'
#' \item{ehazard}{the individual expected hazard values from the ratetable
#' used to fit the model}
#'
#' \item{data}{the dataset used to run the model}
#'
#' \item{time_elapsed}{the time to run the model}
#'
#'
#'
#'
#' @author Juste Goungounga, Darlin Robert Mba, Nathalie Graffeo, Roch Giorgi
#'
#' @references Goungounga JA, Touraine C, Graff\'eo N, Giorgi R; CENSUR working
#' survival group. Correcting for misclassification and selection effects in
#' estimating net survival in clinical trials. BMC Med Res Methodol. 2019 May
#' 16;19(1):104. doi: 10.1186/s12874-019-0747-3. PMID: 31096911; PMCID:
#' PMC6524224. (\href{https://pubmed.ncbi.nlm.nih.gov/31096911/}{PubMed})
#'
#' Touraine C, Graff\'eo N, Giorgi R; CENSUR working survival group. More
#' accurate cancer-related excess mortality through correcting background
#' mortality for extra variables. Stat Methods Med Res. 2020 Jan;29(1):122-136.
#' doi: 10.1177/0962280218823234. Epub 2019 Jan 23. PMID: 30674229.
#' (\href{https://pubmed.ncbi.nlm.nih.gov/30674229/}{PubMed})
#'
#' Mba RD, Goungounga JA, Graff\'eo N, Giorgi R; CENSUR working survival group.
#' Correcting inaccurate background mortality in excess hazard models through
#' breakpoints. BMC Med Res Methodol. 2020 Oct 29;20(1):268. doi:
#' 10.1186/s12874-020-01139-z. PMID: 33121436; PMCID: PMC7596976.
#' (\href{https://pubmed.ncbi.nlm.nih.gov/33121436/}{PubMed})
#'
#' Giorgi R, Abrahamowicz M, Quantin C, Bolard P, Esteve J, Gouvernet J, Faivre
#' J. A relative survival regression model using B-spline functions to model
#' non-proportional hazards. Statistics in Medicine 2003; 22: 2767-84.
#' (\href{https://pubmed.ncbi.nlm.nih.gov/12939785/}{PubMed})
#'
#'
#' @examples
#'\donttest{
#' library("numDeriv")
#' library("survexp.fr")
#' library("splines")
#' library("statmod")
#' data("simuData","rescaledData", "dataCancer")
#' # load the data sets 'simuData', 'rescaledData' and 'dataCancer'.
#'
#'# Esteve et al. model: baseline excess hazard is a piecewise function
#'# linear and proportional effects for the covariates on
#'# baseline excess hazard.
#'
#'
#' fit.estv1 <- xhaz(formula = Surv(time_year, status) ~ agec + race,
#' data = simuData,
#' ratetable = survexp.us,
#' interval = c(0, NA, NA, NA, NA, NA, 6),
#' rmap = list(age = 'age', sex = 'sex', year = 'date'),
#' baseline = "constant", pophaz = "classic")
#'
#'
#' fit.estv1
#'
#'
#' # Touraine et al. model: baseline excess hazard is a piecewise function
#' # with a linear and proportional effects for the
#' # covariates on the baseline excess hazard.
#' # An additionnal cavariate (here race) missing in the life table is
#' # considered by the model.
#'
#'
#' fit.corrected1 <- xhaz(formula = Surv(time_year, status) ~ agec + race,
#' data = simuData,
#' ratetable = survexp.us,
#' interval = c(0, NA, NA, NA, NA, NA, 6),
#' rmap = list(age = 'age', sex = 'sex', year = 'date'),
#' baseline = "constant", pophaz = "corrected",
#' add.rmap = "race")
#'
#'
#'
#' fit.corrected1
#'
#' # extension of Touraine et al model: baseline excess hazard is a piecewise
#' # constant function with a linear and proportional effects for the covariates
#' # on the baseline excess hazard.
#'
#' # An additionnal cavariate (here race) missing in the life table is
#' # considered by the model with a breakpoint at 75 years
#'
#' fit.corrected2 <- xhaz(formula = Surv(time_year, status) ~ agec + race,
#' data = simuData,
#' ratetable = survexp.us,
#' interval = c(0, NA, NA, NA, NA, NA, 6),
#' rmap = list(age = 'age', sex = 'sex', year = 'date'),
#' baseline = "constant", pophaz = "corrected",
#' add.rmap = "race",
#' add.rmap.cut = list(breakpoint = TRUE, cut = 75))
#'
#'
#'
#' fit.corrected2
#'
#'
#' #Giorgi et al model: baseline excess hazard is a quadratic Bsplines
#' # function with two interior knots and allow here a
#' # linear and proportional effects for the covariates on
#' # baseline excess hazard.
#'
#'
#' fitphBS <- xhaz(formula = Surv(time_year, status) ~ agec + race,
#' data = simuData,
#' ratetable = survexp.us,
#' interval = c(0, NA, NA, 6),
#' rmap = list(age = 'age', sex = 'sex', year = 'date'),
#' baseline = "bsplines", pophaz = "classic")
#'
#' fitphBS
#'
#'
#'
#'
#'
#' # Application on `dataCancer`.
#' #Giorgi et al model: baseline excess hazard is a quadratic Bspline
#' # function with two interior knots and allow here a
#' # linear and proportional effect for the variable
#' # "immuno_trt" plus a non-proportional effect
#' # for the variable "ageCentre" on baseline excess hazard.
#'
#'
#' fittdphBS <- xhaz(formula = Surv(obs_time_year, event) ~ qbs(ageCentre) + immuno_trt,
#' data = dataCancer,
#' ratetable = survexp.fr,
#' interval = c(0, 0.5, 12, 15),
#' rmap = list(age = 'age', sex = 'sexx', year = 'year_date'),
#' baseline = "bsplines", pophaz = "classic")
#'
#' fittdphBS
#'
#'
#'
#'
#' # Application on `rescaledData`.
#' # rescaled model: baseline excess hazard is a piecewise function with a
#' # linear and proportional effects for the covariates on baseline excess hazard.
#'
#' # A scale parameter on the expected mortality of general population is
#' # considered to account for the non-comparability source of bias.
#'
#' rescaledData$timeyear <- rescaledData$time/12
#' rescaledData$agecr <- scale(rescaledData$age, TRUE, TRUE)
#'
#' fit.res <- xhaz(formula = Surv(timeyear, status) ~ agecr + hormTh,
#' data = rescaledData,
#' ratetable = survexp.fr,
#' interval = c(0, NA, NA, NA, NA, NA, max(rescaledData$timeyear)),
#' rmap = list(age = 'age', sex = 'sex', year = 'date'),
#' baseline = "constant", pophaz = "rescaled")
#'
#' fit.res
#' }
#'
#' @import survival
#' @import stats
#' @import parallel
#' @import optimParallel
#' @import statmod
#' @import splines
#' @import survexp.fr
#'
#' @export
xhaz <- function(formula = formula(data),
data = sys.parent(),
ratetable,
rmap = list(age = NULL, sex = NULL, year = NULL),
baseline = c("constant", "bsplines"),
pophaz = c("classic", "rescaled", "corrected"),
only_ehazard = FALSE,
add.rmap = NULL,
add.rmap.cut = list(
breakpoint = FALSE,
cut = NA,
probs = NULL,
criterion = "BIC",
print_stepwise = FALSE
),
interval,
ratedata = sys.parent(),
subset,
na.action,
init,
control = list(eps = 1e-4,
iter.max = 800,
level = 0.95),
optim = TRUE,
scale = 365.2425,
trace = 0,
speedy = FALSE,
nghq = 12,
method = "L-BFGS-B",
...) {
m_int <- match.call(expand.dots = FALSE)
if ((
add.rmap.cut$breakpoint == TRUE &
!is.na(add.rmap.cut$cut[1]) & is.null(add.rmap.cut$probs)
)) {
if (!missing(rmap)) {
rcall <- substitute(rmap)
if (!is.call(rcall) || rcall[[1]] != as.name("list"))
stop("Invalid rcall argument")
}
else
rcall <- NULL
breakpoint_with_cut(
formula = formula,
data = data,
ratetable = ratetable,
rmap = rmap,
baseline = baseline,
pophaz = pophaz,
only_ehazard = only_ehazard,
add.rmap = add.rmap,
add.rmap.cut = add.rmap.cut,
interval = interval,
ratedata ,
subset ,
na.action ,
init ,
control ,
optim ,
scale ,
trace ,
speedy ,
nghq,
m_int,
rcall,
method,
...
)
} else if ((
add.rmap.cut$breakpoint == TRUE &
is.na(add.rmap.cut$cut[1]) & !is.null(add.rmap.cut$probs)
)) {
if (!missing(rmap)) {
rcall <- substitute(rmap)
if (!is.call(rcall) || rcall[[1]] != as.name("list"))
stop("Invalid rcall argument")
}
else
rcall <- NULL
xhaz2(
formula = formula,
data = data,
ratetable = ratetable,
rmap = rmap,
baseline = baseline,
pophaz = pophaz,
only_ehazard = only_ehazard,
add.rmap = add.rmap,
add.rmap.cut = add.rmap.cut,
splitting = TRUE,
interval = interval,
ratedata = ratedata,
subset = subset,
na.action = na.action,
init = init,
control = control,
optim = optim,
scale = scale,
trace = trace,
speedy = speedy,
nghq = nghq,
m_int = m_int,
rcall = rcall,
method = method,
...
)
} else if (add.rmap.cut$breakpoint == FALSE) {
if (!missing(rmap)) {
rcall <- substitute(rmap)
if (!is.call(rcall) || rcall[[1]] != as.name("list"))
stop("Invalid rcall argument")
}
else
rcall <- NULL
without_breakpoint_without_cut(
formula = formula,
data = data,
ratetable = ratetable,
rmap = rmap,
baseline = baseline,
pophaz = pophaz,
only_ehazard = only_ehazard,
add.rmap = add.rmap,
add.rmap.cut = add.rmap.cut,
interval = interval,
splitting = FALSE,
ratedata = ratedata,
subset = subset,
na.action = na.action,
init = init,
control = control,
optim = optim,
scale = scale,
trace = trace,
speedy = speedy,
nghq = nghq,
m_int = m_int,
rcall = rcall,
method = method,
...
)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.