Nothing
## data structure:
## id time event + columns with covariate values
## NOTE: time must take positive integer values (1 being the unit of measurement for time)
#' Fit of a tvcure model.
#' @description Fit of a double additive cure survival model with exogenous time-varying covariates.
#' @usage tvcure(formula1, formula2, data,
#' model0=NULL, noestimation=FALSE,
#' baseline=c("S0","F0"), K0=20, pen.order0=2,
#' K1=10, pen.order1=2, K2=10, pen.order2=2,
#' phi.0=NULL, beta.0=NULL, gamma.0=NULL,
#' a.tau=1, b.tau=1e-6, a.pen=1, b.pen=1e-4,
#' tau.0=NULL, tau.min=1, tau.method = c("LPS","LPS2","Schall","grid","none"),
#' psi.method = c("LM","NR","none"),
#' lambda1.0=NULL, lambda1.min=1, lambda2.0=NULL, lambda2.min=1,
#' lambda.method=c("LPS","LPS2","LPS3","nlminb","none"),
#' logscale=FALSE,
#' observed.hessian=TRUE, use.Rfast=TRUE, Wood.test=FALSE,
#' ci.level=.95,
#' criterion=c("logEvid","deviance","lpen","AIC","BIC","gradient"),
#' criterion.tol=1e-2, grad.tol=1e-2,
#' RDM.tol=1e-4, fun.tol=1e-3, Lnorm=c("Linf","L2"),
#' iterlim=50, iter.verbose=FALSE, verbose=FALSE)
#' @param formula1 A formula describing the linear predictor in the long-term (cure) survival (or quantum) submodel.
#' @param formula2 A formula describing the linear predictor in the short-term (cure) survival (or timing) submodel.
#' @param data A data frame with survival data in a counting process format. It should always contain at least the following entries:
#' \itemize{
#' \item \code{id} : the <id> of the unit associated to the data in a given line in the data frame.
#' \item \code{time} : the integer time at which the observations are reported. For a given unit, it should be a sequence of CONSECUTIVE integers starting at 1 for the first observation.
#' \item \code{event} : a sequence of 0-1 event indicators. For the lines corresponding to a given unit, it starts with 0 values concluded by a 0 in case of right-censoring or by a 1 if the event is observed at the end of the follow-up.
#' }
#' @param model0 (Optional) tvcure object from which starting values for the regression parameters, spline and penalty parameters are extracted. Make sure that it corresponds to the same model specification. The values of its components are overhidden by \code{phi.0}, \code{beta.0}, \code{gamma.0}, \code{tau.0}, \code{lambda1.0}, \code{lambda2.0} when they are not NULL. That possibility can be found useful to accelerate the fit of the same model on other data or for specific changes in model parameters (such as penalty parameters). (Default: NULL).
#' @param noestimation Logical specifying that regression, spline and penalty parameters should not be estimated, but rather be fixed at their initial values (as for example provided by \code{model0} or other entries). (Default: FALSE).
#' @param baseline Baseline ("S0" or "F0") used to specify the dependence of the cumulative hazard dynamics on covariates (Default: "S0"):
#' Baseline S0: \eqn{S(t|x) = S_0(t)^{\exp^{\gamma'x}}} ; Baseline F0: \eqn{F(t|x) = F_0(t)^{\exp{\gamma'x}}}
#' @param K0 Number of B-splines used to specify \eqn{\log f_0(t)} (Default: 20).
#' @param pen.order0 Penalty order for the P-splines used to specify \eqn{\log f_0(t)} (Default: 2).
#' @param K1 Number of P-splines for a given additive term in the long-term (or quantum) survival sumbodel (Default: 10).
#' @param pen.order1 Penalty order for the P-splines in the long-term survival (or quantum) sumbodel (Default: 2).
#' @param K2 Number of P-splines for a given additive term in the short-term (or timing) survival sumbodel (Default: 10).
#' @param pen.order2 Penalty order for the P-splines in the short-term survival (or timing) sumbodel (Default: 2).
#' @param phi.0 (Optional) vector of length \code{K0} with starting values for the P-spline parameters in \eqn{\log f_0(t)}.
#' @param beta.0 (Optional) starting value for the regression and spline parameters in the long-term survival (or quantum) submodel.
#' @param gamma.0 (Optional) starting value for the regression and spline parameters in the short-term survival (or timing) submodel.
#' @param a.tau Hyperprior parameter in the Gamma(a.tau,b.tau) prior for the penalty parameter \eqn{\tau} tuning the smoothness of \eqn{\log f_0(t)} (Default: 1.0).
#' @param b.tau Hyperprior parameter in the Gamma(a.tau,b.tau) prior for the penalty parameter \eqn{\tau} tuning the smoothness of \eqn{\log f_0(t)} (Default: 1e-6).
#' @param a.pen Hyperprior parameter in the Gamma(a.pen,b.pen) priors for the penalty parameters \eqn{\lambda_1} and \eqn{\lambda_2} tuning the smoothness of the additive terms in the long-term (quantum) and short-term (timing) survival submodels. (Default: 1.0).
#' @param b.pen Hyperprior parameter in the Gamma(a.pen,b.pen) priors for the penalty parameters \eqn{\lambda_1} and \eqn{\lambda_2} tuning the smoothness of the additive terms in the long-term (quantum) and short-term (timing) survival submodels. (Default: 1e-4).
#' @param tau.0 Starting value for \eqn{\tau}.
#' @param tau.min Minimal value for the penalty parameter \eqn{\tau}. (Default: 1.0).
#' @param tau.method Method used to calculate the posterior mode of \eqn{p(\tau|{\cal D})}: "LPS", "LPS2", "Schall" (Fellner-Schall algorithm), "grid" (best choice in a regular grid on the log-scale) or "none" (stick to the initial value tau.0). LPS and LPS2, based on Laplace P-splines, both maximize the marginal posterior of the penalty parameter \eqn{\tau} using a fixed-point method, with LPS relying on the prior calculation of eigenvalues. (Default: "LPS").
#' @param psi.method Algorithm used for the computation of the conditional posterior mode of the regression and splines parameters. Possible choices are Levenberg-Marquardt ("LM"), Newton-Raphson ("NR") or "none" (when the coefficients remain fixed at their initial values).
#' @param lambda1.0 (Optional) J1-vector with starting values for the penalty parameters of the additive terms in the long-term survival (or quantum) submodel.
#' @param lambda1.min Minimal value for the J1 penalty parameters \eqn{\lambda_1} of the additive terms in the long-term survival (or quantum) submodel. (Default: 1.0).
#' @param lambda2.0 (Optional) J2-vector with starting values for the penalty parameters of the additive terms in the short-term survival (or timing) submodel.
#' @param lambda2.min Minimal value for the J2 penalty parameters \eqn{\lambda_2} of the additive terms in the short-term survival (or timing) submodel. (Default: 1.0).
#' @param lambda.method Method used ("LPS", "LPS2", "LPS3", "nlminb" or "none") to select the penalty parameters of the additive terms in the long-term survival (or quantum) submodel:
#' \itemize{
#' \item \code{LPS}, \code{LPS2}, or \code{LPS3} : based on Laplace P-splines where the marginal posterior of the penalty parameters is maximized using a fixed-point method. LPS is based on the prior calculation of eigenvalues (unlike LPS2) and delivers results of comparable quality to those of nlminb, but much more quickly. LPS3 work sequentially and separately on long- and short-term parameters with potentially convergence issues ;
#' \item \code{Schall} : Fellner-Schall method ;
#' \item \code{nlminb} : nonlinear maximization of the marginal posterior of the penalty parameters using the R function <nlminb> ;
#' \item \code{none} : penalty parameters are set at their initial values. }
#'@param logscale Logical: when TRUE, select \eqn{\lambda_1} or \eqn{\lambda_2} by maximizing \eqn{p(\log(\lambda_k)|D)}, maximize \eqn{p(\lambda_k|D)} otherwise. (Default= FALSE).
#' @param observed.hessian Logical indicating if a fast approximation of the Hessian matrix based on cross-products is preferred over its expected value. (Default: TRUE).
#' @param use.Rfast Logical indicating if matrix functions from the Rfast package should be used to fasten computation. (Default: TRUE).
#' @param Wood.test Logical indicating if P-values based on Wood's test (Biometrika 2013) of the significance of additive terms should be preferred over basic Chi-square tests. (Default: FALSE).
#' @param ci.level Default value for the levels of the credible intervals. (Default: 0.95).
#' @param criterion Criterion used to assess convergence of the estimation procedure (Default: "logEvid"):
#' \itemize{
#' \item \code{logEvid} : log of the evidence (also named \emph{marginal likelihood} or \emph{model likelihood}), i.e. the log of the marginal posterior of the penalty parameters at their selected values ;
#' \item \code{deviance} : deviance or -2 log(Likelihood) ;
#' \item \code{AIC} : Akaike information criterion ;
#' \item \code{BIC} : Bayesian (or Schwarz) information criterion ;
#' \item \code{gradient} : Lp-norm of the gradient of the log of the joint posterior w.r.t. the regression and spline parameters.}
#'
#' @param criterion.tol Maximum absolute difference between the successive values of the \code{criterion} values (when different from "gradient") to declare convergence. (Default: 1e-2).
#' @param grad.tol Tolerance threshold for the absolute value of each gradient component when monitoring convergence. (default: 1e-2).
#' @param RDM.tol Tolerance thershold for the Relative Damping Measure (= RDM) when monitoring convergence (default: 1e-4).
#' @param fun.tol Tolerance threshold for variations in the maximized function during the final iterations of posterior mode computation and convergence monitoring (default: 1e-3).
#' @param Lnorm Lp norm used to evaluate the gradient for convergence assessment. Options are "Linf" (default) or "L2".
#' @param iterlim Maximum number of iterations. (Default: 50).
#' @param iter.verbose Logical indicating if the values of the convergence criterions should be printed after each iteration. (Default: FALSE).
#' @param verbose Logical indicating if additional output based on gradients should be printed at the end of each iteration. (Default: FALSE).
#'
#' @return An object of type \code{\link{tvcure.object}}.
#'
#' @author Philippe Lambert \email{p.lambert@uliege.be}
#' @references Lambert, P. and Kreyenfeld, M. (2025).
#' Time-varying exogenous covariates with frequently changing values in double additive cure survival model: an application to fertility.
#' \emph{Journal of the Royal Statistical Society, Series A}. <doi:10.1093/jrsssa/qnaf035>
#'
#' @examples
#' \donttest{
#' require(tvcure)
#' ## Simulated data generation
#' beta = c(beta0=.4, beta1=-.2, beta2=.15) ; gam = c(gam1=.2, gam2=.2)
#' data = simulateTVcureData(n=500, seed=123, beta=beta, gam=gam,
#' RC.dist="exponential",mu.cens=550)$rawdata
#' ## TVcure model fitting
#' tau.0 = 2.7 ; lambda1.0 = c(40,15) ; lambda2.0 = c(25,70) ## Optional
#' model = tvcure(~z1+z2+s(x1)+s(x2), ~z3+z4+s(x3)+s(x4), data=data,
#' tau.0=tau.0, lambda1.0=lambda1.0, lambda2.0=lambda2.0)
#' print(model)
#' plot(model, pages=1)
#' }
#'
#' @export
#'
tvcure = function(formula1, formula2, data,
model0=NULL, noestimation=FALSE,
baseline=c("S0","F0"),
K0=20, pen.order0=2,
K1=10, pen.order1=2,
K2=10, pen.order2=2,
phi.0=NULL, beta.0=NULL, gamma.0=NULL,
a.tau=1, b.tau=1e-6, ## Prior on penalty parameter <tau> for log f0(t): Gamma(a.tau,b.tau)
a.pen=1, b.pen=1e-4, ## Prior on penalty parameters for the additive terms: Gamma(a.pen,b.pen)
tau.0=NULL, tau.min=1,
tau.method = c("LPS","LPS2","Schall","grid","none"),
psi.method = c("LM","NR","none"), ## Estimation method for the regression and spline parameters
lambda1.0=NULL, lambda1.min=1, lambda2.0=NULL, lambda2.min=1,
lambda.method=c("LPS","LPS2","LPS3","nlminb","none"), ## Penalty selection method for additive terms
logscale=FALSE, ## Maximize p(log(lambda)|D) when TRUE, p(lambda|D) otherwise
observed.hessian=TRUE, ## TRUE: X[event==1,]'X[event==1,] ; FALSE: X'diag(mu.ij)X
use.Rfast=TRUE,
Wood.test=FALSE,
ci.level=.95,
criterion=c("logEvid","deviance","lpen","AIC","BIC","gradient"),
criterion.tol=1e-2,
grad.tol=1e-2, RDM.tol=1e-4, fun.tol=1e-3, Lnorm=c("Linf","L2"),
iterlim=50,iter.verbose=FALSE,verbose=FALSE){
##
cl <- match.call()
aa = a.pen ; bb = b.pen
baseline = match.arg(baseline) ## "S0": S(t|x) = S0(t)^exp(gamma'x) ; "F0": F(t|x) = F0(t)^exp(gamma'x)
if (!is.null(model0)) baseline = model0$baseline
criterion = match.arg(criterion) ## Criterion to assess convergence of the global algorithm
tau.method = match.arg(tau.method)
lambda.method = match.arg(lambda.method)
psi.method = match.arg(psi.method)
if (missing(formula1)) {warning("Missing model formula <formula1> for long-term survival !") ; return(NULL)}
if (missing(formula2)) {warning("Missing model formula <formula2> for short-term survival !") ; return(NULL)}
if (verbose) iter.verbose = verbose
## Lp-norm function
## ----------------
Lnorm = match.arg(Lnorm)
if (Lnorm == "Linf"){
Lp <- function(x) max(abs(x))
} else if (Lnorm == "L2"){
Lp <- function(x) sqrt(sum(x^2))
}
## Make sure that the 's' function for additive terms in formulas is tvcure::s
## ---------------------------------------------------------------------------
s <- tvcure::s
environment(formula1) <- environment(formula2) <- environment()
## Check that <id>, <time> and <event> entries in <data>
## ---------------------------------------------------
if (missing(data)) {warning("Missing data frame <data> with <id>, <time>, event indicator <event>, and covariate values !") ; return(NULL)}
if (!"id" %in% colnames(data)) stop("Missing <id> column in the data frame <data> !!")
if (!"time" %in% colnames(data)) stop("Missing <time> column in the data frame <data> !!")
if (!"event" %in% colnames(data)) stop("Missing <event> column in the data frame <data> !!")
## Some check on data$time values
## ----------------------------
## Check 0: for a given <id>, <event> can only contain 0's finished by 0 or 1
if (!all(sort(unique(data$event)) %in% c(0,1))) stop("<event> can only contain 0 and 1 values !!")
fun0 = function(x) x[length(x)] %in% c(0,1) && sum(x[-length(x)])==0
check0 = all(unlist(by(data$event, data$id, fun0)))
if (!check0) stop("<event> can only contain 0's (with 0 or 1 at the end) for a given <id> !!")
## tapply2: same as <tapply>, but keep original order of appearance of 'group' values
tapply2 = function(x,group,FUN) tapply(x, factor(group,unique(group)), FUN)
##
## Check 1: <time> starts at 1 for a given <id>
fun1 = function(x) x[1]==1
check1 = all(as.vector(by(data$time, data$id, fun1))) ## Should be true
if (!check1) stop("<time> should start at 1 for a given <id> !!")
## Check 2: Make sure that <time> takes integer values !!
temp = as.integer(abs(data$time))
check2 = all(temp == data$time)
if (!check2) stop("<time> must take positive integer values !!")
data$time = temp
T = max(data$time) ## Maximum follow-up time (that should be RC)
## Check 3: <time> should be a sequence of consecutive integers for a given <id>
fun3 = function(x) all(diff(x)==1)
check3 = tapply2(data$time, data$id, fun3) ## Should be true for a given <id>
## check3 = c(by(data$time, data$id, fun3)) ## Should be true for a given <id>
id.discarded = names(check3)[!check3]
n.id = length(check3)
if (sum(check3) < .1*n.id) stop("<time> should be a sequence of consecutive integers for a given <id> !!")
if (!all(check3)){
word = ifelse(sum(!check3)>1, " units", " unit")
message(sum(!check3),word," (out of ",length(unique(data$id)),") with non-consecutive integer values for <time> detected\n",sep="")
fun4 = function(x) as.logical(cumprod(c(TRUE,diff(x)==1)))
## Rows with consecutive <time> values within a given <id>
rows.ok = as.vector(unlist(tapply2(data$time, data$id, fun4)))
## NOTE: the 'by' function does not keep the original order of appearance of data$id values !! --> correction using 'tapply2' on 2024.11.30
## rows.ok = as.vector(unlist(by(data$time, data$id, fun4))) ## Rows with consecutive <time> values within a given <id>
##
temp = tapply2(data$id[!rows.ok],data$id[!rows.ok],function(x) length(x))
## temp = c(by(data$id[!rows.ok],data$id[!rows.ok],function(x) length(x)))
temp = c(temp,sum(temp)) ; names(temp)[length(temp)] = "Total"
message("Number of discarded time entries (out of ",nrow(data),") per problematic unit <id>:\n",sep="")
print(temp)
data = data[rows.ok,] ## Only keep rows with consecutive <time> values for a given <id>
}
## Number of units and their id's
## ------------------------------
id = unique(data$id) ## Unit ids
n = length(id) ## Number of units
## Preliminary NP estimation of S(t)
## ---------------------------------
tab = with(data, table(time,event))
n.risk = rowSums(tab) ; n.event = tab[,2]
h0.hat = n.event / n.risk ; H0.hat = cumsum(h0.hat) ; lS0.hat = -H0.hat
## Significance level
## ------------------
alpha = 1-ci.level
z.alpha = qnorm(1-.5*alpha)
## Initial values
## --------------
Hes.xi1 = Hes.xi2 = Hes.lam1 = Hes.lam2 = NULL
se.lambda1 = se.loglambda1 = se.lambda2 = se.loglambda2 = NULL
## When provided, use estimates in <model0> as initial values in the absence of specific initial values
if (!is.null(model0)){
K1 = model0$regr1$K ; K2 = model0$regr2$K
if (is.null(phi.0)) phi.0 = c(model0$fit$phi[,1])
if (is.null(beta.0)) beta.0 = c(model0$fit$beta[,1])
if (is.null(gamma.0)) gamma.0 = c(model0$fit$gamma[,1])
if (is.null(tau.0)) tau.0 = c(model0$fit$tau)
if (is.null(lambda1.0)) lambda1.0 = model0$fit$lambda1
if (is.null(lambda2.0)) lambda2.0 = model0$fit$lambda2
if (lambda.method!="none"){
se.lambda1 = model0$fit$se.lambda1
se.loglambda1 = model0$fit$se.loglambda1
se.lambda2 = model0$fit$se.lambda2
se.loglambda2 = model0$fit$se.loglambda2
Hes.xi1 = model0$fit$Hes.xi1
Hes.xi2 = model0$fit$Hes.xi2
Hes.lam1 = model0$fit$Hes.lam1
Hes.lam2 = model0$fit$Hes.lam2
}
}
## Length of spline vectors
## -------------------------
if (!is.null(phi.0)) K0 = length(phi.0)
## if (!is.null(beta.0)) K1 = length(beta.0)
## if (!is.null(gamma.0)) K2 = length(gamma.0)
##
## Regression models for long-term (formula1) & short-term (formula2) survival
## ---------------------------------------------------------------------------
environment(formula1) <- asNamespace("tvcure") ## To ensure DALSM::s function used
environment(formula2) <- asNamespace("tvcure") ## To ensure DALSM::s function used
regr1 = DesignFormula(formula1, data=data, K=K1, pen.order=pen.order1) ##, n=n)
regr2 = DesignFormula(formula2, data=data, K=K2, pen.order=pen.order2, nointercept=TRUE) ##, n=n)
q1 = ncol(regr1$Xcal) ## Total number of regression and spline parameters in long-term survival
q2 = ncol(regr2$Xcal) ## Total number of regression and spline parameters in short-term survival
if (is.null(q2)){
nogamma = TRUE ## Check whether covariates are absent in <formula2> --> gamma=0
} else {
nogamma = FALSE
}
## Extract key stuff from regr1 & regr2
nobs = nrow(regr1$Xcal) ## Number of data entries
J1 = regr1$J ; J2 = regr2$J ## Number of additive terms
K1 = regr1$K ; K2 = regr2$K ## Number of B-splines per additive term
nfixed1 = regr1$nfixed ; nfixed2 = regr2$nfixed ## Number of 'non-spline' regression parameters
lambda1.lab = regr1$lambda.lab ; lambda2.lab = regr2$lambda.lab ## Penalty labels
Dd1.x = regr1$Dd.x ; Dd2.x = regr2$Dd.x ## Basic penalty matrix for an additive term
Pd1.x = regr1$Pd.x ; Pd2.x = regr2$Pd.x ## Basic penalty matrix for an additive term
Z1 = regr1$Z ; Z2 = regr2$Z ## Design matrices associated to 'non-spline' regression parameters
regr1.lab = colnames(regr1$Xcal) ; regr2.lab = colnames(regr2$Xcal) ## Labels of the regression parms
addregr1.lab = regr1$additive.lab ; addregr2.lab = regr2$additive.lab ## Labels of the additive terms
##
## Log-determinant of a positive definite matrix based on determinant(.)
## Note: faster than methods based on functions cholevski, svd or qr
## ---------------------------------------------------------------------
ldet.fun = function(x) c(determinant(x,logarithm=TRUE)$mod)
## ldet.fun = function(x) 2*sum(log(diag(chol(x))))
## ldet.fun = function(x) sum(log(svd(A,nu=0,nv=0)$d))
## ldet.fun = function(x) sum(log(abs(diag(qr(x)$qr))))
##
## Fast calculation of log |B'WB + lambda*Pd| using pre-computed eigenvalues:
## log |B'WB + lambda*Pd| = ldet0 + sum(log(lambda1)) + sum_j(log(tau+dj))
## --------------------------------------------------------------------------
## Starting from B'WB and Pd
ev.fun = function(BwB, Pd){ ## t(B) %*% diag(w) %*% B
K = ncol(Pd) ; rk = qr(Pd)$rank ; id1 = 1:rk ; id2 = (1:K)[-id1]
temp2 = svd(Pd) ; U = temp2$u ; lambda1 = temp2$d[id1]
BwB = t(U) %*% (BwB %*% U)
M = BwB[id1,id1] - BwB[id1,id2,drop=FALSE]%*%solve(BwB[id2,id2,drop=FALSE])%*%BwB[id2,id1,drop=FALSE]
MM = sqrt(1/lambda1) * t(sqrt(1/lambda1)*t(M))
## MM2 = diag(1/sqrt(lambda1))%*%M%*%diag(1/sqrt(lambda1))
dj = svd(MM,nu=0,nv=0)$d
ldet0 = ldet.fun(BwB[id2,id2,drop=FALSE])
## log |B'WB + lambda*Pd| = ldet0 + sum(log(lambda1)) + sum_j(log(tau+dj))
return(list(ldet0=ldet0,lambda1=lambda1,dj=dj))
}
## Starting from B, W and Pd
ev.fun2 = function(B, w=NULL, Pd){ ## t(B) %*% diag(w) %*% B
if (is.null(w)) w = rep(1,nrow(B))
K = ncol(Pd) ; rk = qr(Pd)$rank ; id1 = 1:rk ; id2 = (1:K)[-id1]
temp2 = svd(Pd) ; U = temp2$u ; lambda1 = temp2$d[id1]
Bt = (sqrt(w) * B) %*% temp2$u
Bt1 = Bt[,id1] ; Bt0 = Bt[,-id1,drop=FALSE]
B01 = t(Bt0)%*%Bt1
M = t(Bt1)%*%Bt1 - t(B01)%*%solve(t(Bt0)%*%Bt0)%*%B01
MM = sqrt(1/lambda1) * t(sqrt(1/lambda1)*t(M))
## MM2 = diag(1/sqrt(lambda1))%*%M%*%diag(1/sqrt(lambda1))
dj = svd(MM,nu=0,nv=0)$d
ldet0 = ldet.fun(t(Bt0)%*%Bt0)
return(list(ldet0=ldet0,lambda1=lambda1,dj=dj))
}
## Eigenvalues associated to additive terms
id1 = as.logical(data$event) ##which(event==1)
ev1.lst = ev2.lst = list()
Hes.beta0 = NULL
## ... in long-term survival submodel
## Non-penalized Hessian for <beta>
if (use.Rfast){
Hes.beta0 = -Rfast::Crossprod(regr1$Xcal[id1,,drop=FALSE], regr1$Xcal[id1,,drop=FALSE])
} else {
Hes.beta0 = -crossprod(regr1$Xcal[id1,,drop=FALSE], regr1$Xcal[id1,,drop=FALSE])
}
if (J1 > 0){
rk1 = qr(regr1$Pd.x)$rank
for (j in 1:J1){ ## Loop over additive terms in the long-term survival sub-model
idx = nfixed1 + (j-1)*K1 + (1:K1)
Hes.betaj = Hes.beta0[idx,idx,drop=FALSE]
## if (use.Rfast){
## Hes.betaj = -Rfast::Crossprod(regr1$Xcal[id1,idx,drop=FALSE], regr1$Xcal[id1,idx,drop=FALSE])
## } else {
## Hes.betaj = -crossprod(regr1$Xcal[id1,idx,drop=FALSE], regr1$Xcal[id1,idx,drop=FALSE])
## }
ev1.lst[[j]] = ev.fun(BwB=Hes.betaj,Pd=regr1$Pd.x)$dj
}
}
## (Approximate) Non-penalized Hessian for <gamma>:
Hes.gamma0 = ev2.lst = NULL
if (!nogamma){
if (use.Rfast){
Hes.gamma0 = -Rfast::Crossprod(regr2$Xcal[id1,,drop=FALSE], regr2$Xcal[id1,,drop=FALSE])
} else {
Hes.gamma0 = -crossprod(regr2$Xcal[id1,,drop=FALSE], regr2$Xcal[id1,,drop=FALSE])
}
if (J2 > 0){
rk2 = qr(regr2$Pd.x)$rank
for (j in 1:J2){ ## Loop over additive terms in the long-term survival sub-model
idx = nfixed2 + (j-1)*K2 + (1:K2)
Hes.gamj = Hes.gamma0[idx,idx,drop=FALSE]
ev2.lst[[j]] = ev.fun(BwB=Hes.gamj,Pd=regr2$Pd.x)$dj
}
}
}
## Initial values
## --------------
## ... for the regression parameters
if (is.null(beta.0)){
beta.0 = rep(0,q1)
beta.0[1] = log(H0.hat[length(H0.hat)])
}
names(beta.0) = regr1.lab
if (!nogamma){
if (is.null(gamma.0)) gamma.0 = rep(0,q2)
names(gamma.0) = regr2.lab
}
## ... for the penalty parameters
if (is.null(tau.0)) tau.0 = 100
if ((J1 > 0) & is.null(lambda1.0)) lambda1.0 = rep(100,J1)
if ((J2 > 0) & is.null(lambda2.0)) lambda2.0 = rep(100,J2)
##
## Pre-evaluated IB-splines basis for F0(t)
## ----------------------------------------
obj.knots = qknots(1:T, xmin=0, xmax=T+1, equid.knots = TRUE, pen.order=pen.order0, K=K0)
knots = obj.knots$knots ## Knots
Pd = obj.knots$Pd ## Penalty matrix
##
B0.grid = Bsplines(1:T,knots=knots) ## TxS matrix [b0_s(t)]
if (is.null(phi.0)) phi.0 = rep(0,ncol(B0.grid)) ## c(solve(t(B0.grid)%*%B0.grid + Pd)%*%t(B0.grid)%*%dnorm(1:T,.5*T,T/7))
names(phi.0) = paste("phi",1:length(phi.0),sep="")
k.ref = ceiling(K0/2)
phi.0 = phi.0 - phi.0[k.ref]
colnames(B0.grid) = names(phi.0)
tB0B0 = t(B0.grid[data$time,-k.ref]) %*% B0.grid[data$time,-k.ref]
##
## -----------------------------------------------------------------------------------------
## ff: key function computing llik, lpen, gradients, Hessians, etc.
## -----------------------------------------------------------------------------------------
ff = function(phi, beta, gamma,
tau, lambda1, lambda2,
Dbeta=FALSE, Dgamma=FALSE, Dphi=FALSE, D2phi=FALSE,
Dlambda=FALSE, hessian=TRUE, get.logEvid=FALSE){
## -----------------------------------------------------------------------------------------
if (get.logEvid){
hessian = Dphi = D2phi = Dbeta = TRUE
Dgamma = !nogamma
}
Delta = 1 ## Unit of measurement for data$time
eta.grid = c(B0.grid %*% phi)
pi.grid = exp(eta.grid) / sum(exp(eta.grid))
B.tilde = t(t(B0.grid) - c(t(B0.grid)%*%pi.grid)) ## TxS matrix
##
f0.grid = pi.grid / Delta
F0.grid = cumsum(pi.grid)
S0.grid = pmax(1e-6, 1 - F0.grid)
##
PdPhi = c(Pd%*%phi)
quad.phi = sum(phi*PdPhi)
##
eta.1 = c(regr1$Xcal %*% beta) ## Linear predictors in long-term survival
if (nogamma){ ## Absence of covariate (and no intercept for identification reasons)
eta.2 = 0.0
} else {
eta.2 = c(regr2$Xcal %*% gamma) ## Linear predictors in short-term survival
}
time = data$time ; event = data$event ; id1 = as.logical(event) ##which(event==1)
##
switch(baseline,
"F0" = {
lhp = (eta.1 + eta.2) + ((exp(eta.2)-1.0) * log(F0.grid[time])) + log(f0.grid[time])
},
"S0" = {
lhp = (eta.1 + eta.2) + ((exp(eta.2)-1.0) * log(S0.grid[time])) + log(f0.grid[time])
}
)
## Penalty matrices
## ----------------
lambda1.cur = lambda1 ; lambda2.cur = lambda2
if (J1 > 0) P1.cur = Pcal.fun(nfixed1,lambda1.cur,Pd1.x) ## Penalty matrix for location
else P1.cur = 0
##
if (J2 > 0) P2.cur = Pcal.fun(nfixed2,lambda2.cur,Pd2.x) ## Penalty matrix for dispersion
else P2.cur = 0
## Log-likelihood
## --------------
mu.ij = exp(lhp)*Delta
llik = -sum(mu.ij) + sum(log(mu.ij[id1]))
llik.y = -sum(event) + 0 ## ... as only 0-1 values for the Poisson counts <-- y*log(y)=0
dev = -2*(llik - llik.y)
## Penalized llik
## --------------
lpen = llik
lpen = lpen + dgamma(tau,a.tau,b.tau,log=TRUE) ## Prior on <tau>
lpen = lpen + .5*(K0-pen.order0)*log(tau) - .5*tau*quad.phi ## Prior on (phi|tau)
eps.ev = 1e-10
if (J1 > 0){
## Prior on <lambda1>
lpen = lpen + sum(dgamma(lambda1.cur,aa,bb,log=TRUE)) ## Prior on <lambda1>
## Prior on (beta|lambda1)
ev.beta = svd(P1.cur,nu=0,nv=0)$d
lpen = lpen + .5*sum(log(ev.beta[ev.beta>eps.ev])) - .5*sum(beta*c(P1.cur%*%beta))
}
if (J2 > 0){
## Prior on <lambda2>
lpen = lpen + sum(dgamma(lambda2.cur,aa,bb,log=TRUE))
## Prior on (gamma|lambda2)
ev.gam = svd(P2.cur,nu=0,nv=0)$d
lpen = lpen + .5*sum(log(ev.gam[ev.gam>eps.ev])) - .5*sum(gamma*c(P2.cur%*%gamma))
}
##
## Derivatives of lpen wrt <beta>
## ------------------------------
## Gradient wrt <beta>
grad.beta = NULL
Hes.beta = Mcal.1 = NULL
## Hes.beta = Hes.beta0 = Mcal.1 = NULL
if (Dbeta){
if (use.Rfast){
grad.beta = Rfast::colsums(regr1$Xcal*(event-mu.ij))
} else {
grad.beta = colSums(regr1$Xcal*(event-mu.ij))
}
## Additive terms
if (J1 > 0) grad.beta = grad.beta - c(P1.cur%*%beta)
}
## Hessian wrt <beta>
if (Dbeta & hessian){
if (!observed.hessian){ ## Note: already computed when observed.hessian=TRUE
W1 = mu.ij
if (use.Rfast){
Hes.beta0 = -Rfast::Crossprod(regr1$Xcal, W1*regr1$Xcal)
} else {
Hes.beta0 = -crossprod(regr1$Xcal, W1*regr1$Xcal)
## Hes.beta0 = -t(regr1$Xcal) %*% (W1*regr1$Xcal)
}
}
## Additive terms
Hes.beta = Hes.beta0 ## Non-penalized Hessian Hes.beta0 for <beta>
if (J1 > 0){
Hes.beta = Hes.beta - P1.cur ## Penalized Hessian for <beta>
## Useful quantites for <lambda1> update
if (Dlambda){
if (observed.hessian){
if (use.Rfast){
tZWB.1 = Rfast::Crossprod(Z1[id1,,drop=FALSE], regr1$Bcal[id1,,drop=FALSE])
tZWZ.1 = crossprod(Z1[id1,,drop=FALSE], Z1[id1,,drop=FALSE])
} else {
tZWB.1 = crossprod(Z1[id1,,drop=FALSE], regr1$Bcal[id1,,drop=FALSE])
tZWZ.1 = crossprod(Z1[id1,], Z1[id1,,drop=FALSE])
}
} else {
if (use.Rfast){
tZWB.1 = Rfast::Crossprod(Z1, W1*regr1$Bcal) ; tZWZ.1 = crossprod(Z1, W1*Z1)
} else {
tZWB.1 = crossprod(Z1, W1*regr1$Bcal) ; tZWZ.1 = crossprod(Z1, W1*Z1)
## tZWB.1 = t(Z1)%*%(W1*regr1$Bcal) ; tZWZ.1 = t(Z1)%*%(W1*Z1)
}
}
inv.tZWZ.1 = try(solve(tZWZ.1),TRUE)
##
## Mcal.1 required to update <lambda1>
if (observed.hessian){
if (use.Rfast){
Mcal.1 = Rfast::Crossprod(regr1$Bcal[id1,,drop=FALSE], regr1$Bcal[id1,,drop=FALSE])
} else {
Mcal.1 = crossprod(regr1$Bcal[id1,,drop=FALSE], regr1$Bcal[id1,,drop=FALSE])
}
} else {
if (use.Rfast){
Mcal.1 = Rfast::Crossprod(regr1$Bcal, W1*regr1$Bcal)
} else {
Mcal.1 = crossprod(regr1$Bcal, W1*regr1$Bcal)
}
}
if (use.Rfast){
Mcal.1 = Mcal.1 - Rfast::Crossprod(tZWB.1, inv.tZWZ.1%*%tZWB.1)
} else {
Mcal.1 = Mcal.1 - crossprod(tZWB.1, inv.tZWZ.1%*%tZWB.1)
}
}
}
}
## Derivatives of lpen wrt <gamma>
## -------------------------------
## Gradient wrt <gamma>
grad.gamma = NULL
Hes.gamma = Hes.gamma0 = Mcal.2 = NULL
if ((Dgamma) & (!nogamma)){
switch(baseline,
"F0" = {
tempo = exp(eta.2)*log(F0.grid[time])
},
"S0" = {
tempo = exp(eta.2)*log(S0.grid[time]) ## <-- Pack_v2
}
)
XcalTempo = (1+tempo) * regr2$Xcal
##
if (use.Rfast){
grad.gamma = Rfast::colsums((event-mu.ij)*XcalTempo)
} else {
grad.gamma = colSums((event-mu.ij)*XcalTempo)
}
## Additive terms
if (J2 > 0) grad.gamma = grad.gamma - c(P2.cur%*%gamma) ## Penalized gradient for gamma
}
## Hessian wrt <gamma>
if ((Dgamma) & (!nogamma) & (hessian)){
W2 = (event-mu.ij)*tempo - mu.ij*(1+tempo)^2
if (observed.hessian){
if (use.Rfast){
Hes.gamma0 = -Rfast::Crossprod(XcalTempo[id1,,drop=FALSE], XcalTempo[id1,,drop=FALSE])
} else {
Hes.gamma0 = -crossprod(XcalTempo[id1,,drop=FALSE], XcalTempo[id1,,drop=FALSE])
}
} else {
if (use.Rfast){
Hes.gamma0 = Rfast::Crossprod(XcalTempo, W2*XcalTempo)
## Hes.gamma0 = Rfast::Crossprod(regr2$Xcal, W2*regr2$Xcal)
} else {
Hes.gamma0 = crossprod(XcalTempo, W2*XcalTempo)
## Hes.gamma0 = crossprod(regr2$Xcal, W2*regr2$Xcal)
}
}
## Additive terms
Hes.gamma = Hes.gamma0 ## Non-penalized Hessian Hes.gamma0 for <gamma>
if (J2 > 0){
Hes.gamma = Hes.gamma - P2.cur ## Penalized Hessian for <gamma>
## Useful quantites for <lambda2> update
if (Dlambda){
if (nfixed2 > 0){
if (use.Rfast){
tZWB.2 = Rfast::Crossprod(Z2, W2*regr2$Bcal) ; tZWZ.2 = Rfast::Crossprod(Z2, W2*Z2)
} else {
tZWB.2 = crossprod(Z2, W2*regr2$Bcal) ; tZWZ.2 = crossprod(Z2, W2*Z2)
## tZWB.2 = t(Z2)%*%(W2*regr2$Bcal) ; tZWZ.2 = t(Z2)%*%(W2*Z2)
}
inv.tZWZ.2 = try(solve(tZWZ.2),TRUE)
## if (!is.matrix(inv.tZWZ.2)) return(stopthis())
##
if (use.Rfast){
Mcal.2 = Rfast::Crossprod(regr2$Bcal, W2*regr2$Bcal) - Rfast::Crossprod(tZWB.2, inv.tZWZ.2%*%tZWB.2)
} else {
Mcal.2 = crossprod(regr2$Bcal, W2*regr2$Bcal) - crossprod(tZWB.2 , inv.tZWZ.2%*%tZWB.2)
## Mcal.2 = t(regr2$Bcal)%*%(W2*regr2$Bcal) -t(tZWB.2)%*%inv.tZWZ.2%*%tZWB.2
}
} else {
if (use.Rfast){
Mcal.2 = Rfast::Crossprod(regr2$Bcal, W2*regr2$Bcal)
} else {
Mcal.2 = crossprod(regr2$Bcal, W2*regr2$Bcal)
}
}
}
}
}
## Cross-derivatives of lpen wrt <beta> & <gamma>
## ----------------------------------------------
if (nogamma){
grad.regr = grad.beta
Hes.regr = Hes.beta
Hes.regr0 = Hes.beta0
Hes.betgam = NULL
} else {
grad.regr = c(grad.beta,grad.gamma)
Hes.regr = Hes.regr0 = Hes.betgam = NULL
if ((Dbeta) & (Dgamma) & (hessian)){
if (use.Rfast){
Hes.betgam = -Rfast::Crossprod(regr1$Xcal, (mu.ij*(1+tempo))*regr2$Xcal)
} else {
Hes.betgam = -crossprod(regr1$Xcal, (mu.ij*(1+tempo))*regr2$Xcal)
}
Hes.regr = rbind(cbind(Hes.beta, Hes.betgam),
cbind(t(Hes.betgam), Hes.gamma))
Hes.regr0 = rbind(cbind(Hes.beta0, Hes.betgam),
cbind(t(Hes.betgam), Hes.gamma0))
}
}
## Derivatives of lpen wrt <phi>
## -----------------------------
grad.phi = Hes.phi = Hes.phi0 = ed.phi = NULL
dlf0.grid = dlF0.grid = dlS0.grid = NULL
if (Dphi){
S = K0
## Gradient
dlf0.grid = B.tilde ## TxS matrix
if (use.Rfast){
temp = Rfast::colCumSums(pi.grid*B.tilde) ## TxS matrix
} else {
temp = apply(pi.grid*B.tilde,2,cumsum) ## TxS matrix
}
dlF0.grid = temp / F0.grid ## TxS matrix
dlS0.grid = -temp / S0.grid ## TxS matrix
##
switch(baseline,
"F0" = {
dlhp = (exp(eta.2)-1.0) * dlF0.grid[time,] + dlf0.grid[time,] ## (nobs x S) matrix
},
"S0" = {
dlhp = (exp(eta.2)-1.0) * dlS0.grid[time,] + dlf0.grid[time,] ## (nobs x S) matrix
}
)
if (use.Rfast){
grad.phi = Rfast::colsums(dlhp[id1,,drop=FALSE]) - Rfast::colsums(mu.ij * dlhp)
} else {
grad.phi = colSums(dlhp[id1,,drop=FALSE]) - colSums(mu.ij * dlhp)
}
grad.phi = grad.phi - tau * PdPhi ## Roughness penalty correction
## Hessian
if (D2phi){
## Hessian without roughness penalty
if (use.Rfast){
if (observed.hessian){
Hes.phi0 = - Rfast::Crossprod(dlhp[id1,,drop=FALSE], dlhp[id1,,drop=FALSE])
} else {
Hes.phi0 = - Rfast::Crossprod(dlhp, mu.ij * dlhp)
}
} else {
if (observed.hessian){
Hes.phi0 = - crossprod(dlhp[id1,,drop=FALSE], dlhp[id1,,drop=FALSE])
} else {
Hes.phi0 = - crossprod(dlhp, mu.ij * dlhp)
}
}
## Hessian with roughness penalty
Hes.phi = Hes.phi0 - tau * Pd
## Effective dimension wrt <phi>
ed.phi = sum(t(solve(-Hes.phi[-k.ref,-k.ref])) * (-Hes.phi0[-k.ref,-k.ref]))
}
}
attr(phi,"ed.phi") = ed.phi ## Effective dimension for <phi> in F0(t) estimation
## Log-evidence
logEvid = NA
if (get.logEvid){
ev.psi = svd(-Hes.phi[-k.ref,-k.ref],nu=0,nv=0)$d
ev.beta = svd(-Hes.beta,nu=0,nv=0)$d
ev.gamma = 0
if (!nogamma) ev.gamma = svd(-Hes.gamma,nu=0,nv=0)$d
logEvid = lpen -.5*sum(log(ev.beta[ev.beta>eps.ev]))
if (!nogamma) logEvid = logEvid -.5*sum(log(ev.gamma[ev.gamma>eps.ev]))
logEvid = logEvid -.5*sum(log(ev.psi[ev.psi>eps.ev]))
}
ans = list(llik=llik, lpen=lpen, dev=dev, logEvid=logEvid,
mu.ij=mu.ij,res=(event-mu.ij)/sqrt(mu.ij),
phi=phi, beta=beta, gamma=gamma,
nbeta=length(beta), ngamma=length(gamma),
grad.beta=grad.beta, Hes.beta=Hes.beta, Hes.beta0=Hes.beta0,
grad.gamma=grad.gamma, Hes.gamma=Hes.gamma, Hes.gamma0=Hes.gamma0,
Mcal.1=Mcal.1, Mcal.2=Mcal.2,
Hes.betgam=Hes.betgam,
grad.regr=grad.regr, Hes.regr=Hes.regr, Hes.regr0=Hes.regr0,
grad.phi=grad.phi, Hes.phi=Hes.phi, Hes.phi0=Hes.phi0,
nfixed1=nfixed1, nfixed2=nfixed2, nogamma=nogamma,
J1=J1, J2=J2, K1=K1, K2=K2,
knots1.x=regr1$knots.x, knots2.x=regr2$knots.x,
Dd1.x=Dd1.x, Dd2.x=Dd2.x, Pd1.x=Pd1.x, Pd2.x=Pd2.x,
pen.order1 = regr1$pen.order, pen.order2 = regr2$pen.order,
additive.lab1 = regr1$additive.lab, additive.lab2 = regr2$additive.lab,
knots=knots, T=T, t.grid=1:T, f0.grid=f0.grid, F0.grid=F0.grid, S0.grid=S0.grid,
dlf0.grid=dlf0.grid, dlF0.grid=dlF0.grid, dlS0.grid=dlS0.grid, k.ref=k.ref,
a=aa, b=bb, a.tau=a.tau, b.tau=b.tau)
return(ans)
} ## End ff
##
## Function 1: Penalty parameter selection using Laplace approximation to p(lambda|data)
## with an underlying computation of |B'WB + lamdba Pd| using precomputed eigenvalues
## and the fixed point method to find lambda.MAP
## --------------------------------------------------------------------------------------
select.lambda.LPS = function(lambda1,lambda2, itermax=50){
## logscale = TRUE ## Maximize p(log(lambda)|D) when TRUE, p(lambda|D) otherwise
## Generic update of a single penalty parameter
update.lambda.fun = function(lambda,quad,ev,rk){
ok.lam = FALSE ; iter.lam = 0
while(!ok.lam){
iter.lam = iter.lam + 1
lambda.old = lambda
ttr = sum(1 / (lambda+ev))
if (!logscale){
## Posterior mode of lambda
lambda = ((aa-1) + .5*rk) / (bb + .5*(quad + ttr))
} else {
## Exponential of the posterior mode of log(lambda)
lambda = (aa + .5*rk) / (bb + .5*(quad + ttr))
}
lam.dif = abs((lambda - lambda.old)/lambda)
ok.lam = (lam.dif < .001) | (iter.lam >= itermax)
}
curv = (bb+.5*quad)/lambda + .5*sum(ev/lambda/(lambda+ev)^2)
se.lambda = 1 / sqrt(curv)
## se.lambda = 1 / sqrt(.5 * sum(ev/(lambda*(lambda+ev)^2)))
se.loglambda = 1 / sqrt(lambda^2 * curv) ## <-- HERE
return(list(lambda=lambda, se.lambda=se.lambda, se.loglambda=se.loglambda))
}
## Update <lambda1> if (J1 > 0)
if (J1 > 0){
se.lambda1 = se.loglambda1 = 0*lambda1
for (j in 1:J1){ ## Loop over additive terms in the long-term survival sub-model
## Update lambda1
idx = nfixed1 + (j-1)*K1 + (1:K1)
theta.j = beta.cur[idx]
quad.j = sum(theta.j*c(Pd1.x%*%theta.j))
temp = update.lambda.fun(lambda1[j],quad.j,ev1.lst[[j]],rk1)
lambda1[j] = temp$lambda
se.lambda1[j] = temp$se.lambda
se.loglambda1[j] = temp$se.loglambda
}
}
## Update <lambda2> if (J2 > 0)
if (J2 > 0){
se.lambda2 = se.loglambda2 = 0*lambda2
for (j in 1:J2){ ## Loop over additive terms in the short-term survival sub-model
## Update lambda2.cur
idx = nfixed2 + (j-1)*K2 + (1:K2)
theta.j = gamma.cur[idx]
quad.j = sum(theta.j*c(Pd2.x%*%theta.j))
temp = update.lambda.fun(lambda2[j],quad.j,ev2.lst[[j]],rk2)
lambda2[j] = temp$lambda
se.lambda2[j] = temp$se.lambda
se.loglambda2[j] = temp$se.loglambda
}
}
##
return(list(lambda1=lambda1, lambda2=lambda2,
se.lambda1=se.lambda1, se.lambda2=se.lambda2,
se.loglambda1=se.loglambda1, se.loglambda2=se.loglambda2))
} ## End select.lambda.LPS
##
## Function 2: Penalty parameter selection using Laplace approximation to p(lambda|data)
## with the fixed point method to find lambda.MAP (without precomputation of eigenvalues)
## ----------------------------------------------------------------------------------------
select.lambda.LPS2 = function(Hes.regr0){
ok.lam = FALSE ; iter.lam = 0
##
update.Sigma = function(){
if (J1 > 0) Pcal.1 = Pcal.fun(nfixed1,lambda1.cur,Pd1.x)
else Pcal.1 = diag(0,nfixed1)
if (J2 > 0) Pcal.2 = Pcal.fun(nfixed2,lambda2.cur,Pd2.x)
else Pcal.2 = diag(0,nfixed2)
##
## Pcal = bdiag(Pcal.1,Pcal.2)
## Sigma = solve(-Hes.regr0 + Pcal)
idx.1 = 1:ncol(Pcal.1)
idx.2 = ncol(Pcal.1) + (1:ncol(Pcal.2))
Sigma = as.matrix(bdiag(solve(-Hes.regr0[idx.1,idx.1] + Pcal.1), solve(-Hes.regr0[idx.2,idx.2] + Pcal.2)))
##
return(Sigma)
} ## End update.Sigma
##
Sigma = update.Sigma() ## Update Sigma
while(!ok.lam){
iter.lam = iter.lam + 1
## Update <lambda1>
lam1.dif = 0 ## Will be updated if (J1 > 0)
if (J1 > 0){
lambda1.old = lambda1.cur
for (j in 1:J1){ ## Loop over additive terms in the long-term survival sub-model
## Update lambda1.cur
idx = nfixed1 + (j-1)*K1 + (1:K1)
theta.j = beta.cur[idx]
quad.j = sum(theta.j*c(Pd1.x%*%theta.j))
ttr = max(sum(t(Sigma[idx,idx]) * Pd1.x), 1e-6) ## max(sum(t(Sigma[idx,idx]) * Pd1.x),1)
## Note: rank(Pd1.x) = (K1-pen.order1+1)
lambda1.cur[j] = ((aa-1) + K1-pen.order1+1) / (bb + quad.j + ttr)
}
lam1.dif = lambda1.cur-lambda1.old
}
## Update <lambda2>
lam2.dif = 0 ## Will be updated if (J2 > 0)
if (J2 > 0){
lambda2.old = lambda2.cur
for (j in 1:J2){ ## Loop over additive terms in the short-term survival sub-model
## Update lambda2.cur
idx = nfixed2 + (j-1)*K2 + (1:K2)
theta.j = gamma.cur[idx]
quad.j = sum(theta.j*c(Pd2.x%*%theta.j))
ttr = max(sum(t(Sigma[q1+idx, q1+idx]) * Pd2.x), 1e-6) ## max(sum(t(Sigma[q1+idx, q1+idx]) * Pd2.x),1)
## Note: rank(Pd2.x) = (K2-pen.order2+1)
lambda2.cur[j] = ((aa-1) + K2-pen.order2+1) / (bb + quad.j + ttr)
}
lam2.dif = lambda2.cur-lambda2.old
}
## Update Sigma
Sigma = update.Sigma()
##
## Stopping rule
ok.lam = max(abs(c(lam1.dif,lam2.dif))) < 1 | (iter.lam > 20)
} ## End while
##
return(list(lambda1=lambda1.cur,lambda2=lambda2.cur,Sigma=Sigma,
niter=iter.lam,converged=ok.lam))
} ## End select.lambda.LPS2
##
## Function 3: Penalty parameter selection using Laplace approximation to p(lambda|data)
## (separately for <lambda1> & <lambda2>
## --------------------------------------------------------------------------------------
select.lambda.LPS3 = function(coef, nfixed, lambda, Pd, Mcal, pen.order, lambda.min, bb){
Mcal.1 = Mcal
lambda1.cur = lambda ; J1 = length(lambda1.cur)
nfixed1 = nfixed
Pd1.x = Pd ; K1 = ncol(Pd1.x)
theta.cur = coef
pen.order1 = pen.order
lambda1.min = lambda.min
## Home-made Newton-Raphson
ok.xi1 = FALSE ; iter.lam1 = 0
while(!ok.xi1){
iter.lam1 = iter.lam1 + 1
P1.cur = Pcal.fun(nfixed1,lambda1.cur,Pd1.x) ## Update penalty matrix
if (nfixed1 > 0){
iMpen.1 = solve(Mcal.1 + P1.cur[-(1:nfixed1),-(1:nfixed1)])
} else {
iMpen.1 = solve(Mcal.1 + P1.cur)
}
## Score.lambda1
U.lam1 = U.xi1 = rep(0,J1)
Rj.1 = list()
##
for (j in 1:J1){
lam1.j = rep(0,J1) ; lam1.j[j] = lambda1.cur[j]
if (nfixed1 > 0){
Pj.1 = Pcal.fun(nfixed1,lam1.j,Pd1.x)[-(1:nfixed1),-(1:nfixed1)] ## Update penalty matrix
} else {
Pj.1 = Pcal.fun(nfixed1,lam1.j,Pd1.x)
}
Rj.1[[j]] = iMpen.1%*%(Pj.1/lam1.j[j])
idx = nfixed1 + (j-1)*K1 + (1:K1)
theta.j = theta.cur[idx]
quad1.cur = sum(theta.j*c(Pd1.x%*%theta.j)) + bb ## <-----
U.lam1[j] = .5*(K1-pen.order1+1)/lam1.j[j] -.5*quad1.cur -.5*sum(diag(Rj.1[[j]]))
}
## Score.xi1 where lambda1 = lambda1.min + exp(xi1)
U.xi1 = U.lam1 * (lambda1.cur - lambda1.min)
##
## Hessian.lambda1
Hes.lam1 = diag(0,J1)
Only.diagHes1 = TRUE
if (!Only.diagHes1){
for (j in 1:J1){
for (k in j:J1){
temp = 0
if (j==k) temp = temp -.5*(K1-pen.order1+1)/lambda1.cur[j]^2
temp = temp + .5*sum(t(Rj.1[[j]])*Rj.1[[k]]) ## tr(XY) = sum(t(X)*Y)
Hes.lam1[j,k] = Hes.lam1[k,j] = temp
}
}
} else {
for (j in 1:J1) Hes.lam1[j,j] = -.5*(K1-pen.order1+1)/lambda1.cur[j]^2 +.5*sum(t(Rj.1[[j]])*Rj.1[[j]])
}
## Hessian.xi1 where lambda1 = lambda1.min + exp(xi1)
Hes.xi1 = t((lambda1.cur-lambda1.min) * t((lambda1.cur-lambda1.min) * Hes.lam1))
diag(Hes.xi1) = diag(Hes.xi1) + U.lam1 * (lambda1.cur-lambda1.min)
## Update xi1 where lambda1 = lambda1.min + exp(xi1)
xi1.cur = log(lambda1.cur-lambda1.min) ## ; names(xi1.cur) = addregr1.lab ## paste("f.mu.",1:J1,sep="")
dxi1 = -c(MASS::ginv(Hes.xi1) %*% U.xi1) ## Increment.xi1
##
step = 1
accept = FALSE
while(!accept){
xi1.prop = xi1.cur + step*dxi1
accept = all(xi1.prop < 10) ## all(is.finite(exp(xi1.prop)))
if (!accept) step = .5*step
}
if ((step !=1)&verbose) cat("Step-halving for lambda1: step=",step,"\n")
xi1.cur = xi1.prop
lambda1.cur = lambda1.min + exp(xi1.cur)
##
## Convergence ?
ok.xi1 = (max(abs(U.xi1)) < grad.tol) | (iter.lam1 > 20)
}
##
return(list(lambda=lambda1.cur, U.lambda=U.lam1, Hes.lam=Hes.lam1,
xi=xi1.cur, U.xi=U.xi1, Hes.xi=Hes.xi1,
ok.xi=ok.xi1))
} ## End select.lambda.LPS3
##
## Function Tr.test implements Wood (2013) Biometrika 100(1), 221-228
## Goal: evaluate H0: fj = Xt %*% beta = 0 when (beta | D) ~ N(p,V)
## The type argument specifies the type of truncation to use.
## on entry `rank' should be an edf estimate
## 0. Default using the fractionally truncated pinv.
## 1. Round down to k if k<= rank < k+0.05, otherwise up.
## res.df is residual dof used to estimate scale. <=0 implies
## fixed scale.
## -------------------------------------------------------------------
Tr.test = function(p,Xt,V,edf){
ans <- tvcure::testStat(p, Xt, V, min(ncol(Xt), edf), type = 0, res.df = -1)
## ans <- mgcv:::testStat(p, Xt, V, min(ncol(Xt), edf), type = 0, res.df = -1)
return(ans)
}
## End Tr.test
##
## #############
## ESTIMATION ##
## #############
##
## ---------------------------------------
## Generic Levenberg-Marquardt algorithm
## ---------------------------------------
## GOAL: maximize function using its gradient and Hessian with Levenberg-Marquardt
## INPUT:
## * g: function returning a list:
## - g: function value to be maximized
## - grad: gradient of the function
## - Hes: Hessian matrix of the function
## - RDM: grad' (-H)^-1 grad / ntheta
## * theta0: starting value
## * grad.tol: tolerance value for the Lp-norm of the gradient
## * RDM.tol: tolerance value for the RDM (= Relative Damping Measure)
## * max_iter: maximum number of iterations
## * lambda_init: initial value of the damping parameter in Levenberg-Marquardt
## * lambda_factor: adaptative factor for the damping parameter
##
## OUTPUT:
## List containing:
## * val : function value at <theta>
## * val.start: starting value of the function at theta0
## * theta: value of theta maximizing the function (if convergence occured)
## * grad: gradient at <theta>
## * RDM: Relative Damping Measure at <theta>
## * iter: number of iterations
## * convergence: logical indicating if convergence occured: Lp(grad) < grad.tol
myLevMarq <- function(g, theta0, grad.tol=1e-2, RDM.tol=1e-4, fun.tol=1e-3,
max_iter=25, lambda_init=1e-3, lambda_factor=10,
verbose=FALSE){
theta <- theta0
ntheta <- length(theta)
lambda <- lambda_init
obj.cur <- g(theta0, Dtheta=TRUE)
g.start = obj.cur$g
##
nreject = 0
for (i in 1:max_iter) {
grad <- obj.cur$grad
hess <- -obj.cur$Hes ## hess = NEGATIVE Hessian !!
## Hessian regularisation
hess_reg <- hess ; diag(hess_reg) <- diag(hess_reg) + lambda
## hess_reg <- hess + lambda * diag(nrow(hess))
##
## Check positive definite
is_positive_definite <- FALSE
while (!is_positive_definite) {
tryCatch({
chol(hess_reg)
is_positive_definite <- TRUE
}, error = function(e) {
## Increase lambda if not positive-definite
lambda <<- lambda * lambda_factor
hess_reg <<- hess ; diag(hess_reg) <<- diag(hess_reg) + lambda
## hess_reg <<- hess + lambda * diag(nrow(hess))
})
}
## Compute dtheta and update theta
dtheta <- solve(hess_reg, grad)
theta.prop <- theta + dtheta ## Update theta
##
## Accept update ?
obj.prop <- g(theta.prop, Dtheta=TRUE)
g.cur = obj.cur$g ; g.prop = obj.prop$g
g.dif = g.prop - g.cur
if (verbose) cat(i,"> ",lambda, g.cur, g.prop, "--> g.dif =",g.dif,"\n")
if (!is.na(g.dif) && (g.dif > 0)) {
nreject = 0
theta <- theta.prop
lambda <- lambda / lambda_factor
obj.cur <- obj.prop
} else {
nreject = nreject + 1
if (nreject > 5) break
lambda <- lambda * lambda_factor
}
if (verbose) cat("Lp(grad):",Lp(obj.cur$grad),"\n")
## Compute RDM
grad = obj.cur$grad
RDM = sum(grad * dtheta) / ntheta
## Check convergence
converged = (Lp(obj.cur$grad) < grad.tol) && (g.dif >= 0) && (g.dif < fun.tol)
if ((converged) || (abs(g.dif) < fun.tol)) break
}
ans = list(val=obj.cur$g, val.start=g.start,
theta=obj.cur$theta,
grad=obj.cur$grad, RDM=RDM,
iter=i, converged=converged)
return(ans)
} ## End myLevMarq
##
## --------------------------------
## Generic Newton-Raphson algorithm
## --------------------------------
NewtonRaphson = function(g, theta, grad.tol=1e-2, RDM.tol=1e-4, fun.tol=1e-3,
step_factor=2, itermax=25, verbose=FALSE){
ntheta = length(theta)
theta.cur = theta
obj.cur = g(theta.cur,Dtheta=TRUE)
g.start = obj.cur$g ## Function at the iteration start
grad.Lp = Lp(obj.cur$grad)
## Convergence criterion using Lp(grad) and RDM (see e.g. Prague et al, 2013)
RDM = with(obj.cur, sum(grad * dtheta)) / ntheta
ok = (grad.Lp < grad.tol) && (abs(RDM) < RDM.tol) ## grad' (-H)^-1 grad / ntheta < tol^2 ?
iter = 0
step = 1
while(!ok){
iter = iter + 1
## step = 1
theta.cur = obj.cur$theta
dtheta = c(obj.cur$dtheta)
nrep = 0
repeat { ## Repeat step-halving directly till improve target function
nrep = nrep + 1
if (nrep > itermax) break ## if (nrep > 20) break
theta.prop = theta.cur + step*dtheta ## Update.theta
obj.prop = tryCatch(expr=g(theta.prop,Dtheta=TRUE), error=function(e) e)
if (inherits(obj.prop, "error")){
step = step / step_factor
} else {
if (obj.prop$g >= obj.cur$g){
break
}
step = step / step_factor
}
}
g.cur = obj.cur$g ; g.prop = obj.prop$g
g.dif = g.prop - g.cur
##
obj.cur = obj.prop
grad.Lp = Lp(obj.cur$grad)
if (verbose) cat(iter,"> ",step, g.cur, g.prop, "--> g.dif =",g.dif," -- Lp(grad) = ",grad.Lp,"\n")
## Computre RDM (see e.g. Prague et al, 2013):
RDM = with(obj.cur, sum(grad * dtheta)) / ntheta ## grad' (-H)^-1 grad / ntheta < tol^2 ?
ok = (abs(RDM) < RDM.tol) && (grad.Lp < grad.tol) && (g.dif >= 0) && (g.dif < fun.tol)
if (iter > itermax) break
}
if (verbose) cat(obj.cur$g," (niter = ",iter,") - Lp(grad) = ",grad.Lp," - RDM = ",RDM,"\n",sep="")
## if (verbose) cat(obj.cur$g," (niter = ",iter,") - grad = ",Lp(obj.cur$grad),"\n",sep="")
ans = list(val=obj.cur$g, val.start=g.start, theta=obj.cur$theta, grad=obj.cur$grad,
RDM=RDM, iter=iter, converged=ok)
return(ans)
} ## End NewtonRaphson
##
## Starting values
## ---------------
phi.cur = phi.0 ; beta.cur = beta.0 ; gamma.cur = gamma.0
tau.cur = tau.0 ; lambda1.cur = lambda1.0 ; lambda2.cur = lambda2.0
##
tau.iter = c()
## Functions to handle identification issue in polytomial logistic estimation of <f0>
## ----------------------------------------------------------------------------------
psi2phi = function(psi){
ans = append(psi,values=0,after=k.ref-1)
names(ans) = names(phi.0)
return(ans)
}
phi2psi = function(phi) return((phi-phi[k.ref])[-k.ref])
##
converged = FALSE ; iter = 0
final.iteration = FALSE ## Idea: when the EDs of the additive terms stabilize,
##
ptm <- proc.time() ## Start timer
##
if (tau.method == "grid") tau.stable = FALSE ## Initiate the algorithm with the selection of <tau> for F0(t)
tau.iter = tau.cur ## Necessary when <tau.method> == "grid"
dev.old = AIC.old = BIC.old = 1e12 ## Necessary for convergence criterion based on <deviance>
lpen.old = logEvid.old = -1e12 ## ... or <AIC> or <BIC> or <lpen> or <log(evidence)>
##
## ------------------------------------------------------------------------------------
## START of the estimation process
## ------------------------------------------------------------------------------------
if (noestimation){
lambda.method = "none" ; tau.method = "none" ; psi.method = "none"
}
while(!converged){ ## Global estimation loop
iter = iter + 1
if (verbose){
cat("\n--------------\n")
cat("Iteration ",iter,"\n")
cat("--------------\n")
}
## =====================
## -1- Estimation of F0 (through <phi> and <psi>)
## =====================
if (!noestimation){
nlm.version = FALSE
##
if (nlm.version){
## <nlm> estimation of <phi> (and <psi>) (F0 estimation)
## -------------------------------------
g.psi.nlm = function(psi, tau){
phi = psi2phi(psi)
obj.cur = ff(phi=phi, beta=beta.cur, gamma=gamma.cur,
tau=tau,
lambda1=lambda1.cur, lambda2=lambda2.cur,
Dphi=TRUE,D2phi=FALSE)
ans = -obj.cur$lpen
attr(ans, "gradient") = -obj.cur$grad.phi[-k.ref]
return(ans)
} ## End g.psi.nlm
psi.cur = phi2psi(phi.cur)
psi.nlm = nlm(f=g.psi.nlm,psi.cur,tau=tau.cur)
psi.cur = psi.nlm$est ; phi.cur = psi2phi(psi.cur)
}
##
if (!nlm.version){
## Newton-Raphson for <phi> (F0 estimation)
## ------------------------
g.psi = function(theta,Dtheta=TRUE){
phi = psi2phi(theta)
obj.cur = ff(phi=phi, beta=beta.cur, gamma=gamma.cur,
tau=tau.cur, lambda1=lambda1.cur, lambda2=lambda2.cur,
Dphi=Dtheta, D2phi=Dtheta, Dbeta=FALSE, Dgamma=FALSE)
grad = Sigma = dtheta = NULL
if (Dtheta){
grad = obj.cur$grad.phi[-k.ref]
## A = Matrix::nearPD(-obj.cur$Hes.phi[-k.ref,-k.ref])$mat
## dtheta = solve(A, grad)
A = -obj.cur$Hes.phi[-k.ref,-k.ref] + diag(1e-6,length(grad))
dtheta = solve(A, grad)
attr(theta,"ed.phi") = attr(obj.cur$phi,"ed.phi")
}
ans = list(g=obj.cur$lpen, theta=theta, dtheta=dtheta, grad=grad)
return(ans)
} ## End g.psi
##
psi.cur = phi2psi(phi.cur)
psi.NR = NewtonRaphson(g=g.psi,theta=psi.cur,
grad.tol=grad.tol, RDM.tol=RDM.tol, fun.tol=fun.tol,
verbose=FALSE)
psi.cur = psi.NR$theta ; phi.cur = psi2phi(psi.cur)
## ed.phi = attr(psi.cur,"ed.phi") ## Effective dim of <phi> in the estimation of F0(t)
}
}
##
## Selection of <tau> (Penalty parameter to estimate <phi> in F0)
## ------------------
switch(tau.method,
"LPS" = {
## Generic update of <tau>
update.tau.fun = function(tau,quad,ev,rk,itermax=50){
ok = FALSE ; iter = 0
while(!ok){
iter = iter + 1
tau.old = tau
ttr = sum(1 / (tau+ev))
tau = (2*(a.tau-1) + rk) / (2*b.tau + quad + ttr)
tau.dif = abs(tau - tau.old)
ok = (tau.dif < 1) | (iter >= itermax)
}
return(tau)
}
## Update tau.cur
obj.cur = ff(phi=phi.cur, beta=beta.cur, gamma=gamma.cur,
tau=tau.cur, lambda1=lambda1.cur, lambda2=lambda2.cur,
Dphi=TRUE, D2phi=TRUE, Dbeta=FALSE, Dgamma=FALSE)
BwB = -obj.cur$Hes.phi0[-k.ref,-k.ref]
psi.cur = phi2psi(phi.cur) ; quad = sum(psi.cur * c(Pd[-k.ref,-k.ref] %*% psi.cur))
ev0 = ev.fun(BwB=BwB,Pd=Pd[-k.ref,-k.ref])$dj ## Eigenvalues for update of <tau>
rk0 = qr(Pd[-k.ref,-k.ref])$rank ## Rank of Penalty matrix
tau.cur = update.tau.fun(tau.cur,quad,ev0,rk0) ## <tau> udpate
},
"LPS2" = {
obj.cur = ff(phi=phi.cur, beta=beta.cur, gamma=gamma.cur,
tau=tau.cur, lambda1=lambda1.cur, lambda2=lambda2.cur,
Dphi=TRUE, D2phi=TRUE, Dbeta=FALSE, Dgamma=FALSE)
BWB = -obj.cur$Hes.phi0[-k.ref,-k.ref]
ok.tau = FALSE
while(!ok.tau){
Sigma = solve(BWB+tau.cur*Pd[-k.ref,-k.ref])
ttr = sum(t(Sigma)*Pd[-k.ref,-k.ref])
quad = sum(phi.cur * c(Pd%*%phi.cur))
tau.old = tau.cur
tau.cur = max(tau.min, (K0-pen.order0) / (quad + ttr))
ok.tau = (abs(tau.old-tau.cur) < 1)
}
},
"Schall" = {
obj.cur = ff(phi=phi.cur, beta=beta.cur, gamma=gamma.cur,
tau=tau.cur, lambda1=lambda1.cur, lambda2=lambda2.cur,
Dphi=TRUE, D2phi=TRUE, Dbeta=FALSE, Dgamma=FALSE)
BWB = -obj.cur$Hes.phi0[-k.ref,-k.ref]
ed.phi = sum(t(solve(BWB+tau.cur*Pd[-k.ref,-k.ref]))*BWB) ## = sum(diag(solve(BWB+tau*BWB)%*%Pd))
quad = sum(phi.cur * c(Pd%*%phi.cur))
tau.cur = max(tau.min, (ed.phi-pen.order0)/quad)
},
"grid" = {
if (!tau.stable){
## Starting grid for <tau> = roughness penalty parameter in F0(t)
tau.ref = tau.cur
tau.grid = 2^c((log2(tau.cur)-1):min(15,(log2(tau.cur)+1)))
ngrid = length(tau.grid)
ED.grid = BIC.grid = AIC.grid = numeric(ngrid)
psi.grid = c()
psi.cur = phi2psi(phi.cur)
for (j in 1:ngrid){
## N-R
tau.cur = tau.grid[j]
if (tau.cur == tau.ref){
psi.NR2 = psi.NR
} else {
psi.NR2 = NewtonRaphson(g=g.psi,theta=psi.cur,
grad.tol=grad.tol, RDM.tol=RDM.tol, fun.tol=fun.tol,
verbose=FALSE)
}
psi.prop = psi.NR2$theta ; phi.prop = psi2phi(psi.prop)
psi.grid[[j]] = psi.prop
obj = ff(phi=phi.prop, beta=beta.cur, gamma=gamma.cur,
tau=tau.grid[j], lambda1=lambda1.cur, lambda2=lambda2.cur,
Dphi=TRUE,D2phi=TRUE)
ED.grid[j] = sum(diag(solve(obj$Hes.phi0[-k.ref,-k.ref] - tau.grid[j]*Pd[-k.ref,-k.ref]) %*% obj$Hes.phi0[-k.ref,-k.ref]))
BIC.grid[j] = obj$dev + ED.grid[j]*log(sum(n.event))
AIC.grid[j] = obj$dev + 2*ED.grid[j]
}
##
jj = which.min(AIC.grid)
tau.cur = tau.grid[jj]
tau.iter = c(tau.iter, tau.cur)
tau.stable = (sum(diff(tail(tau.iter,4)) == 0) == 2) ## Stop selecting tau when unchanged 3 iterations in a row
psi.cur = psi.grid[[jj]] ; phi.cur = psi2phi(psi.cur)
}
},
"none"={
## <tau.cur> unchanged
}
) ## End of switch(tau.method)
##
## =======================================
## -2- Newton-Raphson for <beta> & <gamma>
## =======================================
## (i.e. regression and spline parameters in long- and short-term survival submodels
##)
idx1 = 1:length(beta.0)
##
## Select <beta,gamma> using homemade Newton-Raphson
## --------------------------------------------------
g.regr = function(theta,Dtheta=TRUE){
beta = theta[idx1] ; gamma = theta[-idx1]
obj.cur = ff(phi=phi.cur, beta=beta, gamma=gamma,
tau=tau.cur, lambda1=lambda1.cur, lambda2=lambda2.cur,
Dbeta=Dtheta,Dgamma=Dtheta)
grad = dtheta = NULL
if (Dtheta){
grad = obj.cur$grad.regr
Hes = obj.cur$Hes.regr
A = -Hes + diag(1e-6,length(grad))
## A = Matrix::nearPD(-obj.cur$Hes.regr)$mat
dtheta = solve(A, grad)
}
##
ans = list(g=obj.cur$lpen, theta=theta, dtheta=dtheta, grad=grad, Hes=Hes)
return(ans)
} ## End g.regr
##
if (psi.method == "LM"){
regr.LM = myLevMarq(g=g.regr, theta=c(beta.cur,gamma.cur),
grad.tol=grad.tol, RDM.tol=RDM.tol, fun.tol=fun.tol,
lambda_init=1e-3, lambda_factor=10,
verbose=verbose)
beta.cur = regr.LM$theta[idx1] ; gamma.cur = regr.LM$theta[-idx1]
} else if (psi.method == "NR"){
regr.NR = NewtonRaphson(g=g.regr,theta=c(beta.cur,gamma.cur),
grad.tol=grad.tol, RDM.tol=RDM.tol, fun.tol=fun.tol,
verbose=verbose)
beta.cur = regr.NR$theta[idx1] ; gamma.cur = regr.NR$theta[-idx1]
}
##
## ================================
## -3- Update <lambda1> & <lambda2>
## ================================
## (i.e. penalty vectors for additive terms in long- and short-term survival)
##
itermin = 1 ## Only start updating after <itermin> iterations
update.lambda = ifelse(iter <= itermin, FALSE, (J1 > 0)|(J2 > 0))
##
## Evaluate necessary objects for the selection of the penalty parameters
if (!noestimation){
obj.cur = ff(phi.cur, beta.cur, gamma.cur,
tau=tau.cur, lambda1=lambda1.cur, lambda2=lambda2.cur,
Dphi=FALSE, Dbeta=TRUE, Dgamma=!nogamma,
Dlambda=update.lambda & ((lambda.method == "LPS3")))
}
## Update <lambda1> & <lambda2>
## ----------------------------
if (update.lambda & !final.iteration){
## -- Method 1: LPS --
if (lambda.method == "LPS"){ ## Laplace's method with fast determinant computation
## -3- lambda1 & lambda2
if (J2 >0){
Hes.gamma0 = obj.cur$Hes.gamma0 ## Update Hes.gamma0 value
for (j in 1:J2){ ## Loop over additive terms in the long-term survival sub-model
idx = nfixed2 + (j-1)*K2 + (1:K2)
Hes.gamj = Hes.gamma0[idx,idx,drop=FALSE]
## Recompute eigenvalues for update of lambda2 (those for lambda1 remain unchanged!)
ev2.lst[[j]] = ev.fun(BwB=Hes.gamj,Pd=regr2$Pd.x)$dj
}
}
if (J1 > 0 | J2 > 0){
temp = select.lambda.LPS(lambda1=lambda1.cur,lambda2=lambda2.cur)
lambda1.cur = temp$lambda1 ; se.lambda1 = temp$se.lambda1 ; se.loglambda1 = temp$se.loglambda1
lambda2.cur = temp$lambda2 ; se.lambda2 = temp$se.lambda2 ; se.loglambda2 = temp$se.loglambda2
}
} ## Endif lambda.method == "LPS"
##
## -- Method 2: LPS2 --
if (lambda.method == "LPS2"){ ## Laplace's method (jointly for (lambd1,lambda2))
## -3- lambda1 & lambda2
if (J1 > 0 | J2 > 0){
temp = select.lambda.LPS2(Hes.regr0=obj.cur$Hes.regr0)
lambda1.cur = temp$lambda1
lambda2.cur = temp$lambda2
}
} ## Endif lambda.method == "LPS2"
##
## -- Method 3: LPS3 --
if (lambda.method == "LPS3"){ ## Laplace's method (separately for <lambda1> & <lambda2>
## -3a- lambda1 (long-term survival)
if (J1 > 0){
temp = select.lambda.LPS3(coef=beta.cur, nfixed=nfixed1,
lambda=lambda1.cur, Pd=Pd1.x, Mcal=obj.cur$Mcal.1,
pen.order=pen.order1, lambda.min=lambda1.min, bb=bb)
lambda1.cur = temp$lambda ## Vector of penalty parameters for the additive terms
Hes.xi1 = temp$Hes.xi
Hes.lam1 = temp$Hes.lam
}
## -3b- lambda2 (short-term survival)
if (J2 > 0){
temp = select.lambda.LPS3(coef=gamma.cur, nfixed=nfixed2,
lambda=lambda2.cur, Pd=Pd2.x, Mcal=obj.cur$Mcal.2,
pen.order=pen.order2, lambda.min=lambda2.min, bb=bb)
lambda2.cur = temp$lambda ## Vector of penalty parameters for the additive terms
Hes.xi2 = temp$Hes.xi
Hes.lam2 = temp$Hes.lam
}
} ## Endif lambda.method == "LPS3"
##
##
## ## -- Method 4: Schall --
## if (lambda.method == "Schall"){ ## Schall's method
## ##
## ED.cur = EDF(list(fit=obj.cur),Wood.test=Wood.test) ## Evaluate current ED for additive terms
## ## -3a- lambda1 (long-term survival)
## if (J1 > 0){
## for (j in 1:J1){
## idx = nfixed1 + (j-1)*K1 + (1:K1)
## beta.j = beta.cur[idx] ## Centered B-splines coefs for jth additive term
## quad = sum(beta.j * c(Pd1.x %*% beta.j))
## ed = ED.cur$ED1[j,1]
## tau2 = quad / (ed-pen.order1)
## sigma2 = sum(obj.cur$res^2) / (length(obj.cur$res)-ED.cur$ED.tot)
## ## sigma2 = obj.cur$dev / (length(obj.cur$mu.ij)-ED.tot)
## lambda1.cur[j] = max(lambda1.min, sigma2/tau2)
## }
## }
## ## -3b- lambda2 (short-term survival)
## if (J2 > 0){
## for (j in 1:J2){
## idx = nfixed2 + (j-1)*K2 + (1:K2)
## gamma.j = gamma.cur[idx] ## Centered B-splines coefs for jth additive term
## quad = sum(gamma.j* c(Pd2.x %*% gamma.j))
## ed = ED.cur$ED2[j,1]
## tau2 = quad / (ed-pen.order2)
## sigma2 = sum(obj.cur$res^2) / (length(obj.cur$res)-ED.cur$ED.tot)
## ## sigma2 = obj.cur$dev / (length(obj.cur$mu.ij)-ED.tot)
## lambda2.cur[j] = max(lambda2.min, sigma2/tau2)
## }
## }
## } ## Endif lambda.method == "Schall"
##
## -- Method 5: nlminb --
if (lambda.method == "nlminb"){ ## Direct optimization of log-evidence
## Loss function to select <lambda>:
## loss.fn(nu) = -log p(lambda=exp(nu) | data)
loglambda.loss = function(loglambda,data){
if (J1 > 0) lambda1 = exp(loglambda[1:J1])
if (J2 > 0) lambda2 = exp(loglambda[(J1+1):(J1+J2)])
obj.cur = ff(phi.cur, beta.cur, gamma.cur,
tau=tau.cur, lambda1=lambda1, lambda2=lambda2,
Dphi=FALSE, Dbeta=TRUE, Dgamma=TRUE, Dlambda=FALSE)
logEvid = obj.cur$lpen -.5*ldet.fun(-obj.cur$Hes.beta)
if (!nogamma){
logEvid = logEvid -.5*ldet.fun(-obj.cur$Hes.gamma)
}
ans = -logEvid
return(ans)
} ## End loglambda.loss
##
## Minimize the loss function to select <log(lambda)>
obj.ml = nlminb(start=log(c(lambda1.cur,lambda2.cur)),
objective=loglambda.loss,
lower=rep(0,J1+J2),upper=(rep(10,J1+J2)))
lambda.cur = exp(obj.ml$par) ## Selected <lambda> --> lambda.hat
if (J1 > 0) lambda1.cur = lambda.cur[1:J1]
if (J2 > 0) lambda2.cur = lambda.cur[(J1+1):(J1+J2)]
} ## Endif lambda.method == "nlminb"
} ## End Update <lambda1> & <lambda2>
##
## toc() ; cat("\n")
##
## ======================================
## -4- Status at the end of the iteration
## ======================================
##
obj.cur = ff(phi.cur, beta.cur, gamma.cur,
tau=tau.cur, lambda1=lambda1.cur, lambda2=lambda2.cur,
Dphi=TRUE, D2phi=TRUE, Dbeta=TRUE, Dgamma=!nogamma,
get.logEvid=TRUE)
## Gradient for <beta,gamma>
## -------------------------
if (nogamma) obj.cur$grad.gamma = 0
grad.cur = with(obj.cur, grad.regr) ##c(grad.beta,grad.gamma))
## grad.Lp = with(obj.cur, c(beta=Lp(grad.beta),gamma=Lp(grad.gamma)))
##
## Penalized logLik
## ----------------
lpen.final = obj.cur$lpen ## Value of the penalized llik at the end of the global iteration
##
## Effective dims & Information criteria
## -------------------------------------
ED.cur = EDF(list(fit=obj.cur),Wood.test=Wood.test)
AIC = obj.cur$dev + 2*ED.cur$ED.tot
BIC = obj.cur$dev + log(sum(n.event))*ED.cur$ED.tot
logEvid = obj.cur$logEvid
##
## Some output at the end of an iteration
## --------------------------------------
if (iter.verbose){
cat(iter,
": logEvid:",round(logEvid,2),
"; BIC:",round(BIC,2),
"; AIC:",round(AIC,2),
"; Dev:",round(obj.cur$dev,2),
"; lpen:",round(obj.cur$lpen,2),
"\n")
}
## green == TRUE: necessary condition to complete the estimation process
switch(criterion, ## CONVERGENCE criterion
"logEvid" = {
green = (abs(logEvid-logEvid.old) < criterion.tol)
logEvid.old = logEvid
},
"deviance" = {
dev.cur = obj.cur$dev
green = (abs(dev.old-dev.cur) < criterion.tol)
dev.old = dev.cur
},
"lpen" = {
green = (abs(lpen.old-obj.cur$lpen) < criterion.tol)
lpen.old = obj.cur$lpen
},
"AIC" = {
green = (abs(AIC.old-AIC) < criterion.tol)
AIC.old = AIC
},
"BIC" = {
green = (abs(BIC.old-BIC) < criterion.tol)
BIC.old = BIC
},
"gradient" = {
green = (Lp(grad.cur) < grad.tol) & (iter > 5)
## green = all(grad.Lp < 10*grad.tol) & (iter > 5)
}
)
## Necessary & sufficient condition to complete the estimation process:
## green light followed by one last iteration
if (final.iteration & green) converged = TRUE
final.iteration = green ## One final iteration required after the 1st green light to completeestimation
##
if (iter >= iterlim) break
if (noestimation) break
} ## End While (global estimation loop)
## ------------------------------------------------------------------------------------
## END the estimation process
## ------------------------------------------------------------------------------------
##
## Prepare final output (after convergence)
## ========================================
fit = obj.cur
fit$criterion = criterion
##
## Gradients, Hessians and se's
## ----------------------------
fit$grad.phi = obj.cur$grad.phi
fit$grad.psi = obj.cur$grad.phi[-k.ref]
fit$Hes.phi0 = obj.cur$Hes.phi0 ; fit$Hes.phi = obj.cur$Hes.phi
fit$Hes.psi0 = obj.cur$Hes.phi0[-k.ref,-k.ref] ; fit$Hes.psi = obj.cur$Hes.phi[-k.ref,-k.ref]
se.psi = sqrt(diag(solve(-fit$Hes.psi)))
se.phi = psi2phi(se.psi)
##
se.beta = with(fit, sqrt(diag(solve(-Hes.beta+1e-6*diag(ncol(Hes.beta))))))
if (nogamma){
se.gamma = 0
} else {
se.gamma = with(fit, sqrt(diag(solve(-Hes.gamma+1e-6*diag(ncol(Hes.gamma))))))
}
## Regression parameter estimates with their se's, z-score, Pval
## -------------------------------------------------------------
fit$marginalized = FALSE ## Marginalization indicator (over penaly parameters) for regression and spline coefs
fun = function(est,se){
mat = cbind(est=est,se=se,
low=est-z.alpha*se, up=est+z.alpha*se,
"Z"=est/se,
"Pval"=1-pchisq((est/se)^2,1))
attr(mat,"ci.level") = ci.level
return(mat)
} ## End fun
fit$ci.level = ci.level
fit$gam = fit$gamma
fit$phi = with(fit, fun(est=phi,se=se.phi))
fit$beta = with(fit, fun(est=beta,se=se.beta))
fit$gamma = with(fit, fun(est=gamma,se=se.gamma))
fit$psi.method = psi.method
##
## Penalty parameters
## ------------------
fit$logscale = logscale
if (J1 > 0){
fit$lambda1 = lambda1.cur ; fit$se.lambda1 = se.lambda1 ; fit$se.loglambda1 = se.loglambda1
if ((lambda.method == "Schall") || (lambda.method == "LPS2")){
fit$xi1 = log(lambda1.cur-lambda1.min)
fit$Hes.xi1 = Hes.xi1
}
fit$pen.order1 = pen.order1
}
if (J2 > 0){
fit$lambda2 = lambda2.cur ; fit$se.lambda2 = se.lambda2 ; fit$se.loglambda2 = se.loglambda2
if ((lambda.method == "Schall") || (lambda.method == "LPS2")){
fit$xi2 = log(lambda2.cur-lambda2.min)
fit$Hes.xi2 = Hes.xi2
}
fit$pen.order2 = pen.order2
}
fit$lambda.method = lambda.method
##
fit$tau = tau.cur ; fit$pen.order0 = pen.order0
fit$tau.method = tau.method
##
## Evaluate EDF, Chi2 and Pval for the estimated additive terms
## and effective total number of parameters in regression submodels
## -----------------------------------------------------------------
temp = EDF(list(fit=fit),Wood.test=Wood.test)
fit = c(fit,temp)
##
## AIC and BIC
## -----------
fit$nobs = nrow(regr1$Xcal)
fit$n = n ## Number of units (not to be confused with the number <nobs> of longitudinal binary observations)
fit$d = sum(n.event) ## Total number of events observed on the <n> units
##
fit$AIC = AIC ## fit$dev + 2*ED.tot
fit$BIC = BIC ## fit$dev + log(fit$d)*ED.tot
##
## Iterations and Computation time
## -------------------------------
fit$converged = converged ## Also save <converged> in fit
fit$iter = iter
fit$elapsed.time <- (proc.time()-ptm)[1] ## Elapsed time
##
## Returned list
## -------------
ans = list(formula1=formula1, formula2=formula2, baseline=baseline,
id=data$id, time=data$time, event=data$event,
regr1=regr1,regr2=regr2, K0=K0,
fit=fit,
call=cl,
converged=converged ## Also save <converged> in root
)
##
ans$logLik = ans$fit$llik
class(ans) = "tvcure"
return(ans)
}
## End tvcure
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.