Nothing
#' @title Fit Dependent Censoring Models
#'
#' @description This function allows to estimate the dependency parameter along all other model parameters. First, estimates the cumulative hazard function, and
#' then at the second stage it estimates other model parameters assuming that the cumulative hazard function is known. The details for
#' implementing the dependent censoring methodology can be found in Deresa and Van Keilegom (2024).
#'
#' @references Deresa and Van Keilegom (2024). Copula based Cox proportional hazards models for dependent censoring, Journal of the American Statistical Association, 119:1044-1054.
#'
#' @param start Initial values for the finite dimensional parameters. If \code{start} is NULL, the initial values will be obtained
#' by fitting a Cox model for survival time T and a Weibull model for dependent censoring C.
#'
#' @param resData Data matrix with three columns; Z = the observed survival time, d1 = the censoring indicator of T
#' and d2 = the censoring indicator of C.
#' @param X Data matrix with covariates related to T.
#' @param W Data matrix with covariates related to C. First column of W should be a vector of ones.
#' @param cop Which copula should be computed to account for dependency between T and C. This argument can take
#' one of the values from \code{c("Gumbel", "Frank", "Normal")}.
#' @param dist The distribution to be used for the censoring time C. Only two distributions are allowed, i.e, Weibull
#' and lognormal distributions. With the value \code{"Weibull"} as the
#' default.
#' @param eps Convergence error. This is set by the user in such away that the desired convergence is met; the default is \code{eps = 1e-4}.
#' @param n.iter Number of iterations; the default is \code{n.iter = 50}. The larger the number of iterations, the longer the computational time.
#' @param bootstrap A boolean indicating whether to compute bootstrap standard errors for making inferences.
#' @param n.boot Number of bootstrap samples to use in the estimation of bootstrap standard errors if \code{bootstrap = TRUE}. The default is n.boot = 150. But, higher
#' values of \code{n.boot} are recommended for obtaining good estimates of bootstrap standard errors.
#' @param ncore The number of cores to use for parallel computation in bootstrapping, with the default \code{ncore = 7}.
#' @importFrom copula pCopula frankCopula gumbelCopula tau
#' @importFrom stats nlminb pnorm qnorm
#' @importFrom survival coxph survreg Surv
#'
#' @return This function returns a fit of dependent censoring model; parameter estimates, estimate of the cumulative hazard function, bootstrap standard
#' errors for finite-dimensional parameters, the nonparametric cumulative hazard function, etc.
#'
#'
#'
#' @examples
#' \donttest{
#' # Toy data example to illustrate implementation
#' n = 300
#' beta = c(0.5)
#' lambd = 0.35
#' eta = c(0.9,0.4)
#' X = cbind(rbinom(n,1,0.5))
#' W = cbind(rep(1,n),rbinom(n,1,0.5))
#' # generate dependency structure from Frank
#' frank.cop <- copula::frankCopula(param = 5,dim = 2)
#' U = copula::rCopula(n,frank.cop)
#' T1 = (-log(1-U[,1]))/(lambd*exp(X*beta)) # Survival time
#' T2 = (-log(1-U[,2]))^(1.1)*exp(W%*%eta) # Censoring time
#' A = runif(n,0,15) # administrative censoring time
#' Z = pmin(T1,T2,A)
#' d1 = as.numeric(Z==T1)
#' d2 = as.numeric(Z==T2)
#' resData = data.frame("Z" = Z,"d1" = d1, "d2" = d2) # should be data frame
#' colnames(W) <- c("ones","cov1")
#' colnames(X) <- "cov.surv"
#'
#' # Fit dependent censoring model
#'
#'fit <- fitDepCens(resData = resData, X = X, W = W, bootstrap = FALSE)
#'
#' # parameter estimates
#'
#' fit$parameterEstimates
#'
#' # summary fit results
#' summary(fit)
#'
#' # plot cumulative hazard vs time
#'
#' plot(fit$observedTime, fit$cumhazardFunction, type = "l",xlab = "Time",
#' ylab = "Estimated cumulative hazard function")
#'}
#'
#' @export
fitDepCens = function(resData,X,W,
cop = c("Frank","Gumbel", "Normal"),
dist = c("Weibull", "lognormal"), start = NULL, n.iter = 50, bootstrap = TRUE, n.boot = 150, ncore = 7, eps = 1e-4){
cop <- match.arg(cop)
dist <- match.arg(dist)
# Verify column names of input dataframe resData
if (!all(c("Z", "d1", "d2") %in% colnames(resData)))
stop("Z, d1 and d2 arguments must be column names of resData")
id <- match(c("Z", "d1", "d2"), colnames(resData))
colnames(resData)[id] <- c("Z", "d1", "d2")
Z = resData$Z
d1 = resData$d1
d2 = resData$d2
if(!is.matrix(W)) W = as.matrix(W)
if(!is.matrix(X)) X = as.matrix(X)
k = dim(X)[2]
l = dim(W)[2]
if(l==1)
stop("W should be a matrix of dimension larger than 1")
if(!is.null(start)){
if(start[(k+l+1)<=0]|start[(k+l+2)<=0])
stop("Scale or dependency parameter cannot be negative")
}
if(is.null(start)){
fit1 = coxph(Surv(Z,d1)~X)
fit2 = survreg(Surv(Z,d2)~W-1)
}
if(is.null(start) & (cop == "Frank" | cop == "Gumbel")){ # obtain initial values by fitting standard models
start = c(fit1$coefficients,fit2$coefficients,fit2$scale,2)
}else if (is.null(start) & (cop == "Normal")){
start = c(fit1$coefficients,fit2$coefficients,fit2$scale,0.2)}
# lb = lower boundary, ub = upper boundary
if(cop == "Frank") # Frank copula
{
lb = c(rep(-Inf,k+l),0,1e-5)
ub = c(rep(Inf,k+l+2))
}else if(cop == "Gumbel") # Gumbel copula
{lb = c(rep(-Inf,k+l),0,1.0001)
ub = c(rep(Inf,k+l+2))
} else if(cop == "Normal") # Normal
{lb = c(rep(-Inf,k+l),0,-0.99)
ub = c(rep(Inf,k+l+1),0.99)
}else
stop(paste0("The copula function",cop, "is not supported"))
if(length(start)!= k+l+2)
stop("The length of initial values is not equal to the number of parameters need to be estimated")
res <- SolveL(start,resData,X,W,cop,dist) # Estimate the cumulative hazard function
lsml <- res$lambda
Llrge <- res$cumhaz
T1 <- res$times
longfrm <- Longfun(Z,T1,lsml,Llrge)
lhat <- longfrm[,1]
cumL <- longfrm[,2]
# initial step of estimation
parhat = nlminb(start = start, PseudoL, resData = resData, X = X, W = W,lhat = lhat,cumL = cumL,cop = cop,dist = dist,lower = lb ,upper = ub, control = list(eval.max = 300,iter.max = 200))$par
a = start
b = parhat
flag = 0
# Then do estimation iteratively until convergence
while (Distance(b,a)>eps){ # doing this while loop until the desired convergence criteria are met
a = b
res <- SolveL(a,resData,X,W,cop,dist)
lsml = res$lambda
Llrge = res$cumhaz
T1 = res$times
longfrm = Longfun(Z,T1,lsml,Llrge)
lhat = longfrm[,1]
cumL = longfrm[,2]
parhat = nlminb(start = a,PseudoL,resData = resData,X = X,W = W,lhat = lhat,cumL = cumL,cop = cop,dist = dist,lower = lb ,upper = ub, control = list(eval.max=300,iter.max=200))$par
b = parhat
flag = flag+1;
if (flag>n.iter) # Stop after iteration 20; this usually gives sufficient convergence results
{
flag=0;
#warning("The maximum number of iterations reached before convergence criteria is satisified. Better convergence may be obtained by increasing n.iter")
warning(paste0("The current convergence error (eps) is ", Distance(b,a)))
break;
}
}
cumHat <- SolveL(b,resData,X,W,cop,dist)
# nonparametric bootstrap for making inference
paramsBootstrap = list("init" = parhat,"resData" = resData, "X" = X, "W" = W,"lhat" = lhat,"cumL" = cumL, "dist" = dist,"k" = k, "lb" = lb, "ub" = ub, "Obs.time" = T1, "cop" = cop, "n.boot" = n.boot, "n.iter" = n.iter, "ncore" = ncore, "eps" = eps)
fitObj <- NULL
if(bootstrap) # Obtain bootstrap standard error
fitObj <- do.call(boot.fun, paramsBootstrap)
# Transform the copula parameter to tau scale
nn <- length(parhat)
if (cop == "Frank") {parhat[nn] <- tau(frankCopula(parhat[nn]))
}else if (cop == "Gumbel"){ parhat[nn] <- tau(gumbelCopula(parhat[nn]))
}else parhat[nn] <- 2*asin(parhat[nn])/pi
names(parhat)[nn] <- "tau"
names(parhat)[nn-1] <- "sigma"
depObj <- c(list("parameterEstimates" = parhat,"copula" = cop, "censoringDistribution" = dist, "bootstrap" = bootstrap, "dimX" = k, "dimW" = l,"observedTime" = cumHat$times,"hazardFunction" = cumHat$lambda, "cumhazardFunction" = cumHat$cumhaz),fitObj)
class(depObj) <- append(class(depObj), "depFit") # dependent censoring fit object
return(depObj)
}
#' @title Fit Independent Censoring Models
#'
#' @description This function allows to estimate all model parameters under the assumption of independent censoring. First, estimates the cumulative hazard function, and
#' then at the second stage it estimates other model parameters assuming that the cumulative hazard is known.
#'
#'
#' @param start Initial values for the finite dimensional parameters. If \code{start} is NULL, the initial values will be obtained
#' by fitting a Cox model for survival time T and a Weibull model for censoring time C.
#'
#' @param resData Data matrix with three columns; Z = the observed survival time, d1 = the censoring indicator of T
#' and d2 = the censoring indicator of C.
#' @param X Data matrix with covariates related to T.
#' @param W Data matrix with covariates related to C. First column of W should be ones.
#' @param dist The distribution to be used for the censoring time C. Only two distributions are allowed, i.e, Weibull
#' and lognormal distributions. With the value \code{"Weibull"} as the
#' default.
#' @param eps Convergence error. This is set by the user in such away that the desired convergence is met; the default is \code{eps = 1e-4}.
#' @param n.iter Number of iterations; the default is \code{n.iter = 50}. The larger the number of iterations, the longer the computational time.
#' @param bootstrap A boolean indicating whether to compute bootstrap standard errors for making inferences.
#' @param n.boot Number of bootstrap samples to use in the estimation of bootstrap standard errors if \code{bootstrap = TRUE}. The default is n.boot = 150. But, higher
#' values of \code{n.boot} are recommended for obtaining good estimates of bootstrap standard errors.
#' @param ncore The number of cores to use for parallel computation is configurable, with the default \code{ncore = 7}.
#' @importFrom stats nlminb pnorm qnorm sd
#' @importFrom survival coxph survreg Surv
#'
#' @return This function returns a fit of independent censoring model; parameter estimates, estimate of the cumulative hazard function, bootstrap standard
#' errors for finite-dimensional parameters, the nonparametric cumulative hazard function, etc.
#'
#'
#'
#' @examples
#' \donttest{
#'
#' # Toy data example to illustrate implementation
#' n = 300
#' beta = c(0.5)
#' lambd = 0.35
#' eta = c(0.9,0.4)
#' X = cbind(rbinom(n,1,0.5))
#' W = cbind(rep(1,n),rbinom(n,1,0.5))
#' # generate dependency structure from Frank
#' frank.cop <- copula::frankCopula(param = 5,dim = 2)
#' U = copula::rCopula(n,frank.cop)
#' T1 = (-log(1-U[,1]))/(lambd*exp(X*beta)) # Survival time'
#' T2 = (-log(1-U[,2]))^(1.1)*exp(W%*%eta) # Censoring time
#' A = runif(n,0,15) # administrative censoring time
#' Z = pmin(T1,T2,A)
#' d1 = as.numeric(Z==T1)
#' d2 = as.numeric(Z==T2)
#' resData = data.frame("Z" = Z,"d1" = d1, "d2" = d2) # should be data frame
#'
#' colnames(W) <- c("ones","cov1")
#' colnames(X) <- "cov.surv"
#'
#' # Fit independent censoring model
#'
#' fitI <- fitIndepCens(resData = resData, X = X, W = W, bootstrap = FALSE)
#'
#' # parameter estimates
#'
#' fitI$parameterEstimates
#'
#' # summary fit results
#' summary(fitI)
#'
#' # plot cumulative hazard vs time
#'
#' plot(fitI$observedTime, fitI$cumhazardFunction, type = "l",xlab = "Time",
#' ylab = "Estimated cumulative hazard function")
#'}
#'
#' @export
fitIndepCens = function(resData,X,W,
dist = c("Weibull", "lognormal"), start = NULL,
n.iter = 50, bootstrap = TRUE, n.boot = 150, ncore = 7, eps = 1e-4){
dist <- match.arg(dist)
if (!all(c("Z", "d1", "d2") %in% colnames(resData)))
stop("Z, d1 and d2 arguments must be column names of resData")
id <- match(c("Z", "d1", "d2"), colnames(resData))
colnames(resData)[id] <- c("Z", "d1", "d2")
Z = resData$Z
d1 = resData$d1
d2 = resData$d2
if (is.vector(X)) k = 1
else if(!is.vector(X)) k = dim(X)[2]
l = dim(W)[2]
if(!is.null(start)){
if(start[(k+l+1)<=0])
stop("Scale parameter cannot be negative")
}
if(is.null(start)){ # obtain initial values by fitting standard models
fit1 = survival::coxph(Surv(Z,d1)~X)
fit2 = survival::survreg(Surv(Z,d2)~W-1)
start = c(fit1$coefficients,fit2$coefficients,fit2$scale)
}
if(length(start)!= k+l+1)
stop("The length of initial values is not equal to the number of parameters need to be estimated")
res <- SolveLI(start,resData,X) # estimate the cumulative hazard function
lsml <- res$lambda
Llrge <- res$cumhaz
T1 <- res$times
longfrm <- Longfun(Z,T1,lsml,Llrge)
lhat <- longfrm[,1]
cumL <- longfrm[,2]
# lb = lower boundary, ub = upper boundary
lb = c(rep(-Inf,k+l),0)
ub = c(rep(Inf,k+l+1))
# Initial estimate of theta
parhat = nlminb(start = start,LikCopInd, resData = resData,X = X,W = W,lhat = lhat,cumL = cumL, dist = dist, lower = lb ,upper = ub, control = list(eval.max=300,iter.max=200))$par
a = start
b = parhat
flag = 0
while (Distance(b,a)>eps){ # doing this while loop until the desired convergence criteria are met
a = b
res = SolveLI(a,resData,X)
lsml = res$lambda
Llrge = res$cumhaz
T1 = res$times
longfrm = Longfun(Z,T1,lsml,Llrge)
lhat = longfrm[,1]
cumL = longfrm[,2]
parhat = nlminb(start = a,LikCopInd,resData = resData,X = X,W = W,lhat = lhat,cumL = cumL, dist = dist, lower = lb ,upper = ub, control = list(eval.max=300,iter.max=200))$par
b = parhat
flag = flag+1;
if (flag>n.iter) # stop after iteration 30; this usually gives sufficient convergence results
{
flag=0;
#warning("The maximum number of iterations reached before convergence criteria is satisified. Better convergence may be obtained by increasing n.iter")
warning(paste0("The current convergence error (eps) is", Distance(b,a)))
break;
}
}
cumHat = SolveLI(parhat,resData,X)
# nonparametric bootstrap for making inference
paramsBootstrap = list("init" = parhat,"resData" = resData, "X" = X, "W" = W,"lhat" = lhat,"cumL" = cumL, "dist" = dist,"k" = k, "lb" = lb, "ub" = ub, "Obs.time" = T1, "n.boot" = n.boot, "n.iter" = n.iter,"ncore" = ncore, "eps" = eps)
fitObj <- NULL
if(bootstrap) # Obtain bootstrap standard error
fitObj <- do.call(boot.funI, paramsBootstrap)
nn <- length(parhat)
names(parhat)[nn] <- "sigma"
indObj <- c(list("parameterEstimates" = parhat, "censoringDistribution" = dist, "bootstrap" = bootstrap, "dimX" = k, "dimW" = l,"observedTime" = cumHat$times, "hazardFunction" = cumHat$lambda, "cumhazardFunction" = cumHat$cumhaz),fitObj)
class(indObj) <- append(class(indObj), "indepFit") # independent censoring fit object
return(indObj)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.