#' Estimating survival curves via structural transformation model
#'
#' \code{trSurvfit} estimates survival curves under dependent truncation and independent censoring via a structural transformation model.
#'
#' A structural transformation model assumes there is a latent, quasi-independent truncation time
#' that is associated with the observed dependent truncation time, the event time, and an unknown dependence parameter
#' through a specified funciton.
#' The dependence parameter is chosen to either minimize the absolute value of the restricted inverse probability weighted Kendall's tau or maximize the corresponding \eqn{p}-value.
#' The marginal distribution for the truncation time and the event time are completely left unspecified.
#'
#' The structure of the transformation model is of the form:
#' \deqn{h(U) = (1 + a)^{-1} \times (h(T) + ah(X)),} where \eqn{T} is the truncation time, \eqn{X} is the observed failure time,
#' \eqn{U} is the transformed truncation time that is quasi-independent from \eqn{X} and \eqn{h(\cdot)} is a monotonic transformation function.
#' The condition, \eqn{T < X}, is assumed to be satisfied.
#' The quasi-independent truncation time, \eqn{U}, is obtained by inverting the test for quasi-independence by either minimizing
#' the absolute value of the restricted inverse probability weighted Kendall's tau or maximize the corresponding \eqn{p}-value.
#'
#' At the current version, three transformation structures can be specified. \code{trans = "linear"} corresponds to \deqn{h(X) = 1;}
#' \code{trans = "log"} corresponds to \deqn{h(X) = log(X);}
#' \code{trans = "exp"} corresponds to \deqn{h(X) = exp(X).}
#'
#' @param trun left truncation time satisfying \code{trun} <= \code{obs}.
#' @param obs observed failure time, must be the same length as \code{trun}, might be right-censored.
#' @param delta an optional 0-1 vector of censoring indicator (0 = censored, 1 = event) for \code{obs}.
#' If this vector is not specified, \code{cKendall} assumes no censoring and all observed failure time
#' denote events.
#' @param tFun a character string specifying the transformation function or a user specified function indicating the relationship
#' between \eqn{X}, \eqn{T}, and \eqn{a}.
#' When \code{tFun} is a character, the following are permitted:
#' \describe{
#' \item{linear}{linear transformation structure;}
#' \item{log}{log-linear transformation structure;}
#' \item{exp}{exponential transformation structure.}
#' }
#' @param plots an optional logical value; if TRUE, a series of diagnostic plots as well as the survival curve for the observed failure time will be plotted.
#' @param control controls the lower and upper bounds when \code{trans} is an user specified function.
#' @param ... for future methods.
#'
#' @return The output contains the following components:
#' \describe{
#' \item{\code{surv}}{is a \code{data.frame} contains the survival probabilities estimates.}
#' \item{\code{byTau}}{a list contains the estimator of transformation parameter:
#' \describe{
#' \item{\code{par}}{is the best set of transformation parameter found;}
#' \item{\code{obj}}{is the value of the inverse probability weighted Kendall's tau corresponding to 'par'.}}
#' }
#' \item{\code{byP}}{a list contains the estimator of transformation parameter:
#' \describe{
#' \item{\code{par}}{is the best set of transformation parameter found;}
#' \item{\code{obj}}{is the value of the inverse probability weighted Kendall's tau corresponding to 'par'.}}
#' }
#' \item{\code{qind}}{a data frame consists of two quasi-independent variables:
#' \describe{
#' \item{\code{trun}}{ is the transformed truncation time;}
#' \item{\code{obs}}{ is the corresponding uncensored failure time.}}}
#' }
#'
#' @references Martin E. and Betensky R. A. (2005), Testing quasi-independence of failure and truncation times via conditional Kendall's tau,
#' \emph{Journal of the American Statistical Association}, \bold{100} (470): 484-492.
#' @references Austin, M. D. and Betensky R. A. (2014), Eliminating bias due to censoring in Kendall's tau estimators for quasi-independence of truncation and failure,
#' \emph{Computational Statistics & Data Analysis}, \bold{73}: 16-26.
#' @references Chiou, S., Austin, M., Qian, J. and Betensky R. A. (2018), Transformation model estimation of survival under dependent truncation and independent censoring,
#' \emph{Statistical Methods in Medical Research}, \bold{28} (12): 3785-3798.
#'
#' @export
#' @example inst/examples/ex_trSurvfit.R
trSurvfit <- function(trun, obs, delta = NULL, tFun = "linear", plots = FALSE,
control = trSurv.control(), ...) {
## trun = truncation time
## obs = observed failure time
## delta = censoring indicator
out <- NULL
if (is.null(delta)) delta <- rep(1, length(trun))
if (class(tFun) == "character") {
if (tFun == "linear") FUN <- function(X, T, a) (T + a * X) / (1 + a)
if (tFun == "log") FUN <- function(X, T, a) exp((log(replace(T, 0, 1)) + a * log(X)) / (1 + a))
if (tFun == "log2") FUN <- function(X, T, a) exp((1 + a) * log(replace(T, 0, 1)) - a * log(X))
if (tFun == "exp") FUN <- function(X, T, a) log((exp(T) + a * exp(X)) / (1 + a))
} else {
FUN <- match.fun(tFun)
}
lower <- ifelse(control$lower == -Inf, -.Machine$integer.max, control$lower)
upper <- ifelse(control$upper == Inf, .Machine$integer.max, control$upper)
ini <- cKendall(trun, obs, delta)
ini.ipw <- cKendall(trun, obs, delta, method = "IPW2")
sc <- survfit(Surv(trun, obs, 1 - delta) ~ 1)
S0 <- survfit(Surv(trun, obs, delta) ~ 1)
if (length(table(delta)) > 1 &
sum(head(sc$n.event[sc$n.event > 0]/sc$n.risk[sc$n.event > 0]) == 1) <= 2) {
sc$time <- sc$time[sc$n.event > 0]
sc$surv <- exp(-cumsum(sc$n.event[sc$n.event > 0]/sc$n.risk[sc$n.event > 0]))
}
out$qind <- subset(data.frame(trun = trun, obs = obs, delta = delta), delta == 1)
out$qind <- out$qind[order(out$qind$obs),]
y0 <- sort(obs)
trun1 <- trun[order(obs)][delta[order(obs)] == 1]
obs1 <- sort(obs[delta == 1])
delta1 <- delta[delta == 1]
yi <- unique(obs1)
byTau <- byP <- NULL
p0 <- -as.numeric(coef(lm(trun ~ obs))[2])
byTau$par <- uniroot.all(f = function(x) sapply(x, function(y)
getA(y, trun1, obs1, sc = sc, FUN = FUN)$PE),
interval = c(lower + 1e-5, upper))
if (length(byTau$par) > 0) {
byTau$par <- unique(c(uniroot.all(f = function(x)
sapply(x, function(y) getA(y, trun1, obs1, sc = sc, FUN = FUN)$PE),
interval = c(lower + 1e-5, byTau$par[1])), byTau$par))
byTau$par <- sort(byTau$par)
byTau$obj <- sapply(byTau$par, function(x) getA(x, trun1, obs1, sc = sc, FUN = FUN)$PE)
} else {
grids <- seq(lower + 1e-5, upper, length.out = 30)
tmp <- sapply(1:29, function(y)
optimize(f = function(x) abs(getA(x, trun1, obs1, delta1, sc = sc, FUN = FUN)$PE),
tol = .01, interval = c(grids[y], grids[y + 1])))
byTau$par <- as.numeric(tmp[1, which.min(tmp[2,])])
byTau$obj <- as.numeric(tmp[2, which.min(tmp[2,])])
}
suppressWarnings(tmpP <- optimize(f = function(x) getA(x, trun1, obs1, delta1, sc = sc, FUN = FUN)$p.value, interval = c(lower, 2 * byTau$par[1] + 1), maximum = TRUE))
byP$par <- tmpP$maximum
byP$obj <- tmpP$objective
tmp <- getA(byTau$par[1], trun1, obs1, sc = sc, FUN = FUN)$p.value
byP$par <- ifelse(!is.na(tmp) & tmp > byP$obj, byTau$par[1], byP$par)
byP$obj <- ifelse(!is.na(tmp) & tmp > byP$obj, tmp, byP$obj)
if (abs(byTau$obj[1]) > 0.1)
warning("Best estimate does not achieve tau_c = 0. This model may not be appropriate.", call. = FALSE)
a <- byTau$par[1]
out$qind$ta <- ta <- mapply(FUN, X = obs1, T = trun1, a = a)
out$qind$wgtT <- approx(sc$time, sc$surv, ta, "constant", yleft = 1, yright = min(sc$surv))$y
out$qind$wgtX <- approx(sc$time, sc$surv, obs1, "constant", yleft = 1, yright = min(sc$surv))$y
## Turnbull's algorithm
scX <- approx(sc$time, sc$surv, method = "constant", f = 0,
xout = yi, yleft = 1, yright = min(sc$surv))$y
aij <- sapply(1:length(yi), function(x) 1 * (yi[x] == obs1))
bij <- sapply(1:length(yi), function(x) 1 * (yi[x] >= ta))
f0 <- rep(1 / length(yi), length(yi))
tb <- squarem(par = f0, fixptfn = tauEm, objfn = tauLik, aij = aij, bij = bij,
obs = obs1, ta = ta)
f1 <- tb$par / scX
f1 <- f1 / sum(f1)
Sy <- approx(yi, 1 - cumsum(f1), method = "constant", xout = y0, yleft = 1,
yright = min(1 - cumsum(f1)))$y
## Weighted KM
## s0 <- survfit(Surv(ta, yi, rep(1, length(yi))) ~ 1, weights = 1/scX)
## if (sum(s0$n.risk == 1) > 0) {
## s0 <- survfit(Surv(ta, yi, rep(1, length(yi))) ~ 1, weights = 1/scX,
## subset = -which.min(abs(yi - s0$time[s0$n.risk == 1])))
## }
## Sy <- approx(s0$time, s0$surv, method = "constant", xout = y0, yleft = 1, yright = 0)$y
## Make plots
if (plots) {
op1 <- par(mfrow = c(2,1), oma = c(1,1,1,1) + 0.1, mar = c(3.7,3,1,1) + 0.2)
plot(trun, obs, cex = .4, main = "", xlab = "", ylab = "", pch = 4)
mtext("x: uncensored o: censored", 3, line = .1, cex = .9)
mtext(expression(bold("Before transformation")), 3, line = .8, cex = 1.2)
points(trun[delta == 0], obs[delta == 0], cex = .4, pch = 1)
title(xlab = "Truncation times", ylab = "Failure times", line = 2, cex.lab = 1)
plot(ta, obs1, cex = .4, main = "", xlab = "", ylab = "", pch = 4)
mtext("x: uncensored o: censored", 3, line = .1, cex = .9)
mtext(expression(bold("After transformation (uncensored objects only)")), 3, line = .8, cex = 1.2)
points(trun[delta == 0], obs[delta == 0], cex = .4, pch = 1)
title(xlab = "Truncation times", ylab = "Failure times", line = 2, cex.lab = 1)
par(op1)
op1 <- par(mfrow = c(2,1), oma = c(1,1,1,1) + 0.1, mar = c(3.7,3,1,1.5) + 0.2, ask = TRUE)
if (a > lower + .2)
run <- c(seq(lower, max(lower, a - .2), length = 30),
seq(max(lower, a - .2), a + .2, .01),
seq(a + .2, 2 * a + 1, length = 30)) + 1e-5
if (a <= lower + .2)
run <- sort(unique(c(seq(lower, 2 * a + 1.01, length = 80) + 1e-5, a)))
Ytau <- sapply(run, function(x) getA(x, trun1, obs1, delta1, sc, FUN)$PE)
if (min(abs(Ytau), na.rm = TRUE) < byTau$obj[1]) {
a <- byTau$par[1] <- run[which.min(abs(Ytau))]
byTau$obj[1] <- min(abs(Ytau))
}
plot(run, Ytau, "l", xlab = "", ylab = "", xlim = c(max(min(run),lower), min(max(run), upper)),
ylim = range(Ytau, na.rm = TRUE),
main = paste("Estimate a by restricted IPW Kendall's tau", " (a = ", round(a, 3), ")",
sep = ""))
title(xlab = "a", ylab = expression(tau[c]), line = 2, cex.lab = 1)
abline(v = a, lty = 3)
abline(h = byTau$obj[1], lty = 3)
text(x = max(run), y = byTau$obj[1], labels = format(round(byTau$obj[1], 2), nsmall = 2),
pos = 4, cex = .8, xpd = TRUE, srt = 0, offset = 1.5)
if (byP$par > lower + .2)
run <- c(seq(lower, max(lower, byP$par - .2), length = 30),
seq(max(lower, byP$par - .2), byP$par + .2, .01),
seq(byP$par + .2, 2 * byP$par + 1, length = 30)) + 1e-5
if (byP$par <= lower + .2)
run <- sort(unique(c(seq(lower, 2 * byP$par + 1.01, length = 80) + 1e-5, byP$par)))
Yp <- sapply(run, function(x) getA(x, trun1, obs1, delta1, sc, FUN)$p.value)
if (max(Yp, na.rm = TRUE) > byP$obj) {
byP$par <- run[which.max(Yp)]
byP$obj <- max(Yp, na.rm = TRUE)
}
plot(run, Yp, "l", xlab = "", ylab = "", xlim = c(max(min(run),lower), min(max(run), upper)),
ylim = range(Yp, na.rm = TRUE),
main = paste("Estimate a by p-value", " (a = ", round(byP$par, 3), ")", sep = ""))
title(xlab = "a", ylab = "p-values", line = 2, cex.lab = 1)
abline(v = byP$par, lty = 3)
abline(h = byP$obj, lty = 3)
if (byP$obj >= 0.01)
text(x = max(run), y = byP$obj, labels = format(round(byP$obj, 2), nsmall = 2),
pos = 4, cex = .8, xpd = TRUE, srt = 0, offset = 1.5)
par(op1)
op2 <- par(mar = c(3.5, 3.5, 2.5, 2.5), ask = TRUE)
plot(sort(obs), Sy, xlab = "", ylab = "", "s")
mtext(expression(bold("Survival estimation")), 3, line = .5, cex = 1.2)
title(xlab = "Time", ylab = "Survival probability", line = 2, cex.lab = 1)
lines(S0$time, S0$surv, lty = 2, "s")
legend("topright", c("Transformation", "Kaplan-Meier"), lty = 1:2, bty = "n")
par(op2)
}
out$surv <- data.frame(Time = y0, trSurv = Sy,
kmSurv = approx(S0$time, S0$surv, method = "constant", xout = y0, yleft = 1, yright = min(S0$surv))$y)
out$byTau <- byTau
out$byP <- byP
## out <- list(Sy = Sy, byTau = byTau, byP = byP,
## qind = data.frame(trun = ta, obs = obs1, wgtT = wgtT, wgtX = wgtX))
out$Call <- match.call()
out$iniKendall <- ini$PE
out$iniKendall.ipw <- ini.ipw$PE
out$iniP <- ini$p.value
out$iniP.ipw <- ini.ipw$p.value
out$tFun <- FUN
out$.data <- data.frame(start = trun, stop = obs, status = delta)
class(out) <- "trSurvfit"
out
}
#' Auxiliary for Controlling trSurvfit Fitting
#'
#' Auxiliary function as user interface for \code{trSurvfit} fitting.
#'
#' @param interval a vector containing the end-points of the interval to be searched the transformation parameter.
#' @param lower the lower end-point of the interval to be searched.
#' @param upper the upper end-point of the interval to be searched.
#'
#' @export
#' @seealso \code{\link{trSurvfit}}
trSurv.control <- function(interval = c(-1, 20), lower = min(interval), upper = max(interval)) {
list(lower = lower, upper = upper)
}
getA <- function(a, trun, obs, delta = NULL, sc = NULL, FUN, test = "CK") {
if (is.null(delta)) delta <- rep(1, length(trun))
FUN <- match.fun(FUN)
ta <- mapply(FUN, X = obs, T = trun, a = a)
if (is.null(sc)) {
if (test == "CK") tmp <- cKendall(ta, obs, delta)
if (test == "PC") tmp <- pmcc(ta[delta == 1], obs[delta == 1])
} else {
weights <- approx(sc$time, sc$surv, method = "constant", xout = c(ta, obs),
yleft = 1, yright = min(sc$surv))$y
tmp <- cKendall(ta, obs, delta, method = "IPW2", weights = weights)
}
return(list(PE = tmp$PE, p.value = tmp$p.value))
}
tauEm <- function(f0, aij, bij, obs, ta) {
uij <- t(t(aij) * f0) / colSums(t(aij) * f0)
vij <- t(t(1 - bij) * f0) / colSums(t(bij) * f0)
colSums(uij + vij) / sum(uij + vij)
}
tauLik <- function(f0, aij, bij, obs, ta) {
yi <- unique(obs)
yi <- yi[order(yi)]
de <- rev(cumsum(rev(f0 * c(0, diff(yi)))))
L <- sapply(1:length(f0), function(x) f0[x] / de[min(which(ta[order(obs)][x] <= yi))])
-sum(log(ifelse(L <= 0, 1, L)))
}
gofPlot <- function(trun, obs, delta = NULL) {
n <- length(obs)
if (is.null(delta)) delta <- rep(1, length(trun))
sc <- survfit(Surv(trun, obs, 1 - delta) ~ 1)
if (length(table(delta)) > 1 &
sum(head(sc$n.event[sc$n.event > 0]/sc$n.risk[sc$n.event > 0]) == 1) <= 2) {
sc$time <- sc$time[sc$n.event > 0]
sc$surv <- exp(-cumsum(sc$n.event[sc$n.event > 0]/sc$n.risk[sc$n.event > 0]))
}
fit <- trSurvfit(trun, obs, delta)
ta <- fit$qind$trun
xa <- fit$qind$obs
Fw <- survfit(Surv(-obs, -trun, rep(1, length(obs))) ~ 1, data = fit$qind)
Fwz <- approx(-Fw$time, Fw$surv, method = "constant", xout = unique(sort(xa)), yleft = 0, yright = 1)$y
fx <- diff(c(0, 1 - fit$Sy))
fxz <- approx(sort(obs), fx, method = "constant", yleft = 0, yright = 0, xout = unique(sort(xa)))$y
Scz <- approx(sc$time, sc$surv, method = "constant", yleft = 0, yright = 1, xout=unique(sort(xa)))$y
gof <- fxz * Fwz * Scz
s0 <- survfit(Surv(obs, rep(1, n)) ~ 1)
plot(unique(sort(xa)), cumsum(gof) / sum(gof), "s", xlab = "Time", main = "Goodness of fit")
lines(s0$time, 1 - s0$surv, "s", col = 2)
legend("topleft", c("Transformation GOF", "ECDF"), col = 1:2, lty = 1, bty = "n")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.