# R/gof.R In tranSurv: Transformation Model Based Estimation of Survival and Regression Under Dependent Truncation and Independent Censoring

#### Documented in gof

globalVariables(c("start", "status")) ## global variables for gof()

#' Goodness of fit based on left-truncated regression model
#'
#' Provide goodness-of-fit diagnostics for the transformation model.
#'
#' The googness of fit assessment of the transformation model focus on the structure of the
#' transformation model, which has the form:
#' \deqn{h(U) = (1 + a)^{-1} \times (h(T) + ah(X)),}
#' where \eqn{T} is the truncation time, \eqn{X} is the observed failure time,
#' \eqn{U} is the transformed truncation time that is quasi-independent from \eqn{X} and
#' \eqn{h(\cdot)} is a monotonic transformation function.
#' With the condition, \eqn{T < X}, assumed to be satisfied,
#' the structure of the transformation model implies
#' \deqn{X - T = -(1 + a) E(U) + (1 + a) X - (1 + a) \times [U - E(U)] := \beta_0 + \beta_1X + \epsilon.}
#' The regression estimates can be obtained by the left-truncated regression model (Karlsson and Lindmark, 2014).
#' To evaluate the goodness of fit of the transformation model,
#' the \code{gof()} function directly test the inearity in \eqn{X} by considering larger model that are nonlinear in \eqn{X}.
#' In particular, we expand the covariates \eqn{X} to \code{P} piecewise linearity terms and test for equality of the associated coefficients.
#'
#'
#' @param x an object of class \code{trSurvfit} returned by the \code{trSurvfit()} or the \code{trReg()} function or a survival object returned by the \code{Surv()}.
#' @param B an integer value specifies the bootstrap size for the left-truncated regression model. A value greater than 2 is required.
#' @param P an integer value specifies number of breakpoints to test the linearity of the transformation model.
#' When \code{P > 0}, \eqn{P} breakpoints divides the event times into \eqn{P} equal spaced segments.
#' Piecewise linear function constructed from those segments of event times  are used in the left-truncated regression model,
#' and the overall significance testing if the coefficient estimates are equal is reported.
#' Default value for \code{P} is 1. See \bold{Details} for a description of the goodness of fit procedure.
#'
#' @export
#' @example inst/examples/ex_gof.R
#' @importFrom stats pchisq quantile complete.cases
#' @importFrom utils combn
#' @importFrom graphics boxplot
#' @importFrom methods is
#'
#' @references Karlsson, M., Lindmark, A. (2014) truncSP: An R Package for Estimation of Semi-Parametric Truncated Linear Regression Models, \emph{Journal of Statistical Software}, \bold{57} (14), pp 1--19.
#' @return A list containing the following elements
#' \describe{
#'   \item{coefficients}{the regression coefficients of the left-truncated regression model.}
#'   \item{pval}{the p-value for the equality of the piecewise linearity terms in the expanded model. See \bold{Details}.}
#' }
gof <- function(x, B = 200, P = 1) {
if (all(!is.trReg(x), !is.trSurvfit(x), !is.Surv(x)))
stop("An 'trReg', 'trSurvfit', or 'Surv' xect is required.")
out <- NULL
input <- class(x)
if (is.trReg(x) || is.trSurvfit(x)) {
B <- max(x$B, B, 2) P <- max(x$P, P, 1)
Q <- x$Q } if (is.Surv(x)) { B <- max(B, 2) P <- max(P, 1) Q <- 0 x <- list(.data = as.data.frame(unclass(x))) } ti <- x$.data$stop[x$.data$status > 0] rm <- (ti %in% boxplot(ti, plot = FALSE)$out)
ti <- ti[!rm]
ti <- seq(min(ti), max(ti), length.out = P + 2)
out <- getTL(x$.data$start[!rm], x$.data$stop[!rm], ti[-c(1, P + 2)], B)
out$input <- input out$breaks <- ti
sc <- survfit(Surv(start, stop, 1 - status) ~ 1, data = x$.data) if (min(sc$surv) == 0)
sc$surv <- ifelse(sc$surv == min(sc$surv), sort(unique(sc$surv))[2], sc$surv) if (is.trReg(x)) { out$fitQs <- lapply(split(x$.data, cut(x$.data$stop, ti)), function(d) { tmp <- trReg(Surv(start, stop, status) ~ as.matrix(d[,x$vNames]), data = d,
method = x$method, control = list(sc = list(time = sc$time, surv = sc$surv), Q = Q)) tmp$.data$ta <- with(tmp$.data, x$tFun(stop, start, tmp$a))
tmp$.data$a <- tmp$a return(tmp$.data)
})
out$dat.gof <- do.call(rbind, out$fitQs)
if (x$method == "adjust" & Q > 0) { tq <- quantile(out$dat.gof$ta, 0:(1 + Q) / (1 + Q)) tmp <- model.matrix( ~ cut(out$dat.gof$ta, breaks = tq, include.lowest = TRUE) - 1) nn <- NULL tq <- round(tq, 4) for (i in 1:(Q + 1)) nn[i] <- paste("T'(a) in (", tq[i], ", ", tq[i + 1], "]", sep = "") colnames(tmp) <- nn out$dat.gof <- cbind(out$dat.gof, tmp) ##[,1]) } colnames(out$dat.gof)[4:(3 + length(x$vNames))] <- x$vNames
}
if (is.trSurvfit(x)) {
out$fitQs <- lapply(split(x$.data, cut(x$.data$stop, ti)), function(d) {
tmp <- with(d, trSurvfit(start, stop, status, tFun = x$tFun)) tmp$.data$ta <- with(tmp$.data, x$tFun(stop, start, tmp$byTau$par)) tmp$.data$a <- tmp$byTau$par return(tmp$.data)
})
out$dat.gof <- do.call(rbind, out$fitQs)
}
class(out) <- "trgof"
return(out)
}

#' @importFrom truncSP lt
#'
#' @param tt is the truncation time
#' @param yy is the observed survival times (events only)
#' @param ti is the endpoints of grids
#' @param B is the bootstrap size
#'
#' @noRd
getTL <- function(tt, yy, ti, B) {
dat <- NULL
dat$tt <- tt dat$yy <- yy
dat$xt <- dat$yy - dat$tt dat <- data.frame(dat) nCpt <- length(ti) covr <- matrix(NA, nrow = nrow(dat), ncol = nCpt + 1) covr[,1] <- with(dat, pmin(yy, ti[1])) if (length(ti) > 1) { covr[, ncol(covr)] <- pmax(dat$yy - ti[2], 0)
for (i in 2:nCpt)
covr[,i] <- pmin(pmax(dat$yy - ti[i - 1], 0), ti[i] - ti[i - 1]) } else covr[,2] <- pmax(dat$yy - ti[1], 0)
colnames(covr) <- c(sapply(1:(nCpt + 1), function(x) paste("t", x, sep = "")))
fit <- lt(xt ~ covr, data = dat, covar = TRUE, R = B)
perm <- combn(length(fit$coefficients[-1]), 2) con <- matrix(0, ncol = ncol(perm), nrow = length(fit$coefficients[-1]))
## overall significance
for (i in 1:ncol(perm)) {
con[perm[1,i], i] <- 1
con[perm[2,i], i] <- -1
}
dif.vet <- t(con) %*% fit$coefficients[-1] dif.cov <- (t(con) %*% fit$covariance[-1, -1]) %*% con
k <- qr(dif.cov)$rank if (k < nrow(dif.cov)) { dif.cov <- dif.cov[1:k, 1:k] dif.vet <- dif.vet[1:k] } coefficients <- fit$coefficients
pval <- 1 - pchisq((t(dif.vet) %*% solve(dif.cov)) %*% dif.vet, length(dif.vet))
list(coefficients = coefficients, pval = pval)
}


## Try the tranSurv package in your browser

Any scripts or data that you put into this service are public.

tranSurv documentation built on Jan. 16, 2021, 5:31 p.m.