Nothing
#' Likelihood ratio test for covariates
#'
#' This function fits separate models for each distinct
#' value of the factor \code{covariate} and computes a likelihood ratio test
#' to test whether there are significant differences between
#' groups.
#'
#' @export
#' @inheritParams nll_elife
#' @param covariate vector of factors, logical or integer whose distinct values define groups
#' @return a list with elements
#' \itemize{
#' \item \code{stat} likelihood ratio statistic
#' \item \code{df} degrees of freedom
#' \item \code{pval} the p-value obtained from the asymptotic chi-square approximation.
#' }
#' @examples
#' test <- with(subset(dutch, ndays > 39082),
#' test_elife(
#' time = ndays,
#' thresh = 39082L,
#' covariate = gender,
#' ltrunc = ltrunc,
#' rtrunc = rtrunc,
#' family = "exp"))
#' test
test_elife <- function(time,
time2 = NULL,
event = NULL,
covariate,
thresh = 0,
ltrunc = NULL,
rtrunc = NULL,
type = c("right",
"left",
"interval",
"interval2"),
family = c("exp",
"gp",
"weibull",
"gomp",
"gompmake",
"extgp",
"extweibull",
"perks",
"perksmake",
"beard",
"beardmake"),
weights = rep(1, length(time)),
arguments = NULL,
...) {
if(!is.null(arguments)){
call <- match.call(expand.dots = FALSE)
arguments <- check_arguments(func = "test_elife", call = call, arguments = arguments)
return(do.call(test_elife, args = arguments))
}
family <- match.arg(family)
type <- match.arg(type)
stopifnot("Covariate must be provided" = !missing(covariate),
"Object `covariate` should be of the same length as `time`" = length(covariate) == length(time),
"Provide a single threshold" = length(thresh) == 1L)
npar <- switch(family,
"exp" = 1L,
"gp" = 2L,
"gomp" = 2L,
"gompmake" = 3L,
"extgp" = 3L,
"weibull" = 2L,
"extweibull" = 3L,
"perks" = 2L,
"perksmake" = 3L,
"beard" = 3L,
"beardmake" = 4L)
if(isTRUE(all(is.matrix(ltrunc),
is.matrix(rtrunc),
ncol(ltrunc) == ncol(rtrunc),
ncol(rtrunc) == 2L))){
# Doubly truncated data
stopifnot("Censoring is not currently handled for doubly truncated data." = is.null(event) | isTRUE(all(event == 1L)),
"Argument `time2` not used for doubly truncated data" = is.null(time2)
)
return(
test_ditrunc_elife(time = time,
covariate = covariate,
thresh = thresh,
ltrunc1 = ltrunc[,1],
ltrunc2 = ltrunc[,2],
rtrunc1 = rtrunc[,1],
rtrunc2 = rtrunc[,2],
family = family,
weights = weights)
)
}
survout <- .check_surv(time = time,
time2 = time2,
event = event,
type = type)
time <- survout$time
time2 <- survout$time2
status <- survout$status
# Transform to factor
if(any(is.na(covariate))){
stop("Covariate vector should not include missing values.")
}
# Cast to factor, remove unused levels
covariate <- factor(covariate, exclude = NULL)
# Count occurences
nobs_cov <- table(covariate)
m <- length(nobs_cov)
stopifnot("There should be more than one group in `covariate`." = m > 1,
"There are too few observations (less than 5 times the number of parameters) for some modalities of `covariate`." = min(nobs_cov) >= 5*npar)
# Fit the pooled model
labels <- names(nobs_cov)
fit_null <- try(fit_elife(time = time,
time2 = time2,
event = event,
status = status,
thresh = thresh,
ltrunc = ltrunc,
rtrunc = rtrunc,
type = type,
family = family,
weights = weights))
loglik0 <- ifelse(is.character(fit_null), NA, fit_null$loglik)
fit_alternative <- list()
loglik1 <- rep(0, m)
n_levels <- rep(0L, m)
for(i in seq_len(m)){
fit_alternative[[i]] <-
try(fit_elife(time = time[covariate == labels[i]],
time2 = time2[covariate == labels[i]],
status = status[covariate == labels[i]],
thresh = thresh,
ltrunc = ltrunc[covariate == labels[i]],
rtrunc = rtrunc[covariate == labels[i]],
type = type,
family = family,
weights = weights[covariate == labels[i]]))
loglik1[i] <- ifelse(is.character(fit_alternative[[i]]), NA, fit_alternative[[i]]$loglik)
n_levels[i] <- fit_alternative[[i]]$nexc
}
lrt_stat <- 2*as.numeric((sum(loglik1)-loglik0))
names(n_levels) <- labels
p_value <- pchisq(q = lrt_stat, df = (m - 1) * npar, lower.tail = FALSE)
invisible(structure(list(stat = lrt_stat,
df = (m - 1) * npar,
pval = p_value,
nobs_covariate = n_levels,
thresh = thresh,
family = family),
class = "elife_par_test"))
}
#' @export
print.elife_par_test <- function(x,
digits = min(3, getOption("digits")),
na.print = "", ...){
cat("Model:", switch(x$family,
exp = "exponential",
gomp = "Gompertz",
gompmake = "Gompertz-Makeham",
weibull = "Weibull",
extgp = "extended generalized Pareto",
gp = "generalized Pareto",
gppiece = "piecewise generalized Pareto",
extweibull = "extended Weibull",
perks = "Perks",
perksmake = "Perks-Makeham",
beard = "Beard",
beardmake = "Beard-Makeham"),
"distribution.", "\n")
cat("Threshold:", round(x$thresh, digits), "\n")
cat("Number of exceedances per covariate level:\n")
print.default(x$nobs_covariate)
cat("\nLikelihood ratio statistic:", format(x$stat, digits = digits))
cat(paste0("\nNull distribution: chi-square (", x$df, ")\n"))
cat("Asymptotic p-value: ", format(x$pval, digits = digits),"\n")
invisible(x)
}
#' @importFrom stats anova
#' @export
anova.elife_par <- function(object,
object2,
...,
test = "Chisq"){
if (any(missing(object), missing(object2))){
stop("Two models must be specified.")
}
#test <- match.arg(test)
test <- "Chisq" # only option implemented so far
model1 <- deparse(substitute(object))
model2 <- deparse(substitute(object2))
models <- c(model1, model2)
narg <- 2L
for (i in 1:narg) {
if (!inherits(get(models[i], envir = parent.frame()),
"elife_par")){
stop("Invalid input: use only with objects of class 'elife_par'.")
}
}
p <- length(models)
npar <- nobs <- integer(p)
dev <- thresh <- numeric(p)
conv <- rep(FALSE, 2L)
censt <- trunct <- family <- character(p)
for (i in seq_len(narg)) {
elifemod <- get(models[i], envir = parent.frame())
dev[i] <- deviance(elifemod)
npar[i] <- length(elifemod$par)
thresh[i] <- elifemod$thresh[1]
nobs[i] <- elifemod$nexc
conv[i] <- elifemod$convergence
family[i] <- elifemod$family
censt[i] <- elifemod$cens_type
trunct[i] <- elifemod$trunc_type
}
if(npar[1] < npar[2]){
dev <- dev[2:1]
npar <- npar[2:1]
family <- family[2:1]
}
if(!isTRUE(all.equal(thresh[1], thresh[2]))){
stop("Invalid arguments: the thresholds should be the same.")
}
if(!isTRUE(all(conv))){
stop("At least one of the optimization failed to converge.")
}
if(!isTRUE(all.equal(nobs[1], nobs[2]))){
stop("Invalid arguments: the observations should be the same.")
}
if(family[1] == family[2]){
stop("Models should be of different families.")
}
if(!isTRUE(all(length(unique(censt)) == 1L, length(unique(trunct)) == 1L))){
stop("Models have different truncation or censoring schemes.")
}
# Cases considered
nmods <- rbind(
c("exp", "weibull", "regular"), # chisq(1)
c("exp", "gp", "regular"), # chisq(1)
c("exp", "gppiece", "regular"), # chisq(K)
c("exp", "gomp", "boundary"), # 0.5 chisq(0) + 0.5 chisq(1)
c("exp", "extgp", "boundary"), # 0.5 chisq(1) + 0.5 chisq(2)
c("exp", "gompmake", "invalid"),
c("gp", "extgp", "boundary"), # 0.5 chisq(0) + 0.5 chisq(1)
c("gomp", "extgp", "regular"), # chisq(1)
c("gp", "gppiece", "regular"), # chisq(K-1)
c("gomp", "gompmake", "boundary"), # 0.5 chisq(0) + 0.5 chisq(1)
c("weibull","extweibull","regular"), # chisq(1)
c("gp","extweibull","regular"), # chisq(1)
c("exp","extweibull","regular"), # chisq(2)
c("exp", "perks","boundary"), # 0.5 chisq(0) + 0.5 chisq(1)
c("exp", "beard","boundary"), # 0.5 chisq(1) + 0.5 chisq(2)
c("gomp", "beard","boundary"), # 0.5 chisq(0) + 0.5 chisq(1)
c("perks", "beard","regular"), # chisq(1)
c("perks", "perksmake","boundary"), # 0.5 chisq(0) + 0.5 chisq(1)
c("beard", "beardmake","boundary"), # 0.5 chisq(0) + 0.5 chisq(1)u
c("perks", "beardmake","boundary"), # 0.5 chisq(0) + 0.5 chisq(1)
c("perksmake", "beardmake","regular"),
c("gompmake", "beardmake","boundary"),
c("gomp", "beardmake","boundary2"), # 0.25 chisq(0) + 0.5 chisq(1) + 0.25 chisq(2)
c("exp", "beardmake","invalid"),
c("exp", "perksmake","invalid")
)
match_family <- which(apply(nmods[,1:2], 1,
function(fam){
isTRUE(all(family %in% fam))}))
stopifnot("Invalid input: models are not nested" = length(match_family) == 1L)
if(nmods[match_family,3] == "invalid"){
stop("Models are nested, but parameters are not identifiable.\nThe information matrix is singular.")
}
df <- -diff(npar)
dvdiff <- diff(dev)
if(dvdiff < 0 && dvdiff > -1e-4){
# Numerical tolerance for zero
dvdiff <- 0
}
if(dvdiff < 0){
stop("The alternative model has a lower likelihood value than the null model, indicating convergence problems.")
}
# if(test == "Chisq"){
if(nmods[match_family,3] == "regular"){ #regular model
pval <- pchisq(dvdiff,
df = df,
lower.tail = FALSE)
} else if(nmods[match_family,3] == "boundary"){
pval <- 0.5*pchisq(dvdiff,
df = df,
lower.tail = FALSE) +
0.5*pchisq(dvdiff,
df = df - 1L,
lower.tail = FALSE)
} else if(nmods[match_family,3] == "boundary2"){
pval <- 0.25*pchisq(dvdiff,
df = df,
lower.tail = FALSE) +
0.5*pchisq(dvdiff,
df = df - 1L,
lower.tail = FALSE) +
0.25*pchisq(dvdiff,
df = df - 2L,
lower.tail = FALSE)
}
table <- data.frame(npar, dev, c(NA, df), c(NA, dvdiff), c(NA,pval))
dimnames(table) <- list(models,
c("npar",
"Deviance",
"Df",
"Chisq",
"Pr(>Chisq)"))
rownames(table) <- family
structure(table,
heading = c("Analysis of Deviance Table\n"),
class = c("anova", "data.frame"))
return(table)
# }
}
#' Goodness-of-fit diagnostics
#'
#' Warning: EXPERIMENTAL
#' Compute the Kolmogorov-Smirnov
#' test statistic and compare it with a simulated null
#' distribution obtained via a parametric bootstrap.
#'
#' @note The bootstrap scheme requires simulating new data,
#' fitting a parametric model and estimating the nonparametric
#' maximum likelihood estimate for each new sample.
#' This is computationally intensive in large samples.
#'
#' @inheritParams fit_elife
#' @param B number of bootstrap simulations
#' @return a list with elements
#' \itemize{
#' \item \code{stat} the value of the test statistic
#' \item \code{pval} p-value obtained via simulation
#' }
#' @keywords internal
ks_test <- function(time,
time2 = NULL,
event = NULL,
thresh = 0,
ltrunc = NULL,
rtrunc = NULL,
type = c("right", "left", "interval", "interval2"),
family = c("exp",
"gp",
"gomp",
"gompmake",
"weibull",
"extgp",
"gppiece",
"extweibull",
"perks",
"beard",
"perksmake",
"beardmake"),
B = 999L,
arguments = NULL,
...){
if(!is.null(arguments)){
call <- match.call(expand.dots = FALSE)
arguments <- check_arguments(func = "ks_test", call = call, arguments = arguments)
return(do.call(ks_test, args = arguments))
}
# Exclude doubly interval truncated data
if(isTRUE(all(is.matrix(ltrunc),
is.matrix(rtrunc),
ncol(ltrunc) == ncol(rtrunc),
ncol(rtrunc) == 2L))){
stop("Doubly interval truncated data not supported")
}
family <- match.arg(family)
type <- match.arg(type)
n <- length(time)
survout <- .check_surv(time = time,
time2 = time2,
event = event,
type = type)
time <- survout$time
time2 <- survout$time2
status <- survout$status
if(thresh[1] > 0){
# Keep only exceedances, shift observations
# We discard left truncated observations and interval censored
# if we are unsure whether there is an exceedance
ind <- ifelse(status == 2, FALSE, time > thresh[1])
weights <- weights[ind] # in this order
time <- time[ind] - thresh[1]
time2 <- time2[ind] - thresh[1]
status <- status[ind]
if(!is.null(ltrunc)){ #both ltrc and ltrt
stopifnot("`ltrunc` must be of the same length as `time`." = length(ltrunc) == n)
ltrunc <- pmax(0, ltrunc[ind] - thresh[1])
}
if(!is.null(rtrunc)){
stopifnot("`rtrunc` must be of the same length as `time`." = length(rtrunc) == n)
rtrunc <- rtrunc[ind] - thresh[1]
}
}
thresh <- 0
# Fit parametric model
F0 <- try(fit_elife(time = time,
time2 = time2,
status = status,
thresh = thresh,
ltrunc = ltrunc,
rtrunc = rtrunc,
type = type,
family = family
))
if(is.character(F0) || !F0$convergence){
stop("Could not estimate the parametric model.")
}
# Fit NPMLE of ECDF
Fn <- npsurv(time = time,
type = "interval",
event = status,
ltrunc = ltrunc,
rtrunc = rtrunc)
# Compute test statistic
if(family == "gompmake"){
scale <- c(F0$par[1], F0$par[3])
shape <- F0$par[2]
} else{
scale <- F0$par[1]
shape <- F0$par[-1]
}
ks <- max(abs(Fn$cdf(Fn$x) - pelife(q = Fn$x, scale = scale, shape = shape, family = family)))
stat <- rep(NA, B + 1L)
stat[B + 1] <- ks
if(is.null(ltrunc) && is.null(rtrunc) && isTRUE(all(status == 1L))){
type2 <- "none"
} else if(isTRUE(all(status == 1L)) && (!is.null(rtrunc) || !is.null(ltrunc))){
type2 <- "ltrt"
} else if(is.null(rtrunc) && isTRUE(all(status == c(0L,1L)))){
type2 <- "ltrc"
}
for(b in 1:B){
bootconv <- FALSE
while(!bootconv){
if(type2 == "ltrt"){
boottime <- samp_elife(n = length(time),
scale = scale,
shape = shape,
lower = ltrunc,
upper = rtrunc,
family = family,
type2 = type2)
bootstatus <- rep(1L, length(time))
} else if(type2 == "none"){
boottime <- relife(n = length(time),
scale = scale,
shape = shape,
family = family)
bootstatus <- rep(1L, length(time))
} else if(type2 == "ltrc"){
boot_sim <- samp_elife(n = length(time),
scale = scale,
shape = shape,
lower = ltrunc,
upper = rtrunc,
family = family,
type2 = type2)
boottime <- boot_sim$dat
boottime2 <- ifelse(boot_sim$rcens, NA, boot_sim$dat)
bootstatus <- as.integer(!boot_sim$rcens)
}
# bootstrap loop
F0_b <- try(fit_elife(time = boottime,
time2 = boottime2,
status = bootstatus,
thresh = thresh,
ltrunc = ltrunc,
rtrunc = rtrunc,
type = "right",
family = family))
# Fit NPMLE of ECDF
if(type2 == "ltrc"){
Fn_b <- try(npsurv(time = boottime,
event = bootstatus,
ltrunc = ltrunc,
rtrunc = rtrunc,
type = "interval"))
} else{
Fn_b <- try(npsurv(time = boottime,
ltrunc = ltrunc,
rtrunc = rtrunc))
}
# TODO replace this with function that takes par and returns a list with arguments
# rate, scale and shape
# TODO check that all calls to *elife, etc. also have a rate parameter
if(family == "gompmake"){
scale_boot <- c(F0_b$par[1], F0_b$par[3])
shape_boot <- F0_b$par[2]
} else{
scale_boot <- F0_b$par[1]
shape_boot <- F0_b$par[-1]
}
# Compute test statistic
ks_boot <- try(max(abs(Fn_b$cdf(Fn_b$x) - pelife(q = Fn_b$x, scale = scale_boot, shape = shape_boot, family = family))))
if(is.numeric(ks_boot)){
stat[b] <- ks_boot
bootconv <- TRUE
}
}
}
list(stat = ks,
# null = stat,
pval = mean(stat >= ks, na.rm = TRUE))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.