Nothing
#' Goodness-of-fit (GOF) test statistic.
#'
#' The GOF-test is performed for a fitted delay-model.
#' There are different GOF-tests implemented:
#' * __Moran GOF__ is based on spacings, like the MPSE-criterion itself.
#' * __Pearson GOF__ uses categories and compares observed to expected frequencies.
#'
#' @param delayFit delay_model fit
#' @param method character(1). which method to use for GOF. Default is 'moran'.
#' @return An `htest`-object containing the GOF-test result
#' @export
test_GOF <- function(delayFit, method = c('moran', 'pearson')){
stopifnot( inherits(delayFit, what = 'incubate_fit') )
if (delayFit$method != 'MPSE'){
stop('Goodness-of-fit test only supported for models that are fit with maximum product of spacings estimation (MPSE)!', call. = FALSE)
}
method <- match.arg(method)
twoGroup <- isTRUE(delayFit$twoGroup)
data_name <- if (twoGroup) paste(names(delayFit$data), collapse = ' and ') else 'x'
nObs <- if (twoGroup) lengths(delayFit$data) else length(delayFit$data)
params <- coef.incubate_fit(delayFit, transformed = FALSE)
k <- length(params)
# required variables
meth <- statist <- p_val <- NULL
switch(method,
moran = {
# Moran's GOF test
meth <- "Moran's Goodness-of-fit (GOF) test"
EUL_MAS <- -digamma(1L)
# Moran test statistic is negative sum of logged spacings, see Cheng & Stephens (1989)
# @param mseCrit: the negative avg logged cumulative spacings, length 1 or 2
# @param n nbr of observations, length 1 or 2
# @param k nbr of parameters to be estimated
# @return Moran's test statistic, length 1 or 2
testStat_mo <- function(mseCrit, n, k){
mo_m <- (n+1L) * (log(n+1L) + EUL_MAS) - .5 - (12L*(n + 1L))**-1L
mo_v <- (n+1L) * (pi**2L / 6L - 1L) - .5 - (6L*(n + 1L))**-1L
C1 <- mo_m - sqrt(.5 * n * mo_v)
C2 <- sqrt(mo_v / (2L*n))
# factor (n+1) to go from -avg to -sum
(mseCrit * (n+1L) + .5 * k - C1) / C2
}# fun
statist <- if (twoGroup){ ## && length(delayFit$bind) < length(oNames) # not needed!?
# sum of two independent chi-sq. is chi-sq
c(`X^2` = sum(testStat_mo(mseCrit = delayFit$objFun(pars = params, criterion = TRUE, aggregated = FALSE), #criterion per group
n = nObs, k = k/2)) )
} else {
# single group
c(`X^2` = testStat_mo(mseCrit = delayFit[["criterion"]], n = nObs, k = k) )
}
p_val <- stats::pchisq(q = statist, df = sum(nObs), lower.tail = FALSE)
},
pearson = {
# Pearson GOF-test
meth <- "Pearson's Goodness-of-fit (GOF) test" # (per group) ## this is our standard
# under H0, expect frequency counts according to uniform distribution
#+nbr of classes as recommended by David S. Moore (Tests of Chi-squared Type, 1986)
testStat_pe <- function(datr, nCl) {
tab_transf <- tabulate(findInterval(datr,
vec = seq.int(from=0L, to=1L, length.out = nCl+1L),
rightmost.closed = TRUE, all.inside = TRUE), nbins = nCl)
sum((tab_transf - mean(tab_transf))**2L) / mean(tab_transf)
}
nCl <- NA_real_ #dummy value to start with
statist <- if (twoGroup) {
nCl <- pmax.int(k/2 + 2L, ceiling(2L * nObs**.4))
# sum of two chi-square test statistics
c(`X^2` = sum(purrr::map2_dbl(.x = transform(delayFit), .y = nCl, .f = testStat_pe)))
} else {
nCl <- max(k + 2L, ceiling(2L * nObs**.4))
c(`X^2` = testStat_pe(datr = transform(delayFit), nCl = nCl))
# inference based on Chi-square distribution.
# use adjusted degrees of freedom (loose one df for each parameter estimated)
}
p_val <- stats::pchisq(q = statist, df = sum(nCl) - k - (1L+twoGroup), lower.tail = FALSE)
},
AD =, ad =, anderson = {
# EDF-based GOF-test
# Anderson-Darling (AD) test statistic
# cf Stephens, Tests based on EDF Statistics p.101, (4.2)
meth <- 'Anderson-Darling Goodness-of-fit (GOF) test (per group)'
testStat_ad <- function(datr, n){
i <- seq_along(datr)
# in fact, A2 utilizes rev-order in its 2nd summation term
-n - mean((2L * i - 1L) * log(datr) + (2L*n - (2L*i-1L))*log(1-datr))
}
A2 <- if (twoGroup) {
purrr::map2_dbl(.x = transform(delayFit), .y = nObs, .f = testStat_ad)
} else {
testStat_ad(datr = transform(delayFit), n = nObs)
}
p_val <- if ( delayFit$distribution == 'exponential' ){
# modification for Exponential (cf Stephens, Table 4.14, p.138)
# the correction factor approaches 1 from above.
# We keep the number of N as all observations, independent of the number of parameters estimated in the null-model.
# QQQ Should we increase N by the number of parameters p estimated less 2 ( p -2 because 2 parameters are estimated in standard delayed exponential)
A2_mod <- A2 * pmax.int(1L, 1L + 5.4 / nObs - 11 / nObs**2L)
.ad_pval[['exponential']](A2_mod)
} else {
stopifnot( delayFit$distribution == 'weibull' )
# P-value for Weibull based on Lockhart, 1994 (Table 1)
# interpolation model on logits using critical value and inverse of shape parameter
params_ntr <- coef.incubate_fit(delayFit, transformed = FALSE)
.ad_pval[['weibull']](A2, params_ntr[grepl('shape', names(params_ntr), fixed = TRUE)])
}
if (twoGroup){
A2 <- paste(signif(A2, 4), collapse = ' and ')
# use Liptak to bring both P-values of AD-tests per group together
p_val <- stats::pnorm(sum(sqrt(nObs) * stats::qnorm(p_val)) / sqrt(sum(nObs)))
}
statist <- c(`A^2` = A2)
},
# catch all
stop('This GOF-test method is not supported!')
)
# return test object
# stats:::print.htest recognizes:
#+parameter, alternative, null.value, conf.int, estimate
structure(
list(method = meth, data.name = data_name,
statistic = statist, p.value = as.numeric(p_val)),
class = 'htest')
}
#' Test the difference for delay model parameter(s) between two uncorrelated groups, based on maximum product of spacings estimation (MPSE).
#'
#' It is in fact a model comparison between a null model where the parameters are enforced to be equal and an unconstrained full model.
#' As test statistic we use twice the difference in best (=lowest) objective function value, i.e. 2 * (`val_0` - `val_1`).
#' This is reminiscent of a likelihood ratio test statistic albeit the objective function is not a negative log-likelihood
#' but the negative of the maximum product spacing metric.
#'
#' High values of this difference speak against the null-model (i.e. high `val_0` indicates bad fit under 0-model and low values of `val_1` indicate a good fit under the more general model1.
#' The test is implemented as a parametric bootstrap test, i.e. we
#'
#' 1. take given null-model fit as ground truth
#' 2. regenerate data according to this model.
#' 3. recalculate the test statistic
#' 4. appraise the observed test statistic in light of the generated distribution under H0
#'
#'
#' @param x data from reference/control group.
#' @param y data from the treatment group.
#' @param distribution character(1). Name of the parametric delay distribution to use.
#' @param twoPhase logical(1). Do we model two phases per group? Default is `FALSE`, i.e. a single delay phase per group.
#' @param method character. Which method to fit the models.
#' @param profiled logical. Use the profiled likelihood?
#' @param param character. Names of parameters to test difference for. Default value is `'delay1'`.
#' @param ties character. How to handle ties in data vector of a group?
#' @param type character. Which type of tests to perform?
#' @param doLogrank logical. In any case do log-rank based tests?
#' @param R numeric(1). Number of bootstrap samples to evaluate the distribution of the test statistic.
#' @param chiSqApprox logical flag. In bootstrap, should we calculate the approximate degrees of freedom for the distribution of the test statistic under H0?
#' @param verbose numeric. How many details are requested? Higher value means more details. 0=off, no details.
#' @return list with the results of the test. Element P contains the different P-values, for instance from parametric bootstrap
#' @export
test_diff <- function(x, y=stop('Provide data for group y!'), distribution = c("exponential", "weibull"), twoPhase = FALSE,
method = c('MPSE', 'MLEn', 'MLEw', 'MLEc'), profiled = method == 'MLEw',
ties = c('density', 'equidist', 'random', 'error'),
param = "delay1",
type = c('all', 'bootstrap', 'GOF', 'moran', 'pearson', 'logrank', 'LR'), doLogrank = TRUE,
R = 400,
chiSqApprox = FALSE, verbose = 0) {
# setup ----
distribution <- match.arg(arg = distribution)
type <- match.arg(arg = type)
onames <- getDist(distribution = distribution, type = "param", twoPhase = FALSE, twoGroup = FALSE, transformed = FALSE)
stopifnot( is.numeric(x), length(x) > length(onames), is.numeric(y), length(y) > length(onames) )
stopifnot( is.numeric(R), length(R) == 1L, R >= 1L )
stopifnot( is.character(param) )
# verbose arg
if (is.logical(verbose)) verbose <- as.numeric(verbose)
if (is.null(verbose) || ! is.numeric(verbose) || ! is.finite(verbose)) verbose <- 0
verbose <- verbose[[1L]]
method <- if (length(method) == 1L && toupper(method) == 'MSE') {
message("The method name 'MPSE' is prefered over the previously used name 'MSE'!")
'MPSE'
} else method[1L]
method <- match.arg(method)
ties <- match.arg(arg = ties)
if (method != 'MPSE' && type %in% c("moran", "pearson", "GOF")){
warning("Goodness-of-fit (GOF) tests are only supported with MPSE currently!")
return(invisible(NULL))
}
# parameters to test differences
if (any(grepl(pattern = "_tr", param, fixed = TRUE))){
stop("Parameter names in param= refer to the distribution parameters and not to the transformed parameters of the objective function.", call. = FALSE)
}
# translate convenience names (for single phase) to canonical names
unNmbrdIdx <- !endsWith(param, suffix = "1") & !endsWith(param, suffix = "2")
if (any(unNmbrdIdx)){
param[unNmbrdIdx] <- paste0(param[unNmbrdIdx], "1") #interpret un-numbered parameters as referring to phase 1
if (verbose > 0L) cat("The unnumbered parameter names in param= are taken to refer to initial phase and are translated to canonical parameter names.\n")
}
# retain only valid names in canonical order
param <- intersect(onames, param)
if (!length(param)){
stop("Provide valid parameter names from the distribution to test for differences in two groups.", call. = FALSE)
}
# bitmask for test types
testMask <- rlang::set_names(logical(5L), nm = c('bootstrap', 'pearson', 'moran', 'logrank', 'LR'))
switch(EXPR = type,
all = {
testMask <- testMask | TRUE
testMask[c("pearson", "moran")] <- method == 'MPSE'
},
# bootstrap + log-rank-tests (for stankovic results) #use better flags? like doBootstrap=, doGOF=, doLR=?!
bootstrap = { testMask[c('bootstrap', 'logrank')] <- TRUE },
GOF = {testMask[c('pearson', 'moran')] <- TRUE},
moran = {testMask['moran'] <- TRUE},
pearson = {testMask['pearson'] <- TRUE},
logrank = {testMask['logrank'] <- TRUE},
LR = {testMask['LR'] <- TRUE}, #likelihood ratio test
stop('This type of test is not supported!', call. = FALSE)
)
# separate control for logrank-based tests
testMask['logrank'] <- doLogrank
# test statistic ----
TOL_CRIT <- 1e-7
# Test statistic calculated from the given data, method and the model specification.
#
# The test statistic takes non-negative values.
# High values of the test statistic speak in favour of H1:
# @param strict logical. Accept models only if they converged flawlessly, i.e., if convergence=0?
# @return list containing value of test statistic and null model fit. Or `NULL` in case of trouble.
testStat <- function(x, y, strict = TRUE) {
fit0 <- delay_model(x = x, y = y, distribution = distribution, twoPhase = twoPhase, method = method, profiled = profiled, ties = ties, bind = param)
fit1 <- delay_model(x = x, y = y, distribution = distribution, twoPhase = twoPhase, method = method, profiled = profiled, ties = ties)
if (is.null(fit0) || is.null(fit1)) return(invisible(NULL))
# if the more restricted model (fit0) yields better fit (=lower criterion) than the more general model (fit1)
#+we are in trouble, possibly due to non-convergence, e.g., optim's convergence code 52
#+we refit the general fit1 again using parameter-values from fit0
if ( fit0[["criterion"]] + TOL_CRIT < fit1[["criterion"]] &&
!is.null(fit1oa <- purrr::pluck(fit1, "optimizer", "optim_args")) ) {
if (verbose > 0) warning("Restricted model with better fit (=smaller criterion) than unrestricted model.", call. = FALSE)
# re-run fit1 with start values based on fitted parameters of reduced model fit0
stopifnot( is.list(fit1oa), "par" %in% names(fit1oa) )
coef0 <- coef.incubate_fit(fit0, transformed = TRUE)
pn1 <- names(fit1[["optimizer"]][["parOpt"]])
# take over optimization coefficients for start values of fit1
# QQQ Would match() or pmatch() help avoid the for-loop?
for (na0 in names(fit0[["optimizer"]][["parOpt"]])) fit1oa[["par"]][startsWith(pn1, prefix = na0)] <- coef0[[na0]]
fit1oa[['control']][['parscale']] <- scalePars(parV = fit1oa[["par"]])
fit1 <- update.incubate_fit(fit1, optim_args = fit1oa)
if (is.null(fit1) || fit0[["criterion"]] + TOL_CRIT < fit1[["criterion"]]) {
warning('Restricted model with better fit than unrestricted model even after refit of the unrestricted model!', call. = FALSE)
return(invisible(NULL))
}
}# fi bad fit1
# check convergence of re-fits when in strict mode only:
if ( strict && (purrr::chuck(fit0, 'optimizer', 'convergence') != 0 || purrr::chuck(fit1, 'optimizer', 'convergence') != 0) ) return(invisible(NULL))
# higher values of T speak in favour of H1:
# 1. fit0 has high value (=bad fit)
# 2. fit1 has low value (=good fit)
list(val = 2L * max(0L, fit0[["criterion"]] - fit1[["criterion"]]),
fit0 = fit0, fit1 = fit1)
} #fn testStat
# observed test statistic
ts_obs <- testStat(x, y, strict = TRUE)
if ( is.null(ts_obs) || ! is.list(ts_obs) || ! is.numeric(ts_obs[['val']]) || ts_obs[['val']] < -TOL_CRIT ){
stop("Delay model failed for restricted null-model or free full model", call. = FALSE)
}
fit0 <- ts_obs[["fit0"]]
fit1 <- ts_obs[["fit1"]]
# P-values -----
# GOF-test results
#+ H0: simpler/restricted model 0 is sufficient
#+ the GOF-test solely builds on fit0
#+ take fitted parameters for both groups under null-model
#+ and transform the observed data for both groups via cumulative distribution functions
# spacings-based GOF-test
GOF_mo0 <- GOF_mo1 <- NULL
if (testMask[['moran']]){
GOF_mo0 <- test_GOF(delayFit = fit0, method = 'moran')
GOF_mo1 <- test_GOF(delayFit = fit1, method = 'moran')
#if (verbose > 0L) cat("Moran test stat for fit0: ", GOF_mo0$statistic, "\n")
}
# Pearson GOF-test based on Chi-square distribution.
# under H0, expect counts according to uniform distribution
GOF_pears0 <- GOF_pears1 <- NULL
if (testMask[['pearson']]){
GOF_pears0 <- test_GOF(delayFit = fit0, method = 'pearson')
GOF_pears1 <- test_GOF(delayFit = fit1, method = 'pearson')
}
P_LR <- NULL
if (testMask[['LR']]){
# likelihood ratio test (LR-test), based on the criterion that was requested (MPSE or ML-based)
P_LR <- stats::pchisq(q = ts_obs[["val"]], df = length(param), lower.tail = FALSE)
}
t0_dist <- P_boot <- chisq_df_hat <- NULL
if (testMask[['bootstrap']]){
# parametric bootstrap:
# generate R samples (x, y) by random sampling from the fitted H0-model (e.g. common delay),
#+where all nuisance parameters are at there best fit
# calculate the test statistic on the simulated data
# estimate P as proportion of simulated test statistics that exceed the observed test statistic t_obs
ranFun <- getDist(distribution, type = "r")
# arguments to the random function generation
ranFunArgsX <- c(list(n=length(x)), coef.incubate_fit(fit0, group = "x", transformed = FALSE))
ranFunArgsY <- c(list(n=length(y)), coef.incubate_fit(fit0, group = "y", transformed = FALSE))
retL <- 1L+(verbose>0L)
t0_dist <- future.apply::future_vapply(X = seq_len(R), FUN.VALUE = double(retL),
FUN = function(dummy){
# generate new data according to given fitted null-model
# sort is not needed here, as it goes through the whole pipeline (factory method)
ts_boot <- testStat(x = rlang::exec(ranFun, !!! ranFunArgsX),
y = rlang::exec(ranFun, !!! ranFunArgsY),
strict = FALSE)
if (is.null(ts_boot)) rep.int(NA_real_, times = retL) else
c(ts_boot[['val']],
# verbose-mode: include convergence code
purrr::chuck(ts_boot, 'fit0', 'optimizer', 'convergence'))[seq_len(retL)]
}, future.seed = TRUE)
if (verbose > 0L){
stopifnot( NROW(t0_dist) == 2L )
fit0_conv <- t0_dist[2L,]
cat(glue('Proportion of model failures: {as_percent(length(which(is.na(fit0_conv)))/length(fit0_conv))}',
'Proportion of conv = 0: {as_percent(length(which(fit0_conv == 0))/ length(fit0_conv))}',
'Proportion of conv = 52: {as_percent(length(which(fit0_conv == 52))/length(fit0_conv))}',
.sep = '\n'), '\n')
t0_dist <- t0_dist[1L, , drop=TRUE] #retain only ts_boot[['val']]
}
t0_dist <- t0_dist[is.finite(t0_dist)]
if (chiSqApprox && length(t0_dist) > 7L){
try(expr = {chisq_df_hat <- coef(MASS::fitdistr(x = t0_dist, densfun = "chi-squared",
start = list(df = length(param)),
method = "Brent", lower = .001, upper = 401))},
silent = TRUE)
}
P_boot <- (1L + sum(t0_dist >= ts_obs[["val"]])) / (length(t0_dist)+1L)
} # bootstrap
# Log-rank tests
P_logrank <- P_logrank_pp <- NULL
if (testMask[["logrank"]]){
# data in long format
dat_2gr <- tibble::tibble(evtime = c(x,y),
group = rep.int(c("x", "y"), times = c(length(x), length(y))))
P_logrank <- stats::pchisq(q = survival::survdiff(survival::Surv(evtime) ~ group, rho = 0, data = dat_2gr)$chisq,
df = 1L, lower.tail = FALSE)
# Peto & Peto modified Gehan-Wilcoxon test
P_logrank_pp <- stats::pchisq(q = survival::survdiff(survival::Surv(evtime) ~ group, rho = 1, data = dat_2gr)$chisq,
df = 1L, lower.tail = FALSE)
}
structure(
# compact cleanses NULL entries
purrr::compact(list(
# two initial model fits
#fit0 = fit0, fit1 = fit1, # debug only?!
t_obs = ts_obs[["val"]],
testDist = t0_dist,
R = if (testMask[['bootstrap']]) length(t0_dist),
chisq_df_hat = chisq_df_hat,
param = param,
P = purrr::compact(
list(
bootstrap = P_boot,
LR = P_LR,
moran = as.vector(GOF_mo0$p.value),
moran1 = as.vector(GOF_mo1$p.value),
pearson = as.vector(GOF_pears0$p.value),
pearson1 = as.vector(GOF_pears1$p.value),
logrank = P_logrank,
logrank_pp = P_logrank_pp
)
)
)), class = "incubate_test")
}
#' @export
print.incubate_test <- function(x, ...){
params <- paste(x$param, collapse = ' & ')
P_boot_str <- if (is.numeric(x$P$bootstrap)) format.pval(x$P$bootstrap) else '-'
cat(glue("Test for difference in {if (length(x$param) > 1) 'parameters' else 'parameter'} {params} between two groups.",
"Alternative hypothesis: {params} {c('is', 'are')[[1L + (length(x$param) > 1)]]} different between the two groups.",
"Parametric Bootstrap P-value: {P_boot_str}", .sep = "\n"), '\n')
}
#' @export
plot.incubate_test <- function(x, y, title, subtitle, ...){
stopifnot(inherits(x, "incubate_test"))
rlang::check_installed(pkg = 'ggplot2', reason = 'to get plots', version = '3.3')
testDist <- x[["testDist"]]
if (is.null(testDist)) {
message('No bootstrap test to visualize.')
return(invisible(NULL))
}
# set R = "-" when entry R is NULL or absent
R <- purrr::pluck(x, "R", .default = "-")
if (missing(title)) title <- glue("Distribution of test statistic under H0 for parameter {paste(x$param, collapse = ' & ')}")
if (missing(subtitle) && ! is.null(x$P$bootstrap)){
subtitle <- glue('Sampling distribution, based on {R} parametric bootstrap draws. ',
'Bootstrap P-value = {format.pval(x$P$bootstrap, eps = 1e-3)}')
#"Approximated by a chi-square distribution with df={signif(x[['chisq_df_hat']], 2)}.")
}
p <- ggplot2::ggplot(tibble::tibble(testDist = testDist),
mapping = ggplot2::aes_(x = ~testDist, y = ~ggplot2::after_stat(density))) +
ggplot2::geom_histogram(bins = 11L + ceiling(sqrt(R)))
# extract maximum density value
ymax <- max(ggplot2::layer_data(p)[['y']])
ymax <- ceiling(max(ymax + .1, ymax * 1.01))
if (! is.null(x[['chisq_df_hat']]) && is.numeric(x[['chisq_df_hat']])) p <- p + ggplot2::geom_function(inherit.aes = FALSE,
fun = stats::dchisq, args = list(df = x[['chisq_df_hat']]),
col = "red", linetype = "dotted")
p +
ggplot2::geom_vline(xintercept = x[["t_obs"]], linetype = "dashed", colour = "grey") +
ggplot2::coord_cartesian(ylim = c(0L, ymax)) +
ggplot2::labs(x = "Test statistic",
title = title, subtitle = subtitle)
}
#' Power simulation function for a two-group comparison of the delay parameter.
#'
#' There are two ways of operation:
#' 1. `power=NULL` Given sample size `n` it simulates the power.
#' 2. `n=NULL` Given a power an iterative search is started to find a suitable `n` within a specified range.
#'
#' In any case, the distribution, the parameters that are tested for, the type of test and the effect size (`eff=`) need to be specified.
#' The more power simulation rounds (parameter `nPowerSim=`) the more densely the space of data according to the specified model is sampled.
#'
#' Note that this second modus (when `n` is estimated) is computationally quite heavy.
#' The iterative search for `n` uses some heuristics and the estimated sample size might actually give a different power-level.
#' It is important to check the stated power in the output. The search algorithm comes to results closer to the power aimed at
#' when the admissible range for sample size (`nRange=`) is chosen sensibly.
#' In case the estimated sample size and the achieved power is too high it might pay off to rerun the function with an adapted admissible range.
#'
#' @param distribution character. Which assumed distribution is used for the power calculation.
#' @param twoPhase logical(1). Do we model two phases per group? Default is `FALSE`, i.e. a single delay phase per group.
#' @param eff list. The two list elements contain the model parameters (as understood by the delay-distribution functions provided by this package) for the two groups.
#' @param param character. Parameter name(s) which are to be tested for difference and for which to simulate the power. Default value is `'delay1'`.
#' @param test character. Which test to use for this power estimation?
#' @param n integer. Number of observations per group for the power simulation or `NULL` when n is to be estimated for a given power.
#' @param power numeric. `NULL` when power is to be estimated for a given sample size or a desired power is specified (and `n` is estimated).
#' @param r numeric. Ratio of both groups sizes, ny / nx. Default value is 1, i.e., balanced group sizes. Must be positive.
#' @param sig.level numeric. Significance level. Default is 0.05.
#' @param nPowerSim integer. Number of simulation rounds. Default value 1600 yields a standard error of 0.01 for power if the true power is 80%.
#' @param R integer. Number of bootstrap samples for test of difference in parameter within each power simulation. It affects the resolution of the P-value for each simulation round. A value of around `R=200` gives a resolution of 0.5% which might be enough for power analysis.
#' @param nRange integer. Admissible range for sample size when power is pre-specified and sample size is requested.
#' @param verbose numeric. How many details are requested? Higher value means more details. 0=off, no details.
#' @return List of results of power simulation. Or `NULL` in case of errors.
#' @export
power_diff <- function(distribution = c("exponential", "weibull"), twoPhase = FALSE, param = "delay1",
test = c('bootstrap', 'pearson', 'moran', 'logrank', 'logrank_pp', "LR"),
eff = stop("Provide parameters for both groups that reflect the effect!"),
n = NULL, r = 1, sig.level = 0.05, power = NULL, nPowerSim = 1600, R = 201,
nRange = c(5, 50), verbose=0){
tol_pow <- .001
distribution <- match.arg(distribution)
if (! missing(test)) test <- tolower(test)
test <- match.arg(arg = test)
test_cat <- sub(pattern = "[_].*$", replacement = "", x = test)
ranFun <- getDist(distribution, type = "r")
onames <- getDist(distribution, type = "param", twoPhase = twoPhase, twoGroup = FALSE, transformed = FALSE)
# parameters to test differences and for which power is requested
if (any(grepl(pattern = "_tr", param, fixed = TRUE))){
stop("Parameter names in param= refer to the distribution parameters and not to the transformed parameters of the objective function.", call. = FALSE)
}
# translate convenience names (for single phase) to canonical names
unNmbrdIdx <- !grepl(pattern = "[12]", param, fixed = FALSE)
if (any(unNmbrdIdx)){
param[unNmbrdIdx] <- paste0(param[unNmbrdIdx], "1") #interpret un-numbered parameters as referring to phase 1
if (verbose > 0L) cat("Unnumbered parameter names in param= are taken to refer to initial phase and are translated to canonical parameter names.\n")
}
# only valid names in canonical order
param <- intersect(onames, param)
if (!length(param)){
stop("Provide valid parameter names from the distribution to test for differences in two groups.", call. = FALSE)
}
param <- match.arg(param, choices = onames)
stopifnot( is.null(n) || (is.numeric(n) && length(n) == 1L && is.finite(n) ))
stopifnot( is.null(power) || (is.numeric(power) && length(power) == 1L && power > 0L && power < 1L ))
if (is.null(n) + is.null(power) != 1L) stop('Either set `n=NULL` or `power=NULL`!')
stopifnot( length(sig.level) == 1L, is.numeric(sig.level), is.finite(sig.level), sig.level > 0L, sig.level < 1L )
stopifnot(is.numeric(r), length(r) == 1L, r > 0L)
stopifnot( length(nPowerSim) == 1L, is.numeric(nPowerSim), nPowerSim >= 3L )
stopifnot( length(R) == 1L, is.numeric(R), R >= 3L )
stopifnot( length(nRange) == 2L, is.numeric(nRange), nRange[[1L]] > 1L, nRange[[2L]] > nRange[[1L]] )
nPowerSim <- ceiling(nPowerSim)
R <- ceiling(R)
stopifnot( is.list(eff), length(eff) == 2L )
parx <- eff[[1L]]
pary <- eff[[2L]]
stopifnot( is.numeric(parx), is.numeric(pary) )
stopifnot( length(parx) == length(onames), length(pary) == length(onames))
parx <- rlang::set_names(parx, onames)
pary <- rlang::set_names(pary, onames)
simulatePower <- function(nx, ny, B = nPowerSim, R){
nx <- ceiling(nx); ny <- ceiling(ny)
# repeatedly test for difference in parameter on bootstrapped data
P_dist <- future.apply::future_vapply(X = seq_len(B), FUN.VALUE = double(1L),
FUN = function(dummy) {
# generate data according to chosen model
#+and with the specified effect
datx <- purrr::exec(ranFun, !!! c(n=nx, parx))
daty <- purrr::exec(ranFun, !!! c(n=ny, pary))
P_val <- NA_real_
try(expr = {
P_val <- purrr::pluck(test_diff(x = datx, y = daty,
distribution = distribution, twoPhase = twoPhase,
param = param, type = test_cat, R = R),
"P", test, .default = NA_real_)
}, silent = TRUE)
P_val
}, future.seed = TRUE)
P_dist <- P_dist[is.finite(P_dist)]
if ( !length(P_dist) ){
warning("No valid power simulation results.")
return(invisible(NULL))
}
if ( length(P_dist) < 100L )
warning("Low resultion for power estimate.")
if (length(P_dist))
sum(P_dist < sig.level) / length(P_dist) else
NA_real_
} #fun
nx <- ny <- -1
powerGrid <- NULL
if (is.null(power)){
# easy case: estimate power once!
nx <- ceiling(n)
ny <- ceiling(r * n)
if ( nx < length(onames) || ny < length(onames) ){
warning("Too few observations to fit parameters.")
return(invisible(NULL))
}
power <- simulatePower(nx = nx, ny = ny, B = nPowerSim, R = R)
} else {
# estimate n for specified power
stopifnot( is.null(n) )
Bmax1 <- 200L
Rmax1 <- 100L
B1 <- min(Bmax1, nPowerSim)
R1 <- min(Rmax1, R)
i2 <- -1L
# 1st iteration
nx_cand1 <- unique(ceiling(seq.int(from = nRange[[1L]], to = nRange[[2L]], length.out = 5L)))
NBR_CAND1 <- length(nx_cand1)
pow_cand1 <- rep_len(-1, length.out = NBR_CAND1)
# if single n remains, return the power for it (no search for n necessary)
if (NBR_CAND1 == 1L) return(power_diff(distribution, twoPhase = twoPhase, param, test = test, eff,
n = nx_cand1[[1L]], power = NULL,
r = r, sig.level = sig.level, nPowerSim = nPowerSim, R = R))
for (i1 in seq_along(nx_cand1)) {
nxc <- nx_cand1[[i1]]
pow_cand1[[i1]] <- simulatePower(nx = nxc, ny = nxc * r, B = B1, R = R1)
if (pow_cand1[[i1]] >= power - tol_pow) break
} #rof
# store preliminary power estimates
powerGrid <- tibble::tibble(
nx = nx_cand1[pow_cand1 > 0],
ny = ceiling(nx * r),
power = pow_cand1[pow_cand1 > 0],
iter = 1L,
B = B1,
R = R1
)
if (NROW(powerGrid) <= 1L) {
stop('Failed to find power estimates within specified range!', call. = FALSE)
}
REFINE <- TRUE #NROW(powerGrid) >= 2L
# check first iteration
if (i1 == 1L){
warning('Smallest allowed n already exceeds requested power!', call. = FALSE)
REFINE <- FALSE
}
# check last iteration
if (i1 == NBR_CAND1 && pow_cand1[[NBR_CAND1]] > -1 && pow_cand1[[NBR_CAND1]] < power - tol_pow){
warning(glue('Failed to reach requested power with maximally allowed n: {nx_cand1[[NBR_CAND1]]} ',
'yields a power of {as_percent(pow_cand1[[NBR_CAND1]])}.'))
REFINE <- FALSE
}
if (!REFINE){
nx <- ceiling(nx_cand1[[i1]])
ny <- ceiling(nx_cand1[[i1]] * r)
power <- if (B1 < nPowerSim || R1 < R) stats::weighted.mean(x = c(pow_cand1[[i1]], simulatePower(nx, ny, B = nPowerSim, R = R)),
w = c(B1, nPowerSim)) else
pow_cand1[[i1]]
} else {
powerMod <- if (NROW(powerGrid) == 2L) stats::lm(power ~ nx, data = powerGrid) else stats::lm(power ~ poly(nx, 2), data = powerGrid)
powerPred <- tibble::tibble(nx = seq.int(from = nRange[[1L]], to = nRange[[2L]], by = 1L),
predpower = stats::predict.lm(powerMod, newdata = data.frame(nx = nx)),
diffpower = .data$predpower - power)
# examine close neighbourhood of predicted best n
powerPredInd <- intersect(seq_len(NROW(powerPred)), c(-1L, 0L, 1L) + which.max(powerPred$diffpower >= 0L))
nx_cand2 <- powerPred$nx[powerPredInd]
NBR_CAND2 <- length(nx_cand2)
pow_cand2 <- rep_len(-1, length.out = NBR_CAND2)
for (i2 in seq_along(nx_cand2)){
nxc <- nx_cand2[[i2]]
pow_cand2[[i2]] <- simulatePower(nx = nxc, ny = nxc * r, B = nPowerSim, R = R)
} #rof
powerGrid2 <- tibble::tibble(
nx = nx_cand2[pow_cand2 > 0],
ny = ceiling(nx * r),
power = pow_cand2[pow_cand2 > 0],
iter = 2L,
B = nPowerSim,
R = R)
stopifnot( any(powerGrid2$power >= power - tol_pow) )
nx <- powerGrid2$nx[which.max(powerGrid2$power >= power - tol_pow)]
ny <- ceiling(nx * r)
power <- powerGrid2$power[which(powerGrid2$nx == nx)]
# store 2nd round (refinement) power estimates
powerGrid <- rbind(powerGrid, powerGrid2)
}
stopifnot( nx > 0L, ny > 0L, power > 0L)
}
purrr::compact(
list(name = "Difference in delayed model for time-to-event data in two groups",
distribution = distribution,
twoPhase = twoPhase,
param = param,
test = test,
eff = eff,
sig.level = sig.level,
nx = nx, ny = ny, N = nx + ny,
#P_dist = P_dist, ##debug
powerGrid = powerGrid,
power = power
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.