In floatofmath/resamplingMCP: Resampling graph-based multiple comparison procedures

library(resamplingMCP)
library(magrittr)
library(plyr)
library(dplyr)
library(pander)
library(parallel)
library(bt88.03.704)
options(mc.cores=detectCores()-1)

One-sample test procedures for adaptive designs

The package resamplingMCP implements a number of test procedures that for adaptive designs the permit for a increase.

Adaptive permutation test
Adaptive non-parametric combination test
Adaptive combiniation test using inverse normal combination function
Adaptive $t$-test

n1 <- 6
n <- 12
ne <- 15
x <- rnorm(15,1)

c(adaptive_permtest_os(x,n1,n,ne,possum),
  adaptive_ttest_os(x,n1,n,ne),
  adaptive_invnormtest_os(x,n1,n,ne),
  adaptive_npcombtest_os(x,n1,n,ne,possum))

Some quick simulation studies

compare_adaptive_tests <- function(n1,n,rule,rdist,test_statistic,mpfr=F,...){
    x <- rdist(n,...)
    ne <- rule(x[1:n1])
    if(ne>n){
        x <- c(x,rdist(ne-n,...))
    } else {
        ne <- n
    }
    list(permtest = adaptive_permtest_os(x,n1,n,ne,test_statistic),
##         ttest = adaptive_ttest_os(x,n1,n,ne,mpfr=mpfr),
         invnorm = adaptive_invnormtest_os(x,n1,n,ne),
         npcomb = adaptive_npcombtest_os(x,n1,n,ne,test_statistic))
}



adaptive_permtest_2s <- function(x,y,n1,n,ne,m1,m,me,test_statistic,perms=50000,alpha=0.025){
    if(ne>n){
        xs <- split(x,rep(1:3,c(n1,n-n1,ne-n)))
    } else {
        if(me>m) stop('Stages with controls only not supported')
        xs <- split(x,rep(1:2,c(n1,ne-n1)))
    }
    if(me>m){
        ys <- split(y,rep(1:3,c(m1,m-m1,me-m)))
    } else {
        if(ne>n) stop('Stages with treatments only not supported')
        ys <- split(y,rep(1:2,c(m1,me-m1)))
    }
    gs <- lapply(1:length(xs),function(i) rep(0:1,c(length(xs[[i]],ys[[i]]))))
    xs <- lapply(1:length(xs),function(i) c(xs[[i]],ys[[i]]))
    A <- permutation_CER(xs[[1]],gs[[1]],xs[[2]],test_statistic,B=perms,alpha=alpha)
    q <- perm_test(xs[[2]],xs[[3]],gs[[2]],gs[[3]],test_statistic,B=perms)
    A>=q
}

run_simulation <- function(n1,n,mean,sd,B=10,...){
    results <- mclapply2(1:B,function(i) compare_adaptive_tests(n1,n,cond_power_rule_norm,rnorm,diffmean,mean=mean,sd=sd))
    ##:ess-bp-start::conditional@length(unique(sapply(results,length))) != 1:##
browser(expr={length(unique(sapply(results,length))) != 1})##:ess-bp-end:##
    results %>% bind_rows %>% colMeans
}

##' @param rule adaptive sample size rule
##' @param rdist random number generator for the data
##' @param test_statistic function that computes the test statistic
##' @param control_opts list of options past to \code{rdist} for the control group
##' @param treatment_opts list of options past to \code{rdist} for the treatment group
##' @param m1 first stage sample size (control group)
##' @param m preplanned total sample size (control group)
##' @param ... 
##' @return list of test results 
##' @author Florian Klinglmueller
compare_adaptive_tests_2s <- function(n1,n,rule,rdist,
                                      test_statistic,
                                      control_opts=NULL,
                                      treatment_opts=control_opts,
                                      m1=n1,m=n,...){
    x <- do.call(match.fun(rdist),c(n=n,control_opts))
    y <- do.call(match.fun(rdist),c(n=n,treatment_opts))
    nes <- rule(x[1:n1],y[1:m1])
    if(nes[1]>n){
        ne <- nes[1]
        x <- c(x,rdist(ne[1]-n,...))
    } else {
        ne <- n
    }
    if(ne[2]>m){
        me <- nes[2]
        y <- c(x,rdist(ne[2]-m,...))
    } else {
        me <- m
    }
    list(permtest = adaptive_permtest_2s(x,y,n1,n,ne,m1,m,me,test_statistic),
#         ttest = adaptive_ttest_os(x,n1,n,ne),
         invnorm = adaptive_invnormtest_2s(x,y,n1,n,ne,m1,m,me),
         npcomb = adaptive_npcombtest_2s(x,y,n1,n,ne,m1,m,me,test_statistic))
}

list(
## one example fixed rule
compare_adaptive_tests(6,12,function(x) 3,rnorm,diffmean,mean=.5),
## one example conditional power rule
compare_adaptive_tests(6,12,cond_power_rule_norm,rnorm,diffmean,mean=.5),
## one example conditional power rule
compare_adaptive_tests(6,12,cond_power_rule_t,rnorm,diffmean,mean=.5)) %>% bind_rows

## lets replicate
system.time(sim <- bind_rows(mclapply2(1:100,function(i) compare_adaptive_tests(10,20,cond_power_rule_norm,rnorm_scont,diffmean,mean=0,cmean=4,cprop=0.2,csd=.1))))
## slow and doesn't seem to solve the problem
##system.time(sim <- mclapply2(1:100,function(i) compare_adaptive_tests(10,20,cond_power_rule_norm,rnorm_scont,diffmean,mean=0,cmean=4,cprop=0.2,csd=.1,mpfr=T)))

colMeans(sim)

Let's do all of this in a more systematic way

Small sample scenario

We start with a preplanned sample size of twelve. Which provides $6$ observations for each group. Such that we will start all trials with a first stage of size $6$ and second stage of at least $6$.

Normal distribution

First let's have a look at the normal distribution. This is what both the adaptive $t$-test and the inverse normal combination (of $t$-tests) test assume.

Type 1 error

We look at a couple of parameter settings. First we consider scenarios under the null. The remaining parameters that are of most interest are preplanned sample size and the standard deviation of observations. The scenario is chosen such that if data were distributed with a mean of $\mu_0 = 1$ and standard deviation $\sigma_0 = 1$ a sample size of 12 would give a power of roughly 88% for the one-sample $t$-test against a one-sided alternative (computed using R's function power.t.test).

During the interim analysis the sample size for the second stage will be reassessed using a conditional power rule. That is, the variance is estimated using the observed first stage outcomes and the second stage sample size is increased to provide a conditional power of $90\%$ using the standard normal sample size formula

$$\left[\frac{\mu_0}{s(\boldsymbol{x}^F)} \left(\Phi^{-1}(1-\alpha) + \Phi^{-1}(1-\beta)\right)\right],$$

where $s(\boldsymbol{x}^F)$ denotes the standard deviation estimated from the first stage observations, $\alpha$ is set to $0.025$ and $\beta = .1$.

scenarios <- expand.grid(n1=6,
                         n=12,
                         mean=0,
                         sd=c(.5,1,1.5,2))

lapply(1:nrow(scenarios),function(p) c(scenarios[p,],do.call('run_simulation',c(scenarios[p,],B=50000)))) %>%
    bind_rows ->
    sim_norm_null

save(sim_norm_null,file="~/repos/resamplingMCP/data/sim_norm_null.rda")

load("~/repos/resamplingMCP/data/sim_norm_null.rda")
pander(sim_norm_null,split.tables=100)

Under the alternative

Observe that the adaptive $t$-test does not control the Type I error rate in any scenario. This is surprising as (unconditional - finite sample) Type I error control of this method has been proven theoretically and confirmed numerically. Implementation of the approach however proved to be numerically challanging; especially the conditional density of the sum of squares at the preplanned end of the trial. It seems as if this problems are exacerbated in with small samples. We will therefore contact the authors of the original paper if they are aware of a better implementation.

We stick with the preplanned sample size of $n=12$ with a first stage of $n^F = 6$ and then look at the power of the different test procedures for mean values ranging from $.1$ to $1$ in steps of $.1$. The standard deviation of outcomes is fixed at $\sigma = 1$.

scenarios  <- expand.grid(n1=6,
                          n=12,
                          mean = round(seq(.1,1,.1),1),
                          sd = 1)

lapply(1:nrow(scenarios),function(p) c(scenarios[p,],do.call('run_simulation',c(scenarios[p,],B=50000)))) %>%
    bind_rows ->
        sim_norm_alt

save(sim_norm_alt,file="../data/sim_norm_alt.rda")

library(pander)
load("../data/sim_norm_alt.rda")
pander(sim_norm_alt,split.tables=100)

In a larger sample

So far we have only looked at scenarios with a fairly limited sample size. As it turns out the non-parametric combination test is not very usefull in such a situation which is due to the discreteness of the permutation distribution.

We now look at simulated trials with slightly larger sample sizes. We set the preplanned sample size to $n = 100$ with a first stage of $n^F = 50$. We simulate under the null hypothesis for standard deviations of $2, 3, 9, 12$.

scenarios <- expand.grid(n1=50,
                         n=100,
                         mean=0,
                         sd=c(2,3,9,12))

library(parallel)
options(mc.cores=3)
lapply(1:nrow(scenarios),function(p) c(scenarios[p,],do.call('run_simulation',c(scenarios[p,],B=1000)))) %>% bind_rows -> sim_norm_null_ls

save(sim_norm_null_ls,file="../data/sim_norm_null_ls.rda")

library(pander)
load("../data/sim_norm_null_ls.rda")
pander(sim_norm_null_ls,split.tables=100)

Under the alternative

Finally we end our investigation into the operating characteristics of adaptive tests under normality with a look at their performance under the alternative for a larger preplanned trial. Again we set the preplanned total sample size to $n=100$ with a first stage of $n^F = 50$. Again we compare the power of the different test procedures for mean values ranging from $.1$ to $1$ in steps of $.1$. The standard deviation of outcomes is now fixed at $\sigma = 3$. This was chosen so that the preplanned trial (assuming $\mu_0=1$, $\sigma_0=3$) has a power of about $90%$ ($91%$ according to power.t.test).

scenarios <- expand.grid(n1=50,
                         n=100,
                         mean=seq(0.1,1,.1),
                         sd=3)

library(parallel)
options(mc.cores=3)
lapply(1:nrow(scenarios),function(p) c(scenarios[p,],do.call('run_simulation',c(scenarios[p,],B=1000)))) %>% bind_rows -> sim_norm_alt_ls

save(sim_norm_null,file="../data/sim_norm_alt_ls.rda")