In floatofmath/adaperm: Permutation tests for adaptive designs

knitr::opts_chunk$set(eval=FALSE,echo=FALSE,warning=FALSE,results='hide',fig.width=7,fig.height=7,message=FALSE)

During a recent investigation we found that the inverse normal combination test of stage-wise $t$-tests performs surprisingly well if the parent distribution is not normal. It is well known that the $t$-test controls the type I error for a wide range of parent distribution. This robustness, however, comes at the price of reduced power. As we will show the inverse normal combination test of stagewise t-tests does actually outperform many nonparametric tests in terms of power when the parent distribution is non-normal.

library(magrittr)
library(plyr)
library(dplyr)
library(pander)
library(parallel)
library(bt88.03.704)
library(adaperm)
library(flip)
library(ggplot2)
options(mc.cores=detectCores()-1)
##' Adaptive permutation test one-sample problems
##'
##' @title One-sample adaptive permutation test
##' @template onesample_sims
##' @param combination_function Function to combine stage-wise (permutation) p-values
##' @param perms Maximum number of permutations to use when computing permutation p-values and conditional error rates
##' @author Florian Klinglmueller
##' @export
adaptive_permtest2_os <- function(x,n1,n,ne,test_statistic,perms=50000,resam=100,alpha=0.025){
    if(ne>n){
        xs <- split(x,rep(1:3,c(n1,n-n1,ne-n)))
        gs <- split(sign(x)>0,rep(1:3,c(n1,n-n1,ne-n)))
        A <- permutation_CER2(xs[[1]],gs[[1]],xs[[2]],xs[[3]],test_statistic,one_sample=TRUE,restricted=FALSE,
                              permutations=perms,subsamples=resam,alpha=alpha)
        q <- perm_test(xs[[2]],xs[[3]],gs[[2]],gs[[3]],test_statistic,restricted=FALSE,B=perms)
        return(A>=q)
    } else {
        xs <- split(x,rep(1:2,c(n1,ne-n1)))
        gs <- split(sign(x)>0,rep(1:2,c(n1,ne-n1)))
        return(alpha>=perm_test(xs[[1]],xs[[2]],gs[[1]],gs[[2]],test_statistic,restricted=FALSE,B=perms))
    }
}

# mclapply2(1:1000,function(i) adaptive_permtest2_os(rnorm_cont(40,mean=.5,cshift=0,csd=3),10,20,40,diffmean,perms=10000,resam=10)) %>% unlist %>% mean
# mclapply2(1:1000,function(i) adaptive_permtest2_os(rnorm_cont(40,mean=.2,cshift=0,csd=3),20,40,40,signedranks,perms=10000,resam=10)) %>% unlist %>% mean
B=1000
n=40
data <- matrix(rnorm_cont(B*n,mean=0.5,cshift=0,csd=3),nrow=n)
c(permW=mean(flip(data,statTest="Wilcoxon",perms=10000,tail=1)@res$`p-value`<.025),
  permT=mean(flip(data,perms=10000,tail=1)@res$`p-value`<.025),
  apermW=mclapply2(1:B,function(i) adaptive_permtest_os(data[,i],20,40,40,signedranks,perms=10000),mc.progress=F) %>% unlist %>% mean,
  apermT=mclapply2(1:B,function(i) adaptive_permtest_os(data[,i],20,40,40,diffmean,perms=10000),mc.progress=F) %>% unlist %>% mean,
  invnorm=mclapply2(1:B,function(i) adaptive_invnormtest_os(data[,i],20,40,40),mc.progress=F) %>% unlist %>% mean,
  ttest=mclapply2(1:B,function(i) (t.test(data[,i],alternative='greater')$p.value < 0.025),mc.progress=F) %>% unlist %>% mean,
  wilcox=mclapply2(1:B,function(i) (wilcox.test(data[,i],alternative='greater')$p.value < 0.025),mc.progress=F) %>% unlist %>% mean) -> nonadaptive

save(nonadaptive,file="nonadaptive.rda")

Some comments the Wilcoxon test is actually much better that the t-test and also than the invnorm test.

load("~/repos/adaperm/vignettes/nonadaptive.rda")
pander(nonadaptive)

c(invnormM = mclapply2(1:B,function(i) adaptive_invnormtest_os(data[,i],15,30,40),mc.progress=F) %>% unlist %>% mean,
  permTM = mclapply2(1:B,function(i) adaptive_permtest_os(data[,i],15,30,40,diffmean,perms=10000),mc.progress=F) %>% unlist %>% mean,
  permWM = mclapply2(1:B,function(i) adaptive_permtest_os(data[,i],15,30,40,signedranks,perms=10000),mc.progress=F) %>% unlist %>% mean,
  invnormS = mclapply2(1:B,function(i) adaptive_invnormtest_os(data[,i],10,20,40),mc.progress=F) %>% unlist %>% mean,
  permTS = mclapply2(1:B,function(i) adaptive_permtest_os(data[,i],10,20,40,diffmean,perms=10000),mc.progress=F) %>% unlist %>% mean,
  permWs = mclapply2(1:B,function(i) adaptive_permtest_os(data[,i],10,20,40,signedranks,perms=10000),mc.progress=F) %>% unlist %>% mean) -> adaptive
save(adaptive,file="adaptive.rda")

load("~/repos/adaperm/vignettes/adaptive.rda")
pander(adaptive)

In the adaptive setting this changes. Here the adaptive wilcoxon test looses quite a bit of power when the preplanned second stage size is small in comparison to the actual second stage sample size, such that the inverse normal combination test may be favourable at a certain stage.

Possible improvements of the adaptive signedrank test

One may however improve the adaptive signedrank test in two ways.

1. Subsampling the extended second stage

This is already implemented in the package and has been shown to provide improvements in power.

c(subsWM = mclapply2(1:B,function(i) adaptive_permtest2_os(data[,i],15,30,40,signedranks,perms=10000),mc.progress=F) %>% unlist %>% mean,
  subsWs = mclapply2(1:B,function(i) adaptive_permtest2_os(data[,i],10,20,40,signedranks,perms=10000,resam=1000),mc.progress=F) %>% unlist %>% mean) -> subsampling
save(subsampling,file="subsampling.rda")

load("~/repos/adaperm/vignettes/subsampling.rda")
pander(subsampling)

The subsampling approach may be further improved using a bootstrap procedure that also takes into account samples from the first stage. The latter approach may be problematic as this introduces dependeny between first and second stage data.
and second by looking at the conditional distribution of the second stage ranks. Here the idea is to draw ranks for the second stage by keeping the order of the first stage ranks as well as the observed first stage signs.

See below for one stap of the sampling process for the second stage data.

We need to assume that the probability of the second stage data to fall between any of the first stage observations is the same. This obviously does not hold for all parent distributions! A way out may be bootstrap? - Eventhough Werner claims that just that is true.

x1 <- rnorm(10)
r1 <- rank(x1)

## preplanned second stage 10
## sample one possible second stage outcome
r2 <- sample(100,10)
r2[r2 %in% r1*10] <- r2[r2 %in% r1*10]+1
r2 <- rank(c(r2,r1*10))[1:10]

Why is the inverse normal combination test better when the second

stage is large?

I guess it is because with the conditional error rate approach the discreteness of the permutation distributions is fixed, while for the combination test the second stage distribution gets less discrete with increasing second stage sample sizes.

This should go away using subsampling but it doesn't. We need to look into this further (e.g. estimating the CER of a randomized test)

compare_adaptive_tests <- function(i,n1=15,n=30,ne,rdist,permutations=100){
    x <- data[1:ne,i]
    list(ne = ne,
         permtestT = adaptive_permtest_os(x,n1,n,ne,diffmean,perms=permutations),
         invnormT = adaptive_invnorm_ttest_os(x,n1,n,ne),
         permtestW = adaptive_permtest_os(x,n1,n,ne,signedranks,perms=permutations),
         invnormW = adaptive_invnorm_wilcoxtest_os(x,n1,n,ne))
}




## mclapply2(1:nrow(G),function(j) list(n1=G[j,'n2'],permtest=adaptive_permtest_os(data[,G[j,'i']],15,30,G[j,'n2'],signedranks,perms=100)),mc.progress=T) %>% bind_rows%>% group_by(n1) %>% summarize(power=mean(permtest))

## mclapply2(1:nrow(G),function(j) c(G[j,'n1'],permtest=adaptive_invnorm_wilcoxtest_os(data[,G[j,'n1']],15,30,G[j,'n2'],signedranks,perms=1000)),mc.progress=T) %>% bind_rows %>% group_by(n1) %>% summarize(power=mean(permtest))





run_scenario <- function(B,...){
    ## just a wrapper around mclapply2
    results <- mclapply2(1:B,function(i) compare_adaptive_tests(i,...))
    results %>% bind_rows %>% summarize(ASN=mean(ne),maxSN=max(ne),permtestT=mean(permtestT),invnormT=mean(invnormT),permtestW=mean(permtestW),invnormW=mean(invnormW))
}

run_simulation_general <- function(scenarios,B=100,...){
    params <- list(...)
    bind_rows(lapply(1:nrow(scenarios),function(p) c(scenarios[p,],do.call('run_scenario',c(B=B,scenarios[p,],params)))))
}

B=2000
n=200
data <- matrix(rnorm_cont(B*n,mean=0.8,sd=2,cshift=0,csd=3),nrow=n)

## small preplanned trial
G5 <- expand.grid(ne=seq(10,100,10),n1=5,n=10,permutations=10000)
G6 <- expand.grid(ne=seq(12,100,10),n1=6,n=12,permutations=10000)
G10 <- expand.grid(ne=seq(20,100,10),n1=10,n=20,permutations=10000)
## medium preplanned trial
Gm <- expand.grid(ne=seq(30,100,10),n1=15,n=30,permutations=10000)
## large preplanned trial
Gl <- expand.grid(ne=seq(60,100,10),n1=30,n=60,permutations=10000)

G <- rbind(G5,G6,G10,Gm,Gl)
power_second_stage <- run_simulation_general(G,B=B)

##save(power_second_stage,file=vfile('power_second_stage'))

##load('~/srv-home/repos/adaperm/vignettes/power_second_stage_node5_151221.Rd')


ggplot(subset(power_second_stage,n1>10),aes(x=ne,y=permtestT/invnormT))+geom_path(aes(linetype=factor(n1)),col='green') +
    geom_path(aes(y=permtestW/invnormW,linetype=factor(n1)),col='blue')

foo <- data[,1]
x1 <- foo[1:5]
x2 <- foo[6:10]
hist(dist <- adaperm:::perm_dist(x1,x2,sign(x1),sign(x2),stat=signedranks,B=10000,restricted=F))
abline(v={talpha <- min(dist[rank(dist,ties.method='min')>=ceiling((1-.025)*length(dist))])})
hist(cdist <- adaperm:::cond_dist(x1,x2,sign(x1),sign(x2),stat=signedranks,B=1000,restricted=F))

rank(dist,ties.method ="max",na.last="keep")/length(dist)
abline(v=talpha)
permutation_CER(x1=x1,g1=sign(x1),x2=x2,stat=sumdiff,permutations=10000,one_sample=T)
permutation_CER2(x1=x1,g1=sign(x1),x2=x2,stat=sumdiff,permutations=10000,one_sample=T)
mean(cdist>=talpha)
mean(dist>=talpha)