# library(devtools)
# install_github("floatofmath/bt88.03.704")
library(adaperm)
library(magrittr)
library(plyr)
library(dplyr)
library(pander)
library(parallel)
library(bt88.03.704)
library(flip)
library(MASS)
options(mc.cores=detectCores()-1)

#N permutation for the first simulations
Bsim=1000
#source('~/github/resamplingMCP/R/simulation.R', echo=TRUE)

summaryResSim <- function(res,alphas=c(.01,.05,.1,.5,.75),na.rm=FALSE){
  out=sapply(alphas,function(a)colMeans(res<=a,na.rm=na.rm))
  colnames(out)=paste("<=",alphas,sep="")
out  
}

Generating paired counts

r_counts_pair generates paired counts as descried in the description.

The type I error is out of control (realy???)

#' @description generates differents of (paired) counts from a poisson with mean rchisq(df=df)+delta and rchisq(df=df)
r_counts_pair <- function(n,delta=0,df=2){
  means=1/rchisq(n=n,df = df)
  means[means==0]=.5
  means[means>=1000]=1000
  y=rpois(n = n,lambda = means+delta)-
    rpois(n = n,lambda = means)
  y
}

n=10
Y=matrix(r_counts_pair (n*Bsim,df=1),n,Bsim)
ps=apply(Y,2,function(y)t.test(y)$p.value)
plot.ecdf(ps);abline(0,1)
ps.prm=p.value(flip(Y))
plot.ecdf(ps.prm,add=TRUE,col="red")
pander(summaryResSim(cbind(param.t=ps,perm=ps.prm)))

Generating 2 independent samples

r_counts generates paired counts as in the description

#' @description generates counts 
#' from a poisson with lambda = rchisq(df=df*theta)
#' If means is not null, it generates from a poisson with given (fixed?) parameter lambda.
#' 

r_counts <- function(n,theta=1,df=2,lambda=NULL){
  # if(!is.finite(disp))
  #   rpois(n,lambda=exp(mu)) else
  #   rnbinom(n,mu=exp(mu),size=exp(disp))
  if (is.null(lambda)) lambda=rchisq(n=n,df = df)
#   if(is(lambda,"try-error")) {browser()}
  lambda[lambda<.5]=.5
  lambda[lambda>10000]=10000
  y=rpois(n = n,lambda = lambda*theta)
  y
}

set.seed(1)
n=14
theta=1
df=5
x=rep(0:1,each=n/2)
Y=rbind(matrix(r_counts(Bsim*n/2,df=df),n/2),matrix(r_counts(Bsim*n/2,df=df,theta=theta),n/2))

ps=apply(Y,2,function(y){res=summary(glm(y~x,family = poisson))
                         s <- res$coefficients[2,"z value"]>0
                         p <- res$coefficients[2,"Pr(>|z|)"]
                         p <- ifelse(s,p/2,1-p/2)})

ps.prm=p.value(flip(Y~x,tail=1))

ps.nb=apply(Y,2,function(y){res=summary(glm.nb(y~x))
                            s <- res$coefficients[2,"z value"]>0
                             p <- res$coefficients[2,"Pr(>|z|)"]
                             p <- ifelse(s,p/2,1-p/2)})


plot.ecdf(ps);abline(0,1)
plot.ecdf(ps.prm,add=TRUE,col="red");abline(0,1)
plot.ecdf(ps.nb,add=TRUE,col="green");abline(0,1)

pander(summaryResSim(cbind(param.t=ps,param.nb=ps.nb,perm=ps.prm)))

Under H1

set.seed(1)
theta=1.5
Y=rbind(matrix(r_counts(Bsim*n/2,df=df),n/2),matrix(r_counts(Bsim*n/2,df=df,theta=theta),n/2))

ps=apply(Y,2,function(y){res=summary(glm(y~x,family = poisson))
                         s <- res$coefficients[2,"z value"]>0
                         p <- res$coefficients[2,"Pr(>|z|)"]
                         p <- ifelse(s,p/2,1-p/2)})

ps.prm=p.value(flip(Y~x,tail=1))

ps.nb=apply(Y,2,function(y){res=summary(glm.nb(y~x))
                            s <- res$coefficients[2,"z value"]>0
                             p <- res$coefficients[2,"Pr(>|z|)"]
                             p <- ifelse(s,p/2,1-p/2)})


plot.ecdf(ps);abline(0,1)
plot.ecdf(ps.prm,add=TRUE,col="red");abline(0,1)
plot.ecdf(ps.nb,add=TRUE,col="green");abline(0,1)

pander(summaryResSim(cbind(param.t=ps,param.nb=ps.nb,perm=ps.prm)))

Some quick simulation studies

source("counts_functions.R")
# qui si trova la
# adaptive_permtest_2s
#usata in queste simulazioni

Examples

Apply test procedures

in the example and in the simulation we will also try adaptive_permtest_2s(x,y,n1,n,ne,meanratio) which is based on the ratio of the two sample means, pretty much the same as wald statistic does.

n1 <- 10
n <- 20
ne <- 25
x <- r_counts(ne,df=5)
y <- r_counts(ne,df=5,theta=2)
# adaptive_permtest_2s(x,y,n1,n,ne,meandiff,restricted=FALSE,type='non-randomized')
# adaptive_permtest_2s(x,y,n1,n,ne,meanratio,restricted=FALSE,type='non-randomized')

adaperm_DR(c(x,y),c(rep(0,ne),rep(1,ne)),n1,n,
           test=meandiff,cer_type='non-randomized',atest_type='midp',permutations =10)

adaptive_invnormtest_2s(x,y,n1,n,ne)
adaptive_invnorm_wilcoxtest_2s (x,y,n1,n,ne)
adaptive_invnormtest_negbin_2s(x,y,n1,n,ne)
adaptive_waldtest_2s(x,y,n1,n,ne)
adaptive_invnormtest_2s(x,y,n1,n,ne)

samplesize_counts(2,5)
samplesize_counts(1.5,5)

Compare tests

## one example fixed rule
compare_adaptive_tests_2s(14,28,condpowerrule_counts,
                          control_opts=list(df=5),
                         perms=100, treatment_opts=list(df=5,theta=1.5),rdist=r_counts,test_statistic=meandiff)

Simulation

# adaperm_DR(c(x,y),c(rep(0,ne),rep(1,ne)),n1,n,
#            test=meandiff,
#            cer_type='non-randomized',
#            atest_type='midp')
# 
# for cer_type I would try non-randomized and randomized
# for atest_type non-randomized and midp could be compared if all
# permutations are used otherwise you could try davison_hinkley
# 
# sample sizes I would try.
# 
# Assuming lambda=5,k=1,t=1,alpha=0.025,beta=0.2,
# n1=5 n=10:  lambda=5, theta=1.652, 
# n1=10 n=20  lambda=2.5, theta=1.652,
# n1=50 n=100 lambda=.5, theta=1.652,
# 
# Would be interesting if cer_type='randomized'
# makes a difference for n1=5.


run_scenario_counts <- function(B,...){
    ## just a wrapper around mclapply2
#     browser()
    scenario <- list(...)
    results <- mclapply2(1:B,function(i){ 
      compare_adaptive_tests_2s(n1=scenario$n1,
                         n=scenario$n,
                         rule=scenario$rule,rdist=scenario$rdist,
                         test_statistic=scenario$test_statistic,
                         control_opts=list(df=scenario$df,lambda=scenario$lambda),
                         treatment_opts=list(df=scenario$df,theta=scenario$theta,lambda=scenario$lambda),perms=scenario$perms)})

    results %>% bind_rows %>% summarize(ASN=mean(ne),maxSN=max(ne),
                                        perm_CERnonrndmz_Anonrndmz = mean(perm_CERnonrndmz_Anonrndmz),
                                        perm_CERother_Aother =mean(perm_CERother_Aother),
                                        perm_ratio_CERnonrndmz_Anonrndmz =  mean(perm_ratio_CERnonrndmz_Anonrndmz),
                                        perm_ratio_CERother_Aother =  mean(perm_ratio_CERother_Aother),
                                        invnormT=mean(invnorm),
                                        invnormWlcx=mean(invnorm_wlcx),
                                        invnormNB=mean(invnorm_nb),
                                        wald=mean(wald))#,npcomb=mean(npcomb),permtest=mean(permtest))
}

run_simulation_counts <- function(scenarios,B=3,...){
    params <- list(...)
    bind_rows(lapply(1:nrow(scenarios),function(p) 
      c(scenarios[p,],do.call('run_scenario_counts',c(B=B,scenarios[p,],params)))
      ))
}
B=1000
perms=1000

Poisson distribution

# set.seed(1)
# scenarios_test <- expand.grid(n1=c(5),n=10,
#                               theta= c(1.4),
#                               lambda=c(5),
#                               perms= perms)
# sim_counts_poisson=run_simulation_counts(scenarios_test,B=B,rule=condpowerrule_counts,rdist=r_counts,test_statistic=meandiff,resam=1000)
# t(sim_counts_poisson)
set.seed(1)
scenarios_test <- expand.grid(n1=c(5),
                              theta= c(1,1.4),
                              lambda=c(5),
                              perms= perms)
scenarios_test$n=scenarios_test$n1*2

temp=scenarios_test
temp$n1=10
temp$n=20
temp$lambda=2.5
scenarios_test=rbind(scenarios_test,temp)
temp$n1=50
temp$n=100
temp$lambda=.5
scenarios_test=rbind(scenarios_test,temp)

# debug(run_scenario_counts)
sim_counts_poisson=run_simulation_counts(scenarios_test,B=B,rule=condpowerrule_counts,rdist=r_counts,test_statistic=meandiff,resam=1000)
# sim_counts_poisson
save(sim_counts_poisson,file=vfile("../data/sim_counts_poisson",ending="rda"))
library(pander)
load(vfile("../data/sim_counts_poisson",ending="rda"))
pander(sim_counts_poisson,split.tables=100)

over dispersion

set.seed(1)
scenarios_test <- expand.grid(n1=c(5),
                              theta= c(1,1.4),
                              df = 5,
                              perms= perms)
scenarios_test$n=scenarios_test$n1*2

temp=scenarios_test
temp$n1=10
temp$n=20
temp$df=2.5
scenarios_test=rbind(scenarios_test,temp)
temp$n1=50
temp$n=100
temp$df=.5
scenarios_test=rbind(scenarios_test,temp)

sim_countsEQ=run_simulation_counts(scenarios_test,B=B,rule=condpowerrule_counts,rdist=r_counts,test_statistic=meandiff,resam=1000)

save(sim_countsEQ,file=vfile("../data/sim_countsEQ",ending="rda"))
library(pander)
load(vfile("../data/sim_countsEQ",ending="rda"))
pander(sim_countsEQ,split.tables=100,digits = 3)

See the distribution of the samples ASN and maxSN, is fix. furthermore, I can't decrease n1 and n.

Wald test is in control under poisson data generating process (checked by simulation), while here it is very anticonservative.

NB slightly anticonservative.



floatofmath/adaperm documentation built on May 16, 2019, 1:18 p.m.