knitr::opts_chunk$set(eval=FALSE,echo=FALSE,warning=FALSE,results='hide',fig.width=7,fig.height=7,message=FALSE)
library(magrittr)
library(plyr)
library(dplyr)
library(microbenchmark)
library(pander)
library(parallel)
library(bt88.03.704)
library(adaperm)
#devtools::install_github('floatofmath/adaperm')    
library(ldbounds)
library(parallel)
options(mc.cores=detectCores()-1)
library(matrixStats)

Robust scale estimates

The normal distribution based unbiased estimate of the variance is unreliable as a basis for sample size reassessment when observations are not normally distributed. We therefore investigated other estimates of scale for the use in sample size reassessment.

We consider 4 robust scale estimates 1. The median absolute deviation 2. Interquartile range 3. $S_n$ as suggested in @rousseeuw1993alternatives 4. $Q_n$ as suggested in @rousseeuw1993alternatives

In a first step we look at the bias and standard deviation of the three estimates for a single sample of size between 2 and 20 as well as 50 and 100.

Code to compute $S_n$ and $Q_n$ as well as two sample generalizations

At the moment we use a computationally suboptimal implementation to compute $S_n$ and $Q_n$ which requires $O(n^2)$ computations instead of the $O(n \log(n))$ required by more effecient algorithms.

We have also implemented the $O(n\log(n))$ of @rousseeuw1992computation in native R, however due to the efficiency of R's builtin matrix operations the "suboptimal" matrix implementation is faster up to sample sizes $n \sim 200$. In addition the latter implementation uses only 2-3 lines of code.

sn <- function(x,factor=1.1926){
    n <- length(x)
    factor*sort(matrixStats::rowOrderStats(abs(matrix(x,n,n) - matrix(x,n,n,byrow=T)),which=floor(n/2)+1))[floor((n+1)/2)]
}
qn <- function(x,factor=2.2219){
    n <- length(x)
    xm <- abs(matrix(x,n,n) - matrix(x,n,n,byrow=T))
    factor*sort(xm[upper.tri(xm)])[choose(floor(n/2)+1,2)]
}
## Assumes x,y sorted, length(x) <= length(y)
overall_median <- function(x,y,lx=1,rx=length(y),ly=1,ry=length(y)){
    d <- length(y) - length(x)
    if(d < 0) stop()
    length <- rx - lx +1
    even  <- 1 - (length %% 2)
    half <- floor((length-1)/2)
    minx <- floor(d/2) 
    maxx <- floor(d/2) + length(x)
    imx <- half + lx 
    imy <- half + ly
    my <- y[imy]
    if(imx <= minx){
        mx <- -Inf
        ry = imy
        lx = imx + even
    } else if(imx > maxx){
        mx <- Inf
        rx = imx 
        ly = imy + even
    } else {
        mx <- x[imx - minx]
        if(mx >= my){
            rx = imx 
            ly = imy + even
        } else {
            ry = imy
            lx = imx + even
        }
    }
    if(lx > maxx){
        return(my)
    }
    if(lx >= rx || ly >= ry){
        return(min(x[lx - minx],y[ly]))
    }
    overall_median(x,y,lx,rx,ly,ry)
}

Sn_fast <- function(x){
    ## 1. This is only faster for n > 200
    ## 2. Still has bugs! Does not provide correct result.
    ai <- function(sx,i,lx,rx,ly,ry,n){
        d <- ifelse(i <= n/2,
                    n - 2*i + 1,
                    2*i - n - 1)
        if(d < 0) stop()
        length <- rx - lx +1
        even  <- 1 - (length %% 2)
        half <- floor((length-1)/2)
        minx <- floor(d/2) 
        maxx <- floor(d/2) + i-1
        imx <- half + lx 
        imy <- half + ly
        my <- ifelse(i <= n/2,
                     sx[imy + i] - sx[i],
                     sx[i] - sx[i - imy])
        if(imx <= minx){
            mx <- -Inf
            ry = imy
            lx = imx + even
        } else if(imx > maxx){
            mx <- Inf
            rx = imx 
            ly = imy + even
        } else {
            mx <- ifelse(n <= n/2,
                         sx[i] - sx[i - imx + minx],
                         sx[i + imx - minx] - sx[i])
            if(mx >= my){
                rx = imx 
                ly = imy + even
            } else {
                ry = imy
                lx = imx + even
            }
        }
        if(lx > maxx){
            ans <- ifelse(i <= n/2,
                          sx[lx+i]-sx[i],
                          sx[i] - sx[i - ly])
            return(ans)
        }
        if(lx >= rx || ly >= ry){
            ans <- ifelse(i <= n/2,
                          min(sx[i] - sx[i - lx + minx],
                              sx[ly+i]-sx[i]),
                          min(sx[i] - sx[i - ly], 
                              sx[i+ly-minx]-sx[i]))
            return(ans)
        }
        ai(sx,i,lx,rx,ly,ry,n)
    }
    sx <- sort(x)
    n <- length(x)
    a[1] <- sx[im+1] - sx[1]
    for(i in 2:10)
        a[i] <- ai(sx,i,lx=1,rx=n-i-1,ly=1,ry=n-i-1,n=n)
    return(sort(a)[floor((n+1)/2)+1])
}    

library(microbenchmark)
microbenchmark(rnorm(300) %>% sn(),
               rnorm(300) %>% Sn_fast())

for(i in 1:100){
    mclapply2(i:100,function(j){
        if(!all.equal(max({
            x <- sort(rnorm(i))
            y <- sort(rnorm(j))
            abs(overall_median(sort(x),sort(y)) - sort(c(x,y))[floor((length(c(x,y))+1)/2)])}),0))
            cat('gotcha')
    },mc.progress=F)
}

Due to the computational progress it is feasible to compute $S_n$ and $Q_n$ even for large sample sizes. For example in our simulations we compute estimates for $10^4$ Monte-Carlo samples with sample sizes up to 100 (as shown in the original publication) in a few seconds. On a decent multicore computer, even $10^6$ runs finish in a few minutes.

Estimation from pooled samples

In the orginal article only one sample versions of $S_n$ and $Q_n$ are proposed, here we extend the estimates to include the case where the scale is to be estimated from two samples with equal scale but different location.

These extensions are rather heuristic and based on informal comparison with versions that aggregate the per sample estimates using a weighted sum. Before publication this part definitly requires more research (literature, comparison of operating characteristics)

Finite sample correction factors

Even though for normally distributed data asymptotic bias correction factors have been derived for all measures, these only ensure consistency. For finite samples all measures show quite considerable biases. We perform a simulation study with the purpose to estimate the correction factor from simulated data. For larger sample sizes we estimate the correction factors as a function of sample size which has the form $n/(n-c)$, where $c$ may depend on whether $n$ is even or odd.

@rousseeuw1992computation perform a similar simulation study - however using only a limited number of $2*10^4$ Monte Carlo samples and only for the one-sample versions of the scale estimates.

As it turns out the derived correction factors work reasonably well. Even though we have not tested them for many different values of pooled variance or the mean difference. Also there is no guarantee that these estimates work well with other distributions. This could be subject of further research.

qnp <- function(x,y,factor=2.2219){
    n1 <- length(x)
    n2 <- length(x)
    if(n1 != n2) stop('Unequal sample sizes not yet implemented')
    xm <- abs(matrix(x,n1,n1) - matrix(x,n1,n1,byrow=T))
    ym <- abs(matrix(y,n2,n2) - matrix(y,n2,n2,byrow=T))
    factor*sort(c(xm[upper.tri(xm)],ym[upper.tri(ym)]))[choose(floor(n1/2)+1,2)+choose(floor(n2/2)+1,2)]
}

snp <- function(x,y,factor=1.1926){
    n1 <- length(x)
    n2 <- length(y)
    if(n1 != n2) stop('Unequal sample sizes not yet implemented')
    xm <- matrixStats::rowOrderStats(abs(matrix(x,n1,n1) - matrix(x,n1,n1,byrow=T)),which=floor(n1/2)+1)
    ym <- matrixStats::rowOrderStats(abs(matrix(y,n2,n2) - matrix(y,n2,n2,byrow=T)),which=floor(n2/2)+1)
    factor*sort(c(xm,ym))[n1]
}

madp <- function(x,y){
    mad(c(x-median(x),y-median(y)))
}

iqrp <- function(x,y){
    IQR(c(x,-median(x),y-median(y)))/(2*qnorm(3/4))
}
set.seed('5381')


est <- list()
sd <- list()
gc()
B <- 10^5

squares <- list()
square <- matrix()        

for(i in c(2:21,50,51,100,101)){
    squares[[i]] <- rowMeans(
        square <- replicate2(B,{
            x <- rnorm(i)
            y <- rnorm(i)+1
            c(qnp=qnp(x,y)^2,
              snp=snp(x,y)^2)}))
}
save(square,file=vfile('square'))



root <- list()
for(i in c(2:21,50,51,100,101)){
    root[[i]] <- rowMeans(replicate2(B,{
            x <- rnorm(i)
            y <- rnorm(i)+1
            c(qnp=qnp(x,y),
              snp=snp(x,y),
              mad = madp(x,y),
              iqr = iqrp(x,y))}))
}


save(root,square,file=vfile('uncorrected'))

do.call(cbind,root)
do.call(cbind,square)

sd.fac <- 1/do.call(cbind,root)
var.fac <- sqrt(1/do.call(cbind,square))


n <- c(2:21,50,51,100,101)


fac <- function(n,c){
    n/(n-c)
}

rms <- function(c,fs,ns){
    sqrt(sum((fs - fac(ns,c))^2))
}

odd <- function(ns,fs=ns){
    fs[(ns %% 2) == 1]
}
even <- function(ns,fs=ns){
    fs[(ns %% 2) == 0]
}

whole <- function(ns,fs=ns){
    fs
}


rms_est <- function(c,what,odds=T,sd=T){
    data <- switch(sd+1,var.fac,sd.fac)
    even_odd <- switch(odds+1,even,odd,whole)
    rms(c,even_odd(n[-(1:9)],data[what,-(1:9)]),even_odd(n[-(1:9)]))
}

optim(-1,rms_est,what='qn',
      method='Brent',lower=-10,upper=10)$par
optim(-1,rms_est,what='qn',odds=F,
      method='Brent',lower=-10,upper=10)$par

optim(-1,rms_est,what='qn',sd=F,
      method='Brent',lower=-10,upper=10)$par
optim(-1,rms_est,what='qn',odds=F,sd=F,
      method='Brent',lower=-10,upper=10)$par

optim(-1,rms_est,what='qnp',
      method='Brent',lower=-10,upper=10)$par
optim(-1,rms_est,what='qnp',odds=F,
      method='Brent',lower=-10,upper=10)$par

optim(-1,rms_est,what='qnp',sd=F,
      method='Brent',lower=-10,upper=10)$par
optim(-1,rms_est,what='qnp',odds=F,sd=F,
      method='Brent',lower=-10,upper=10)$par



optim(-1,rms_est,what='sn',
      method='Brent',lower=-10,upper=10)$par
optim(-1,rms_est,what='sn',odds=F,
      method='Brent',lower=-10,upper=10)$par

optim(-1,rms_est,what='sn',sd=F,
      method='Brent',lower=-10,upper=10)$par
optim(-1,rms_est,what='sn',odds=F,sd=F,
      method='Brent',lower=-10,upper=10)$par

optim(-1,rms_est,what='snp',
      method='Brent',lower=-10,upper=10)$par
optim(-1,rms_est,what='snp',odds=F,
      method='Brent',lower=-10,upper=10)$par

optim(-1,rms_est,what='snp',sd=F,
      method='Brent',lower=-10,upper=10)$par
optim(-1,rms_est,what='snp',odds=F,sd=F,
      method='Brent',lower=-10,upper=10)$par


optim(-1,rms_est,what='iqr',odds=2,sd=F,
      method='Brent',lower=-10,upper=10)$par
optim(-1,rms_est,what='iqr',odds=T,sd=F,
      method='Brent',lower=-10,upper=10)$par
optim(-1,rms_est,what='iqr',odds=F,sd=F,
      method='Brent',lower=-10,upper=10)$par
optim(-1,rms_est,what='mad',odds=2,sd=F,
      method='Brent',lower=-10,upper=10)$par





optim(-1,qn_even,method='Brent',lower=-10,upper=10)

optim(-1,qnp_odd,method='Brent',lower=-10,upper=10)
optim(-1,qnp_even,method='Brent',lower=-10,upper=10)






plot(i <- seq(-2,2,.001),sapply(i,function(j) rms(j,odd(n,sd.fac[1,]),odd(n))),type='l')

save(facs,effs,file=vfile('fin_sample_const'))
## Estimated finite sample efficiencies for qn and sn

load('fin_sample_const_node6_160609.Rd')

efficiencies <- effs[5,]/t(effs[c(1,3,6,7),])
colnames(efficiencies) <- c(expression(Q(n)),expression(S[n]),'MAD','IQR')
pander(efficiencies)
biases <- t(facs[c(1,3,5,6,7),]) - 1
colnames(biases) <- c(expression(Q(n)),expression(S[n]),'VAR','MAD','IQR')
pander(round(biases,3))
## It seems that sn is mor efficient for small sample sizes!

## 



sfac <- facs[4,]
qfac <- facs[2,]

plot(2:15,(1+qfac)/qfac)
plot(2:15,(1+sfac)/sfac)

n1 <- 2:15
cn <- ifelse(n1%%2<1,
             -1      - 0.276 * n1,
             -1.894  - 0.639 * n1)


load('facs_mm_node6_160608.Rd')  #facs <- do.call(cbind,est)
sf <- sqrt(facs[2,])

sfo <- sf[1+2*(1:7)]
o <- 1+2*(1:7)
sfe <- sf[2*(1:7)]
e <- 2*(1:7)



lm(sfaco ~ 0 + 

summary(mo <- lm(I(sfo/(1-sfo))~o))
summary(me <- lm(I(sfe/(1-sfe))~e))
plot(o,sfo/(1-sfo))
abline(mo)
plot(e,sfe/(1-sfe))
abline(me)

save(facs,file=vfile('facs_mm'))
qnp <- function(x,y,factor=2.2219){
    n1 <- length(x)
    n2 <- length(x)
    cns <- c(0.249,0.699,0.451,0.771,0.577,0.823,0.657,0.856,0.712,0.879)
    cn <- ifelse(n1>11,n1/(n1 + (n1%%2) * 1.5157 + (1-n1%%2) * 3.8839),cns[n1-1])
    if(n1 != n2) stop('Unequal sample sizes not yet implemented')
    xm <- abs(matrix(x,n1,n1) - matrix(x,n1,n1,byrow=T))
    ym <- abs(matrix(y,n2,n2) - matrix(y,n2,n2,byrow=T))
    cn*factor*sort(c(xm[upper.tri(xm)],ym[upper.tri(ym)]))[choose(floor(n1/2)+1,2)+choose(floor(n2/2)+1,2)]
}


snp <- function(x,y,factor=1.1926){
    n1 <- length(x)
    n2 <- length(y)
    cns <- c(0.982,1.302,0.846,1.137,0.877,1.078,0.899,1.048,0.914,1.032)
    cn <- ifelse(n1>11,n1/(n1 + (1-n1%%2) * 1.0327 - (n1%%2) * 0.2468),cns[n1-1])
    if(n1 != n2) stop('Unequal sample sizes not yet implemented')
    xm <- matrixStats::rowOrderStats(abs(matrix(x,n1,n1) - matrix(x,n1,n1,byrow=T)),which=floor(n1/2)+1)
    ym <- matrixStats::rowOrderStats(abs(matrix(y,n2,n2) - matrix(y,n2,n2,byrow=T)),which=floor(n2/2)+1)
    cn*factor*sort(c(xm,ym))[n1]
}

madp <- function(x,y){
    n1 <- length(x)
    cns <- c(1.054,1.429,1.239,1.178,1.167,1.111,1.111,1.080,1.081,1.063)
    cn <- ifelse(n1>11,n1/(n1 - 0.659),cns[n1-1])
    cn*mad(c(x-median(x),y-median(y)))
}

iqrp <- function(x,y){
    n1 <- length(x)
    cns <- c(1.186,1.241,1.197,1.173,1.159,1.136,1.129,1.112,1.108,1.097)
    cn <- ifelse(n1>11,n1/(n1 - 1.046),cns[n1-1])
    cn*IQR(c(x-median(x),y-median(y)))/(2*qnorm(3/4))
}

Finite sample bias correction does not work very well for the interquartile range. One could investigate that further. But let's leave it at that.

B <- 1e6
confirm <- list()
ns <- c(2:21,50,51,100,101)
for(i in ns){
    confirm[[i]] <- list()
    out <- replicate2(B,{
         x <- rnorm(i,sd=2)
         y <- rnorm(i,sd=2)+1
         c(qnp=qnp(x,y)^2,
           snp=snp(x,y)^2,
           sd = adaperm::pooled_variance(c(x,y),rep(0:1,each=i)),
           mad = madp(x,y)^2,
           iqr = iqrp(x,y)^2)})
     confirm[[i]]$mean <- rowMeans(out)
     confirm[[i]]$rms <- rowSds(out)
}

rowMeans(bias <- do.call('cbind',lapply(confirm,`[[`,'mean')) - 4)
round(bias,2)
bias <- t(bias)
round(rms <- do.call('cbind',lapply(confirm,`[[`,'rms')),2)
rrms <- (t(rms)/rms[3,])
colnames(rrms) <- rownames(bias)
library(ggplot2)
reshape2::melt(rrms,varnames=c('SN','Est')) %>% ggplot(aes(ns[SN],value,colour=Est))+geom_line()+scale_x_log10()

reshape2::melt(bias,varnames=c('SN','Est')) %>%
    subset(Est!='iqr') %>%
    ggplot(aes(ns[SN],value,colour=Est))+geom_line()+scale_x_log10()



rowMeans(replicate2(1000,{
    x <- rnorm(5)
    y <- rnorm(5)
    c(robust_pooled_variance(x,y,type='sn'),
      adaperm::pooled_variance(c(x,y),rep(0:1,each=5)))}))

Sample size reassessment using robust scale measures

run_scenario <- function(scenario,ssrs){
    for(i in names(scenario)) assign(i,scenario[[i]])
    xdist = function(N) {
        switch(distribution,
               'normal'=rnorm(N,m=0,sd=parameter),
               'log-gamma'=log(rgamma(N,1.2))*parameter,
               't4'=rt(N,4)*parameter,
               'cont'=rnorm_cont(N,m=0,sd=parameter,cshift=0,csd=3*parameter),
               'count'=NULL)
    }
    ydist = function(N) {
        switch(distribution,
               'normal'=rnorm(N,m=delta,sd=parameter),
               'log-gamma'=log(rgamma(N,1.2))*parameter+delta,
               't4'=rt(N,4)*parameter+delta,
               'cont'=rnorm_cont(N,m=delta,sd=parameter,cshift=0,csd=3*parameter),
               'count'=NULL)
    }
    nf <- power.w.test(p1=delta2res(delta0,sigma0,T),propn=1/2,power=0.8,silent=T)
    x1 <- xdist(n1)
    y1 <- ydist(n1)
    x2 <- xdist(n1)
    y2 <- ydist(n1)
    ## delta0s <- c('normal'=delta0,
    ##            'log-gamma'=delta0,
    ##            't4'=delta0,
    ##            'cont'=delta0)[distribution]
    ## nsd <- try(max(n,cond_power_rule_t_ts(x1,y1,delta=1,target=.84,rob_var=F,maxN=6*n1)))
    ns <- c(sapply(ssrs,do.call,args=list(x=x1,y=y1,z=c(x2,y2),delta0=scenario$delta0,sigma0=scenario$sigma0)),fixed=nf)
    ns <- pmax(2*n1,ns)
    if(any(sapply(ns,class)=='try-error')) browser()
    nE <- max(ns)
    n_combs <- choose(2*n1,n1)*choose(2*(n-n1),(n-n1))
    exact  <- n_combs <= 10^5
    x <- c(x1,x2,xdist(nE-2*n1))
    y <- c(y1,y2,ydist(nE-2*n1))
    ## perform fixed sample test
    pf <- wilcox.test(y[1:nf],x[1:nf],alternative='greater',correct=F,exact=T)$p.value < 0.025
    ## perform adaptive tests
    Rs <- c(lapply(head(ns,-1),function(ne) adaptive_invnorm_wilcoxtest_2s(x[1:ne],y[1:ne],n1=n1,n=n,ne=ne)),pf)
    names(ns) <- paste0('n2_',c(names(ssrs),'fixed'))
    names(Rs) <- paste0('power_',c(names(ssrs),'fixed'))
    ans <- c(exact,nE,ns,Rs)
    return(ans)
}


simulate_scenario <- function(ssrs,scenario,B){
    ans <- mclapply2(1:B,function(i) run_scenario(scenario,ssrs))
    ans <- data.table::rbindlist(ans)
    dots <- c(B=B,
              paste0('mean(',names(ans)[grepl('n2',names(ans))],')'),
              paste0('max(',names(ans)[grepl('n2',names(ans))],')'),
              ##paste0(c('mean(','stats::quantile(','stats::quantile('),rep(names(ans)[grepl('n2',names(ans))],each=3),c(')',',.175)',',.825)')),
              paste0('mean(',names(ans)[grepl('power_',names(ans))],')'))
    names(dots)[grepl('mean\\(n2',dots)] <- gsub('n2_','',paste0('ASN_',names(ans)[grepl('n2',names(ans))]))
    names(dots)[grepl('.175',dots)] <- gsub('n2_','',paste0('LCL_',names(ans)[grepl('n2',names(ans))]))
    names(dots)[grepl('.825',dots)] <- gsub('n2_','',paste0('UCL_',names(ans)[grepl('n2',names(ans))]))
    names(dots)[grepl('max\\(n2',dots)] <- gsub('n2_','',paste0('MAX_',names(ans)[grepl('n2',names(ans))]))
    names(dots)[grepl('power',dots)] <- gsub('power_','',paste0('Power_',names(ans)[grepl('power',names(ans))]))
    summarise_(ans,.dots=dots)

}

In a first step we evaluate how well sample size reassessment based on the different scale measures control the Type II error rate for normally distributed data.

rsssd = function(x,y,z,delta0,sigma0) cond_power_rule_t_ts(x,y,delta=delta0,target=.84,rob_var=F,maxN=6*length(x))
rsssn = function(x,y,z,delta0,sigma0) cond_power_rule_t_ts(x,y,delta=delta0,target=.84,rob_var=T,type='sn',maxN=6*length(x))
rsscond = function(x,y,z,delta0,sigma0) cond_power_rule_w_ts(x,y,z,delta2res(delta=delta0,sigma0,T),target=.85,maxN=6*length(x))
rsspred = function(x,y,z,delta0,sigma0) predictive_power_rule_w_ts(x,y,target=.80,alpha=0.025,maxN=6*length(x))
rssapprox = function(x,y,z,delta0,sigma0) approximate_power_rule_w_ts(x,y,pp=delta2res(delta=delta0,sigma0,T),lambda=(0.15-0.1/length(x))/(4*sigma0^2),maxN=6*length(x))


### what we use
rsscomb1 = function(x,y,z,delta0,sigma0) combination_power_rule_w_ts(x,y,pp=delta2res(delta=delta0,sigma0,T),lambda=450e-4,maxN=6*length(x))
rsscomb2 = function(x,y,z,delta0,sigma0) combination_power_rule_w_ts(x,y,pp=delta2res(delta=delta0,sigma0,T),lambda=150e-4,maxN=6*length(x))
rsscomb3 = function(x,y,z,delta0,sigma0) combination_power_rule_w_ts(x,y,pp=delta2res(delta=delta0,sigma0,T),lambda=570e-5,maxN=6*length(x))
rsscomb4 = function(x,y,z,delta0,sigma0) combination_power_rule_w_ts(x,y,pp=delta2res(delta=delta0,sigma0,T),lambda=560e-5,maxN=6*length(x))
rsscomb5 = function(x,y,z,delta0,sigma0) combination_power_rule_w_ts(x,y,pp=delta2res(delta=delta0,sigma0,T),lambda=540e-5,maxN=6*length(x))



rssopt = function(x,y,z,delta0,sigma0) optimal_power_rule_w_ts(x,y,z,pp=delta2res(delta=delta0,sigma0,T),lambda=(0.1-0.1/length(x))/(4*sigma0^2),maxN=6*length(x))
drule <- function(n,A,lambda=540e-5,delta=1,sigma=2.8) {
    sqrt(2/n)*dnorm(qnorm(A,lower=F) - delta*sqrt(n/2)/sigma)*delta/sigma - 2*lambda
}

lapply(seq(0.001,0.05,.001),function(a) try(uniroot(drule,A=a,lambda=550e-5,lower=100,upper=200)$root))
scenarios <- data.frame(n1 = rep(c(rep(5,3),rep(50,3),rep(15,3),rep(10,3)),2),
                n =  rep(c(rep(10,3),rep(100,3),rep(30,3),rep(20,3)),2),
                distribution = rep(rep(c('normal','t4','cont'),4),2),
                parameter =rep(c(
                    0.80,0.65,0.70,
                    2.6,2.20,2.3,
                    1.5,1.25,1.35,
                    3,2.5,2.7),2),
                delta = c(rep(0,12),rep(1,9),rep(2,3)),
                delta0 = rep(c(rep(1,9),rep(2,3)),2),
                sigma0 = rep(rep(c(.80,2.6,1.5,3),each=3),2),
                lambda = rep(c(rep(450e-4,3),rep(570e-5,3),rep(150e-4,3),rep(180e-4,3)),2),
                propn = rep(c(rep(1/2,9),rep(1/3,3)),2))

## examples
pp <- delta2res(.7,higher_order=T)
cond_power_rule_w_ts(rnorm(5),rnorm(5,.7),c(rnorm(5),rnorm(5,.7)),pp=pp)
approximate_power_rule_w_ts(rnorm(5),rnorm(5,.7),pp=pp)
predictive_power_rule_w_ts(rnorm(5),rnorm(5,.7))
power.z.test(.9,1,power=0.9)
combination_power_rule_w_ts(rnorm(50),rnorm(50,.1),pp=pp,lambda=0,maxN=1000)
run_scenario(scenarios[4,],'rsscomb3')
run_scenario(scenarios[4,],'rsscomb4')



deriv(y~(sqrt((m/r - m) * m * ((m/r - m) + m + 1)/12) * za + ((m/r - m) * m - 1)/2 - (m/r - m) * m * p1)/sqrt((m/r - m) * m * (p1 * (1 - p1) + ((m/r - m) - 1) * (p2 - p1^2) + (m - 1) * (p3 - p1^2))),'m')

plot(570e-5*
## 10 3e-3
## 100 1e-4

x <- rnorm(50)
y <- rnorm(50,1,2.8)
z <- c(rnorm(50),rnorm(50,1,2.8))
delta0=1
sigma0=2.8
##rsspred=rsspred,rssop=rssopt,rssapprox=rssapprox,rsscomb=rsscomb


options(mc.cores=39)
out <- list()
sapply(condpw <- list(rsscomb1=rsscomb3,rsscomb2=rsscomb4,rsscomb3=rsscomb5),do.call,args=list(x=x,y=y,z=z,delta0=1,sigma0=2.8))
for(i in c(4:6,16:18)) out[[i]] <- bind_cols(scenarios[i,],simulate_scenario(scenarios[i,],ssrs=condpw,B=1e3))
sims <- bind_rows(out)
bt88.03.704:::print.tbl_df(sims,width=Inf,round=3)
     n1     n distribution parameter delta0 sigma0     B ASN_rsssd ASN_rsssn
  <dbl> <dbl>        <chr>     <dbl>  <dbl>  <dbl> <dbl>     <dbl>     <dbl>
1     5    10       normal      0.85      1   0.85 10000    14.716    14.931
2     5    10         cont      0.75      1   0.85 10000    17.042    15.021
3    50   100       normal      2.80      1   2.80 10000   138.078   139.246
4    50   100           t4      2.40      1   2.80 10000   197.081   136.630
5    15    30       normal      1.55      1   1.55 10000    43.835    44.619
6    15    30         cont      1.40      1   1.55 10000    58.825    45.446
  ASN_rsscond ASN_rsscomb1 ASN_rsscomb2 ASN_rsscomb3 ASN_fixed Power_rsssd
        <dbl>        <dbl>        <dbl>        <dbl>     <dbl>       <dbl>
1      13.648       13.805       13.259       12.691        14       0.764
2      13.728       13.929       13.325       12.783        14       0.790
3     143.884      131.054      126.438      122.117       131       0.808
4     146.445      132.602      127.536      122.706       131       0.895
5      44.330       40.804       39.325       37.846        41       0.804
6      44.869       41.096       39.578       37.974        41       0.866
  Power_rsssn Power_rsscond Power_rsscomb1 Power_rsscomb2 Power_rsscomb3
        <dbl>         <dbl>          <dbl>          <dbl>          <dbl>
1       0.748         0.774          0.794          0.770          0.744
2       0.734         0.768          0.786          0.764          0.737
3       0.809         0.870          0.834          0.816          0.799
4       0.790         0.860          0.822          0.807          0.787
5       0.801         0.863          0.832          0.814          0.794
6       0.789         0.845          0.813          0.791          0.768
  Power_fixed
        <dbl>
1       0.825
2       0.817
3       0.800
4       0.789
5       0.802
6       0.785
match_ps(10,1,.7,dist=function(n) rt(n,4),10000)-delta2res(1,0.85,T)
match_ps(30,1,1.3,dist=function(n) rt(n,4),10000)-delta2res(1,1.55,T)
match_ps(100,1,2.4,dist=function(n) rt(n,4),10000)-delta2res(1,2.8,T)

match_ps(10,1,.75,dist=function(N) rnorm_cont(N,m=0,sd=1,cshift=0,csd=3),10000)-delta2res(1,0.85,T)
match_ps(30,1,1.4,dist=function(n) rnorm_cont(n,m=0,sd=1,cshift=0,csd=3*1),10000)-delta2res(1,1.55,T)
match_ps(10,1,2.5,dist=function(n) rnorm_cont(n,m=0,sd=1,cshift=0,csd=3*1),10000)-delta2res(1,2.8,T)






save(sims,file=vfile('power_tabs'))
library(stringr)
melt(sims,id.vars=c(1:8))%>%transform(ASN=grepl('ASN',variable),
                                       LOW=grepl('low',variable),
                                       HIG=grepl('hig',variable),
                                       Power=grepl('ivn',variable),
                                       method=str_extract(variable,'sd|qn|sn|iqr|mad')) -> df

left_join(subset(df,ASN),subset(df,LOW)[,c('n','method','distribution','value')],by=c('n','method','distribution'))%>%
    plyr::rename(replace=c('value.x'='asn','value.y'='lown')) %>%
    left_join(subset(df,HIG)[,c('n','method','distribution','value')],by=c('n','method','distribution')) %>%
    plyr::rename(replace=c('value'='hign')) %>%
    left_join(subset(df,Power)[,c('n','method','distribution','value')],by=c('n','method','distribution')) %>%
    plyr::rename(replace=c('value'='power')) %>%
    transform(LOW=NULL,HIG=NULL,ASN=NULL,Power=NULL,variable=NULL) -> df



ggplot(df,aes(method,asn-n))+geom_point()+facet_grid(distribution~n)+geom_errorbar(aes(ymax=hign-n,ymin=lown-n))

ggplot(df,aes(method,power))+geom_point()+facet_wrap(distribution~n)+geom_hline(yintercept=.8)


floatofmath/adaperm documentation built on May 16, 2019, 1:18 p.m.