Nothing
# Calculation to show that for all n=1,2,...,100
# the coverage of the 95% confidence interval for the
# Delta = unconditional difference =
# proportion positive minus proportion negative
# where prop pos + prop neg + prop zero = 1
# Also Delta = E(S), where S=sign(x)
#
# We can write Delta = theta (2*beta-1)
# where theta = Pr[S=1 or S=-1]
# beta = Pr[S=1 | S !=0 ]
# We calculated over the grid
# betas <- seq(0, 1, by = .01)
# thetas <- seq(0, 1, by = .01)
# For each n, and each grid point we calculate
# lowerError = Pr[ L(X) > Delta]
# upperError = Pr[ U(X) < Delta]
# where [L(X),U(X)] is the 95% confidence for X
# Then we find the maximum of the 2 errors over the grid for each n
# We want the maximum of each set of errors to be less than 2.5% for the 95% central CI
#
# Tests show that for all n=1,2,..100 the errors are less than 2.5% (within rounding error)
# The only n values with greater than 2.5% error are n=71 and n=72,
# but the errors are likely due to rounding error in the
# numeric integration. Those errors are 0.02503121 for n=72 (beta=0, theta=0.96) or (beta=1, theta=0.96)
# and 0.025000563 for n=71 (beta=0, theta=0.21) or (beta=1, theta=0.21)
#
# use exacgt2x2 version 1.6.4 or greater
library(exact2x2)
library(ggplot2)
library(grid)
library(gridExtra)
mcnemarSim <- function(n, nmc = 0, plots = TRUE) {
enum <- expand.grid(0:n, 0:n)
enum <- enum[enum$Var1 >= enum$Var2, ]
names(enum) <- c("m", "x")
enum <- enum[order(enum$m), ]
enum$Lower <- numeric(nrow(enum))
enum$Upper <- numeric(nrow(enum))
for (i in 1:nrow(enum)) {
if(nmc == 0){
CI <- mcnemarExactDP(n = n, m = enum$m[i], x = enum$x[i])$conf.int
} else {
CI <- mcnemarExactDP(n = n, m = enum$m[i], x = enum$x[i], nmc = nmc)$conf.int
}
enum$Lower[i] <- CI[1]
enum$Upper[i] <- CI[2]
}
betas <- seq(0, 1, by = .01)
thetas <- seq(0, 1, by = .01)
results <- expand.grid(betas, thetas)
names(results) <- c("Beta", "Theta")
results$LowErrorProb <- numeric(nrow(results))
results$HighErrorProb <- numeric(nrow(results))
count <- 1
for (t in thetas) {
for (b in betas) {
delta <- t * (2 * b - 1)
loopframe <- enum
loopframe$LowError <- ifelse(delta < loopframe$Lower, 1, 0)
loopframe$HighError <- ifelse(delta > loopframe$Upper, 1, 0)
loopframe$Prob <-
dbinom(loopframe$m, n, t) * dbinom(loopframe$x, loopframe$m, b)
results$LowErrorProb[count] <-
sum(loopframe$LowError * loopframe$Prob)
results$HighErrorProb[count] <-
sum(loopframe$HighError * loopframe$Prob)
count <- count + 1
}
}
if(plots){
lowheatmap <-
ggplot(results, aes(x = Beta, y = Theta, color = LowErrorProb, fill = LowErrorProb)) +
geom_tile() +
scale_fill_gradient2(low = "white", high = "black") +
scale_color_gradient2(low = "white", high = "black") +
ggtitle("Lower Error Probabilities")
highheatmap <-
ggplot(results, aes(x = Beta, y = Theta, color = HighErrorProb, fill = HighErrorProb)) +
geom_tile() +
scale_fill_gradient2(low = "white", high = "black") +
scale_color_gradient2(low = "white", high = "black") +
ggtitle("Upper Error Probabilities")
grid.arrange(lowheatmap, highheatmap, ncol = 2)
}
return(results)
}
# Example: predict how much time the simulation will take
# run several times....
#t0<-proc.time()
#mc <- mcnemarSim(11, plots = TRUE)
#mc <- mcnemarSim(21, plots = TRUE)
# n=26 gives graph in paper
#mc <- mcnemarSim(26, plots = TRUE)
#dev.print(pdf,file="ConfIntErrors26.pdf")
#mc <- mcnemarSim(40, plots = TRUE)
#mc <- mcnemarSim(60, plots = TRUE)
#mc <- mcnemarSim(100, plots = TRUE)
#t1<-proc.time()
#sec<-(t1-t0)[1]
# Results of the times
#n<-c(11,21,26,40,60,100)
#sec<-c(9.02,32.03,49.36,120.86,278.6,789.12)
#n2<- n^2
#lout<-lm(sec~n+n2)
#coef(lout)
# (Intercept) n n2
# 2.25685660 -0.30005320 0.08169258
#N<-1:100
#sec100<-predict(lout,newdata=data.frame(n=N,n2=N^2))
#plot(1:100,sec100,type="l",lwd=2,log="")
#points(n,sec,cex=2,pch=16)
# Predict all n=1:100, in hours
# sum(sec100)/(60*60)
## 7.31975 hours
# Run below to test values of n from 1 to 100
# for all valid values of m and x.
mcList <- vector("list", length = 100)
for(i in 1:100){
print(i)
mcList[[i]] <- mcnemarSim(i, plots = FALSE)
}
loerrorVec <- hierrorVec<-numeric(100)
for(i in 1:100){
loerrorVec[i] <- max(mcList[[i]]$LowErrorProb)
hierrorVec[i] <- max(mcList[[i]]$HighErrorProb)
}
max(loerrorVec)
max(hierrorVec)
names(loerrorVec)<-names(hierrorVec)<-1:100
loerrorVec
hierrorVec
plot(1:100,loerrorVec,type="l",ylim=c(0.02,0.0255))
m72<-mcList[[72]]
m72[m72$LowErrorProb>0.025 | m72$HighErrorProb>0.025,]
m71<-mcList[[71]]
m71[m71$LowErrorProb>0.025 | m71$HighErrorProb>0.025,]
#all(errorVec < 0.025) # Hopefully evaluates to TRUE
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.