Nothing
library("multcomp")
library("sandwich")
#######################################################
### Source code for simulations presented in
### A Robust Procedure for Comparing Multiple Means
### under Heterscedasticity in Unbalanced Designs
### by E. Herberich, J. Sikorski, and T. Hothorn
### PLoS ONE 2010
#######################################################
###############################################
# Simulation setup for normally distributed data
###############################################
###################################################################################
### data generating process of group membership for n observations and four groups
### f: parameter controling the relation of group sizes
###################################################################################
dgpX <- function(n, f){
n1 <- n + f
n2 <- n + f * 2 * n
n3 <- n + f * 3 * n
n4 <- n + f * 4 * n
x1 <- as.factor(sample(rep(1:4, c(n1, n2, n3, n4)), replace = FALSE))
X <- data.frame(X1 = x1)
return(X)
}
#################################################################################
### data generating process of normally distributed data in four groups
### f: parameter controling the relation of group sizes
### sigma: standard deviation of the errors
#################################################################################
dgp_aov <- function(n, f, sigma, beta){
X <- dgpX(n, f)
Xm <- model.matrix(~ - 1 + X1, data=X)
stopifnot(ncol(Xm) == length(beta))
lp <- Xm %*% beta
epsilon <- rnorm(length(X$X1), 0, 1)
sigma_i <- sigma[as.numeric(X$X1)]
Y <- lp + epsilon * sigma_i
data.frame(Y=Y, X)
}
#################################################################################
### Fit of an ANOVA model
#################################################################################
fit_aov <- function(data)
aov(Y ~ X1, data=data)
##########################################################################################################################
### Simulations in the unbalanced ANOVA model
### Comparison of OLS and HC3 covariance estimation for the global test
### Comparison of Tukey-Kramer test and max-t test with HC3 covariance estimation for all pairwise comparisons of groups
### r: variing group effect for calculation of power
##########################################################################################################################
sim <- function(nsim, dgp, fit, beta0, r, n, f, sigma, K){
P <- matrix(0, ncol=2, nrow=nsim, byrow=TRUE)
P_HC <- matrix(0, ncol=2, nrow=nsim, byrow=TRUE)
Pow_Global <- numeric(nsim)
Pow_Sim <- matrix(0, ncol=(length(r)), nrow=nsim, byrow=TRUE)
Pow_Global_HC <- numeric(nsim)
Pow_Sim_HC <- matrix(0, ncol=(length(r)), nrow=nsim, byrow=TRUE)
for (i in 1:nsim){
print(i)
x <- dgp(n, f, sigma, beta0)
mod <- fit(x)
mod_F <- aov(Y ~ -1 + X1, data = x)
glht0_F <- glht(mod_F, linfct = K, rhs = rep(2,4))
glht1_F <- glht(mod_F, linfct = K, rhs = c((r[1] + 2),2,2,2))
glht0 <- glht(mod, linfct = mcp(X1="Tukey"))
glht1 <- glht(mod, linfct = mcp(X1="Tukey"), rhs = r)
P[i,1] <- summary(glht0_F, test=Ftest())$test$pvalue
P[i,2] <- min(TukeyHSD(mod)$X1[,4])
Pow_Global[i] <- summary(glht1_F, test=Ftest())$test$pvalue
Pow_Sim[i,] <- as.numeric(r > TukeyHSD(mod)$X1[,3] | r < TukeyHSD(mod)$X1[,2])
glht0_HC <- glht(mod, linfct = mcp(X1="Tukey"), vcov = vcovHC)
glht1_HC <- glht(mod, linfct = mcp(X1="Tukey"), rhs = r, vcov = vcovHC)
glht0_F_HC <- glht(mod_F, linfct = K, rhs = rep(2,4), vcov=vcovHC)
glht1_F_HC <- glht(mod_F, linfct = K, rhs = c((r[1] + 2),2,2,2), vcov=vcovHC)
P_HC[i,1] <- summary(glht0_F_HC, test=Ftest())$test$pvalue
P_HC[i,2] <- min(summary(glht0_HC)$test$pvalues)
Pow_Global_HC[i] <- summary(glht1_F_HC, test=Ftest())$test$pvalue
Pow_Sim_HC[i,] <- summary(glht1_HC)$test$pvalue
}
Size_Global <- mean(P[,1] <= 0.05)
FWER <- mean(P[,2] <= 0.05)
Power_Global <- mean(Pow_Global <= 0.05)
Power_Sim <- colMeans(Pow_Sim)
Size_Global_HC <- mean(P_HC[,1] <= 0.05)
FWER_HC <- mean(P_HC[,2] <= 0.05)
Power_Global_HC <- mean(Pow_Global_HC <= 0.05)
Power_Sim_HC <- colMeans(Pow_Sim_HC <= 0.05)
ret <- c(Size_Global, FWER, Power_Global, Power_Sim,
Size_Global_HC, FWER_HC, Power_Global_HC, Power_Sim_HC)
return(ret)
}
beta0 <- c(2,2,2,2)
b <- c(seq(-2,2,by=0.1))
n <- c(10, 20, 30, 40)
design <- expand.grid(b, n)
names(design) <- c("b", "N")
nsim <- 1000
#############################################################
### A: Normally distributed data, homogeneneous variances ###
#############################################################
results <- matrix(0, nrow=nrow(design), ncol=20, byrow=T)
results[,1] <- design[,1]
results[,2] <- design[,2]
set.seed(11803)
for (j in 1:nrow(design)) {
print(j)
r <- c(design$b[j],design$b[j],design$b[j],0,0,0)
n <- design$N[j]
f <- 0.2
K <- diag(1,4)
sigma <- c(2,2,2,2)
results[j,3:20] <- sim(nsim, dgp_aov, fit_aov, beta0, r, n, f, sigma, K)
save(results, file = "AOV_hom.Rda")
}
colnames(results) <- c("r", "n", "Size_Global", "FWER_Tukey", "Power_Global",
"H1","H1","H1","H0","H0","H0","Size_Global_HC", "FWER_HC",
"Power_Global_HC", "H1_HC","H1_HC","H1_HC","H0_HC","H0_HC","H0_HC")
results
save(results, file = "AOV_hom.Rda")
####################################################################################################################
### B: Normally distributed data, heterogeneous variances, smaller variances in groups with smaller sample sizes ###
####################################################################################################################
results <- matrix(0, nrow=nrow(design), ncol=20, byrow=T)
results[,1] <- design[,1]
results[,2] <- design[,2]
set.seed(21803)
for (j in 1:nrow(design)) {
print(j)
r <- c(design$b[j],design$b[j],design$b[j],0,0,0)
n <- design$N[j]
f <- 0.2
K <- diag(1,4)
sigma <- c(3,5,7,9)
results[j,3:20] <- sim(nsim, dgp_aov, fit_aov, beta0, r, n, f, sigma, K)
save(results, file = "AOV_het1.Rda")
}
colnames(results) <- c("r", "n", "Size_Global", "FWER_Tukey", "Power_Global",
"H1","H1","H1","H0","H0","H0","Size_Global_HC", "FWER_HC",
"Power_Global_HC", "H1_HC","H1_HC","H1_HC","H0_HC","H0_HC","H0_HC")
save(results, file = "AOV_het1.Rda")
###################################################################################################################
### C: Normally distributed data, heterogeneous variances, smaller variances in groups with larger sample sizes ###
###################################################################################################################
results <- matrix(0, nrow=nrow(design), ncol=20, byrow=T)
results[,1] <- design[,1]
results[,2] <- design[,2]
set.seed(31803)
for (j in 1:nrow(design)) {
print(j)
r <- c(design$b[j],design$b[j],design$b[j],0,0,0)
n <- design$N[j]
f <- 0.2
K <- diag(1,4)
sigma <- c(9,7,5,3)
results[j,3:20] <- sim(nsim, dgp_aov, fit_aov, beta0, r, n, f, sigma, K)
save(results, file = "AOV_het2.Rda")
}
colnames(results) <- c("r", "n", "Size_Global", "FWER_Tukey", "Power_Global",
"H1","H1","H1","H0","H0","H0","Size_Global_HC", "FWER_HC",
"Power_Global_HC", "H1_HC","H1_HC","H1_HC","H0_HC","H0_HC","H0_HC")
save(results, file = "AOV_het2.Rda")
###############################################
### Simulation setup for beta distributed data
###############################################
###################################################################################
### data generating process of group membership for n observations and four groups
### f: parameter controling the relation of group sizes
###################################################################################
dgpX <- function(n, f){
n1 <- n + f * 1 * n
n2 <- n + f * 2 * n
n3 <- n + f * 3 * n
n4 <- n + f * 4 * n
x1 <- as.factor(rep(1:4, c(n1, n2, n3, n4), replace = FALSE))
X <- data.frame(X1 = x1)
return(X)
}
##########################################################################################################################
### Simulations in the unbalanced ANOVA model
### Comparison of OLS and HC3 covariance estimation for the global test
### Comparison of Tukey-Kramer test and max-t test with HC3 covariance estimation for all pairwise comparisons of groups
##########################################################################################################################
sim <- function(nsim, dgp, fit, n, f, K){
P <- matrix(0, ncol=2, nrow=nsim, byrow=TRUE)
P_HC <- matrix(0, ncol=2, nrow=nsim, byrow=TRUE)
for (i in 1:nsim){
print(i)
x <- dgp(n, f)
mod <- fit(x)
mod_F <- aov(Y ~ -1 + X1, data = x)
glht0_F <- glht(mod_F, linfct = K, rhs = rep(1,4))
glht0 <- glht(mod, linfct = mcp(X1="Tukey"))
P[i,1] <- summary(glht0_F, test=Ftest())$test$pvalue
P[i,2] <- min(TukeyHSD(mod)$X1[,4]) # Tukey HSD minimaler adjustierter p-Wert
glht0_HC <- glht(mod, linfct = mcp(X1="Tukey"), vcov = vcovHC)
glht0_F_HC <- glht(mod_F, linfct = K, rhs = rep(1,4), vcov=vcovHC)
P_HC[i,1] <- summary(glht0_F_HC, test=Ftest())$test$pvalue
P_HC[i,2] <- min(summary(glht0_HC)$test$pvalues)
}
Size_Global <- mean(P[,1] <= 0.05)
FWER <- mean(P[,2] <= 0.05)
Size_Global_HC <- mean(P_HC[,1] <= 0.05)
FWER_HC <- mean(P_HC[,2] <= 0.05)
ret <- c(Size_Global, FWER, Size_Global_HC, FWER_HC)
return(ret)
}
################################################################################################################
### D: Beta distributed data, heterogeneous variances, smaller variances in groups with smaller sample sizes ###
################################################################################################################
#################################################################################
### data generating process of beta distributed data in four groups,
### smaller variances in groups with smaller sample sizes
### f: parameter controling the relation of group sizes
### sigma: standard deviation of the errors
#################################################################################
dgp_aov1 <- function(n, f){
X <- dgpX(n, f)
Xm <- model.matrix(~ - 1 + X1, data=X)
n1 <- n + f * 1 * n
n2 <- n + f * 2 * n
n3 <- n + f * 3 * n
n4 <- n + f * 4 * n
lp1 <- rep(1.25,n1) + rbeta(n1,6,2)
lp2 <- rep(5/3,n2) + rbeta(n2,2,4)
lp3 <- rep(1.5,n3) + rbeta(n3,1,1)
lp4 <- rep(1.5,n4) + rbeta(n4,0.5,0.5)
Y <- c(lp1, lp2, lp3, lp4)
data.frame(Y=Y, X)
}
N <- rep(n,rep(41,4))
results <- matrix(0, nrow=length(N), ncol=5, byrow=T)
results[,1] <- N
set.seed(11802)
for (j in 1:nrow(results)) {
print(j)
n <- N[j]
f <- 0.2
K <- diag(1,4)
results[j,2:5] <- sim(nsim, dgp_aov1, fit_aov, n, f, K)
save(results, file = "AOV_het1_beta.Rda")
}
colnames(results) <- c("n", "Size_Global", "FWER_Tukey","Size_Global_HC", "FWER_HC")
save(results, file = "AOV_het1_beta.Rda")
###############################################################################################################
### E: Beta distributed data, heterogeneous variances, smaller variances in groups with larger sample sizes ###
###############################################################################################################
#################################################################################
### data generating process of beta distributed data in four groups,
### smaller variances in groups with larger sample sizes
### f: parameter controling the relation of group sizes
### sigma: standard deviation of the errors
#################################################################################
dgp_aov2 <- function(n, f){
X <- dgpX(n, f)
Xm <- model.matrix(~ - 1 + X1, data=X)
n1 <- n + f
n2 <- n + f * 2 * n
n3 <- n + f * 3 * n
n4 <- n + f * 4 * n
lp1 <- rep(1.5,n1) + rbeta(n1,0.5,0.5)
lp2 <- rep(1.5,n2) + rbeta(n2,1,1)
lp3 <- rep(5/3,n3) + rbeta(n3,2,4)
lp4 <- rep(1.25,n4) + rbeta(n4,6,2)
Y <- c(lp1, lp2, lp3, lp4)
data.frame(Y=Y, X)
}
results <- matrix(0, nrow=length(N), ncol=5, byrow=T)
results[,1] <- N
set.seed(11803)
for (j in 1:length(N)) {
print(j)
n <- N[j]
f <- 0.2
K <- diag(1,4)
results[j,2:5] <- sim(nsim, dgp_aov2, fit_aov, n, f, K)
save(results, file = "AOV_het2_beta.Rda")
}
colnames(results) <- c("n", "Size_Global", "FWER_Tukey","Size_Global_HC", "FWER_HC")
save(results, file = "AOV_het2_beta.Rda")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.