knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(tibble) levels <- 3 n_per_level <- 10 F_crit <- qf(0.95, 2, 27) F_crit for(i in 1:1000){ random_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = rnorm(levels*n_per_level, 0, 1) ) F_crit <- qf(0.95, 2, 27) F_crit aov.out <- aov(DV ~ IV, data = random_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] if(simulated_F < F_crit) break } summary(aov.out)
library(ggplot2) ggplot(random_data, aes(x = IV, y = DV))+ geom_bar(stat = "summary", fun = "mean")+ geom_point()
i
library(tibble) levels <- 3 n_per_level <- 10 F_crit <- qf(0.95, 2, 27) F_crit for(i in 1:1000){ random_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = rnorm(levels*n_per_level, 0, 1) ) F_crit <- qf(0.95, 2, 27) F_crit aov.out <- aov(DV ~ IV, data = random_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] if(simulated_F > F_crit) break } summary(aov.out)
i
library(ggplot2) ggplot(random_data, aes(x = IV, y = DV))+ geom_bar(stat = "summary", fun = "mean")+ geom_point()
save_F_values <- length(1000) for(i in 1:1000){ random_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = rnorm(levels*n_per_level) / rt(levels*n_per_level, 1, 1) ) aov.out <- aov(DV ~ IV, data = random_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] save_F_values[i] <- simulated_F } F_comparison <- tibble(type = rep(c("analytic","simulated"), each = 1000), F_value = c(rf(1000, levels - 1, (levels*n_per_level)),save_F_values)) ggplot(F_comparison, aes(x=F_value, color = type))+ geom_freqpoly(bins = 50)
someFs <- rf(1000,3,16) hist(someFs)
pf(4, 3, 16, lower.tail = FALSE)
library(tibble) levels <- 4 n_per_level <- 5 random_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = rnorm(levels*n_per_level, 0, 1) ) aov.out <- aov(DV ~ IV, data = random_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] save_F_values <- length(1000) for(i in 1:1000){ random_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = rnorm(levels*n_per_level, 0, 1) ) aov.out <- aov(DV ~ IV, data = random_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] save_F_values[i] <- simulated_F }
library(ggplot2) F_comparison <- tibble(type = rep(c("analytic","simulated"), each = 1000), F_value = c(rf(1000,3,16),save_F_values)) ggplot(F_comparison, aes(x=F_value))+ geom_histogram()+ facet_wrap(~type)
save_new_F_values <- length(1000) for(i in 1:1000){ random_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = rnorm(levels*n_per_level, 50, 25) ) aov.out <- aov(DV ~ IV, data = random_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] save_new_F_values[i] <- simulated_F } F_comparison <- tibble(type = rep(c("analytic","sim_0_1","sim_50_25"), each = 1000), F_value = c(rf(1000,3,16),save_F_values, save_new_F_values)) ggplot(F_comparison, aes(x=F_value))+ geom_histogram()+ facet_wrap(~type)
hist(sample(c(rnorm(levels*n_per_level/2,0,1),rnorm(levels*n_per_level/2,5,1))))
save_bimodal_F_values <- length(1000) for(i in 1:1000){ random_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = sample(c(rnorm(levels*n_per_level/2,0,1), rnorm(levels*n_per_level/2,5,1))) ) aov.out <- aov(DV ~ IV, data = random_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] save_bimodal_F_values[i] <- simulated_F } F_comparison <- tibble(type = rep(c("analytic","sim_0_1","sim_50_25", "sim_bimodal"), each = 1000), F_value = c(rf(1000,3,16),save_F_values, save_new_F_values,save_bimodal_F_values)) ggplot(F_comparison, aes(x=F_value))+ geom_histogram(bins=100)+ facet_wrap(~type)
qf(.95,3,16)
library(dplyr) Fs <- F_comparison[F_comparison$type=="analytic",]$F_value sorted_Fs <- sort(Fs) sorted_Fs[950] Fs <- F_comparison[F_comparison$type=="sim_0_1",]$F_value sorted_Fs <- sort(Fs) sorted_Fs[950] Fs <- F_comparison[F_comparison$type=="sim_50_25",]$F_value sorted_Fs <- sort(Fs) sorted_Fs[950] Fs <- F_comparison[F_comparison$type=="sim_bimodal",]$F_value sorted_Fs <- sort(Fs) sorted_Fs[950]
levels <- 4 n_per_level <- 5 some_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = c(11,12,11,11,12, 10,8,10,9,10, 8,9,10,10,10, 10,8,10,9,10 ) ) aov.out <- aov(DV ~ IV, data = some_data) summary(aov.out)
save_F_values <- length(1000) for(i in 1:1000){ some_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = sample(c(11,12,11,11,12, 10,8,10,9,10, 8,9,10,10,10, 10,8,10,9,10 )) ) aov.out <- aov(DV ~ IV, data =some_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] save_F_values[i] <- simulated_F } F_comparison <- tibble(type = rep(c("analytic","randomization"), each = 1000), F_value = c(rf(1000,3,16),save_F_values)) ggplot(F_comparison, aes(x=F_value))+ geom_histogram()+ facet_wrap(~type)
length(save_F_values[save_F_values>7.407])/1000
levels <- 4 n_per_level <- 5 alternative_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = c(rnorm(n_per_level, 125, 25), rnorm(n_per_level, 100, 25), rnorm(n_per_level, 100, 25), rnorm(n_per_level, 100, 25) ) ) save_altF_values <- length(1000) for(i in 1:1000){ alternative_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = c(rnorm(n_per_level, 125, 25), rnorm(n_per_level, 100, 25), rnorm(n_per_level, 100, 25), rnorm(n_per_level, 100, 25) ) ) aov.out <- aov(DV ~ IV, data = alternative_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] save_altF_values[i] <- simulated_F } F_comparison <- tibble(type = rep(c("null","alternative"), each = 1000), F_value = c(rf(1000,3,16),save_altF_values)) ggplot(F_comparison, aes(x=F_value))+ geom_histogram(bins=100)+ facet_wrap(~type, nrow=2)
qf(.95,3,16)
length(save_altF_values[save_altF_values > 3.23])
n_per_level <- 50 # repeat the above many times to compute the F-distribution save_altF_values <- length(1000) for(i in 1:1000){ alternative_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = c(rnorm(n_per_level, 125, 25), rnorm(n_per_level, 100, 25), rnorm(n_per_level, 100, 25), rnorm(n_per_level, 100, 25) ) ) aov.out <- aov(DV ~ IV, data = alternative_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] save_altF_values[i] <- simulated_F } F_comparison <- tibble(type = rep(c("null","alternative"), each = 1000), F_value = c(rf(1000,3,96),save_altF_values)) ggplot(F_comparison, aes(x=F_value))+ geom_histogram(bins=100)+ facet_wrap(~type, nrow=2)
qf(.95,3,200-4)
length(save_altF_values[save_altF_values > 2.65])
n_per_level <- 25 # repeat the above many times to compute the F-distribution save_altF_values <- length(1000) for(i in 1:1000){ alternative_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = c(rnorm(n_per_level, 125, 25), rnorm(n_per_level, 100, 25), rnorm(n_per_level, 100, 25), rnorm(n_per_level, 100, 25) ) ) aov.out <- aov(DV ~ IV, data = alternative_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] save_altF_values[i] <- simulated_F } length(save_altF_values[save_altF_values > qf(.95,3,(n_per_level*4)-4)])/1000
num_subjects <- c(5,10,15,20,25,30) sim_power <- length(length(num_subjects)) for(n in 1:length(num_subjects)){ n_per_level <- num_subjects[n] save_altF_values <- length(1000) for(i in 1:1000){ alternative_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = c(rnorm(n_per_level, 125, 25), rnorm(n_per_level, 100, 25), rnorm(n_per_level, 100, 25), rnorm(n_per_level, 100, 25) ) ) aov.out <- aov(DV ~ IV, data = alternative_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] save_altF_values[i] <- simulated_F } power <- length(save_altF_values[save_altF_values > qf(.95,3,(n_per_level*4)-4)])/1000 sim_power[n] <- power } power_curve <- tibble(num_subjects, sim_power) knitr::kable(power_curve)
ggplot(power_curve, aes(x=num_subjects, y=sim_power))+ geom_point()+ geom_line()
num_subjects <- c(10, 50, 75, 100, 125, 150, 175, 200) sim_power <- length(length(num_subjects)) for(n in 1:length(num_subjects)){ n_per_level <- num_subjects[n] # repeat the above many times to compute the F-distribution save_altF_values <- length(1000) for(i in 1:1000){ alternative_data <- tibble(subjects = 1:(levels*n_per_level), IV = as.factor(rep(1:levels, each = n_per_level)), DV = c(rnorm(n_per_level, rnorm(1,0,.2), 1), rnorm(n_per_level, rnorm(1,0,.2), 1), rnorm(n_per_level, rnorm(1,0,.2), 1), rnorm(n_per_level, rnorm(1,0,.2), 1) ) ) aov.out <- aov(DV ~ IV, data = alternative_data) simulated_F <- summary(aov.out)[[1]]$`F value`[1] save_altF_values[i] <- simulated_F } power <- length(save_altF_values[save_altF_values > qf(.95,3,(n_per_level*4)-4)])/1000 sim_power[n] <- power } power_curve <- tibble(num_subjects, sim_power) knitr::kable(power_curve)
ggplot(power_curve, aes(x=num_subjects, y=sim_power))+ geom_point()+ geom_line()
library(DBSStats2SemesterProject)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.