knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(tidyverse) library(tibble) binom_data <- replicate(10000, sum(rbinom(20, 1, 0.05))) length(binom_data[binom_data > 0])/10000 binom_data2 <- replicate(10000, sum(rbinom(50, 1, 0.05))) length(binom_data2[binom_data2 > 0])/10000 ### I understood how to construct the first term, defining the experiments in this case "binom_data," but the actual "check" part using the "length" function was a grammar I was not too familiar with and needed to consult the solutions video for. 70%
dataB <- replicate(10000, sum(rbinom(20, 3, 0.05))) length(dataB[dataB > 0])/10000 ### I ended up trying to go waaay too deep here with the aov and tibble functions- when the row/column mismatches seemed to obfuscate any interprable results, I realized I should just check the solutions video, realizing it was way simpler than I thought. 0%
p_values <- c() for(i in 1:10000){ IV <- rep(1:2, each = 20) data2 <- c(rnorm(20, 0, 1), rnorm(20, 0, 1)) q2_data <- tibble(IV, data2) p_values[i] <- t.test(data2 ~ IV, var.equal = TRUE, data = q2_data)$p.value } hist(p_values) ### I knew that the rbinom function must've played a role here somewhere (*I also just realized it's supposed to be the rnorm function, not rbinom. So I may have been more clueless than I thought). But I tripped and figuratively faceplanted on the situation when I realized that I'm not making a frequency distribution itself, I'm making a distribution of p-values. So I ultimately needed to watch the solutions video. 50% ### The distribution overall should be relatively flat- similar frequency of occurence of every possible p-value. Makes sense, since this is a randomized/rnorm null situation...
q3_p_values <- c() for(i in 1:10000){ IV <- rep(1:2, each = 20) data3 <- c(rnorm(20, 0, 1), rnorm(20, 0.5, 1)) q3_data <- tibble(IV, data3) q3_p_values[i] <- t.test(data3 ~ IV, var.equal = TRUE, data = q3_data)$p.value } hist(q3_p_values) ### This was a fairly natural and quick extension of the previous question. If seeing the solutions video for q2 carries over to q3, I scored maybe 25% on help-requirement. IF this question were to stand on its own terms, I scored a 100% (teeechnically didn't need to check help for this question in particular). ### The distribution is more compressed to the lower end, towards 0; there are still p-values spanning the whole range, but the majority seem to be below 0.2. Approximately 3500 out of 10,000 p-values are below the likely alpha level of 0.05.
q4_p_values <- c() for(i in 1:10000){ IV <- as.factor(rep(1:4, each = 10)) data4 <- as.numeric(c(rnorm(10, 0.5, 1), rnorm(10, 0, 1), rnorm(10, 0, 1), rnorm(10, 0, 1))) q4_data <- tibble(IV, data4) contrast <- c(3, -1, -1, -1) contrasts(q4_data$IV)<- contrast aov4 <- aov(data4 ~ IV, data = q4_data) p_results <- summary.aov(aov4, split = list(IV = list("contrast" = 1))) q4_p_values[i] <- p_results[[1]]$'Pr(>F)'[2] } hist(q4_p_values) ### hist(q4_p_values) It's at this point that I got frustrated and ran to the solutions video. Maybe 65% for effort but I'm probably missing the mark at the last moment. Damn, in hindsight I was closer than I thought...rows 106 and 108 needed your guidance but that was about it. ### The distribution here is similar to that for question 3. Most of the p-values are below 0.2, approx 2700/10,000 are below 0.05.
data1_p_values <- c() data2_p_values <- c() for(i in 1:10000){ IV1 <- as.factor(rep(1:4, each = 10)) data1 <- as.numeric(c(rnorm(10, 0, 1), rnorm(10, 0, 1), rnorm(10, 0, 1), rnorm(10, 0, 1))) q5_data1 <- tibble(IV1, data1) contrast <- c(3, -1, -1, -1) contrasts(q5_data1$IV1) <- contrast aov5a <- aov(data1 ~ IV, data = q5_data1) p_resultsa <- summary.aov(aov5a, split = list(IV = list("contrast" = 1))) data1_p_values[i] <- p_resultsa[[1]]$'Pr(>F)'[1] data2_p_values[i] <- p_resultsa[[1]]$'Pr(>F)'[2] } sigPa <- length(which(data1_p_values < 0.05)) sigPb <- length(which(data2_p_values < 0.05)) sigPa == sigPb sigPa > sigPb sigPa < sigPb ### I'd initially had the whole thing put there twice. The thing I was missing was that the code in line 138 could be duplicated just so that I extract the omnibus separately from the same analysis. Maybe 70%. ### It appears that more (albeit only slightly more, in the grand scheme of things) type-1 errors are made for the contrast, rather than the omnibus.
library(tibble) library(tidyr) library(dplyr) options(dplyr.summarise.inform = FALSE) smith_example <- tribble( ~Same, ~Different, ~Imagery, ~Photo, ~Placebo, #--|--|--|--|---- 25,11,14,25,8, 26,21,15,15,20, 17,9,29,23,10, 15,6,10,21,7, 14,7,12,18,15, 17,14,22,24,7, 14,12,14,14,1, 20,4,20,27,17, 11,7,22,12,11, 21,19,12,11,4 ) %>% pivot_longer(cols = everything(), names_to = "IV", values_to = "DV") %>% mutate(IV = factor(IV,levels = c("Same", "Different", "Imagery", "Photo", "Placebo"))) aov.out <- aov(DV~IV, smith_example) summary(aov.out) contrasts(smith_example$IV) c1 <- c(2,-3,2,2,-3) c2 <- c(2,0,-1,-1,0) c3 <- c(0,0,+1,-1,0) c4 <- c(0,+1,0,0,-1) my_contrasts <- cbind(c1, c2, c3, c4) contrasts(smith_example$IV) <- my_contrasts aov.out <- aov(DV~IV, smith_example) summary(aov.out) (full_summary <- summary.aov(aov.out, split=list(IV=list("(1+3+4) vs (2+5)"=1, "(1) vs (3+4)" = 2, "(3) vs (4)"= 3, "(2) vs (5)"= 4) ) ) )
full_summary[[1]]$`F value`[1] mean(full_summary[[1]]$`F value`[2:5])
group_means <- c(4,3,10,11) (grand_mean <- mean(group_means)) (differences <- group_means-grand_mean) (squared_differences <- differences^2) (sum_squares <- sum(squared_differences)) ### Unfortunately, papaja still isn't working for me. I should really consult your help on this...
fake_data <- tibble(IV = factor(c("A","B","C","D")), DV = c(4,3,10,11)) contrasts(fake_data$IV)
contrasts(fake_data$IV)[,'D'] contrasts(fake_data$IV)[,'D'] * differences grand_mean + (1 * differences[4])
contrasts(fake_data$IV) contrasts(fake_data$IV) * differences
grand_mean*contrasts(fake_data$IV) + contrasts(fake_data$IV) * differences
grand_mean mean(c(4, 3, 10, 11))
c1 <- c(-1,-1,1,1) c2 <- c(1,-1,0,0) c3 <- c(0,0,-1,1) my_contrasts <- cbind(c1,c2,c3) contrasts(fake_data$IV) <- my_contrasts contrasts(fake_data$IV) cor(contrasts(fake_data$IV))
contrasts(fake_data$IV) * group_means colSums(contrasts(fake_data$IV) * group_means) colSums(contrasts(fake_data$IV) * group_means)^2 (colSums(contrasts(fake_data$IV) * group_means)^2)/ colSums(contrasts(fake_data$IV)^2)
fake_data$DV grand_means <- c(7, 7, 7, 7) grand_means grand_means + contrasts(fake_data$IV)[,1] grand_means + contrasts(fake_data$IV)[,1] * 2 grand_means + contrasts(fake_data$IV)[,1] * 3 grand_means+ (contrasts(fake_data$IV)[,1]*3.5)+ (contrasts(fake_data$IV)[,2]*.5)+ (contrasts(fake_data$IV)[,3]*.5)
fake_data_2 <- fake_data fake_data_2 <- cbind(fake_data,contrasts(fake_data$IV)) lm(DV ~ c1 + c2 + c3, data = fake_data_2 ) summary(lm(DV ~ c1 + c2 + c3, data = fake_data_2 ))
grand_means+ (contrasts(fake_data$IV)[,1]*3.5)+ (contrasts(fake_data$IV)[,2]*.5)+ (contrasts(fake_data$IV)[,3]*.5)
fake_data <- tibble(IV = factor(c("A","B","C","D")), DV = c(43,22,53,104)) c1 <- c(-1,-1,1,1) c2 <- c(1,-1,0,0) c3 <- c(0,0,-1,1) my_contrasts <- cbind(c1,c2,c3) contrasts(fake_data$IV) <- my_contrasts fake_data_2 <- cbind(fake_data,contrasts(fake_data$IV)) lm(DV ~ c1 + c2 + c3, data = fake_data_2 ) summary(lm(DV ~ c1 + c2 + c3, data = fake_data_2 ))
sim_data <- tibble(DV = rnorm(6*100,0,1), IV = factor(rep(1:6, each = 100))) c1 <- c(1,-1,0,0,0,0) c2 <- c(0,0,1,-1,0,0) c3 <- c(0,0,0,0,1,-1) c4 <- c(-1,-1,2,2,-1,-1) c5 <- c(1,1,0,0,-1,-1) orth_contrasts <- cbind(c1,c2,c3,c4,c5) cor(orth_contrasts) contrasts(sim_data$IV) <- orth_contrasts summary.aov(aov(DV~IV, sim_data), split=list(IV=list("c1"=1, "c2" = 2, "c3"= 3, "c4"= 4, "c5" = 5) ))
all_sim_data <- tibble() for(i in 1:10000){ sim_data <- tibble(DV = rnorm(6*100,0,1), IV = factor(rep(1:6, each = 100))) contrasts(sim_data$IV) <- orth_contrasts sim_output <- summary.aov(aov(DV~IV, sim_data), split=list(IV=list("c1"=1, "c2" = 2, "c3"= 3, "c4"= 4, "c5" = 5) )) sim_results <- tibble(type = c("omnibus",rep("contrast",5)), p_values = sim_output[[1]]$`Pr(>F)`[1:6], sim_num = rep(i,6) ) all_sim_data <- rbind(all_sim_data,sim_results) }
type_I_errors <- all_sim_data %>% mutate(type_I = p_values < .05) %>% group_by(type, sim_num) %>% summarize(counts = sum(type_I)) %>% group_by(type,counts) %>% summarize(type_I_frequency = sum(counts)) knitr::kable(type_I_errors)
type_I_errors %>% filter(type == 'omnibus', counts == 1) %>% pull(type_I_frequency)/10000
type_I_errors %>% filter(type == 'contrast', counts > 0) %>% pull(type_I_frequency) %>% sum()/50000
type_I_errors %>% filter(type == 'contrast', counts > 0) %>% pull(type_I_frequency) %>% sum()/10000
romeo_juliet <- tibble(subjects = 1:20, Group = rep(c("Context Before", "Partial Context", "Context After", "Without context"), each = 5), Comprehension = c(5,9,8,4,9, 5,4,3,5,4, 2,4,5,4,1, 3,3,2,4,3 ) ) romeo_juliet$Group <- factor(romeo_juliet$Group, levels = c("Context Before", "Partial Context", "Context After", "Without context") ) c1 <- c(1,1,1,-3) c2 <- c(0,0,1,-1) c3 <- c(3,-1,-1,-1) c4 <- c(1,-1,0,0) new_contrasts <- cbind(c1,c2,c3,c4) cor(new_contrasts) contrasts(romeo_juliet$Group) <- new_contrasts summary.aov(aov(Comprehension~Group, romeo_juliet), split=list(Group=list("c1"=1, "c2" = 2, "c3"= 3, "c4" = 4))) contrasts(romeo_juliet$Group) <- c1 summary.aov(aov(Comprehension~Group, romeo_juliet), split=list(Group=list("c1"=1))) contrasts(romeo_juliet$Group) <- c2 summary.aov(aov(Comprehension~Group, romeo_juliet), split=list(Group=list("c2"=1))) contrasts(romeo_juliet$Group) <- c3 summary.aov(aov(Comprehension~Group, romeo_juliet), split=list(Group=list("c3"=1))) contrasts(romeo_juliet$Group) <- c4 summary.aov(aov(Comprehension~Group, romeo_juliet), split=list(Group=list("c4"=1)))
romeo_juliet <- tibble(subjects = 1:20, Group = rep(c("Context Before", "Partial Context", "Context After", "Without context"), each = 5), Comprehension = c(5,9,8,4,9, 5,4,3,5,4, 2,4,5,4,1, 3,3,2,4,3 ) ) romeo_juliet$Group <- factor(romeo_juliet$Group, levels = c("Context Before", "Partial Context", "Context After", "Without context") ) c1 <- c(3,-1,-1,-1) c2 <- c(1,1,-1,-1) c3 <- c(1,-1,1,-1) contrasts(romeo_juliet$Group) <- c1 summary.aov(aov(Comprehension~Group, romeo_juliet), split=list(Group=list("contrast"=1))) contrasts(romeo_juliet$Group) <- c2 summary.aov(aov(Comprehension~Group, romeo_juliet), split=list(Group=list("contrast"=1))) contrasts(romeo_juliet$Group) <- c3 summary.aov(aov(Comprehension~Group, romeo_juliet), split=list(Group=list("contrast"=1))) romeo_juliet <- romeo_juliet %>% mutate(c1 = rep(c(3,-1,-1,-1),each=5), c2 = rep(c(1,1,-1,-1),each=5), c3 = rep(c(1,-1,1,-1),each=5) ) summary(lm(Comprehension ~ c1 + c2 + c3 , romeo_juliet)) library(ppcor) spcor(romeo_juliet[,3:6])$estimate^2
library(DBSStats2Labs)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.