library(tidyverse) library( emmeans) load("~/Sta486Finalmtf83/data/mat125data.rda")
# data becomes more normal when we drop test scores that are zero # also working with actual tests not pre, practice, honors code or learning aids mat125data <- filter( mat125data, score != 0 & practice_test == 0 & learning_aid == 0 & honors_code == 0 & pre_test == 0 & module_final != "F1")
# build t2_attempt from mat125data using only T2 attempts and actual tests t2_attempts <- filter( mat125data, test_attempt == "T2") # check for duplicates # t2_attempts[ duplicated( t2_attempts[, 1:4]), ] # join second attempts and large data set # added section id to join to for students who took class twice pairwise_comp <- inner_join( t2_attempts, mat125data, by = c("Id", "module_final", "section_Id")) # remove duplicate entries for tests # mat125data has the full data set, this resulted in the previous inner join # matching modules with the same attempts from t2_attempts pairwise_comp <- filter( pairwise_comp, test_attempt.x != test_attempt.y) # duplicate check # pairwise_comp[ duplicated( pairwise_comp[, 1:4]), ] # create difference column pairwise_comp$score_diff = pairwise_comp$score.x - pairwise_comp$score.y # # plot # ggplot( pairwise_comp, aes( x = module_final, y = score_diff)) + # geom_boxplot() + # labs( title = "Pairwise Differences Between First and Second Attempt Test Scores")+ # ylab("Difference in Test Scores")+ # xlab("Module")
# select t2 test scores and label them t2_attempts <- pairwise_comp %>% select( c(1, 2, 3, 4, 5)) t2_attempts <- t2_attempts %>% rename( "attempt" = 4, "score" = 5) t2_attempts$attempt <- "2x" # select t1 test scores and label them t1_attempts <- pairwise_comp %>% select( c( 1, 2, 3, 13, 14)) t1_attempts <- t1_attempts %>% rename( "attempt" = 4, "score" = 5) t1_attempts$attempt <- "1x" # bind first and second attempts attempts <- rbind( t1_attempts, t2_attempts) two_attempt_timeframe <- filter( mat125data, year <= 17 | ( year == 18 & season == "spring")) two_attempt_timeframe$attempt <- "all" two_attempt_timeframe <- select( two_attempt_timeframe, c( 1, 2, 3, 13, 5)) # select tests that individual students only took once single_attempt_tests<- setdiff( select(two_attempt_timeframe, -c(4)), select(attempts, -c(4) )) single_attempt_tests$attempt <- "1" no_retake <- filter( mat125data, year == 19 | ( year == 18 & season == "fall")) no_retake$attempt = "only one" no_retake <- select( no_retake, c(1, 2, 3, 5, 13)) df <- rbind( attempts, no_retake, single_attempt_tests) df<- df %>% mutate( attempt = fct_relevel( attempt, "1x", "2x", "1", "only one")) ggplot( df, aes( x = score, fill = attempt, color = attempt))+ geom_density( alpha = .2)+ labs( title = "Fall 16 to Fall 19 Test Score Distribution By Attempt")+ facet_grid( . ~ module_final)+ theme(legend.position="bottom") # ggsave("multiple and single attempt scores fall 16 to fall 19.png",dpi="retina") # ggplot(df, aes(x = score, y = attempt, fill = attempt)) + geom_density_ridges()
df1 <- rbind( attempts, single_attempt_tests) # df1<- df1 %>% mutate( attempt = fct_relevel( attempt, # "Attempt 1 scores, Fall 16 - Spring 18", # "Attempt 2 scores, Fall 16 - Spring 18", # "1", # "Only one single attempt allowed, Fall 18 - Fall 19")) ggplot( df1, aes( x = score, fill = attempt, color = attempt))+ geom_density( alpha = .2)+ facet_grid( . ~ module_final )+ labs( title = "Fall 16 to Spring 18 Test Score Distribution By Attempt")+ theme(legend.position="bottom") # ggsave("multiple and single attempt scores fall 16 to spring 18.png",dpi="retina")
model <- lm( score ~ attempt, df ) anova( model) model %>% summary() plot( model, which = 1:2)
library(emmeans) emmeans(model, pairwise ~ attempt )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.