library(tidyverse)
library( emmeans)
load("~/Sta486Finalmtf83/data/mat125data.rda")
# data becomes more normal when we drop test scores that are zero
# also working with actual tests not pre, practice, honors code or learning aids 
mat125data <- filter( mat125data, score != 0 &
                                  practice_test == 0 & 
                                  learning_aid == 0 & 
                                  honors_code == 0 & 
                                  pre_test == 0 &
                                  module_final != "F1")
# build t2_attempt from mat125data using only T2 attempts and actual tests
t2_attempts <- filter( mat125data, test_attempt == "T2")


# check for duplicates
# t2_attempts[ duplicated( t2_attempts[, 1:4]), ]

# join second attempts and large data set
# added section id to join to for students who took class twice
pairwise_comp <- inner_join( t2_attempts, mat125data, by = c("Id",
                                                             "module_final",
                                                             "section_Id"))

# remove duplicate entries for tests
# mat125data has the full data set, this resulted in the previous inner join 
# matching modules with the same attempts from t2_attempts
pairwise_comp <- filter( pairwise_comp, test_attempt.x != test_attempt.y)

# duplicate check
# pairwise_comp[ duplicated( pairwise_comp[, 1:4]), ]

# create difference column
pairwise_comp$score_diff = pairwise_comp$score.x - pairwise_comp$score.y

# # plot
# ggplot( pairwise_comp, aes( x = module_final, y = score_diff)) +
#   geom_boxplot() +
#   labs( title = "Pairwise Differences Between First and Second Attempt Test Scores")+
#   ylab("Difference in Test Scores")+
#   xlab("Module")
# select t2 test scores and label them
t2_attempts <- pairwise_comp %>% select( c(1, 2, 3, 4, 5))
t2_attempts <- t2_attempts %>% rename( "attempt" = 4, "score" = 5)
t2_attempts$attempt <- "2x"


# select t1 test scores and label them
t1_attempts <- pairwise_comp %>% select( c( 1, 2, 3, 13, 14))
t1_attempts <- t1_attempts %>% rename( "attempt" = 4, "score" = 5)
t1_attempts$attempt <- "1x"

# bind first and second attempts
attempts <- rbind( t1_attempts, t2_attempts)

two_attempt_timeframe <- filter( mat125data,
                                 year <= 17 |
                                   ( year == 18 & season == "spring"))

two_attempt_timeframe$attempt <- "all"
two_attempt_timeframe <- select( two_attempt_timeframe, c( 1, 2, 3, 13, 5)) 


# select tests that individual students only took once 
single_attempt_tests<- setdiff( select(two_attempt_timeframe, -c(4)), select(attempts, -c(4) ))


single_attempt_tests$attempt <- "1"

no_retake <- filter( mat125data, year == 19 | ( year == 18 & season == "fall"))

no_retake$attempt = "only one"

no_retake <- select( no_retake, c(1, 2, 3, 5, 13))

df <- rbind( attempts, no_retake, single_attempt_tests)

df<- df %>% mutate( attempt = fct_relevel( attempt,
                                           "1x", 
                                           "2x",
                                           "1",
                                           "only one"))

ggplot( df, aes( x = score, fill = attempt, color = attempt))+ 
  geom_density( alpha = .2)+
  labs( title = "Fall 16 to Fall 19 Test Score Distribution By Attempt")+
  facet_grid( . ~ module_final)+ 
  theme(legend.position="bottom")


# ggsave("multiple and single attempt scores fall 16 to fall 19.png",dpi="retina")
# ggplot(df, aes(x = score, y = attempt, fill = attempt)) + geom_density_ridges()
df1 <- rbind( attempts, single_attempt_tests)

# df1<- df1 %>% mutate( attempt = fct_relevel( attempt,
#                                            "Attempt 1 scores, Fall 16 - Spring 18", 
#                                            "Attempt 2 scores, Fall 16 - Spring 18",
#                                            "1",
#                                            "Only one single attempt allowed, Fall 18 - Fall 19"))

ggplot( df1, aes( x = score, fill = attempt, color = attempt))+ 
  geom_density( alpha = .2)+
  facet_grid( . ~ module_final )+
  labs( title = "Fall 16 to Spring 18 Test Score Distribution By Attempt")+ 
  theme(legend.position="bottom")

# ggsave("multiple and single attempt scores fall 16 to spring 18.png",dpi="retina")
model <- lm( score ~ attempt, df )
anova( model)
model %>% summary()
plot( model, which = 1:2)
library(emmeans)
emmeans(model, pairwise ~ attempt )


matt92253/Sta486Finalmtf83 documentation built on Aug. 19, 2022, 11:13 p.m.