Loading libraries

library(tidyverse)
library(emmeans)
load("~/Sta486Finalmtf83/data/mat125data.rda")

additional cleaning

# data becomes more normal when we drop test scores that are zero
# also working with actual tests not pre, practice, honors code or learning aids 
mat125data <- filter( mat125data, score != 0 &
                                  practice_test == 0 & 
                                  learning_aid == 0 & 
                                  honors_code == 0 & 
                                  pre_test == 0 &
                                  module_final != "F1")

pairwise comparison data frame

# build t2_attempt from mat125data using only T2 attempts and actual tests
t2_attempts <- filter( mat125data, test_attempt == "T2")

# join second attempts and large data set
# added section id to join to for students who took class twice
pairwise_comp <- inner_join( t2_attempts, 
                             mat125data, 
                             by = c("Id",
                                    "module_final",
                                    "section_Id")
                             )

# remove duplicate entries for tests
# mat125data has the full data set, this resulted in the previous inner join 
# matching modules with the same attempts from t2_attempts
pairwise_comp <- filter( pairwise_comp, test_attempt.x != test_attempt.y)

# create difference column
pairwise_comp$score_diff = pairwise_comp$score.x - pairwise_comp$score.y

single and multiple attempts by semester

# select t2 test scores and label them
t2_attempts <- pairwise_comp %>% select( c(1, 2, 3, 4, 5, 11, 12))
t2_attempts <- t2_attempts %>% rename( "attempt" = 4,
                                       "score" = 5,
                                       "season" = 6,
                                       "year" = 7)
t2_attempts$attempt <- "Second attempt scores, Fall 16 - Spring 18"




# select t1 test scores and label them
t1_attempts <- pairwise_comp %>% select( c( 1, 2, 3, 13, 14, 20, 21))
t1_attempts <- t1_attempts %>% rename( "attempt" = 4,
                                       "score" = 5,
                                       "season" = 6,
                                       "year" = 7)
t1_attempts$attempt <- "First attempt scores, Fall 16 - Spring 18"

# bind first and second attempts
attempts <- rbind( t1_attempts, t2_attempts)


two_attempt_timeframe <- filter( mat125data,
                                 year <= 17 |
                                   ( year == 18 & season == "spring"))

two_attempt_timeframe$attempt <- "all"
two_attempt_timeframe <- select( two_attempt_timeframe, c( 1, 2, 3, 13, 5, 11, 12))


# select tests that individual students only took once 
single_attempt_tests<- setdiff( select(two_attempt_timeframe, -c(4)), select(attempts, -c(4) ))


single_attempt_tests$attempt <- "Single attempt taken, Fall 16 - Spring 18"



no_retake <- filter( mat125data, year >= 19 | ( year == 18 & season == "fall"))

no_retake$attempt = "Only one attempt allowed, Fall 18 - Fall 19"

no_retake <- select( no_retake, c(1, 2, 3, 5, 13, 11, 12))
no_retake <- no_retake %>% rename( )

df <- rbind( attempts, no_retake, single_attempt_tests)

df<- df %>% mutate( attempt = fct_relevel( attempt,
                                           "First attempt scores, Fall 16 - Spring 18", 
                                           "Second attempt scores, Fall 16 - Spring 18",
                                           "Single attempt taken, Fall 16 - Spring 18",
                                           "Only one attempt allowed, Fall 18 - Fall 19"))


# these sections and module tests had multiple attempts after spring 2018, so they were removed
df<-df[!(df$section_Id==15545713 & df$module_final=="M2"),]
df<-df[!(df$section_Id==15545721 & df$module_final=="M2"),]
df <- na.omit(df)


ggplot( df, aes( x = score, fill = attempt, color = attempt))+ 
  geom_density( alpha = .3)+
  labs( title = "Fall 16 to Fall 21 Test Score Distribution By Attempt")+ 
  theme(legend.position= "bottom")+
  guides(fill=guide_legend(nrow=2,byrow=TRUE))+
  facet_grid( year ~ season)

single vs multiple attempt during second attempt time period

df1 <- rbind( attempts, single_attempt_tests)

# df1<- df1 %>% mutate( attempt = fct_relevel( attempt,
#                                            "Attempt 1 scores, Fall 16 - Spring 18", 
#                                            "Attempt 2 scores, Fall 16 - Spring 18",
#                                            "Single attempt taken, Fall 16 - Spring 18",
#                                            "Only one single attempt allowed, Fall 18 - Fall 19"))

ggplot( df1, aes( x = score, fill = attempt, color = attempt))+ 
  geom_density( alpha = .3)+
  labs( title = "Fall 16 to Spring 18 Test Score Distribution By Attempt")+ 
  theme(legend.position="bottom")+
  guides(fill=guide_legend(nrow=4,byrow=TRUE))

linear model, attempts 2016 to 2019

model <- lm( score ~ attempt, df )
anova( model)
model %>% summary()
plot( model, which = 1:2)
emmeans(model, pairwise ~ attempt )

density plots and box plots of attempts 2016 to 2019

ggplot( df, aes( x = score, fill = attempt, color = attempt))+
  geom_density( alpha = .3)+
  labs( title = "Fall 16 to Fall 19 Test Score Distribution By Attempt and Modules")+
  theme(legend.position= "bottom")+
  guides(fill=guide_legend(nrow=2,byrow=TRUE))+
  facet_grid( . ~ module_final)


ggplot( df, aes( x = score, fill = attempt))+ 
  geom_boxplot( alpha = .3)+
  labs( title = "Fall 16 to Fall 19 Test Score Distribution By Attempt and Modules")+ 
  theme(legend.position= "bottom")+
  guides(fill=guide_legend(nrow=2,byrow=TRUE))+
  facet_grid( . ~ module_final)

multiple attempt time period by attempts and module density plots

ggplot( df1, aes( x = score, fill = attempt, color = attempt))+ 
  geom_density( alpha = .3)+
  labs( title = "Fall 16 to Spring 18 Test Score Distribution By Attempt and Modules")+ 
  theme(legend.position="bottom")+
  guides(fill=guide_legend(nrow=4,byrow=TRUE))+
  facet_grid( . ~ module_final)
df$attempt <- case_when(
  df$attempt == "Second attempt scores, Fall 16 - Spring 18" ~ "2_of_2",

  df$attempt == "First attempt scores, Fall 16 - Spring 18" ~ "1_of_2",

  df$attempt == "Single attempt taken, Fall 16 - Spring 18" ~ "single",

  df$attempt == "Only one attempt allowed, Fall 18 - Fall 19" ~ "only one"
)



model2 <- lm( score ~ attempt * module_final, df )
anova( model2)
model2 %>% summary()
plot( model2, which = 1:2)
comp <- emmeans(model2, pairwise ~ attempt )

comp <- comp$contrasts

comp
emmeans(model2, pairwise ~ attempt * module_final, at = list( module_final = "M1"))
emmeans(model2, pairwise ~ attempt * module_final, at = list( module_final = "M2"))
emmeans(model2, pairwise ~ attempt * module_final, at = list( module_final = "M4"))
emmeans(model2, pairwise ~ attempt * module_final, at = list( module_final = "M4"))


matt92253/Sta486Finalmtf83 documentation built on Aug. 19, 2022, 11:13 p.m.