In matt92253/Sta486Finalmtf83: Math 125 data analysis

Loading libraries

library(tidyverse)
library(emmeans)
load("~/Sta486Finalmtf83/data/mat125data.rda")

additional cleaning

# data becomes more normal when we drop test scores that are zero
# also working with actual tests not pre, practice, honors code or learning aids 
mat125data <- filter( mat125data, score != 0 &
                                  practice_test == 0 & 
                                  learning_aid == 0 & 
                                  honors_code == 0 & 
                                  pre_test == 0 &
                                  module_final != "F1")

pairwise comparison data frame

# build t2_attempt from mat125data using only T2 attempts and actual tests
t2_attempts <- filter( mat125data, test_attempt == "T2")

# join second attempts and large data set
# added section id to join to for students who took class twice
pairwise_comp <- inner_join( t2_attempts, 
                             mat125data, 
                             by = c("Id",
                                    "module_final",
                                    "section_Id")
                             )

# remove duplicate entries for tests
# mat125data has the full data set, this resulted in the previous inner join 
# matching modules with the same attempts from t2_attempts
pairwise_comp <- filter( pairwise_comp, test_attempt.x != test_attempt.y)

# create difference column
pairwise_comp$score_diff = pairwise_comp$score.x - pairwise_comp$score.y

single and multiple attempts by semester

# select t2 test scores and label them
t2_attempts <- pairwise_comp %>% select( c(1, 2, 3, 4, 5, 11, 12))
t2_attempts <- t2_attempts %>% rename( "attempt" = 4,
                                       "score" = 5,
                                       "season" = 6,
                                       "year" = 7)
t2_attempts$attempt <- "Second attempt scores, Fall 16 - Spring 18"




# select t1 test scores and label them
t1_attempts <- pairwise_comp %>% select( c( 1, 2, 3, 13, 14, 20, 21))
t1_attempts <- t1_attempts %>% rename( "attempt" = 4,
                                       "score" = 5,
                                       "season" = 6,
                                       "year" = 7)
t1_attempts$attempt <- "First attempt scores, Fall 16 - Spring 18"

# bind first and second attempts
attempts <- rbind( t1_attempts, t2_attempts)


two_attempt_timeframe <- filter( mat125data,
                                 year <= 17 |
                                   ( year == 18 & season == "spring"))

two_attempt_timeframe$attempt <- "all"
two_attempt_timeframe <- select( two_attempt_timeframe, c( 1, 2, 3, 13, 5, 11, 12))


# select tests that individual students only took once 
single_attempt_tests<- setdiff( select(two_attempt_timeframe, -c(4)), select(attempts, -c(4) ))


single_attempt_tests$attempt <- "Single attempt taken, Fall 16 - Spring 18"



no_retake <- filter( mat125data, year >= 19 | ( year == 18 & season == "fall"))

no_retake$attempt = "Only one attempt allowed, Fall 18 - Fall 19"

no_retake <- select( no_retake, c(1, 2, 3, 5, 13, 11, 12))
no_retake <- no_retake %>% rename( )

df <- rbind( attempts, no_retake, single_attempt_tests)

df<- df %>% mutate( attempt = fct_relevel( attempt,
                                           "First attempt scores, Fall 16 - Spring 18", 
                                           "Second attempt scores, Fall 16 - Spring 18",
                                           "Single attempt taken, Fall 16 - Spring 18",
                                           "Only one attempt allowed, Fall 18 - Fall 19"))


# these sections and module tests had multiple attempts after spring 2018, so they were removed
df<-df[!(df$section_Id==15545713 & df$module_final=="M2"),]
df<-df[!(df$section_Id==15545721 & df$module_final=="M2"),]
df <- na.omit(df)


ggplot( df, aes( x = score, fill = attempt, color = attempt))+ 
  geom_density( alpha = .3)+
  labs( title = "Fall 16 to Fall 21 Test Score Distribution By Attempt")+ 
  theme(legend.position= "bottom")+
  guides(fill=guide_legend(nrow=2,byrow=TRUE))+
  facet_grid( year ~ season)

single vs multiple attempt during second attempt time period

df1 <- rbind( attempts, single_attempt_tests)

# df1<- df1 %>% mutate( attempt = fct_relevel( attempt,
#                                            "Attempt 1 scores, Fall 16 - Spring 18", 
#                                            "Attempt 2 scores, Fall 16 - Spring 18",
#                                            "Single attempt taken, Fall 16 - Spring 18",
#                                            "Only one single attempt allowed, Fall 18 - Fall 19"))

ggplot( df1, aes( x = score, fill = attempt, color = attempt))+ 
  geom_density( alpha = .3)+
  labs( title = "Fall 16 to Spring 18 Test Score Distribution By Attempt")+ 
  theme(legend.position="bottom")+
  guides(fill=guide_legend(nrow=4,byrow=TRUE))

linear model, attempts 2016 to 2019

model <- lm( score ~ attempt, df )
anova( model)
model %>% summary()
plot( model, which = 1:2)

emmeans(model, pairwise ~ attempt )

density plots and box plots of attempts 2016 to 2019

ggplot( df, aes( x = score, fill = attempt, color = attempt))+
  geom_density( alpha = .3)+
  labs( title = "Fall 16 to Fall 19 Test Score Distribution By Attempt and Modules")+
  theme(legend.position= "bottom")+
  guides(fill=guide_legend(nrow=2,byrow=TRUE))+
  facet_grid( . ~ module_final)


ggplot( df, aes( x = score, fill = attempt))+ 
  geom_boxplot( alpha = .3)+
  labs( title = "Fall 16 to Fall 19 Test Score Distribution By Attempt and Modules")+ 
  theme(legend.position= "bottom")+
  guides(fill=guide_legend(nrow=2,byrow=TRUE))+
  facet_grid( . ~ module_final)

multiple attempt time period by attempts and module density plots

ggplot( df1, aes( x = score, fill = attempt, color = attempt))+ 
  geom_density( alpha = .3)+
  labs( title = "Fall 16 to Spring 18 Test Score Distribution By Attempt and Modules")+ 
  theme(legend.position="bottom")+
  guides(fill=guide_legend(nrow=4,byrow=TRUE))+
  facet_grid( . ~ module_final)

df$attempt <- case_when(
  df$attempt == "Second attempt scores, Fall 16 - Spring 18" ~ "2_of_2",

  df$attempt == "First attempt scores, Fall 16 - Spring 18" ~ "1_of_2",

  df$attempt == "Single attempt taken, Fall 16 - Spring 18" ~ "single",

  df$attempt == "Only one attempt allowed, Fall 18 - Fall 19" ~ "only one"
)



model2 <- lm( score ~ attempt * module_final, df )
anova( model2)
model2 %>% summary()
plot( model2, which = 1:2)

comp <- emmeans(model2, pairwise ~ attempt )

comp <- comp$contrasts

comp

emmeans(model2, pairwise ~ attempt * module_final, at = list( module_final = "M1"))

emmeans(model2, pairwise ~ attempt * module_final, at = list( module_final = "M2"))

emmeans(model2, pairwise ~ attempt * module_final, at = list( module_final = "M4"))

emmeans(model2, pairwise ~ attempt * module_final, at = list( module_final = "M4"))

matt92253/Sta486Finalmtf83 documentation built on Aug. 19, 2022, 11:13 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

matt92253/Sta486Finalmtf83
Math 125 data analysis

In matt92253/Sta486Finalmtf83: Math 125 data analysis

Loading libraries

additional cleaning

pairwise comparison data frame

single and multiple attempts by semester

single vs multiple attempt during second attempt time period

linear model, attempts 2016 to 2019

density plots and box plots of attempts 2016 to 2019

multiple attempt time period by attempts and module density plots

R Package Documentation

Browse R Packages

We want your feedback!

matt92253/Sta486Finalmtf83 Math 125 data analysis

In matt92253/Sta486Finalmtf83: Math 125 data analysis

Loading libraries

additional cleaning

pairwise comparison data frame

single and multiple attempts by semester

single vs multiple attempt during second attempt time period

linear model, attempts 2016 to 2019

density plots and box plots of attempts 2016 to 2019

multiple attempt time period by attempts and module density plots

R Package Documentation

Browse R Packages

We want your feedback!

matt92253/Sta486Finalmtf83
Math 125 data analysis