In matt92253/Sta486Finalmtf83: Math 125 data analysis

library(tidyverse)
library(emmeans)
library(cowplot)
library(quantreg)
load("~/Sta486Finalmtf83/data/mat125data.rda")

Pre and Post in person test scores

box plots

# data becomes more normal when we drop test scores that are zero
# also working with actual tests not pre, practice, honors code or learning aids 
mat125data <- filter( mat125data, score != 0 &
                                  practice_test == 0 & 
                                  learning_aid == 0 & 
                                  honors_code == 0 & 
                                  pre_test == 0 &
                                  module_final != "F1")

# create precovid data frame, fall of 2019
f18 <- mat125data %>% filter( year == 18 & season == "fall")
f18$covid <- "fall 2018, before covid"
# create precovid data frame, fall of 2019
s19 <- mat125data %>% filter( year == 19 & season == "spring")
s19$covid <- "spring 2019, before covid"
# create precovid data frame, fall of 2019
f19 <- mat125data %>% filter( year == 19 & season == "fall")
f19$covid <- "fall 2019, before covid"

# create post  covid data frame, fall of 2021
f21 <- mat125data %>% filter( year == 21 & season == "fall")
f21$covid <- "fall 2021, after covid"

# bind the two data frames together for visualization
pre_post <- rbind( f18,s19, f19, f21)

# order the factors
pre_post <- pre_post %>% 
  mutate( covid = fct_relevel( covid,
                               "fall 2018, before covid",
                               "spring 2019, before covid",
                               "fall 2019, before covid",
                               "fall 2021, after covid"))

# plot
ggplot( pre_post, aes( x = module_final, y = score, fill = covid)) +
  geom_boxplot( alpha = 0.8) + 
  labs( title = "Pre and Post Covid In Person Module Test Scores")+
  theme(legend.position="bottom")

 ggsave("Pre and Post Covid In Person Module Test Scores.png",dpi="retina")



 # additional semesters
 ggplot( pre_post, aes( x = covid, y = score, fill = covid)) +
  geom_boxplot( alpha = 0.8) + 
  labs( title = "Pre and Post Covid In Person Module Test Scores")+
  theme(legend.position="bottom")
anova( lm( score ~  covid, pre_post ))
pre_post_model <- lm( score ~ covid, pre_post )
pre_post_model %>% summary()
plot( pre_post_model, which = 1:2)

qr_pre_post <- rq( formula = score ~ covid , data = pre_post, tau = 0.5)
summary(qr_pre_post)

linear regression

anova( lm( score ~ module_final * covid, pre_post ))

pre_post_model <- lm( score ~ module_final * covid, pre_post )

pre_post_model %>% summary()

plot( pre_post_model, which = 1:2)

contrasts

library(emmeans)

emmeans(pre_post_model, pairwise ~ covid )

emmeans(pre_post_model, pairwise ~ covid * module_final, at = list( module_final = "M1"))

emmeans(pre_post_model, pairwise ~ covid * module_final, at = list( module_final = "M2"))

emmeans(pre_post_model, pairwise ~ covid * module_final, at = list( module_final = "M3"))

emmeans(pre_post_model, pairwise ~ covid * module_final, at = list( module_final = "M4"))

quantile regression

qr_pre_post <- rq( formula = score ~ covid * module_final, data = pre_post, tau = 0.5)
summary(qr_pre_post)

pairwise differences

boxplot plot

# build t2_attempt from mat125data using only T2 attempts and actual tests
t2_attempts <- filter( mat125data, test_attempt == "T2")




# join second attempts and large data set
# added section id to join to for students who took class twice
pairwise_comp <- inner_join( t2_attempts, mat125data, by = c("Id",
                                                             "module_final",
                                                             "section_Id"))

# remove duplicate entries for tests
# mat125data has the full data set, this resulted in the previous inner join 
# matching modules with the same attempts from t2_attempts
pairwise_comp <- filter( pairwise_comp, test_attempt.x != test_attempt.y)


# create difference column
pairwise_comp$score_diff = pairwise_comp$score.x - pairwise_comp$score.y

# plot
ggplot( pairwise_comp, aes( x = module_final, y = score_diff)) +
  geom_boxplot() +
  labs( title = "Pairwise Differences Between First and Second Attempt Test Scores")+
  ylab("Difference in Test Scores")+
  xlab("Module")

ggsave("pairwise_differences.png", dpi="retina")

linear regression

pw_model <- lm( score_diff ~ module_final, pairwise_comp )
summary(pw_model)
anova( pw_model)
plot( pw_model, which = 1:2)

contrasts

emmeans(pw_model, pairwise ~ module_final )

Changes in remote test scores

plot

tests <- mat125data

# pre_remote
pre <- filter( tests, year == 19 & season == "fall" )
pre$remote_time_period = "Fall 2019 ( in person before covid)"

# split semester
split <- filter( tests, year == 20 & season == "spring" )
split$remote_time_period = "Spring 2020 (split)"

# remote
f20 <- filter( tests, year == 20 & season == "fall" )
s21 <- filter( tests, year == 21 & season == "spring")
f20$remote_time_period = "Fall 2020 (nau flex)"
s21$remote_time_period = "Spring 2021 (nau flex)"

# post_remote
f21 <- filter( tests, year == 21 & season == "fall" )
f21$remote_time_period = "Fall 2021 (in person after covid)"


remote_learning <- rbind(pre,split, f20, s21, f21)

remote_learning <- remote_learning %>% 
  mutate( remote_time_period = fct_relevel( remote_time_period,
                                            "Fall 2019 ( in person before covid)",
                                            "Spring 2020 (split)",
                                            "Fall 2020 (nau flex)",
                                            "Spring 2021 (nau flex)",
                                            "Fall 2021 (in person after covid)" ))

p1 <- ggplot( remote_learning, aes( x = remote_time_period, y = score, fill = remote_time_period)) +
  geom_boxplot(alpha = 0.65)+
  labs( title = "Effects of Covid-19 and NAU Flex By Semester")+
  theme(legend.position= "bottom",
        axis.text.x=element_blank(),
        legend.title = element_blank())+
  guides(fill=guide_legend(nrow=3,byrow=TRUE))

p1
 ggsave("Effects of Covid-19 and Remote Learning.png",dpi="retina")

# p2 <- ggplot( remote_learning, aes( x = remote_time_period, y = score, fill = remote_time_period)) +
#   geom_boxplot(alpha = 0.65)+
#   labs( title = "Effects of Covid-19 and NAU Flex By Semester")+theme(legend.position= "none")+
#   theme(axis.text.x=element_blank(),
#         axis.title.x=element_blank())
# 
# p2

linear regression

rl_model <- lm( score ~  remote_time_period, remote_learning)
summary(rl_model)
anova( rl_model)
plot( rl_model, which = 1:2)

contrasts

emmeans(rl_model, pairwise ~  remote_time_period )

quantile regression

qr_remote<- rq( formula = score ~  remote_time_period, data = remote_learning)
summary(qr_remote)

# select t2 test scores and label them
t2_attempts <- pairwise_comp %>% select( c(1, 2, 4, 5))
t2_attempts <- t2_attempts %>% rename( "attempt" = 3, "score" = 4)
t2_attempts$attempt <- "Attempt 2 scores, Fall 16 - Spring 18"


# select t1 test scores and label them
t1_attempts <- pairwise_comp %>% select( c( 1, 2, 13, 14))
t1_attempts <- t1_attempts %>% rename( "attempt" = 3, "score" = 4)
t1_attempts$attempt <- "Attempt 1 scores, Fall 16 - Spring 18"

# bind first and second attempts
attempts <- rbind( t1_attempts, t2_attempts)


# length(unique( attempts$Id))
# 2250 students took tests twice

two_attempt_timeframe <- filter( mat125data, year <= 17 | ( year == 18 & season == "spring"))


# length(unique(two_attempt_timeframe$Id))
# 2852 total students during the two attempt time period


# select students who took test once by not selecting those who took the test twice
single_attempt_tests <- subset( two_attempt_timeframe, !(Id %in%  unique( attempts$Id)))
single_attempt_tests$attempt <- "Single attempt taken, Fall 16 - Spring 18"

single_attempt_tests <- select( single_attempt_tests, c(1, 2, 5, 13))

# length(unique(single_attempt_tests$Id))

no_retake <- filter( mat125data, year == 19 | ( year == 18 & season == "fall"))

no_retake$attempt = "Single attempt allowed, Fall 18 - Fall 19"

no_retake <- select( no_retake, c(1, 2, 5, 13))

df <- rbind( attempts, no_retake, single_attempt_tests)

df<- df %>% mutate( attempt = fct_relevel( attempt,
                                           "Attempt 1 scores, Fall 16 - Spring 18", 
                                           "Attempt 2 scores, Fall 16 - Spring 18",
                                           "Single attempt taken, Fall 16 - Spring 18",
                                           "Single attempt allowed, Fall 18 - Fall 19"))

ggplot( df, aes( x = score, fill = attempt))+ 
  geom_density( alpha = .4)

# ggplot( df, aes( x = score, fill = attempt))+ 
#   geom_density( alpha = .3)+
#   facet_grid( year ~ season)
# ggsave("multiple and single attempt scores fall 16 to fall 19.png",dpi="retina")
# ggplot(df, aes(x = score, y = attempt, fill = attempt)) + geom_density_ridges()

single_attempt <- filter( mat125data, year == 19 | ( year == 18 & season == "fall"))

single_attempt$attempt = "single"

single_attempt <- select( single_attempt, c(1, 2, 5, 13))

df <- rbind( attempts, single_attempt)

ggplot( df, aes( x = score, fill = attempt))+ 
  geom_density( alpha = .3)

t2_attempts <- pairwise_comp %>% select( c(1, 2, 4, 5, 11, 12))
t2_attempts <- t2_attempts %>% rename( "attempt" = 3, "score" = 4,
                                       "season" = 5, "year" = 6)

t1_attempts <- pairwise_comp %>% select( c( 1, 2, 13, 14, 11, 12))
t1_attempts <- t1_attempts %>% rename( "attempt" = 3, "score" = 4,
                                       "season" = 5, "year" = 6)

attempts <- rbind( t1_attempts, t2_attempts)

attempts <- filter( attempts, year < 19)

single_attempt <- filter( mat125data, year >= 19 | ( year == 18 & season == "fall"))



single_attempt$attempt = "single"

single_attempt <- select( single_attempt, c(1, 2, 5, 13, 11, 12))

df <- rbind( attempts, single_attempt)

ggplot( df, aes( x = score, fill = attempt))+ 
  geom_density( alpha = .3)+
  facet_grid( year ~ season)

# ggsave("attempts by semester.png",dpi="retina")

# ggplot(df, aes(x = score, y = year ~ season, fill = year)) + geom_density_ridges()

ggplot(df, aes(x = score, y = year, fill = season)) +
  geom_density_ridges( alpha = .5)

matt92253/Sta486Finalmtf83 documentation built on Aug. 19, 2022, 11:13 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

matt92253/Sta486Finalmtf83
Math 125 data analysis

In matt92253/Sta486Finalmtf83: Math 125 data analysis

Pre and Post in person test scores

box plots

linear regression

contrasts

quantile regression

pairwise differences

boxplot plot

linear regression

contrasts

Changes in remote test scores

plot

linear regression

contrasts

quantile regression

R Package Documentation

Browse R Packages

We want your feedback!

matt92253/Sta486Finalmtf83 Math 125 data analysis

In matt92253/Sta486Finalmtf83: Math 125 data analysis

Pre and Post in person test scores

box plots

linear regression

contrasts

quantile regression

pairwise differences

boxplot plot

linear regression

contrasts

Changes in remote test scores

plot

linear regression

contrasts

quantile regression

R Package Documentation

Browse R Packages

We want your feedback!

matt92253/Sta486Finalmtf83
Math 125 data analysis