knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
We'll be using the automatic ARIMA model fitting machinery from the forecast package to simplify this a bit.
library(lubridate) library(forecast) library(rstatix)
First let's generate the three data frames of interest; mets, prescheduled appointments (volume), and noshows. Split each into only appointments in 2019 and only appointments in 2020.
##Change the year to select the year splitbyyear <- function(df, year = "2019"){ df <- df[lubridate::year(df$Date) == year,] df <- df[order(df$Date),] df } mets2019 <- splitbyyear(RMHF::read_individual_data(fn = "mets.csv"), "2019") mets2020 <- splitbyyear(RMHF::read_individual_data(fn = "mets.csv"), "2020") ##Order by date. mets2019 <- mets2019[order(mets2019$Date),] mets2020 <- mets2020[order(mets2020$Date),] volume2019 <- splitbyyear(RMHF::read_individual_data(fn = "volume.csv"), "2019") volume2020 <- splitbyyear(RMHF::read_individual_data(fn = "volume.csv"), "2020") ##Order by date. volume2019 <- volume2019[order(volume2019$Date),] volume2020 <- volume2020[order(volume2020$Date),]
Then aggregate data on the daily level.
##Should do the heavy lifting for us. aggregate_date <- function(date, df, colselect = "Mets"){ dfdate <- df[df$Date == date, colselect] return(mean(dfdate)) } ##Vectorized, gives data at the daily level. meanmets2020 <- sapply(unique(mets2020$Date), aggregate_date, df = mets2020) meanmets2019 <- sapply(unique(mets2019$Date), aggregate_date, df = mets2019) meanage2020 <- sapply(unique(volume2020$Date), aggregate_date, df = volume2020, colselect = "Age") meanage2019 <- sapply(unique(volume2019$Date), aggregate_date, df = volume2019, colselect = "Age")
We'd expect some autoregressive (ARIMA) structure in the data, so let's check for that.
library(forecast) auto.arima(meanmets2019) auto.arima(meanmets2020) auto.arima(meanage2019) auto.arima(meanage2020)
ARIMA models mostly come up up as white noise processes, which by definition we can't predict with.
##Normal stuff, t - test between data in different years. library(rstatix) t.test(meanmets2020, meanmets2019[1:length(meanmets2020)], paired = TRUE) testdf <- data.frame("covidyear" = meanmets2020, "meanage" = meanage2020[1:length(meanmets2020)], "covid" = c(rep(0,24), rep(1,17))) ##Anova. anova_test(data = testdf, formula = covidyear ~ covid) ##Ancova with age as a covariate. anova_test(data = testdf, formula = covidyear~covid * meanage)
##Normal stuff, t - test between data in different years. t.test(meanage2020, meanage2019[1:length(meanage2020)], paired = TRUE) t.test(df2020[1:23], df2020[7:11]) testdf <- data.frame("covidyear" = meanage2020, "meanage" = meanage2020, ##Hardcoded groups, don't set. "covid" = c(rep(0,23), rep(1,31))) ##Anova. anova_test(data = testdf, formula = covidyear ~ covid) ##Ancova with age as a covariate. anova_test(data = testdf, formula = covidyear~covid * meanage2020)
df <- RMHF::read_group_data() df2019 <- as.numeric(df[df$Year == "2019",]$"Prescheduled appointments") df2020 <- as.numeric(df[df$Year == "2020",]$"Prescheduled appointments") t.test(df2019, df2020, paired = TRUE) testdf <- data.frame("covidyear" = df2020, "covid" = c(rep(0,6), rep(1,5)), "meanage2020" = meanage2020) #Anova. anova_test(data = testdf, formula = covidyear ~ covid) ##Ancova anova_test(data = testdf, formula = covidyear ~ covid * age)
df <- RMHF::read_group_data() df2019 <- as.numeric(df[df$Year == "2019",]$"% of patients who were no-shows") df2020 <- as.numeric(df[df$Year == "2020",]$"% of patients who were no-shows") t.test(df2019, df2020, paired = TRUE) testdf <- data.frame("covidyear" = df2020, "covid" = c(rep(0,6), rep(1,5)) ) #Anova anova_test(data = testdf, formula = covidyear ~ covid) ##Ancova with age.
df <- RMHF::read_group_data() df2019 <- as.numeric(df[df$Year == "2019",]$"% Females") df2020 <- as.numeric(df[df$Year == "2020",]$"% Females") t.test(df2019, df2020, paired = TRUE) testdf <- data.frame("covidyear" = df2020, "covid" = c(rep(0,6), rep(1,5))) anova_test(data = testdf, formula = covidyear ~ covid)
mets2020 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2020",]$"Met-minutes" mets2019 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2019",]$"Met-minutes" att2020 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2020",]$"% of patients who were no-shows" att2019 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2019",]$"% of patients who were no-shows" #The ith element in meanmets is the mean of the mets collected on ith day in that year. cor(as.numeric(mets2019), as.numeric(att2019)) cor(as.numeric(mets2020), as.numeric(att2020))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.