knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
We'll be using the automatic ARIMA model fitting machinery from the forecast package to simplify this a bit.
library(lubridate) library(forecast) library(rstatix)
First let's generate the three data frames of interest; mets, prescheduled appointments (volume), and noshows. Split each into only appointments in 2019 and only appointments in 2020.
##Change the year to select the year splitbyyear <- function(df, year = "2019"){ df[lubridate::year(df$Date) == year,] } mets2019 <- splitbyyear(RMHF::read_individual_data(fn = "mets.csv"), "2019") mets2020 <- splitbyyear(RMHF::read_individual_data(fn = "mets.csv"), "2020") ##Order by date. mets2019 <- mets2019[order(mets2019$Date),] mets2020 <- mets2020[order(mets2020$Date),] volume2019 <- splitbyyear(RMHF::read_individual_data(fn = "volume.csv"), "2019") volume2020 <- splitbyyear(RMHF::read_individual_data(fn = "volume.csv"), "2020") ##Order by date. volume2019 <- volume2019[order(volume2019$Date),] volume2020 <- volume2020[order(volume2020$Date),]
Now, let's split off into seperate analyses for each variable. For each, we ask two questions: 1. Does 2019 differ from 2019? 1. If that difference exists, is it before and after the implementation of COVID-19 social distancing in Ontario?
First, aggregate data on the daily level.
##Should do the heavy lifting for us. aggregate_date <- function(date, df, colselect = "Mets"){ dfdate <- df[df$Date == date, colselect] return(mean(dfdate)) } ##Vectorized, gives data at the daily level. meanmets2020 <- sapply(unique(mets2020$Date), aggregate_date, df = mets2020) meanmets2019 <- sapply(unique(mets2019$Date), aggregate_date, df = mets2019) meanage2020 <- sapply(unique(volume2020$Date), aggregate_date, df = volume2020, colselect = "Age") meanage2019 <- sapply(unique(volume2019$Date), aggregate_date, df = volume2019, colselect = "Age")
We'd expect some autoregressive (ARIMA) structure in the data, so let's check for that.
library(forecast) auto.arima(meanmets2019) auto.arima(meanmets2020) auto.arima(meanage2019) auto.arima(meanage2020)
ARIMA models mostly come up up as white noise processes. So we can't predict anything. Defaulting to anovas and t - tests.
##Normal stuff, t - test between data in different years. library(rstatix) t.test(meanmets2020, meanmets2019[1:length(meanmets2020)], paired = TRUE) covidIndex <- as.numeric(min(which(unique(mets2020)$Date == lubridate::ymd("2020/03/16")))) df2020before <- mets2020[1:covidIndex] df2020after <- mets2020[covidIndex + 1:length(df2020)]
##Normal stuff, t - test between data in different years. t.test(meanage2020, meanage2019[1:length(meanage2020)], paired = TRUE) covidIndex <- as.numeric(min(which(unique(mets2020)$Date == lubridate::ymd("2020/03/16")))) df2020before <- meanage2020[1:covidIndex] df2020after <- meanage2020[covidIndex + 1:length(df2020)]
So there were no individual level differences between years. Or due to COVID. On to group - level stuff.
df <- RMHF::read_group_data() df2019 <- as.numeric(df[df$Year == "2019",]$"Prescheduled appointments") df2020 <- as.numeric(df[df$Year == "2020",]$"Prescheduled appointments") ##Between 2019 and 2020. t.test(df2019, df2020, paired = TRUE) ##Withint 2020 before and after COVID-19 closure. t.test(df2020before, df2020after, paired = TRUE)
df <- RMHF::read_group_data() df2019 <- as.numeric(df[df$Year == "2019",]$"% of patients who were no-shows") df2020 <- as.numeric(df[df$Year == "2020",]$"% of patients who were no-shows") ##Between 2019 and 2020. t.test(df2019, df2020, paired = TRUE) ##Withint 2020 before and after COVID-19 closure. t.test(df2020[1:5], df2020[6:10], paired = TRUE)
df <- RMHF::read_group_data() df2019 <- as.numeric(df[df$Year == "2019",]$"% Females") df2020 <- as.numeric(df[df$Year == "2020",]$"% Females") ##Between 2019 and 2020. t.test(df2019, df2020, paired = TRUE) ##Withint 2020 before and after COVID-19 closure. t.test(df2020[1:5], df2020[6:10], paired = TRUE)
mets2020 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2020",]$"Met-minutes" mets2019 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2019",]$"Met-minutes" att2020 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2020",]$"% of patients who were no-shows" att2019 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2019",]$"% of patients who were no-shows" #The ith element in meanmets is the mean of the mets collected on ith day in that year. cor(as.numeric(mets2019), as.numeric(att2019)) cor(as.numeric(mets2020), as.numeric(att2020))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.