knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Individual level variables of interest

  1. Met minutes (daily, weekly)
  2. Age (daily, weekly)
  3. Attendance (weekly)
  4. %Females (weekly)

Creating data and time series analysis.

We'll be using the automatic ARIMA model fitting machinery from the forecast package to simplify this a bit.

library(lubridate)
library(forecast)
library(rstatix)

First let's generate the three data frames of interest; mets, prescheduled appointments (volume), and noshows. Split each into only appointments in 2019 and only appointments in 2020.

##Change the year to select the year
splitbyyear <- function(df, year = "2019"){
  df <- df[lubridate::year(df$Date) == year,]
  df <- df[order(df$Date),]
  df
}
mets2019 <- splitbyyear(RMHF::read_individual_data(fn = "mets.csv"), "2019")
mets2020 <- splitbyyear(RMHF::read_individual_data(fn = "mets.csv"), "2020")
##Order by date.
mets2019 <- mets2019[order(mets2019$Date),]
mets2020 <- mets2020[order(mets2020$Date),]

volume2019 <- splitbyyear(RMHF::read_individual_data(fn = "volume.csv"), "2019")
volume2020 <- splitbyyear(RMHF::read_individual_data(fn = "volume.csv"), "2020")
##Order by date.
volume2019 <- volume2019[order(volume2019$Date),]
volume2020 <- volume2020[order(volume2020$Date),]

Then aggregate data on the daily level.

##Should do the heavy lifting for us.
aggregate_date <- function(date, df, colselect = "Mets"){
  dfdate <- df[df$Date == date, colselect]
  return(mean(dfdate))
}
##Vectorized, gives data at the daily level.
meanmets2020 <- sapply(unique(mets2020$Date), aggregate_date, df = mets2020)
meanmets2019 <- sapply(unique(mets2019$Date), aggregate_date, df = mets2019)
meanage2020 <- sapply(unique(volume2020$Date), aggregate_date, df = volume2020, colselect = "Age")
meanage2019 <- sapply(unique(volume2019$Date), aggregate_date, df = volume2019, colselect = "Age")

We'd expect some autoregressive (ARIMA) structure in the data, so let's check for that.

library(forecast)
auto.arima(meanmets2019)
auto.arima(meanmets2020)
auto.arima(meanage2019)
auto.arima(meanage2020)

ARIMA models mostly come up up as white noise processes, which by definition we can't predict with.

Daily Mets
##Normal stuff, t - test between data in different years.
library(rstatix)
t.test(meanmets2020, meanmets2019[1:length(meanmets2020)], paired = TRUE)
testdf <- data.frame("covidyear" = meanmets2020,
                     "meanage" = meanage2020[1:length(meanmets2020)],
                     "covid" = c(rep(0,24), rep(1,17)))
##Anova.
anova_test(data = testdf, 
           formula = covidyear ~ covid)
##Ancova with age as a covariate.
anova_test(data = testdf, formula = covidyear~covid * meanage)

Daily Age

##Normal stuff, t - test between data in different years.
t.test(meanage2020, meanage2019[1:length(meanage2020)], paired = TRUE)
t.test(df2020[1:23], df2020[7:11])
testdf <- data.frame("covidyear" = meanage2020,
                    "meanage" = meanage2020,
                     ##Hardcoded groups, don't set.
                     "covid" = c(rep(0,23), rep(1,31)))
##Anova.
anova_test(data = testdf, 
           formula = covidyear ~ covid)
##Ancova with age as a covariate.
anova_test(data = testdf, formula = covidyear~covid * meanage2020)

Weekly Volume of visits.

df <- RMHF::read_group_data()
df2019 <- as.numeric(df[df$Year == "2019",]$"Prescheduled appointments")
df2020 <- as.numeric(df[df$Year == "2020",]$"Prescheduled appointments")

t.test(df2019, df2020, paired = TRUE)
testdf <- data.frame("covidyear" = df2020,
  "covid" = c(rep(0,6), rep(1,5)),
  "meanage2020" = meanage2020)
#Anova.
anova_test(data = testdf,
  formula = covidyear ~ covid)
##Ancova
anova_test(data = testdf, formula = covidyear ~ covid * age)

Weekly attendance

df <- RMHF::read_group_data()
df2019 <- as.numeric(df[df$Year == "2019",]$"% of patients who were no-shows")
df2020 <- as.numeric(df[df$Year == "2020",]$"% of patients who were no-shows")

t.test(df2019, df2020, paired = TRUE)
testdf <- data.frame("covidyear" = df2020,
  "covid" = c(rep(0,6), rep(1,5))
  )
#Anova
anova_test(data = testdf,
  formula = covidyear ~ covid)
##Ancova with age.

Weekly %Females

df <- RMHF::read_group_data()
df2019 <- as.numeric(df[df$Year == "2019",]$"% Females")
df2020 <- as.numeric(df[df$Year == "2020",]$"% Females")

t.test(df2019, df2020, paired = TRUE)
testdf <- data.frame("covidyear" = df2020,
                     "covid" = c(rep(0,6), rep(1,5)))
anova_test(data = testdf, formula = covidyear ~ covid)

Correlations between attendance and met-minutes.

mets2020 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2020",]$"Met-minutes"
mets2019 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2019",]$"Met-minutes"
att2020 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2020",]$"% of patients who were no-shows"
att2019 <- RMHF::read_group_data()[RMHF::read_group_data()$Year == "2019",]$"% of patients who were no-shows"

#The ith element in meanmets is the mean of the mets collected on ith day in that year.
cor(as.numeric(mets2019), as.numeric(att2019))
cor(as.numeric(mets2020), as.numeric(att2020))


ZachLevine-11/rMHF documentation built on Oct. 26, 2020, 11:16 a.m.