#' @title Create Weekly Data Snapshot Data for Power Bi
#' @description Given the base datasets, this function will do all the
#' transformations and summarizations that are used to feed the data being
#' displayed on the Weekly Data Snapshot
#' @param baseDataList A named list of data.frames containing the base data. See
#' details for more information. Defaults to the return from
#' \code{getBaseCovidData()}
#' @param cutoffDay A character day of the week. One of "Sunday", "Monday", "Tuesday", "Wednesday",
#' "Thursday", "Friday", "Saturday"
#' @param lagDaysCDT Number of days to lag the Case, Death, Test data. Defaults
#' to the value used by the Hub (10).
#' @param lagDaysHosp Number of days to lag the Hospital data. Defaults to the
#' value used by the Hub (2).
#' @details \code{baseDataList} should contain a named list of the base data.frames. These
#' are available through the MARC data API through the helpful functions
#' \code{downloadMARCCovidData()} and \code{downloadAllCovidAPIData()}, but
#' also must include the base summary datasets calculated from these. In total,
#' this should include the 3 base data.frames and the 3 summary data.frames
#' with the following names:
#' \describe{
#' \item{cdtData}{Case, Death, and Test Data}
#' \item{cdtNRData}{Newly Reported Case, Death, and Test Data}
#' \item{hospData}{Hospital Data: modified by \code{getBaseCovidData}}
#' \item{cdtHospData}{A joined version of \code{cdtData} and \code{hospData}}
#' \item{cdtHosp7DayRollingData}{The 7 day rolling average of summary of cdtHospData}
#' \item{cdtHosp14DayRollingData}{The 14 day rolling average of summary of cdtHospData}
#' }
#' @export
createBiDatasets_WDS <- function(baseDataList = getBaseCovidData(),
cutoffDay = c("Sunday", "Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday"),
lagDaysCDT = 10, lagDaysHosp = 2) {
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Load in the base data to the environment from a list ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
list2env(baseDataList, env = rlang::current_env())
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
message(crayon::blue("Exporting Weekly Data Snapshot Data."))
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Get the cutoff days of the week as integers ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
cutoffDay <- match.arg(cutoffDay)
cutoffDayInt <- as.integer(
factor(cutoffDay,
levels = c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"),
ordered = TRUE)
)
cutoffDayInt_CDTlag <- ifelse(((cutoffDayInt - lagDaysCDT) %% 7) == 0, 7, ((cutoffDayInt - lagDaysCDT) %% 7))
cutoffDayInt_hosplag <- ifelse(((cutoffDayInt - lagDaysHosp) %% 7) == 0, 7, ((cutoffDayInt - lagDaysHosp) %% 7))
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Get full time series data ####
# Data is filtered so only one point per week based on the most recent Day of Week
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#Create CDT time series with the cutoff date and lagDaysCDT
bi_WDS_7DayRollingLag <- cdtHosp7DayRollingData %>% dplyr::filter(Date <= max(Date) - lagDaysCDT) %>%
dplyr::mutate(dayWeek = as.numeric(format(Date, format = "%u"))) %>%
dplyr::filter(dayWeek == cutoffDayInt_CDTlag) %>% dplyr::select(-dayWeek) %>%
dplyr::mutate(TestsPositive7DayRate = CasesNew7DayTotal/dplyr::if_else(TestsNew7DayTotal == 0, NA_integer_, TestsNew7DayTotal),
DeathsToCases7DayRate = DeathsNew7DayTotal/dplyr::if_else(CasesNew7DayTotal == 0, NA_integer_, CasesNew7DayTotal),
HospsToCases7DayRate = CovidNew7DayTotal/dplyr::if_else(CasesNew7DayTotal == 0, NA_integer_, CasesNew7DayTotal)) %>%
dplyr::filter(GeoID %in% c('MARC', '20MARCReg', '29MARCReg', 'HCC')) %>%
dplyr::select(Jurisdiction, State, Region, GeoID, Date,
CasesNew7DayTotal, CasesNew7DayAvg, DeathsNew7DayTotal, DeathsNew7DayAvg, TestsNew7DayTotal, TestsNew7DayAvg,
TestsPositive7DayRate, DeathsToCases7DayRate, HospsToCases7DayRate)
#Most recent version of the last dataset
bi_WDS_Last7Days <- bi_WDS_7DayRollingLag %>%
marcR::groupby_rank(GeoID, rankby = Date, filterIDs = 1) %>%
dplyr::filter(GeoID %in% c('MARC', 'HCC'))
#Create Hospital time series with the cutoff date and lagDaysHosp
bi_WDS_7DayRollingLagHosp <- cdtHosp7DayRollingData %>% dplyr::filter(Date <= max(Date) - lagDaysHosp) %>%
dplyr::mutate(dayWeek = as.numeric(format(Date, format = "%u"))) %>%
dplyr::filter(dayWeek == cutoffDayInt_hosplag) %>% dplyr::select(-dayWeek) %>%
dplyr::filter(GeoID %in% c('MARC', '20MARCReg', '29MARCReg', 'HCC')) %>%
dplyr::mutate(HospitalsTotal7DayTotal = dplyr::if_else(GeoID == 'MARC', as.integer(27*7), as.integer(HospitalsTotal7DayTotal))) %>%
dplyr::mutate(Hospital7DayReportRate = HospitalsReporting7DayTotal/dplyr::if_else(HospitalsTotal7DayTotal == 0, NA_integer_, HospitalsTotal7DayTotal)) %>%
dplyr::select(Jurisdiction, State, Region, GeoID, Date,
CovidNew7DayTotal, CovidNew7DayAvg, CovidTotal7DayAvg, Hospital7DayReportRate)
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# #Weekly comparison for the last 6 weeks (last 6*7 days) ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
measureTableCDT <- tibble::tribble(
~measureName, ~upGood,
"CasesNew7DayAvg", FALSE,
"CasesNew7DayTotal", FALSE,
"DeathsNew7DayAvg", FALSE,
"DeathsNew7DayTotal", FALSE,
"TestsNew7DayAvg", TRUE,
"TestsNew7DayTotal", TRUE,
"TestsPositive7DayRate", FALSE,
"DeathsToCases7DayRate", FALSE,
"HospsToCases7DayRate", FALSE
)
measureTableHosp <- tibble::tribble(
~measureName, ~upGood,
"CovidTotal7DayAvg", FALSE,
"CovidNew7DayAvg", FALSE,
"CovidNew7DayTotal", FALSE,
"Hospital7DayReportRate", TRUE
)
bi_WDS_WeeklyComparison <- dplyr::bind_rows(dplyr::filter(baseDaysComparison(bi_WDS_7DayRollingLag, measureTableCDT, days = 7, lag = 1), Date >= (max(Date) - 6*7)),
dplyr::filter(baseDaysComparison(bi_WDS_7DayRollingLagHosp, measureTableHosp, days = 7, lag = 1), Date >= (max(Date) - 6*7))
) %>% dplyr::filter(GeoID %in% c('MARC', 'HCC'))
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# #Weekly comparison table for all measures for easy display with slicers in the power bi helper ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
measureTable <- tibble::tribble(
~measureName, ~upGood,
"CasesNew##DayAvg", FALSE,
"CasesNew##DayTotal", FALSE,
"DeathsNew##DayAvg", FALSE,
"DeathsNew##DayTotal", FALSE,
"TestsNew##DayAvg", TRUE,
"TestsNew##DayTotal", TRUE,
"CovidTotal##DayAvg", FALSE,
"CovidNew##DayAvg", FALSE,
"CovidNew##DayTotal", FALSE
)
WDScomparison <- function(rollSumData, measureTable, days, lagDays) {
rollSumData %>%
dplyr::mutate(dayWeek = as.numeric(format(Date, format = "%u"))) %>%
dplyr::filter(Date <= max(Date[dayWeek == cutoffDayInt])) %>%
dplyr::select(-dayWeek) %>%
baseDaysComparison(., measureTable, days = days) %>%
dplyr::filter(Date <= (max(Date) - lagDays)) %>%
marcR::groupby_rank(GeoID, Measure, rankby = Date, filterIDs = 1) %>%
# dplyr::group_by(GeoID, Measure) %>% dplyr::mutate(rankID = rank(dplyr::desc(Date))) %>% dplyr::filter(rankID == 1) %>% dplyr::select(-rankID) %>%
dplyr::filter(GeoID == 'MARC') %>% #dplyr::ungroup() %>%
dplyr::mutate(Days = days) %>% dplyr::rename_with(~stringr::str_remove(.x, "Week"), tidyr::contains("Week")) %>%
dplyr::mutate(Lag = lagDays)
}
bi_WDS_ComparisonTable <- list(
WDScomparison(cdtHosp7DayRollingData, measureTable[stringr::str_detect(measureTable$measureName, "^Covid", negate = TRUE),], days = 7, lagDays = lagDaysCDT),
WDScomparison(cdtHosp7DayRollingData, measureTable[stringr::str_detect(measureTable$measureName, "^Covid", negate = FALSE),], days = 7, lagDays = lagDaysHosp),
WDScomparison(cdtHosp14DayRollingData, measureTable[stringr::str_detect(measureTable$measureName, "^Covid", negate = TRUE),], days = 14, lagDays = lagDaysCDT),
WDScomparison(cdtHosp14DayRollingData, measureTable[stringr::str_detect(measureTable$measureName, "^Covid", negate = FALSE),], days = 14, lagDays = lagDaysHosp)
) %>% dplyr::bind_rows() %>%
dplyr::mutate(ChangeProp = ChangeRatio - 1)
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# #WDS Time Helper table ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
minDate_CDT <- min(dplyr::filter(bi_WDS_ComparisonTable, stringr::str_detect(Measure, "^Covid", negate = TRUE))[["Date"]])
minDate_hosp <- min(dplyr::filter(bi_WDS_ComparisonTable, stringr::str_detect(Measure, "^Covid", negate = FALSE))[["Date"]])
bi_WDS_HelperTable <- tibble::tribble(
~ID_Name, ~Date1, ~Date2,
"CurrentDate", Sys.Date(), NA,
"ReportCutoffDate", minDate_CDT + lagDaysCDT, NA,
"CDTRange", minDate_CDT - 6, minDate_CDT,
"HospRange", minDate_hosp - 6, minDate_hosp
)
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# #WDS Time series data ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
bi_WDS_HospitalDailyData <- hospData %>%
dplyr::mutate(dayWeek = as.numeric(format(Date, format = "%u"))) %>%
dplyr::filter(Date <= max(Date[dayWeek==cutoffDayInt])-lagDaysHosp) %>%
dplyr::select(-dayWeek)
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# #Vaccination Data ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
bi_WDS_RegionVacc <- vaccData %>%
dplyr::filter(GeoID %in% c('MARC', '20MARCReg', '29MARCReg', 'HCC'),
(Date == bi_WDS_HelperTable[['Date1']][bi_WDS_HelperTable[['ID_Name']] == "ReportCutoffDate"] |
Date == max(Date)))
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Create return output ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
out <- mget(stringr::str_subset(ls(), "^bi_"))
names(out) <- stringr::str_remove(names(out), "^bi_")
return(out)
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.