This is a template which can be used to create an automated outbreak situation report for meningitis.
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// setup \\
Several packages are required for different aspects of analysis with R. You will need to install these before starting.
These packages can be quite large and may take a while to download in the field. If you have access to a USB key with these packages, it makes sense to copy and paste the packages into your computer's R package library (run the command .libPaths() to see the folder path).
For help installing packages, please visit https://r4epis.netlify.com/welcome ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
## hide all code chunks in the output, but show errors knitr::opts_chunk$set(echo = FALSE, # hide all code chunks in output error = TRUE, # show errors if they appear, but don't stop fig.width = 6*1.25, # Figure width fig.height = 6 # Figure height ) ## set default NA to - in output, define figure width/height options(knitr.kable.NA = "-") # Ensures the package "pacman" is installed if (!require("pacman")) { install.packages("pacman") } ## Installing required packages for this template pacman::p_load( knitr, # create output docs here, # find your files rio, # read in data forcats, # clean/shape data lubridate, # handle dates dplyr, # clean/shape data tidyr, # clean/shape data stringr, # clean text ggplot2, # create plots and charts slider, # moving averages parsedate, # guessing dates patchwork, # combine plots in one sitrep, # MSF field epi functions janitor, # clean data matchmaker, # dictionary-based standardization of variables gtsummary, # tables flextable, tsibble, # epiweeks sf, # encode spatial vector data ggspatial, # plot maps purrr # iterate over data )
## replace the date below with the latest date ## for which you want data to appear in the report reporting_date <- ymd("2018-04-23") ## This command sets the reporting week (e.g 2018 W17) based on the reporting_date ## Mondays are the week start by default. Adjust the "1" to another number if desired. ## E.g. 7 for Sundays, 5 for Fridays reporting_week <- tsibble::yearweek(reporting_date, 1)
## Set default options for plots and charts ## set default text size to 16 for plots ## give classic black/white axes for plots ggplot2::theme_set(theme_classic(base_size = 18)) ## sets the theme in ggplot for epicurves epicurve_theme <- theme( axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), legend.title = element_blank(), panel.grid.major.x = element_line(color = "grey60", linetype = 3), panel.grid.major.y = element_line(color = "grey60", linetype = 3)) ## sets the labels in ggplot for the epicurves epicurve_labels <- labs(x = "Calendar week", y = "Cases (n)", title = "Cases by week of onset", subtitle = str_glue("Source: MSF data from {reporting_week}") )
linelist_raw <- gen_data("Meningitis") ## fake lab data # lab_results <- linelist_cleaned %>% # select(case_number) %>% # mutate(test_result = sample(c("Positive", "Negative"), # nrow(linelist_cleaned), # replace = TRUE) # )
### Read in data --------------------------------------------------------------- ## Excel file ------------------------------------------------------------------ ## For a specific sheet, use "which" # linelist_raw <- rio::import(here::here("Data", "linelist.xlsx"), which = "Sheet1") ## Excel file with password ---------------------------------------------------- ## Use this section if your Excel has a password. # install.packages(c("excel.link", "askpass")) # library(excel.link) # linelist_raw <- xl.read.file(here::here("Data", "linelist.xlsx"), # xl.sheet = "Sheet1", # password = askpass::askpass(prompt = "please enter file password"))
## MSF meningitis Dictionary ---------------------------------------------------------- ## get MSF standard dictionary for meningitis linelist_dict <- msf_dict("Meningitis", compact = FALSE) ## look at the standard dictionary by uncommenting the line below # View(linelist_dict) ## Clean column names ---------------------------------------------------------- ## make a copy of your orginal dataset and name it linelist_cleaned linelist_cleaned <- linelist_raw ## define clean variable names using clean_names from the janitor package. linelist_cleaned <- janitor::clean_names(linelist_cleaned) ## Standardising values -------------------------------------------------------- ## This step fixes the values so that you can read them. ## values like 1/0 will be recoded as "Yes" / "No" based on the dictionary linelist_cleaned <- matchmaker::match_df(linelist_cleaned, dict = linelist_dict, from = "option_code", to = "option_name", by = "data_element_shortname", order = "option_order_in_set" )
## Excel file ------------------------------------------------------------------ ## to read in a specific sheet use "which" # linelist_raw <- rio::import(here::here("Data", "linelist.xlsx"), which = "Sheet1") ## Excel file -- Specific range ------------------------------------------------ ## you can specify a range in an excel sheet. # linelist_raw <- rio::import(here::here("Data", "linelist.xlsx"), range = "B2:J102") ## Excel file with password ---------------------------------------------------- ## use this section if your Excel has a password. # install.packages(c("excel.link", "askpass")) # library(excel.link) # linelist_raw <- xl.read.file(here::here("Data", "linelist.xlsx"), # xl.sheet = "Sheet1", # password = askpass::askpass(prompt = "please enter file password")) ## CSV file -------------------------------------------------------------------- # linelist_raw <- rio::import(here::here("Data", "linelist.csv")) ## Stata data file ------------------------------------------------------------- # linelist_raw <- rio::import(here::here("Data", "linelist.dat"))
## MSF meningitis Dictionary ---------------------------------------------------------- ## get MSF standard dictionary for meningitis # linelist_dict <- msf_dict("Meningitis", compact = FALSE) %>% # select(option_code, option_name, everything()) ## look at the standard dictionary by uncommenting the line below # View(linelist_dict) ## You will need to recode your variables to match the data dictionary. This is ## addressed below. ## Clean column names ---------------------------------------------------------- ## This step fixes the column names so they are easy to use in R. ## make a copy of your orginal dataset and name it linelist_cleaned # linelist_cleaned <- linelist_raw ## define clean variable names using clean_names from the janitor package. # linelist_cleaned <- janitor::clean_names(linelist_cleaned) ## Match column names --------------------------------------------------------- ## This step helps you match your variables to the standard variables. ## This step will require some patience. Courage! ## Use the function msf_dict_rename_helper() to create a template based on the ## meningitis dictionary. This will copy a rename command like the one above to your ## clipboard. # msf_dict_rename_helper("Meningitis") ## Paste the result below and your column names to the matching variable. ## Be careful! You still need to be aware of what each variable means and what ## values it takes. ## If there are any variables that are in the MSF dictionary that are not in ## your data set, then you should comment them out, but be aware that some ## analyses may not run because of this. ## PASTE HERE ## Here is an EXAMPLE for changing a few specific names. function. In this ## example, we have the columns "gender" and "age" that we want to rename as ## "sex" and "age_years". ## The formula for this is rename(data, NEW_NAME = OLD_NAME). # linelist_cleaned <- rename(linelist_cleaned, # sex = gender, # TEXT # age_years = age # INTEGER_POSITIVE # )
## Read data ------------------------------------------------------------------- ## This step reads in your population data from Excel. ## You may need to rename your columns. # population_data <- rio::import(here::here("Data", "population.xlsx"), which = "Sheet1") ## repeat preparation steps as appropriate ## Enter counts directly ------------------------------------------------------- # population_data_age <- gen_population( # groups = c("0-2", "3-14", "15-29", "30-44", "45+"), # counts = c(3600, 18110, 13600, 8080, 6600), # strata = NULL) %>% # do not stratify by gender # rename(age_group = groups, # rename columns (syntax is NEW NAME = OLD NAME) # population = n) # population_data_region <- gen_population( # groups = c("Village A", "Village B", "Village C", "Village D"), # counts = c(10000, 20000, 15000, 5000), # strata = NULL) %>% # do not stratify # rename(patient_origin = groups, # rename columns (syntax is NEW NAME = OLD NAME) # population = n) ## Create counts from proportions ---------------------------------------------- ## This step helps you estimate sub-group size with proportions. ## You need to replace the total_pop and proportions. You can change the groups ## to fit your needs. # estimate population size by age group in years population_data_age <- gen_population(total_pop = 5000, groups = c("0-1", "2-14", "15-29", "30-44", "45+"), proportions = c(0.068, 0.3622, 0.276, 0.1616, 0.1321), strata = NULL) %>% rename(age_group = groups, population = n) # estimate population size by age group in months (under 2 years) population_data_age_months <- gen_population(total_pop = 5000, groups = c("0-5", "6-8", "9-11","12-23"), proportions = c(0.0164, 0.088, 0.088, 0.034), strata = NULL) %>% rename(age_group_mon = groups, population = n) ## estimate population size by age categories in months and years # population_data_age_categories <- gen_population(total_pop = 5000, # groups = c("0-5 months", "6-8 months", "9-11 months","12-23 months", # "2-14 years", "15-29 years", "30-44 years", "45+ years"), # proportions = c(0.0164, 0.088, 0.088, 0.034, # 0.3622, 0.276, 0.1616, 0.1321), # strata = NULL) %>% # rename(age_category = groups, # population = n) ## estimate population size by region proportion population_data_region <- gen_population(total_pop = 5000, # set the total population groups = c("Village A", "Village B", "Village C", "Village D"), # set the groups proportions = c(0.221, 0.174, 0.355, 0.245), # set the proportions for each group strata = NULL) %>% # do not stratify rename(patient_origin = groups, # rename columns (syntax is NEW NAME = OLD NAME) population = n)
## Read data ------------------------------------------------------------------- ## This step reads in your lab data from Excel. ## You may need to rename your columns. ## to read in a specific sheet use "which" # lab_data <- rio::import(here::here("Data", "labresults.xlsx"), which = "Sheet1") ## repeat preparation steps as appropriate ## make sure your unique ID variable name matches! ## Merge lab to linelist ------------------------------------------------------- # linelist_cleaned <- left_join(linelist_cleaned, lab_results, # by = "case_number")
## view the first ten rows of data head(linelist_cleaned, n = 10) ## view your whole dataset interactively (in an excel style format) View(linelist_cleaned) ## overview of variable types and contents str(linelist_cleaned) ## get summary: ## mean, median and max values of numeric variables ## counts for categorical variables ## also gives number of NAs summary(linelist_cleaned) ## view unique values contained in variables ## you can run this for any column -- just replace the column name unique(linelist_cleaned$patient_origin) ## use the dfSummary function in combination with view ## note that view is not capitalised with this package # summarytools::dfSummary(linelist_cleaned) %>% # summarytools::view()
## Drop unused rows ----------------------------------------------------------- ## This step removes blank rows that don't have both a date of admission and ID ## number. linelist_cleaned <- linelist_cleaned %>% filter(!is.na(case_number) & !is.na(date_of_consultation_admission)) ## Drop columns ---------------------------------------------------------------- ## OPTIONAL: This step shows you how you can remove certain variables. # linelist_cleaned <- linelist_cleaned %>% # select(-c(msf_involvement, treatment_facility_site)) ## OPTIONAL: if you want to inspect certain variables, you can select these by ## name or column number. This example creates a reduced dataset for the first ## three columns, age_years, and sex. # linelist_reduced <- select(linelist_cleaned, c(1:3, "age_years", "sex"))
## DHIS2 standard data --------------------------------------------------------- ## If you got your data from DHIS2, use this portion of the code. ## If not, comment this section out and use the below. ## make sure all date variables are formatted as dates DATEVARS <- linelist_dict %>% filter(data_element_valuetype == "DATE") %>% filter(data_element_shortname %in% names(linelist_cleaned)) %>% ## filter to match the column names of your data pull(data_element_shortname) ## find if there are date variables which are completely empty ## (otherwise the linelist package does not work) EMPTY_DATEVARS <- purrr::map(DATEVARS, ~all( is.na(linelist_cleaned[[.x]]) )) %>% unlist() %>% which() ## remove exclude the names of variables which are completely emptys DATEVARS <- DATEVARS[-EMPTY_DATEVARS] ## change to dates linelist_cleaned <- linelist_cleaned %>% mutate( across(.cols = all_of(DATEVARS), .fns = ~ymd(parsedate::parse_date(.x)))) ## Non-DHIS2 data -------------------------------------------------------------- ## Use this section if you did not have DHIS2 data. ## use the parse_date() function to make a first pass at date variables. # linelist_cleaned <- linelist_cleaned %>% # mutate( # across(.cols = matches("date|Date"), # .fns = ~ymd(parsedate::parse_date(.x)))) ## once you have run parse_date(), take a look at your date variables. ## here is an example: # table(linelist_cleaned$date_of_consultation_admission) ## Some dates will be unrealistic or wrong. ## Here is an example of how to manually fix dates. ## Look at your data and edit as needed. # linelist_cleaned <- mutate(linelist_cleaned, # date_of_onset = case_when( # date_of_onset < as.Date("2017-11-01") ~ as.Date(NA), # date_of_onset == as.Date("2081-01-01") ~ as.Date("2018-01-01"), # TRUE ~ date_of_onset # )) ## Create epiweek variable ----------------------------------------------------- ## This step creates an epiweek variable from the date of onset. ## You can use date_of_consultation_admission if you are missing many date_of_onset. linelist_cleaned <- linelist_cleaned %>% mutate( ## create an epiweek variable epiweek = tsibble::yearweek( date_of_onset, week_start = 1), # 1 is Monday start; use 7 for Sundays or 5 for Fridays ## create a date version of epiweek epiweek_date = as.Date(epiweek))
## Age group variables ---------------------------------------------------------- ## This step shows you how to create categorical variables from numeric variables. ## We have some intermediate steps on the way. ## OPTIONAL: add under 2 years to the age_years variable ## data dictionary defines that under 2s dont have year filled in (but months/days instead) linelist_cleaned <- linelist_cleaned %>% mutate(age_months = case_when( is.na(age_years) & is.na(age_months) ~ as.numeric(age_days / 30), TRUE ~ as.numeric(age_months) ), age_years = case_when( is.na(age_years) & is.na(age_months) ~ as.numeric(age_days / 365.25), is.na(age_years) ~ as.numeric(age_months / 12), TRUE ~ as.numeric(age_years) )) ## OPTIONAL: change those who are above or below a certain age to NA # linelist_cleaned <- linelist_cleaned %>% # mutate(age_years = case_when( # is.na(age_years) ~ NA_integer_, # age_years < 0 ~ NA_integer_, # age_years > 120 ~ NA_integer_, # TRUE ~ as.integer(age_years) #Preserves other values # )) ## OPTIONAL: create an age_months variable from decimal years variable # linelist_cleaned <- linelist_cleaned %>% # mutate(age_months = case_when( # age_years < 5 ~ age_years * 12)) ## create age group variable for under 2 years based on months linelist_cleaned <- linelist_cleaned %>% mutate(age_group_mon = age_categories( age_months, breakers = c(0, 6, 9, 12, 23), ceiling = TRUE)) ## create an age group variable by specifying categorical breaks linelist_cleaned <- linelist_cleaned %>% mutate(age_group = age_categories( age_years, breakers = c(0, 2, 15, 30, 45))) ## alternatively, create an age group variable by specifying a sequence ## this creates age groups for every ten years between 0 and 100. # linelist_cleaned <- linelist_cleaned %>% # mutate(age_group = age_categories( # age, # lower = 0, # upper = 100, # by = 10))) ## If you already have an age group variable defined, you should manually ## arrange the categories in order. # linelist_cleaned <- linelist_cleaned %>% # mutate(age_group = fct_relevel( # age_group, # "0-4y", "5-14y", "15-29y", "30-44y", "45+y") ## to combine different age categories use the following function ## this prioritises the smaller unit, i.e. if given months and years, will return months first ## generally, these delineations are NOT used for measles. ## # linelist_cleaned <- group_age_categories(linelist_cleaned, # years = age_group, # months = age_group_mon) ## drop the 0-2 years age level! (need to fix group_age_categories) # linelist_cleaned <- linelist_cleaned %>% # mutate(age_category = factor(age_category, # levels = c("0-5 months", # "6-8 months", # "9-11 months", # "12-23 months", # "2-14 years", # "15-29 years", # "30-44 years", # "45+ years")))
## Numeric variables ----------------------------------------------------------- ## This step creates a variable for number of days under observation. ## You can adapt this step to create other calculated variables ## create number of days under observation linelist_cleaned <- linelist_cleaned %>% mutate(obs_days = as.numeric(date_of_exit - date_of_consultation_admission)) ## Factor (categorical) variables ---------------------------------------------- ## This step creates a variable from another character/factor variable ## You can adapt this step to create other calculated variables ## The variable DIED will be binary (TRUE/FALSE) -- if patient died or not ## if your variable is named ("Dead on arrival", "Dead in facility"), use this linelist_cleaned <- linelist_cleaned %>% mutate(DIED = str_detect(exit_status, "Dead")) ## use this version for coded values (e.g. "DOA", "DD") # linelist_cleaned <- linelist_cleaned %>% # mutate(DIED = exit_status %in% c("DD", "DOA")) ## Create a new grouping for exit status variable and previously vaccinated linelist_cleaned <- linelist_cleaned %>% mutate(exit_status2 = case_when( exit_status %in% c("Dead on arrival", "Dead in facility") ~ "Died", exit_status %in% c("Transferred (to an MSF facility)", "Transferred (to External Facility)") ~ "Transferred", exit_status == "Discharged home" ~ "Discharged", exit_status == "Left against medical advice" ~ "Left" ), # this variable will put meningitis vaccination into a yes/no variable previously_vaccinated = case_when( is.na(vaccinated_meningitis_mvc) & is.na(vaccinated_meningitis_routine) ~ NA_character_, str_detect(vaccinated_meningitis_mvc, "Yes") ~ "Yes", str_detect(vaccinated_meningitis_routine, "Yes") ~ "Yes", vaccinated_meningitis_routine == "No" & vaccinated_meningitis_mvc == "No" ~ "No", TRUE ~ "Unknown" )) ## Recode character variables ------------------------------------------------- ## This step shows how to fix misspellings in the geographic region variable. ## Ideally, you want these values to match your shapefile and population data! # linelist_cleaned <- linelist_cleaned %>% # mutate(patient_origin = case_when( # patient_origin == "Valliages D" ~ "Village D", # patient_origin == "VillageD" ~ "Village D", # patient_origin == "Town C" ~ "Village C", # TRUE ~ as.character(patient_origin)) # )) ## sometimes, coding is inconsistent across variables -- for example, "Yes" / "No" ## may be coded as Y, y, yes, 1 / N, n, no, 0. You can change them all at once! ## Create a list of the variables you want to change, and run the following. ## You may need to edit this code if options are different in your data. ## create list of variables # change_yn <- c("headache", "fever", "vomiting") ## standardize options # linelist_cleaned <- linelist_cleaned %>% # mutate( # across( # .cols = all_of(change_yn), # .fns = ~forcats::fct_recode(.x, # Yes = "y", # Yes = "Y", # Yes = "yes", # Yes = "1", # No = "n", # No = "N", # No = "no", # No = "0"))) ## Create a variable based on rules from other simple character variables ## If you have access to lab results, you can create a case definition variable ## the tilde (~) is used to assign the new values (Conf, prob, susp, unknown) ## starting from the specific to the general ## TRUE assigns all remaining rows ## You MUST modify this section to match your case definition. The below ## uses positive pastorex for Confirmed and fever with stiff neck or rash for ## Probable. linelist_cleaned <- linelist_cleaned %>% mutate(case_def = case_when( is.na(pastorex_result) & is.na(fever) & is.na(stiff_neck) & is.na(petechiae_purpura_at_admission) ~ NA_character_, str_detect(pastorex_result, "Mening") ~ "Confirmed", fever == "Yes" & stiff_neck == "Yes" ~ "Probable", fever == "Yes" & petechiae_purpura_at_admission == "Yes" ~ "Probable", TRUE ~ "Suspected" )) ## Fix logically inconsistent variables ## example: if no vaccine given previously or if unsure, data of last vax ## should be NA linelist_cleaned <- linelist_cleaned %>% mutate(date_of_last_vaccination = ifelse( previously_vaccinated != "Yes", date_of_last_vaccination, as.Date(NA) ))
## Force missing values to NA ## important for sex to generate age pyramids linelist_cleaned <- linelist_cleaned %>% mutate(sex = fct_recode(sex, NULL = "Unknown/unspecified")) ## Change the order of levels in a single categorical variable ## This orders the levels -- since in all figures / tables, 0-4 should come ## before 4-24, etc linelist_cleaned <- linelist_cleaned %>% mutate(time_to_death = factor(time_to_death, levels = c("0-4 hours", ">4-24 hours", ">24-48 hours", ">48 hours"))) ## Change the order of levels of multiple categorical variables at the same time linelist_cleaned <- linelist_cleaned %>% mutate( across( # Looks for variables beginning with "seizure" .cols = starts_with("seizure"), # Sets order of levels .fns = ~fct_relevel(.x, levels = c("Yes", "No")))) %>% ## stop a meaningless tidyverse warning from showing suppressWarnings()
# create a grouping of all symptoms SYMPTOMS <- c("headache", "chills", "fever", "stiff_neck", "neck_pain", "upper_limb_pain", "lower_limb_pain", "vomiting", "petechiae_purpura_at_admission", "purpura_fulminans", "kernigs_brudzinski_signs", "altered_consciousness", "seizures_at_admission", "seizure_episodes", "febrile_coma", "coma") # create a grouping of all lab tests LABS <- c("ti_result_pcr", "ti_result_culture", "malaria_rdt_at_admission")
# return the last day of the last reporting week obs_end <- as.Date((reporting_week + 1)) - 1 # filter out cases after end of reporting week linelist_cleaned <- linelist_cleaned %>% filter(epiweek <= reporting_week) # define the first week of outbreak (date of first case) first_week <- min(linelist_cleaned$epiweek) # outbreak start # return the first day in the week of first case obs_start <- as.Date(first_week) ## pull the unique weeks which occur all_weeks <- seq(first_week, reporting_week, by = 1) ## change weeks to dates all_weeks_date <- as.Date(all_weeks)
## option 1: only keep the first occurence of duplicate case linelist_cleaned <- linelist_cleaned %>% ## find duplicates based on case number, sex and age group ## only keep the first occurence distinct(case_number, sex, age_group, .keep_all = TRUE) # ## option 2: create flagging variables for duplicates (then use to browse) # # linelist_cleaned <- linelist_cleaned %>% # ## choose which variables to use for finding unique rows # group_by(case_number, sex, age_group) %>% # mutate( # ## get the number of times duplicate occurs # num_dupes = n(), # duped = if_else(num_dupes > 1 , TRUE, FALSE) # ) # # ## browse duplicates based on flagging variables # linelist_cleaned %>% # ## only keep rows that are duplicated # filter(duped) %>% # ## arrange by variables of interest # arrange(case_number, sex, age_group) %>% # View() # # ## filter duplicates to only keep the row with the earlier entry # linelist_cleaned %>% # ## choose which variables to use for finding unique rows # group_by(case_number, sex, age_group) %>% # ## sort to have the earliest date by person first # arrange(date_of_consultation_admission) %>% # ## only keep the earliest row # slice(1)
# rio::export(linelist_cleaned, here::here("Data", str_glue("linelist_cleaned_{Sys.Date()}.xlsx")))
From the start of the outbreak up until r reporting_week
there were a
total of r nrow(linelist_cleaned)
cases. There were
r fmt_count(linelist_cleaned, sex == "Female")
females and
r fmt_count(linelist_cleaned, sex == "Male")
males.
The most affected age group was r tabyl(linelist_cleaned, age_group) %>% slice(which.max(n)) %>% pull(age_group)
years.
Cases by age group and definition <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// describe_by_age_group_and_def \\
This chunk will create a table of cases by age group and case definition. You can only use this chunk if you have lab results and have defined a case definition variable. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
linelist_cleaned %>% ## only keep variables of interest select(age_group, case_def) %>% ## make NAs show up as "Missing" (so they are included in counts) mutate(case_def = forcats::fct_explicit_na(case_def, "Missing")) %>% ## create a table with counts/percentages by column tbl_summary(by = case_def, percent = "column", label = list(age_group = "Age group")) %>% ## add totals add_overall() %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
There were r fmt_count(linelist_cleaned, is.na(sex)| sex == "Unknown/unspecified")
cases missing information on sex and r fmt_count(linelist_cleaned, is.na(age_group))
missing age group.
Cases by age group and sex <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// describe_by_age_group_and_sex \\
This chunk will create a table of cases by age group and sex. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
linelist_cleaned %>% select(age_group, sex) %>% mutate(sex = forcats::fct_explicit_na(sex, "Missing")) %>% tbl_summary(by = sex, percent = "column", label = list(age_group = "Age group")) %>% add_overall() %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
Cases by population proportion (age group and sex) <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// total_props_agegroup_sex \\
You can also show cases by proportion of the total population by sex and age. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
linelist_cleaned %>% select(age_group, sex) %>% mutate(sex = forcats::fct_explicit_na(sex, "Missing")) %>% tbl_summary(by = sex, percent = "cell", label = list(age_group = "Age group")) %>% add_overall() %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
Age pyramid <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// age_pyramid \\
This chunk creates an age/sex pyramid of your cases. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
age_pyramid(linelist_cleaned, age_group = "age_group", split_by = "sex") + labs(y = "Cases (n)", x = "Age group") + # change axis labels theme(legend.position = "bottom", # move legend to bottom legend.title = element_blank(), # remove title text = element_text(size = 18) # change text size )
# ## plot age pyramid by month groups, for observations under 2 years # linelist_cleaned %>% # filter(age_years < 2) %>% # age_pyramid(age_group = "age_group_mon", # split_by = "sex") + # # stack_by = "case_def") + # labs(y = "Cases (n)", x = "Age group (months)") + # change axis labels (nb. x/y flip) # theme(legend.position = "bottom", # move legend to bottom # legend.title = element_blank(), # remove title # text = element_text(size = 18) # change text size # )
Of the patients,r fmt_count(linelist_cleaned, previously_vaccinated == "Yes")
reported previous vaccination. r fmt_count(linelist_cleaned, previously_vaccinated == "No")
were unvaccinated and r fmt_count(linelist_cleaned, previously_vaccinated == "Unknown")
were unsure.
Cases by vaccination status <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// describe_by_vaccination_status \\
This chunk creates a table of the vaccination status of your cases. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
linelist_cleaned %>% select(previously_vaccinated) %>% mutate(previously_vaccinated = forcats::fct_explicit_na(previously_vaccinated, "Missing")) %>% tbl_summary(percent = "cell", label = list(previously_vaccinated = "Previously vaccinated")) %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
Of the patients, r fmt_count(linelist_cleaned, patient_facility_type == "Outpatient")
were seen as outpatients and r fmt_count(linelist_cleaned, patient_facility_type == "Inpatient")
were inpatients. Among inpatients, the median number of days admitted was r median(linelist_cleaned$obs_days, na.rm = T)
,
with a range between r min(linelist_cleaned$obs_days, na.rm = T)
and r max(linelist_cleaned$obs_days, na.rm = T)
days.
Cases by symptoms <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// describe_by_symptoms \\
This chunk gives the counts and proportions for all of the variables in SYMPTOMS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
linelist_cleaned %>% select(all_of(SYMPTOMS)) %>% tbl_summary() %>% # fix the way symptom names are displayed # str_replace_all switches underscore for space in the variable column # str_to_sentence makes the first letter capital, and all others lowercase modify_table_body( ~.x %>% mutate(label = str_to_sentence(str_replace_all(label, "_", " "))) ) %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
Cases by lab results <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// describe_by_labs \\
This chunk gives the counts and proportions for all of the variables in LABS. If you do not have lab results, comment out this chunk. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
linelist_cleaned %>% select(all_of(LABS)) %>% ## create long dataset for counting pivot_longer(cols = everything()) %>% # fix the way lab test names are displayed mutate(name = case_when( str_detect(name, "culture") ~ "Culture", str_detect(name, "pcr") ~ "PCR", str_detect(name, "rdt") ~ "RDT")) %>% ## show percentages by column (i.e. for each test) tbl_summary(percent = "column", by = name, label = list(value = "Test result")) %>% add_overall() %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
Cases by Pastorex results <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// describe_by_pastorex \\
This chunk creates a table for Pastorex results. If you do not have Pastorex results, comment out this chunk. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
linelist_cleaned %>% select(pastorex_result) %>% mutate(pastorex_result = forcats::fct_explicit_na(pastorex_result, "Missing")) %>% tbl_summary(percent = "cell", label = list(pastorex_result = "Pastorex result")) %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
Of r fmt_count(linelist_cleaned, patient_facility_type == "Inpatient")
inpatients, there have been r fmt_count(linelist_cleaned, str_detect(exit_status, "Dead"), patient_facility_type == "Inpatient")
deaths, of which
r fmt_count(linelist_cleaned, exit_status == "Dead on arrival")
were dead on arrival.
Among inpatients who died, the time to death is shown below. <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// describe_time_to_death \\
If you have the time_to_death available, you can use this chunk to describe when inpatients died. If not, comment it out. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
linelist_cleaned %>% filter(DIED, patient_facility_type == "Inpatient") %>% select(time_to_death) %>% tbl_summary(label = list(time_to_death = "Time to Death")) %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
The case fatality ratio among inpatients with known outcomes is below. <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// overall_cfr \\
This chunk gives the case fatality ratio among inpatients with outcomes. If you have no deaths reported, the table will not be useful. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
# use arguments from above to produce overal CFR linelist_cleaned %>% filter(patient_facility_type == "Inpatient") %>% select(DIED) %>% gtsummary::tbl_summary( statistic = everything() ~ "{N}", label = DIED ~ "Inpatients" ) %>% modify_header(stat_0 = "Cases (N)") %>% # Use wrapper function to calculate cfr epitabulate::add_cfr(deaths_var = "DIED") %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
The case fatality ratio by sex among inpatients with known outcomes is below. <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// cfr_by_sex \\
This chunk gives the case fatality ratio among inpatients with outcomes, divided by sex. If you have no deaths reported, the table will not be useful. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
linelist_cleaned %>% filter(patient_facility_type == "Inpatient") %>% select(DIED, sex) %>% mutate(sex = forcats::fct_explicit_na(sex, "Missing")) %>% gtsummary::tbl_summary( include = sex, statistic = sex ~ "{n}", label = sex ~ "Gender") %>% modify_header(stat_0 = "Cases (N)") %>% # Use wrapper function to calculate cfr add_cfr(deaths_var = "DIED") %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
CFR by age group among inpatients with known outcomes <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// cfr_by_age_group \\
This chunk gives the case fatality ratio among inpatients with outcomes, divided by age group. If you have no deaths reported, the table will not be useful. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
linelist_cleaned %>% filter(patient_facility_type == "Inpatient") %>% select(DIED, age_group) %>% gtsummary::tbl_summary( include = age_group, statistic = age_group ~ "{n}", missing = "no", label = age_group ~ "Age group" ) %>% modify_header(stat_0 = "Cases (N)") %>% # Use wrapper function to calculate cfr add_cfr(deaths_var = "DIED") %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
linelist_cleaned %>% filter(patient_facility_type == "Inpatient") %>% select(DIED, case_def) %>% gtsummary::tbl_summary( include = case_def, statistic = case_def ~ "{n}", missing = "no", label = case_def ~ "Case definition" ) %>% modify_header(stat_0 = "Cases (N)") %>% # Use wrapper function to calculate cfr add_cfr(deaths_var = "DIED") %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
The attack rate per 10,000 population is below (based on available population data available for the catchment area/region of interest).
# define population population <- sum(population_data_age$population)
Below gives the attack rate per 10,000 population (N = r format(population, big.mark = ",")
)
ar <- attack_rate(nrow(linelist_cleaned), population, multiplier = 10000) ar %>% merge_ci_df(e = 3, digits = 1) %>% # merge the lower and upper CI into one column rename("Cases (n)" = cases, "Population" = population, "AR (per 10,000)" = ar, "95%CI" = ci) %>% # produce styled output table with auto-adjusted column widths with {flextable} qflextable() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit") %>% ## set to only show 1 decimal place colformat_double(digits = 1)
Here, we can see that the attack rate for a population of r format(population, big.mark = ",")
was r fmt_ci_df(ar, percent = FALSE)
.
To give attack rate by age group, with appropriate population denominators, use the following code.
linelist_cleaned %>% ## create a dummy variable so all rows are counted mutate(case_total = TRUE) %>% select(case_total, age_group) %>% gtsummary::tbl_summary( ## only include age group counts include = age_group, label = list(age_group ~ "Age Group") ) %>% epitabulate::add_ar( ## specify which variable counts cases (dummy) case_var = "case_total", ## get the population counts as a vector population = pull(population_data_age, population), drop_tblsummary_stat = TRUE) %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This section can only be used if you are in a closed population (eg refugee camp). The assumptions don't hold in an open/community setting.
This chunk calculates the attack rate by age group and then gives a table of attack rate by group.
This section gives mortality rates attributable to meningitis in a closed population. It does not calculate all-cause mortality. It assumes that all meningitis deaths are among inpatients.
This demonstrates three ways of calculating mortality rate based on catchment population (twice) and based on hospital population. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
# count number of deaths deaths <- sum(linelist_cleaned$DIED) # outbreak duration in days obs_time <- as.numeric(obs_end - obs_start) # patient observation time pat_obs_time <- linelist_cleaned %>% filter(!is.na(exit_status)) %>% summarise(days = sum(obs_days)) %>% pull(days)
Mortality rate attributable to meningitis per 10,000 population
mortality_rate(deaths, population, multiplier = 10000, mergeCI = TRUE) %>% rename("Deaths" = deaths, "Population" = population, "Mortality (per 10,000)" = `mortality per 10 000`, "95%CI" = ci) %>% # produce styled output table with auto-adjusted column widths with {flextable} qflextable() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit") %>% ## set to only show 1 decimal place colformat_double(digits = 1)
Crude mortality rate attributable to meningitis per 10,000 population per day
mortality_rate(deaths, population*obs_time, multiplier = 10000, mergeCI = TRUE) %>% rename("Deaths" = deaths, "Person-days" = population, "Mortality (per 10,000/day)" = `mortality per 10 000`, "95%CI" = ci) %>% # produce styled output table with auto-adjusted column widths with {flextable} qflextable() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit") %>% ## set to only show 1 decimal place colformat_double(digits = 1)
Mortality rate attributable to meningitis per 10,000 patients per day
mortality_rate(deaths, pat_obs_time, multiplier = 10000, mergeCI = TRUE) %>% rename("Deaths" = deaths, "Population" = population, "Mortality (per 10,000/day)" = `mortality per 10 000`, "95%CI" = ci) %>% # produce styled output table with auto-adjusted column widths with {flextable} qflextable() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit") %>% ## set to only show 1 decimal place colformat_double(digits = 1)
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This section focuses on the when of the outbreak: - When did cases fall ill? - Are numbers increasing or decreasing?
There is code to include an epi curve. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
There were r fmt_count(linelist_cleaned, is.na(date_of_onset))
cases missing dates of onset.
## make plot basic_curve <- linelist_cleaned %>% filter(!is.na(epiweek_date)) %>% ## plot those not missing using the date variable ggplot(aes(x = epiweek_date)) + ## plot as a bar geom_bar(width = 7, fill = "red", colour = "black") + ## align plot with bottom of plot scale_y_continuous(expand = c(0, NA)) + ## control x-axis breaks and display scale_x_date( breaks = all_weeks_date, labels = all_weeks) + ## use pre-defined labels epicurve_labels + ## use pre-defined theme (design) settings epicurve_theme # show your plot (stored for later use) basic_curve ## if the outbreak has been going on for a while, your x-axis might look messy. ## to reduce the number of labels, uncomment the below. ## you can customize the number of breaks by changing n_breaks ## you can also use months for your break (here in three-month intervals) # basic_curve + scale_x_date(breaks = "3 months", date_labels = "%b %Y")
The peak of the outbreak was in r tabyl(linelist_cleaned, epiweek) %>% slice(which.max(n)) %>% pull(epiweek)
## define the width of the moving average window here moving_window_days <- 14 ## calculates the moving average using counts by epiweek moving_avg <- linelist_cleaned %>% ## count cases by epiweek count(epiweek_date) %>% ## create column with sliding window for day and window prior mutate(indexed_window = slider::slide_index_dbl( n, # calculate on new_cases .i = epiweek_date, # indexed with epiweek_date .f = ~mean(.x, na.rm = TRUE), # function is mean() with missing values removed .before = days(moving_window_days - 1))) ## create plot moving_plot <- basic_curve + geom_line(data = moving_avg, mapping = aes(x = epiweek_date, y = indexed_window)) + # adjust title and subtitle for this particular plot labs(title = str_glue("Cases by date of onset"), subtitle = str_glue("{moving_window_days}-day moving average Source: MSF data from {reporting_week}")) # show your plot (stored for later use) moving_plot
## calculates the moving sum using counts by epiweek moving_sum <- linelist_cleaned %>% ## count cases by date of onset count(epiweek_date) %>% mutate(biweekly = slide_sum(x = n, after = 1, step = 2)) %>% filter(!is.na(biweekly)) ggplot(moving_sum, aes(x = epiweek_date, y = biweekly)) + ## plot as a bar geom_bar( ## use the count we calculated above stat = "identity", ## make each bar two weeks wide width = 14, fill = "red", colour = "black") + ## align plot with bottom of plot scale_y_continuous(expand = c(0, NA)) + ## control x-axis breaks and display scale_x_date( breaks = all_weeks_date, labels = all_weeks) + ## use pre-defined labels epicurve_labels + # change visuals of dates and remove legend title epicurve_theme
linelist_cleaned %>% filter(!is.na(epiweek_date)) %>% ## plot those not missing using the date variable ggplot(aes(x = epiweek_date, fill = fct_rev(fct_explicit_na(sex)))) + ## plot as a bar geom_bar(width = 7, colour = "black") + # align plot with bottom of plot scale_y_continuous(expand = c(0, NA)) + # control x-axis breaks and display scale_x_date( breaks = all_weeks_date, labels = all_weeks) + # use pre-defined labels epicurve_labels + # use pre-defined theme (design) settings epicurve_theme
Cases by week of onset among inpatients by sex
linelist_cleaned %>% filter(!is.na(epiweek_date)) %>% filter(patient_facility_type == "Inpatient") %>% ## plot those not missing using the date variable ggplot(aes(x = epiweek_date, fill = fct_rev(fct_explicit_na(sex)))) + ## plot as a bar geom_bar(width = 7, colour = "black") + ## align plot with bottom of plot scale_y_continuous(expand = c(0, NA)) + ## control x-axis breaks and display scale_x_date( breaks = all_weeks_date, labels = all_weeks) + ## use pre-defined labels epicurve_labels + ## use pre-defined theme (design) settings epicurve_theme
linelist_cleaned %>% filter(!is.na(epiweek_date)) %>% ## plot those not missing using the date variable ggplot(aes(x = epiweek_date, fill = fct_rev(fct_explicit_na(case_def)))) + ## plot as a bar geom_bar(width = 7, colour = "black") + # align plot with bottom of plot scale_y_continuous(expand = c(0, NA)) + # control x-axis breaks and display scale_x_date( breaks = all_weeks_date, labels = all_weeks) + # use pre-defined labels epicurve_labels + # use pre-defined theme (design) settings epicurve_theme
Cases by week and vaccination status
linelist_cleaned %>% filter(!is.na(epiweek_date)) %>% ## plot those not missing using the date variable ggplot(aes(x = epiweek_date, fill = fct_explicit_na(previously_vaccinated))) + ## plot as a bar geom_bar(width = 7, colour = "black") + # align plot with bottom of plot scale_y_continuous(expand = c(0, NA)) + # control x-axis breaks and display scale_x_date( breaks = all_weeks_date, labels = all_weeks) + # use pre-defined labels epicurve_labels + # use pre-defined theme (design) settings epicurve_theme
Attack rate per 10,000 population by week
# counts and cumulative counts by week cases <- linelist_cleaned %>% arrange(date_of_onset) %>% # arrange by date of onset count(epiweek, .drop = FALSE) %>% # count all epiweeks and include zero counts mutate(cumulative = cumsum(n)) # add a cumulative sum # attack rate for each week ar <- attack_rate(cases$n, population, multiplier = 10000) %>% bind_cols(select(cases, epiweek), .) # add the epiweek column to table ar %>% merge_ci_df(e = 4) %>% # merge the lower and upper CI into one column rename("Epiweek" = epiweek, "Cases (n)" = cases, "Population" = population, "AR (per 10,000)" = ar, "95%CI" = ci) %>% # produce styled output table with auto-adjusted column widths with {flextable} qflextable() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit") %>% ## set to only show 1 decimal place colformat_double(j = -1, digits = 1)
Cumulative attack rate per 10,000 population per week
attack_rate(cases$cumulative, population, multiplier = 10000) %>% bind_cols(select(cases, epiweek), .) %>% # add the epiweek column to table merge_ci_df(e = 4) %>% # merge the lower and upper CI into one column rename("Epiweek" = epiweek, "Cases (n)" = cases, "Population" = population, "AR (per 10,000)" = ar, "95%CI" = ci) %>% # produce styled output table with auto-adjusted column widths with {flextable} qflextable() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit") %>% ## set to only show 1 decimal place colformat_double(j = -1, digits = 1)
Case fatality ratio as a proportion among inpatients by week
# calculate CFR among inpatients by week cfr <- linelist_cleaned %>% filter(patient_facility_type == "Inpatient") %>% mutate(epiweek = as.character(epiweek)) %>% case_fatality_rate_df(str_detect(exit_status, "Dead"), group = epiweek) cfr %>% merge_ci_df(e = 4) %>% # merge the lower and upper CI into one column rename("Epiweek" = epiweek, "Deaths" = deaths, "Cases" = population, "CFR (%)" = cfr, "95%CI" = ci) %>% # produce styled output table with auto-adjusted column widths with {flextable} qflextable() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit") %>% ## set to only show 1 decimal place colformat_double(j = -1, digits = 1)
ar_plot <- ggplot(ar, aes(x = as.Date(epiweek), group = 1)) + # add confidence intervals as a ribbon geom_ribbon(aes(ymin = lower, ymax = upper), color = "blue", fill = "blue", linetype = 2, alpha = 0.2, show.legend = FALSE) + # add AR as a line geom_line(aes(y = ar), color = "blue", show.legend = FALSE) + # set origin for axes scale_y_continuous(expand = c(0, NA)) + # control x-axis breaks and display scale_x_date( breaks = all_weeks_date, labels = all_weeks) + # add labels to axes and below chart labs(x = "Calendar week", y = "AR [95% CI]", subtitle = "Attack Rate (per 10,000)") + # change visuals of dates and remove legend title epicurve_theme
cfr_plot <- ggplot(cfr, aes(x = as.Date(yearweek(as.character(epiweek)))), group = 1) + # add confidence intervals as a ribbon geom_ribbon(aes(ymin = lower, ymax = upper), color = "red", fill = "red", linetype = 2, alpha = 0.2, show.legend = FALSE) + # add CFR as a line geom_line(aes(y = cfr), color = "red", show.legend = FALSE) + # set origin for axes scale_y_continuous(expand = c(0, NA)) + # control x-axis breaks and display scale_x_date( breaks = all_weeks_date, labels = all_weeks) + # add labels to axes and below chart labs(x = "Calendar week", y = "CFR [95% CI]", subtitle = "Case Fatality Ratio [95% CI] Among Inpatients") + # change visuals of dates and remove legend title epicurve_theme
no_x <- theme(axis.text.x = element_blank(), axis.title.x = element_blank()) ## use patchwork to stack the three plots on top of one another (basic_curve + no_x) / (ar_plot + no_x) / cfr_plot
Inpatient admissions by case definition and week
linelist_cleaned %>% filter(patient_facility_type == "Inpatient") %>% tbl_cross( row = epiweek, col = case_def, percent = "column", label = list(epiweek = "Calendar Week", case_def = "Case definition") ) %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
Inpatient discharges by reason for exit and week
linelist_cleaned %>% filter(patient_facility_type == "Inpatient") %>% tbl_cross( row = epiweek, col = exit_status2, percent = "column", label = list(epiweek = "Calendar Week", exit_status2 = "Outcome") ) %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This section focuses on the where of the outbreak: what area is affected, how many villages, and so on.
There is code to include maps based on distribution of cases. You must have a shapefile to create this map. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
Cases by region and facility type
linelist_cleaned %>% tbl_cross( row = patient_origin, col = patient_facility_type, percent = "column", label = list(patient_origin = "Region", patient_facility_type = "Facility type") ) %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
Inpatient discharges by reason for exit and region
linelist_cleaned %>% filter(patient_facility_type == "Inpatient") %>% tbl_cross( row = patient_origin, col = exit_status2, percent = "column", label = list(patient_origin = "Region", exit_status2 = "Outcome") ) %>% ## make variable names bold bold_labels() %>% # change to flextable format as_flex_table() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit")
Attack rate per 10,000 population by region
# count cases by region cases <- linelist_cleaned %>% count(patient_origin) %>% # add in population data left_join(population_data_region, by = "patient_origin") # calculate attack rate for region ar_region <- attack_rate(cases$n, cases$population, multiplier = 10000) %>% # add the region column to table bind_cols(select(cases, patient_origin), .) %>% rename("Region" = patient_origin, "Cases (n)" = cases, "Population" = population, "AR (per 10,000)" = ar, "Lower 95%CI" = lower, "Upper 95%CI" = upper) ar_region %>% merge_ci_df(e = 4) %>% # merge lower and upper CI in to one column rename("95%CI" = ci) %>% # rename single 95%CI column # produce styled output table with auto-adjusted column widths with {flextable} qflextable() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit") %>% ## set to only show 1 decimal place colformat_double(digits = 1)
ggplot(ar_region, aes(x = reorder(Region, `AR (per 10,000)`), y = `AR (per 10,000)`)) + # plot as bars (identity = as is) geom_bar(stat = "identity", col = "black", fill = "red") + # add CIs geom_errorbar(aes(ymin = `Lower 95%CI`, ymax = `Upper 95%CI`), width = 0.2) + # set origin for axes scale_y_continuous(expand = c(0,0)) + # add labels to axes and below chart labs(x = "Region", y = "AR (per 10,000)", captions = str_glue("Source: MSF data from {reporting_week}")) + epicurve_theme
Mortality rate per 10,000 population by region
deaths <- group_by(linelist_cleaned, patient_origin) %>% filter(str_detect(exit_status, "Dead")) %>% summarise(deaths = n()) %>% # count deaths by region left_join(population_data_region, by = "patient_origin") # merge population data mortality_rate(deaths$deaths, deaths$population, multiplier = 10000) %>% # add the region column to table bind_cols(select(deaths, patient_origin), .) %>% merge_ci_df(e = 4) %>% # merge the lower and upper CI into one column rename("Region" = patient_origin, "Deaths" = deaths, "Population" = population, "Mortality (per 10,000)" = `mortality per 10 000`, "95%CI" = ci) %>% # produce styled output table with auto-adjusted column widths with {flextable} qflextable() %>% # make header text bold (using {flextable}) bold(part = "header") %>% # make your table fit to the maximum width of the word document set_table_properties(layout = "autofit") %>% ## set to only show 1 decimal place colformat_double(digits = 1)
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// read_shapefiles \\
To create maps, you need to have a shapefile of the area. Often, the MSF GIS unit can provide shapefiles.
Your shapefile can be a polygon or points. Polygons do not need to be contiguous.
The names of the polygons or points MUST match the names in your linelist.
Your coordinate reference system needs to be WGS84. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
## fake map data - DELETE if you are using real data map <- gen_polygon(regions = unique(linelist_cleaned$patient_origin)) ## read in shapefile # map <- read_sf(here::here("mapfolder", "region.shp")) ## check the coordinate reference system (CRS) # st_crs(map) ## if CRS not WGS84, reset it # map <- st_set_crs(map, value = 4326) # Sets to WGS84
# ggplot() + # # load shapefile # geom_sf(data = map, col = "grey50") + # # add in case coordinates # geom_point(mapping = aes(x = lat, y = lon), # data = linelist_cleaned)
## Create a categories of ARs by region ---------------------------------------- # define maximum max_ar <- max(ar_region$`Upper 95%CI`, na.rm = TRUE) # define your highest AR # create groups - one group will be 0 only, then up to 4 more groups evenly # divided to maximum breakers <- as.integer(c( # include zero as a standalone group 0, # 1 to 4 divisions, snapping the boundaries to the nearest 100 find_breaks(n = max_ar, breaks = 4, snap = 100) )) ## create a categorical variable using the age_categories function ## (we aren't using ages - but it functions the same way!) ar_map <- mutate(ar_region, categories = age_categories(`AR (per 10,000)`, breakers = breakers)) ## Join table to shapefile ----------------------------------------------------- mapsub <- left_join(map, ar_map, by = c("name" = "Region")) ## Plot cases or AR by geography ----------------------------------------------- ## you could also fill by cases using `Cases (n)` in the fill option instead of `AR (per 10,000)` ggplot() + # shapefile as polygon geom_sf(data = mapsub, aes(fill = categories), col = "grey50") + # needed to avoid gridlines being drawn coord_sf(datum = NA) + # add a scalebar annotation_scale() + # color the scale to be perceptually uniform # drop FALSE keeps all levels # name allows you to change the legend title scale_fill_brewer(drop = FALSE, palette = "OrRd", name = "AR (per 10,000)") + # label polygons geom_sf_text(data = mapsub, aes(label = name), colour = "grey50") + # remove coordinates and axes theme_void()
## Prepare data --------------------------------------------------------------- # change region variable to a factor so that zero counts can be included linelist_cleaned$patient_origin <- as.factor(linelist_cleaned$patient_origin) # case counts cases <- linelist_cleaned %>% group_by(epiweek) %>% count(patient_origin, .drop = FALSE) %>% # cases for each week by region left_join(population_data_region, by = "patient_origin") # merge population data # attack rate for region ar <- attack_rate(cases$n, cases$population, multiplier = 10000) %>% # add the region column to table bind_cols(select(cases, epiweek, patient_origin), .) %>% rename("Region" = patient_origin, "Cases (n)" = cases, "Population" = population, "AR (per 10,000)" = ar, "Lower 95%CI" = lower, "Upper 95%CI" = upper) ## define the last four weeks available in your dataset which_weeks <- tail(unique(cases$epiweek), n = 4) ## only keep the last four weeks of data ar <- ar %>% filter(epiweek %in% which_weeks) # define the maximum number of cases / max AR for the color palette max_cases <- max(cases$n, na.rm = TRUE) max_ar <- max(ar$`Upper 95%CI`, na.rm = TRUE) # define breaks for standardising color palette breakers <- as.integer(c(0, # include zero as a standalone group find_breaks(max_ar, breaks = 4, snap = 100) # four breaks rounded to nearest 100 )) # add a categorical variable for AR # (replace with `Cases (n)` if you want to map cases) ar <- mutate(ar, categories = age_categories(`AR (per 10,000)`, breakers = breakers)) ## define the last four weeks available in your dataset which_weeks <- tail(unique(cases$epiweek), n = 4) # go through each epiweek, fiter and plot the data purrr::walk(.x = which_weeks, .f = ~{ this_ar <- filter(ar, epiweek == yearweek(.x)) ## map mapsub <- left_join(map, this_ar, by = c("name" = "Region")) ## choropleth map_plot <- ggplot() + ## shapefile as polygon geom_sf(data = mapsub, aes(fill = categories), col = "grey50") + ## needed to avoid gridlines being drawn coord_sf(datum = NA) + ## add a scalebar annotation_scale() + ## color the scale to be perceptually uniform (keep levels) scale_fill_brewer(drop = FALSE, palette = "OrRd", name = "AR (per 10,000)") + ## remove coordinates and axes theme_void() ## plot with the region on the x axis sorted by increasing ar ## ar value on the y axis barplot <- ggplot(this_ar, aes(x = reorder(Region, `AR (per 10,000)`), y = `AR (per 10,000)`)) + ## plot as bars (identity = as is) geom_bar(stat = "identity", col = "black", fill = "red") + ## add CIs geom_errorbar(aes(ymin = `Lower 95%CI`, ymax = `Upper 95%CI`), width = 0.2) + ## set origin for axes scale_y_continuous(expand = c(0, 0), limits = c(0, max_ar)) + ## add labels to axes and below chart labs(x = "Region", y = "AR (per 10,000)") + epicurve_theme ## combine the barplot and map plot into one with patchwork and add a title ## in the top-left corner and a caption in the bottom-right print( barplot + map_plot + plot_annotation( title = str_glue("Epiweek: {.x}"), caption = str_glue("Source: MSF data from {reporting_week}") ) ) })
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.