Introduction to this template

This is a template which can be used to create an automated EBS situation report and for routine monitoring purposes. It can be used to report on community and/or health facility event-based surveillance and equally community and/or health facility indicator-based surveillance, according to how surveillance is set up in the project.

For a more detailed explanation of this template, please visit https://r4epis.netlify.app/ - Feedback and suggestions are welcome at the GitHub issues page - Text within <! > will not show in your final document. These comments are used to explain the template. You can delete them if you want.

## hide all code chunks in the output, but show errors
knitr::opts_chunk$set(echo = FALSE,       # hide all code chunks in output
                      error = TRUE,       # show errors if they appear, but don't stop
                      fig.width = 6*1.25, # Figure width
                      fig.height = 6      # Figure height
                     )

## set default NA to - in output, define figure width/height
options(knitr.kable.NA = "-")


# Ensures the package "pacman" is installed
if (!require("pacman")) {
     install.packages("pacman") }

## Installing required packages for this template
pacman::p_load(
  knitr,         # create output docs
  here,          # find your files
  rio,           # read in data
  forcats,       # clean/shape data
  lubridate,     # handle dates
  dplyr,         # clean/shape data
  tidyr,         # clean/shape data
  stringr,       # clean text
  ggplot2,       # create plots and charts
  parsedate,     # guessing dates
  sitrep,        # MSF field epi functions
  janitor,       # clean data
  gtsummary,     # tables
  flextable,     # tables
  tsibble,       # epiweeks
  OpenStreetMap, # to add OSM basemap in ggplot map
  sf,            # encode spatial vector data
  ggspatial,     # plot maps
  purrr         # iteration
)
## set current week 
reporting_week <- tsibble::yearweek("2018-W10")


## set 4 weeks prior to reporting week
past4weeks <- reporting_week - 3
## generates MSF standard dictionary for KoBo
ebs_data_dict <- msf_dict_survey("ebs")


## generates a fake dataset for use as an example in this template
ebs_raw <- gen_data(dictionary = "ebs",
                           varnames   = "name",
                           numcases   = 400)
### Read in data ---------------------------------------------------------------

# Uncomment the below when you want to import your data

## Signal collection form
## Excel file ------------------------------------------------------------------
## For a specific sheet, use "which"
# signal_collect_raw <- rio::import(here::here("data", "signal_collection.xlsx"), which = "Sheet1") %>%
#               ## convert all column names to lower case
#               janitor::clean_names()


## Signal verification form
## Excel file ------------------------------------------------------------------
## For a specific sheet, use "which"
# signal_verif_raw <- rio::import(here::here("data", "signal_verification.xlsx"), which = "Sheet1") %>%
#               ## convert all column names to lower case
#               janitor::clean_names()



## Risk assessment form
## Excel file ------------------------------------------------------------------
## For a specific sheet, use "which"
# risk_assess_raw <- rio::import(here::here("data", "risk_assessment.xlsx"), which = "Sheet1") %>%
#               ## convert all column names to lower case
#               janitor::clean_names()


## Response form
## Excel file ------------------------------------------------------------------
## For a specific sheet, use "which"
# response_raw <- rio::import(here::here("data", "response.xlsx"), which = "Sheet1") %>%
#               ## convert all column names to lower case
#               janitor::clean_names()
## join the signal and risk assessment
# ebs_raw <- left_join(signal_collect_raw, signal_verification_raw, by = c("signal_id" = "c_signal_id") %>% 
              ## join the combined forms with the risk assessment form
              # left_join(risk_assessment_raw, by = c("signal_id" = "c_signal_id") %>% 
              # left_join(response_raw, by = c("signal_id" = "c_signal_id")
## MSF ebs Dictionary ----------------------------------------------------------
## get MSF standard dictionary for ebs
ebs_dict <- msf_dict_survey("ebs", compact = FALSE)

## look at the standard dictionary by uncommenting the line below
# View(ebs_dict) 



## Clean column names ----------------------------------------------------------
## make a copy of your original dataset and name it ebs_cleaned
ebs_cleaned <- ebs_raw
## Excel file ------------------------------------------------------------------
## to read in a specific sheet use "which"
# ebs_raw <- rio::import(here::here("data", "ebs.xlsx"), which = "Sheet1")
## MSF ebs Dictionary ----------------------------------------------------------
## get MSF standard dictionary for ebs 
# ebs_dict <- msf_dict("ebs", compact = FALSE) %>%
#   select(option_code, option_name, everything())

## look at the standard dictionary by uncommenting the line below
# View(ebs_dict) 

## You will need to recode your variables to match the data dictionary. This is
## addressed below.


## make a copy of your orginal dataset and name it ebs_cleaned
# ebs_cleaned <- ebs_raw



## Match column names ---------------------------------------------------------
## This step helps you match your variables to the standard variables.
## This step will require some patience. Courage!

## Use the function msf_dict_rename_helper() to create a template based on the
## AJS dictionary. This will copy a rename command like the one above to your
## clipboard.

# msf_dict_rename_helper("ebs")

## Paste the result below and your column names to the matching variable.
## Be careful! You still need to be aware of what each variable means and what
## values it takes.
## If there are any variables that are in the MSF dictionary that are not in
## your data set, then you should comment them out, but be aware that some
## analyses may not run because of this. 



## Here is an EXAMPLE for changing a few specific names. function. In this
## example, we have the columns "gender" and "age" that we want to rename as
## "sex" and "age_years". 
## The formula for this is rename(data, NEW_NAME = OLD_NAME).

# ebs_cleaned <- rename(ebs_cleaned, 
#                        sex           = gender, # TEXT
#                        age_years     = age     # INTEGER_POSITIVE
# )
## view the first ten rows of data
head(ebs_cleaned, n = 10)

## view your whole dataset interactively (in an excel style format)
View(ebs_cleaned)

## overview of variable types and contents
str(ebs_cleaned)

## get summary: 
## mean, median and max values of numeric variables
## counts for categorical variables
## also gives number of NAs
summary(ebs_cleaned)

## view unique values contained in variables 
## you can run this for any column -- just replace the column name
unique(ebs_cleaned$signal_type) 

## use the dfSummary function in combination with view
## note that view is not capitalised with this package
# summarytools::dfSummary(ebs_cleaned) %>%
#   summarytools::view()
## Drop unused rows  -----------------------------------------------------------
## This step removes blank rows that don't have both a date of signal
ebs_cleaned <- ebs_cleaned %>% 
  filter(!is.na(date_signal)) 


## Drop columns ----------------------------------------------------------------
## OPTIONAL: This step shows you how you can remove certain variables.

# ebs_cleaned <- ebs_cleaned %>%
#   select(-c(var1, var2))
## KoBo standard data ---------------------------------------------------------
## If you got your data from KoBo, use this portion of the code.
## If not, comment this section out and use the below.

## make sure all date variables are formatted as dates 
DATEVARS <- ebs_dict %>% 
  filter(type == "date") %>%
  filter(name %in% names(ebs_cleaned)) %>% 
  ## filter to match the column names of your data
  pull(name) 

## find if there are date variables which are completely empty
## (otherwise the parsedate package does not work)
EMPTY_DATEVARS <- purrr::map(DATEVARS, ~all(
  is.na(ebs_cleaned[[.x]])
  )) %>% 
  unlist() %>% 
  which()


## remove exclude the names of variables which are completely emptys
DATEVARS <- DATEVARS[-EMPTY_DATEVARS]

## change to dates 
ebs_cleaned <- ebs_cleaned %>%
    mutate(
      across(.cols = all_of(DATEVARS),
           .fns = ~ymd(parsedate::parse_date(.x))))



## Non-KoBo data --------------------------------------------------------------
## Use this section if you did not have KoBo data. 

## use the parse_dates() function to make a first pass at date variables.
# ebs_cleaned <- ebs_cleaned %>%
#   mutate(
#     across(.cols = matches("date|Date"),
#            .fns  = ~ymd(parsedate::parse_date(.x))))

## once you have run parse_date(), take a look at your date variables.
## here is an example:
# table(ebs_cleaned$date_signal)


## Some dates will be unrealistic or wrong.
## Here is an example of how to manually fix dates. 
## Look at your data and edit as needed.
# ebs_cleaned <- mutate(ebs_cleaned,
#                            date_signal = case_when(
#                              date_signal < as.Date("2017-11-01")  ~ as.Date(NA),
#                              date_signal == as.Date("2081-01-01") ~ as.Date("2018-01-01"),
#                              TRUE                                   ~ date_signal
#                            ))



## Create epiweek variable -----------------------------------------------------
## This step creates an epiweek variable from the date of onset.
## You can use date_of_consultation_admission if you are missing many date_of_onset.

ebs_cleaned <- ebs_cleaned %>% 
  mutate(
    ## create an epiweek variable 
    epiweek = tsibble::yearweek(
      date_signal,
      week_start = 1),  # 1 is Monday start; use 7 for Sundays or 5 for Fridays
    ## create a date version of epiweek 
    epiweek_date = as.Date(epiweek)) %>% 
  mutate(
    #create an epimonth variable
    epimonth = tsibble::yearmonth(
      date_signal)
  )
## Numeric variables -----------------------------------------------------------
## This step creates numeric variables related to timeliness of ebs activities
## You can adapt this step to create other calculated variables

## create timeliness-related variables
ebs_cleaned <- ebs_cleaned %>%
  ## Time in days to detect the signal
  mutate(detection_time = as.numeric(date_signal - date_event_start)) %>% 
  ## Time in days to verify a signal
  mutate(verification_time = as.numeric(date_verification - date_signal)) %>% 
  ## Time in days to risk assess a signal after verification
  mutate(assessment_time = as.numeric(date_assessment - date_verification)) %>% 
  ## Time in days to response from detection
  mutate(response_detect_time = as.numeric(date_response_started - date_signal)) %>% 
  ## Time in days to response after risk assessment
  mutate(response_assess_time = as.numeric(date_response_started - date_assessment)) %>% 
  ## Identify when verification done within 24 hours of signal being detected
  mutate(verif_24 = case_when(
    as.numeric(date_verification - date_signal) <= 1 ~ "Verified <= 24 hours",
    as.numeric(date_verification - date_signal) > 1 ~ "Verified > 24 hours",
    TRUE ~ NA_character_
  )) %>% 
  ## Identify when risk assessment done within 48 hours of signal being detected
  mutate(assess_48 = case_when(
    as.numeric(date_assessment - date_signal) <= 2 ~ "Risk assessed <= 48 hours",
    as.numeric(date_assessment - date_signal) > 2 ~  "Risk assessed > 48 hours",
    TRUE ~ NA_character_
  ))


## Factor (categorical) variables ----------------------------------------------

## This step creates a variable from another character/factor variable
## You can adapt this step to create other calculated variables

## The variable ALERT will be binary (TRUE/FALSE) -- if an alert was declared 
## or not
ebs_cleaned$ALERT <- str_detect(ebs_cleaned$alert_status, "1")


## Recode character variables -------------------------------------------------
## This step shows how to fix misspellings in the geographic region variable.
## Ideally, you want these values to match your shapefile and population data!

# ebs_cleaned <- ebs_cleaned %>%
#   mutate(location_signal = case_when(
#     location_signal == "Valliages D"       ~ "Village D",
#     location_signal == "VillageD"          ~ "Village D",
#     location_signal == "Town C"            ~ "Village C",
#     TRUE ~ as.character(location_signal))
#   ))
# return the last day of the reporting week
# obs_end   <- as.Date(reporting_week- 7)
# 
# # filter out cases after end of reporting week
# ebs_cleaned <- ebs_cleaned %>% 
#   filter(date_signal <= obs_end)
## option 1: only keep the first occurrence of duplicate case 

ebs_cleaned <- ebs_cleaned %>% 
  ## find duplicates based on signal_type, date_signal, initials 
  ## only keep the first occurrence 
  distinct(signal_type, date_signal, initials, .keep_all = TRUE)


# ## option 2: create flagging variables for duplicates (then use to browse)
# 
# ebs_cleaned <- ebs_cleaned %>% 
#   ## choose which variables to use for finding unique rows 
#   group_by(signal_type, date_signal, initials) %>% 
#   mutate(
#     ## get the number of times duplicate occurs 
#     num_dupes = n(), 
#     duped = if_else(num_dupes > 1 , TRUE, FALSE)
#   )
# 
# ## browse duplicates based on flagging variables 
# ebs_cleaned %>% 
#   ## only keep rows that are duplicated
#   filter(duped) %>% 
#   ## arrange by variables of interest 
#   arrange(case_number, sex, age_group) %>% 
#   View()
# 
# ## filter duplicates to only keep the row with the earlier entry 
# ebs_cleaned %>% 
#   ## choose which variables to use for finding unique rows 
#   group_by(signal_type, date_signal, initials) %>% 
#   ## sort to have the earliest date by person first
#   arrange(date_signal) %>% 
#   ## only keep the earliest row 
#   slice(1)
# rio::export(ebs_cleaned, here::here("data", str_glue("ebs_cleaned_{Sys.Date()}.xlsx")))

Routine ebs Sitrep

Recommendations

Overview for r reporting_week

In r reporting_week, there were r nrow(filter(ebs_cleaned, epiweek == reporting_week)) signals received, of which r fmt_count(filter(ebs_cleaned, epiweek == reporting_week), event_status == "1") were verified. There were r fmt_count(filter(ebs_cleaned, epiweek == reporting_week), is.na(date_assessment) == FALSE) risk assessments completed resulting in r fmt_count(filter(ebs_cleaned, epiweek == reporting_week), ALERT) alerts and r fmt_count(filter(ebs_cleaned, epiweek == reporting_week), response_undertaken == "y") responses.

Overview of performance in past 4 weeks

ebs_cleaned %>% 
  ## filter to only include the last 4 weeks data
  filter(epiweek >= past4weeks & epiweek <= reporting_week) %>% 
  ## Group by epiweek
  group_by(epiweek) %>% 
  ## Count the number of total signals, number required verification,
  # verified signals, alerts and responses
  summarise(total_signals = n(),
            verified_signals = sum(event_status == "1", na.rm = T),
            risk_assessments = sum(is.na(date_assessment) == F,  na.rm = T),
            alerts = sum(ALERT, na.rm  = T),
            responses = sum(response_undertaken == "y", na.rm = T)) %>% 
  ## convert epiweek to a factor
  mutate(epiweek = factor(epiweek)) %>% 
  # mutate(epiweek = factor(paste0(year(epiweek), " W", sprintf("%02d", week(epiweek))))) %>%
  ## Add total value
  janitor::adorn_totals("row") %>% 
  ## Rename variables to make a cleaner table
  dplyr::rename("Epiweek" = epiweek, 
                "Total signals" = total_signals,
                "Verified signals" = verified_signals,
                "Risk assessments" = risk_assessments,
                "Alerts" = alerts,
                "Responses" = responses) %>% 
  ## make a flex table
  flextable() %>% 
  ## fit the table automatically
  autofit()

Time

Overview of signal processes by type in r reporting_week
ebs_cleaned %>% 
 ## Filter to only include data for the past 6 weeks
  filter(epiweek == reporting_week) %>% 
  ## Group by epiweek
  group_by(signal_type) %>% 
  ## Count the number of total signals, number required verification,
  # verified signals, alerts and responses
  summarise(total_signals = n(),
            verified_signals = sum(event_status == "1", na.rm = T),
            risk_assessments = sum(is.na(date_assessment) == F,  na.rm = T),
            alerts = sum(ALERT, na.rm  = T),
            responses = sum(response_undertaken == "y", na.rm = T)) %>% 
  dplyr::rename("Signal type" = signal_type, "
                Total signals" = total_signals,
                "Verified signals" = verified_signals,
                "Risk assessments" = risk_assessments,
                "Alerts" = alerts,
                "Responses" = responses) %>% 
  janitor::adorn_totals("row") %>% 
  ## convert to flextable
  flextable::flextable() %>% 
  ## autofit table
  flextable::autofit()

Place

## fake map data - DELETE if you are using real data
map <- gen_polygon(regions = unique(ebs_cleaned$location_signal))


## Clean up map
map <- map %>% 
  ## remove the NAs
  filter(is.na(name) == F ) %>% 
  ## rename name variables to match ebs_cleaned
  mutate(name = case_when(
         name == "1" ~ "location_1",
         name == "2" ~ "location_2",
         name == "3" ~ "location_3",
         name == "4" ~ "location_4",
         name == "5" ~ "location_5",
         name == "6" ~ "location_6",
         TRUE ~ NA_character_
         )
)

## read in shapefile
# map <- read_sf(here::here("mapfolder", "region.shp"))

## check the coordinate reference system (CRS)
# st_crs(map)

## if CRS not WGS84, reset it
# map <- st_set_crs(map, value = 4326) # Sets to WGS84
Verified signals by type by location in r reporting_week
## Count number of verified signals in last week  ----------------------------------------
verif_sig <- ebs_cleaned %>% 
    ## specify the time period of interest
    filter(epiweek == reporting_week ) %>% 
    ## Group by location and signal type
    group_by(location_signal, signal_type) %>% 
    ## Count verified signals by location and type
    summarise(verified_signals = sum(event_status == "1", na.rm = T)) %>% 
    ## Only keep rows where there were verified signals
    filter(verified_signals > 0)

## left join with polygon
map_verif_sig <- left_join(map, verif_sig, by = c("name" = "location_signal")) %>% 
  ## filter any locations with no verified signals
  filter(is.na(verified_signals) == F)




## Plot verified signals by location  -----------------------------------------------

ggplot() +
  geom_sf(data = map, fill = "white") +
  # shapefile as polygon
  geom_sf(data = map_verif_sig, aes(fill = as.factor(verified_signals)))+
  # needed to avoid gridlines being drawn
  coord_sf(datum = NA) + 
  # add a scalebar
  annotation_scale() +
  # color the scale to be perceptually uniform 
  # drop FALSE keeps all levels 
  # name allows you to change the legend title 
  scale_fill_brewer(drop = FALSE, palette = "OrRd", 
                    name = "No. verified signals") + 
  # label polygons
  geom_sf_text(data = map_verif_sig, aes(label = name), size = 2.5,
               colour = "black") + 
  # remove coordinates and axes
  theme_void() +
  facet_wrap("signal_type") + 
  labs( captions = str_glue("Source: MSF data from {reporting_week}"),
       title = str_glue("Number of verified signals in MSF-OCA catchment areas by signal type, {reporting_week}"))
Alerts by type by location in r reporting_week
## Count number of alerts in reporting week  ----------------------------------------
alerts <- ebs_cleaned %>% 
    ## specify the time period of interest
    filter(epiweek == reporting_week ) %>% 
    ## convert
    ## Group by location
    group_by(location_signal, signal_type) %>% 
    ## Count verified signals by location
    summarise(alerts = sum(ALERT, na.rm = T)) %>% 
   ## Remove rows where there were 0 alerts
    filter(alerts > 0)

## left join with polygon
map_alerts <- left_join(map, alerts, by = c("name" = "location_signal")) %>% 
  ## filter locations where there were no alerts
  filter(is.na(alerts) == F)




## Plot alerts by location
# -----------------------------------------------

ggplot() +
  ## Add the map layer as an empty white shape
  geom_sf(data = map, fill = "white") +
  # shapefile as polygon
  geom_sf(data = map_alerts, aes(fill = as.factor(alerts)))+
  # needed to avoid gridlines being drawn
  coord_sf(datum = NA) + 
  # add a scalebar
  annotation_scale() +
  # color the scale to be perceptually uniform 
  # drop FALSE keeps all levels 
  # name allows you to change the legend title 
  scale_fill_brewer(drop = FALSE, palette = "OrRd", 
                    name = "No. alerts") + 
  # label polygons
  geom_sf_text(data = map_alerts, aes(label = name), size = 2.5,
               colour = "black") + 
  # remove coordinates and axes
  theme_void() +
  facet_wrap("signal_type") + 
  labs( captions = str_glue("Source: MSF data from {reporting_week}"),
       title = str_glue("Number of alerts in MSF-OCA catchment areas by type, {reporting_week}"))
Response by location in r reporting_week
## Count number of alerts in reporting week  ----------------------------------------
responses <- ebs_cleaned %>% 
    ## specify the time period of interest
    filter(epiweek == reporting_week ) %>% 
    ## convert
    ## Group by location
    group_by(location_signal) %>% 
    ## Count verified signals by location
    summarise(response = sum(response_undertaken == "y", na.rm = T)) %>% 
   ## Remove rows where there were 0 alerts
    filter(response > 0)

## left join with polygon
map_response <- left_join(map, responses, by = c("name" = "location_signal")) %>% 
  ## filter locations where there were no alerts
  filter(is.na(response) == F)




## Plot responses by location
# -----------------------------------------------

ggplot() +
  ## Add the map layer as an empty white shape
  geom_sf(data = map, fill = "white") +
  # shapefile as polygon
  geom_sf(data = map_response, aes(fill = as.factor(response)))+
  # needed to avoid gridlines being drawn
  coord_sf(datum = NA) + 
  # add a scalebar
  annotation_scale() +
  # color the scale to be perceptually uniform 
  # drop FALSE keeps all levels 
  # name allows you to change the legend title 
  scale_fill_brewer(drop = FALSE, palette = "OrRd", 
                    name = "No. responses") + 
  # label polygons
  geom_sf_text(data = map_response, aes(label = name), size = 3,
               colour = "black") + 
  # remove coordinates and axes
  theme_void() +
  # facet_wrap("signal_type") + 
  labs( captions = str_glue("Source: MSF data from {reporting_week}"),
       title = str_glue("Number of responses in MSF-OCA catchment areas, 
                        {reporting_week}"))

\newpage \pagebreak

Monitoring and Evaluation of surveillance attributes

## Identify the start week for monitoring/evaluation period
# start_week <- tsibble::yearweek("2020-W04")

Characteristics of surveillance data collected

Overview of EBS performance indicators full dataset
ebs_cleaned %>% 
  ## Group by epiweek
  group_by(epiweek) %>% 
  ## Count the number of total signals, number required verification,
  # verified signals, alerts and responses
  summarise(total_signals = n(),
            verified_signals = sum(event_status == "1", na.rm = T),
            risk_assessments = sum(is.na(date_assessment) == F,  na.rm = T),
            alerts = sum(ALERT, na.rm  = T),
            responses = sum(response_undertaken == "y", na.rm = T)) %>% 
  ## convert epiweek to a factor
  mutate(epiweek = factor(paste0(year(epiweek), " W", sprintf("%02d", week(epiweek))))) %>%
  ## Add total value
  janitor::adorn_totals("row") %>% 
  ## Rename variables to make a cleaner table
  dplyr::rename("Epiweek" = epiweek, 
                "Total signals" = total_signals,
                "Verified signals" = verified_signals,
                "Risk assessments" = risk_assessments,
                "Alerts" = alerts,
                "Responses" = responses) %>% 
  ## convert to flextable
  flextable::flextable() %>% 
  ## autofit table
  flextable::autofit()

Usefulness

Number of alerts of the EBS that resulted in public health action

Across the period of evaluation, there were r nrow(filter(ebs_cleaned, ALERT)) alerts of which r nrow(filter(ebs_cleaned, ALERT, response_undertaken == "y")) resulted in public health action.

Responses by type by location - full dataset

ebs_cleaned %>% 
  ## Filter to only include data for the past 4 weeks and responses
  filter(response_undertaken == "y") %>% 
  ## Select the variables of interest - signal type and location of signal
  select(signal_type, location_signal) %>% 
  ## summarise data by signal type
  tbl_summary(by = "signal_type") %>% 
  ## convert to flextable
  gtsummary::as_flex_table()

Timeliness of signal detection, verification, risk assessment and response - full dataset

The below table shows the median delay in days between identification and detection/reporting of a signal, detection of a signal and verification, verification of a signal and assessment, assessment of a signal and a response.

ebs_cleaned %>% 
  ## select timeliness-related variables
  select(detection_time, verification_time, assessment_time, 
         response_detect_time, response_assess_time) %>% 
  ## calculate mean and SD for each
  tbl_summary(missing = "no",
              type = (all_categorical() ~ "continuous"),
              label = list(detection_time ~ "Time to detect signal" ,
                verification_time ~ "Time to verify signal",
                assessment_time ~ "Time to risk assess verified signal",
                response_detect_time ~ "Time from detection to response",
                response_assess_time ~ "Time from risk assessment to response")) %>% 
  ## convert to flextable
  gtsummary::as_flex_table()

Timeliness of signal verification and risk assessment compared to expected- full dataset

The below table shows the proportion of signals that were verified within 24 hours of detection and proportion of signals risk assessed within 48 hours of detection

ebs_cleaned %>%
  ## select timeliness-related variables
  select(verif_24, assess_48) %>%
  ## calculate mean and SD for each
  tbl_summary(label = list(
    verif_24 ~ "Time to verify signal",
    assess_48 ~ "Time to conduct risk assessment"
  )) %>%
  ## convert to flextable
  gtsummary::as_flex_table()

Timeliness of signal detection, verification, risk assessment and response - by month

ebs_cleaned %>% 
  ## select timeliness-related variables
  select(detection_time, verification_time, assessment_time, 
         response_detect_time, response_assess_time, epimonth) %>% 
  ## Group by epimonth
  group_by(epimonth) %>% 
  ## calculate mean and SD for each
  tbl_summary(missing = "no",
              type = (all_categorical() ~ "continuous"),
              label = list(detection_time ~ "Time to detect signal" ,
                verification_time ~ "Time to verify signal",
                assessment_time ~ "Time to risk assess verified signal",
                response_detect_time ~ "Time from detection to response",
                response_assess_time ~ "Time from risk assessment to response"), by = epimonth) %>% 
  ## convert to flextable
  gtsummary::as_flex_table()

Timeliness of verification and risk assessment - by % and month

ebs_cleaned %>% 
  ## select timeliness-related variables
  select(verif_24, assess_48, epimonth) %>% 
  ## Group by epimonth
  group_by(epimonth) %>% 
  ## calculate mean and SD for each
   tbl_summary(label = list(
    verif_24 ~ "Time to verify signal",
    assess_48 ~ "Time to conduct risk assessment"),
    by = epimonth) %>% 
  ## convert to flextable
  gtsummary::as_flex_table()

Completeness of selected variables - full dataset

ebs_cleaned %>% 
     ## select variables of interest
     select(source_signal, date_signal, location_signal, signal_type,
         total_affected, date_verification, date_assessment) %>% 
   ## create a summary table of number and proportion of missing values
   ## iterate over each variable selected
    map_dfr(
       ## create counts and percentages (always include counts of NA)
       ~ tabyl(as.numeric(.), useNA = "always")  %>% 
           ## only keep the NA counts and percentages
           filter(is.na(`as.numeric(.)`)), 
           ## put the variable as row name
            .id = "variable") %>% 
    ## rename columns
    select("Variable" = variable,
                "Number missing" = n,
                "% missing" = percent) %>% 
  ## convert to flextable
  flextable::flextable() %>% 
  ## autofit table
  flextable::autofit()

Positive predictive value - overview full dataset

ebs_cleaned %>% 
  ## select variables to make calculations
  select(event_status, ALERT, response_undertaken) %>% 
  ## calculate proportion of signals that become events
  summarise(ppv_sig_eve = round(sum(event_status == 1, na.rm = T)/n(), digits = 2),
            ppv_sig_alerts = round(sum(ALERT, na.rm = T)/n(), 
                                   digits = 2),
            ppv_sig_response = round(sum(response_undertaken == "y",
                                         na.rm = T)/n(), 
                                     digits = 2),
            ppv_eve_alerts = round(sum(ALERT, na.rm = T)/sum(event_status == 1,
                                                       na.rm = T),
                                   digits = 2)) %>% 
  ## rename variables
  dplyr::rename("PPV signals as events" = ppv_sig_eve,
                "PPV signals as alerts" = ppv_sig_alerts,
                "PPV signals as response"  = ppv_sig_response,
                "PPV events as alerts" = ppv_eve_alerts) %>% 
  ## Make a long dataset out of the PPV calculations
  pivot_longer(everything(), names_to = "PPV", values_to = "%") %>% 
  ## convert to flextable
  flextable::flextable() %>% 
  ## autofit table
  flextable::autofit()
# ebs_cleaned %>% 
#   ## select variables of interest
#   select(epiweek, event_status) %>% 
#   ## Group by epiweek
#   group_by(epiweek) %>% 
#   ## calculate ratio of events to signals
#   summarise(event_signal = round(sum(event_status == "1", na.rm = T)/n(),
#                                  digits = 2)) %>% 
#   ## Make a barplot
#   ggplot()+
#   geom_point(aes(x = as.factor(epiweek), y = event_signal)) +
#   theme(axis.text.x = element_text(angle = 90)) +
#   labs(x = "Epi week",
#        y = "Event:signal ratio",
#        title = "Event to signal ratio by epiweek")

Positive predictive value - by signal type full dataset

ebs_cleaned %>% 
  ## select variables to make calculations
  select(event_status, ALERT, response_undertaken, signal_type) %>% 
  ## Group by signal type
  group_by(signal_type) %>% 
  ## calculate proportion of signals that become events
  summarise(ppv_sig_eve = round(sum(event_status == 1, na.rm = T)/n(), digits = 2),
            ppv_sig_alerts = round(sum(ALERT, na.rm = T)/n(), 
                                   digits = 2),
            ppv_sig_response = round(sum(response_undertaken == "y",
                                         na.rm = T)/n(), 
                                     digits = 2),
            ppv_eve_alerts = round(sum(ALERT, na.rm = T)/sum(event_status == 1,
                                                       na.rm = T),
                                   digits = 2)) %>% 
  ## rename variables
  dplyr::rename("PPV signals as events" = ppv_sig_eve,
                "PPV signals as alerts" = ppv_sig_alerts,
                "PPV signals as response"  = ppv_sig_response,
                "PPV events as alerts" = ppv_eve_alerts,
                "Signal type" = signal_type) %>% 
  ## convert to flextable
  flextable::flextable() %>% 
  ## autofit table
  flextable::autofit()

Annex

Verified signals by type past 4 weeks
ebs_cleaned %>% 
  ## Filter to only include data for the past 4 weeks and verified signals
  filter(epiweek >= past4weeks & epiweek <= reporting_week & event_status == "1") %>% 
  ## Group by epiweek
  group_by(epiweek) %>% 
  ## Count the number of verified signals by signal type
  ## You will need to replace the new variable names below to reflect the 
  ## signals included in your surveillance system
  summarise(verified_signal1 = sum(signal_type == "signal_1", na.rm = T),
            verified_signal2 = sum(signal_type == "signal_2", na.rm = T),
            verified_signal3 = sum(signal_type == "signal_3", na.rm = T),
            verified_signal4 = sum(signal_type == "signal_4", na.rm = T),
            verified_signal5 = sum(signal_type == "signal_5", na.rm = T),
            verified_signal6 = sum(signal_type == "signal_6", na.rm = T)) %>% 
  ## convert epiweek to a factor
  mutate(epiweek = as.factor(epiweek)) %>% 
  ## rename to make a cleaner table and labels can be adjusted further
  dplyr::rename("Epiweek" = epiweek) %>% 
  ## convert to flextable
  flextable::flextable() %>% 
  ## autofit table
  flextable::autofit()
Alerts by type in the past 4 weeks
ebs_cleaned %>% 
  ## Filter to only include data for the past 6 weeks and alerts
  filter(epiweek >= past4weeks & epiweek <= reporting_week & ALERT == TRUE) %>% 
  ## Group by epiweek
  group_by(epiweek) %>% 
  ## Count the number of verified signals by signal type
  ## You will need to replace the new variable names below to reflect the 
  ## signals included in your surveillance system
  summarise(alert_signal1 = sum(signal_type == "signal_1", na.rm = T),
            alert_signal2 = sum(signal_type == "signal_2", na.rm = T),
            alert_signal3 = sum(signal_type == "signal_3", na.rm = T),
            alert_signal4 = sum(signal_type == "signal_4", na.rm = T),
            alert_signal5 = sum(signal_type == "signal_5", na.rm = T),
            alert_signal6 = sum(signal_type == "signal_6", na.rm = T)) %>% 
  ## convert epiweek to a factor
  mutate(epiweek = as.factor(epiweek)) %>% 
  ## rename to make a cleaner table and labels can be adjusted further
  dplyr::rename("Epiweek" = epiweek) %>% 
  ## convert to flextable
  flextable::flextable() %>% 
  ## autofit table
  flextable::autofit()
Responses by type in the past 4 weeks
ebs_cleaned %>% 
  ## Filter to only include data for the past 6 weeks and alerts
  filter(epiweek >= past4weeks & epiweek <= reporting_week & response_undertaken == "y") %>% 
  ## Group by epiweek
  group_by(epiweek) %>% 
  ## Count the number of verified signals by signal type
  ## You will need to replace the new variable names below to reflect the 
  ## signals included in your surveillance system
  summarise(response_signal1 = sum(signal_type == "signal_1", na.rm = T),
            response_signal2 = sum(signal_type == "signal_2", na.rm = T),
            response_signal3 = sum(signal_type == "signal_3", na.rm = T),
            response_signal4 = sum(signal_type == "signal_4", na.rm = T),
            response_signal5 = sum(signal_type == "signal_5", na.rm = T),
            response_signal6 = sum(signal_type == "signal_6", na.rm = T)) %>% 
  ## convert epiweek to a factor
  mutate(epiweek = as.factor(epiweek)) %>% 
  ## rename to make a cleaner table and labels can be adjusted further
  dplyr::rename("Epiweek" = epiweek) %>% 
  ## convert to flextable
  flextable::flextable() %>% 
  ## autofit table
  flextable::autofit()
Verified signals by type by location - past 4 weeks

ebs_cleaned %>% 
  ## Filter to only include data for the past 6 weeks and verified signals
  filter(epiweek >= past4weeks & epiweek <= reporting_week & event_status == "1") %>% 
  select(signal_type, location_signal) %>% 
  ## summarise data by signal type
  tbl_summary(by = "signal_type") %>% 
  ## convert to flextable
  gtsummary::as_flex_table()
Alerts by type by location in the past 4 weeks

ebs_cleaned %>% 
  ## Filter to only include data for the past 4 weeks and alerts
  filter(epiweek >= past4weeks & epiweek <= reporting_week & ALERT == "TRUE") %>% 
  ## Select the variables of interest - signal type and location of signal
  select(signal_type, location_signal) %>% 
  ## summarise data by signal type
  tbl_summary(by = "signal_type") %>% 
  ## convert to flextable
  gtsummary::as_flex_table()
Responses by type by location in the past 4 weeks

ebs_cleaned %>% 
  ## Filter to only include data for the past 4 weeks and responses
  filter(epiweek >= past4weeks & epiweek <= reporting_week & response_undertaken == "y") %>% 
  ## Select the variables of interest - signal type and location of signal
  select(signal_type, location_signal) %>% 
  ## summarise data by signal type
  tbl_summary(by = "signal_type") %>% 
  ## convert to flextable
  gtsummary::as_flex_table()
Reporting source of signals - past 4 weeks
ebs_cleaned %>% 
  ## examine only the last 4 weeks
  filter(epiweek >= past4weeks & epiweek <= reporting_week) %>% 
  ## select variables of interest
  select(epiweek, reporter_signal) %>% 
  ## Summarise value by epiweek
  tbl_summary(by = epiweek, 
              label = list(reporter_signal ~ "Reporter")) %>% 
  ## convert to flextable
  gtsummary::as_flex_table()


R4EPI/sitrep documentation built on April 5, 2025, 4:51 p.m.