DERTtools: Collection of utilities to support DERT work

Documented in create_eml create_npds_notification plot_summary summarize_npds_db

#' Create NPDS notification email sent to state and poison center
#'
#' \code{create_npds_notification()} is used to create the notification email
#' that CDC send to the state health department epidemiologist and poison
#' control center contact persons when an incident of pubic health significance
#' is confirmed in the NPDS.
#'
#' @param db list; object generated by \code{read_npds_notification_db}.
#' @param id numeric; integer identifying the row number for which you want to
#'   create the notification email.
#' @param final logical; TRUE if the email to be created should be final and
#'   FALSE if the email to be created should be draft (not final).
#' @param save character; filename of the output file. Default is NULL. If NULL
#'   a file is not created.
#'
#' @return If \code{save = NULL} then the function will return a character
#'   vector with the content to generate an eml (email) file. If \code{save =
#'   "file.eml"} then it will create a file named \code{file.eml} with the
#'   notification email.
#'
#' @examples
#' #
#' create_npds_notification("tests/manual_tests/NPDS_notification_database.xlsx",
#' 1, final = FALSE)
#'
#' @keywords npds_notifications
#'
#' @export
create_npds_notification <- function(db, id, final, save = NULL) {

  # Check db format
  #check_npds_notification_db_fmt(con)

  # Get input parameters
  input <- get_input(db, id)

  # Select state epi and PCC contact dataframes
  epi_pcc <- select_contacts(db, input)

  # Append cdc_team sheet
  contacts <- c(epi_pcc, db["cdc_team"])

  # Generate to, cc, and bcc based on the type of email (i.e., final vs draft)
  email <- set_contact(contacts, final)

  # Generate anomaly links
  if(all(is.na(input$anomaly_id)) | all(is.na(input$definition_id))) {
    anomaly_link <- "No anomaly_id or defintion_id exist therefore no link can be created."
  } else
  if(length(input$anomaly_id)!=length(input$definition_id)) {
    anomaly_link <- "Link could not be generated because the number of anomaly_id is not the same as the definition_id"
  } else {
    anomaly_link <- paste0(
      'https://www.npds.us/logon.aspx?AnomalyID=',
      input$anomaly_id,
      '&DefinitionID=',
      input$definition_id
    )

    anomaly_link <- paste0(sapply(anomaly_link, function(x) as.character(shiny::tags$a(x, href=x)), USE.NAMES = FALSE), collapse = as.character(shiny::tags$br()))
  }


  # Read template and substitute variables (e.g., @@variable@@) with
  # corresponding values
  body <- readLines(system.file("templates", "notifications.html", package = "DERTtools"))

  if(final==TRUE) {
    body <- body
  } else
    if(final==FALSE) {
      header <- readLines(system.file("templates", "notifications_draft_header.html", package = "DERTtools"))
      body <- c(header, body)
    }

  vars_pattern <- unlist(stringr::str_extract_all(body, "@@[a-z_]+@@"))

  emails <- emails(contacts)

  vars_replacement <- c(
    emails = emails,
    anomaly_link = anomaly_link,
    salutation_names = wordlist(c(epi_pcc[["state_epi"]][["name"]], epi_pcc[["pcc"]][["name"]])),
    pcc_name = paste(input[["pcc_names"]], collapse = ", the "),
    description = paste0("On ", format(input[["anomaly_date"]], "%d %b %Y"), ", NPDS detected ", input[["notification_text"]]),
    pcc_contact_info = create_label(epi_pcc[["pcc"]]),
    case_id = input[["case_id"]]
  )

  vars_replacement <- vars_replacement[gsub("@", "", vars_pattern)]

  if(length(vars_pattern)!=length(vars_replacement)) stop("The number of variables and the number of values to be substituted in the template file are not the same.")
  body <- stringi::stri_replace_all_regex(body, vars_pattern, vars_replacement, vectorize_all = FALSE)


  # if(final==TRUE) body <- body[-1]
  body <- paste(body, collapse = "")

  # Create email (using eml format) file
  if(final==TRUE) {
    subject <- "Incident of Potential Public Health Significance in your Jurisdiction"
  } else {
    subject <- "DRAFT: Incident of Potential Public Health Significance in your Jurisdiction"
  }

  eml_content <- c(
    email,
    list(
      subject = subject,
      `x-unsent` = 1,
      `content-type` = 'text/html; charset="UTF-8"',
      body = body
    )
  )

  eml <- create_eml(eml_content)

  if(!is.null(save)) write_utf8(eml, save)

  eml

}


# Select State Epidemiologist, PCC and other contacts
select_contacts <- function(x, input) {

  id <- input[["id"]]
  sent_to_state_epi <- input[["sent_to_state_epi"]]
  sent_to_pcc <- input[["sent_to_pcc"]]

  # Filter notification database to choose a specific notification (id)
  notification <- x[["notifications"]][id, ]

  # Standardize state variable to use two letter state abbreviation
  x[["state_epis"]] <- x[["state_epis"]] %>%
    mutate(state_original = state) %>%
    mutate(
      state = case_when(
        grepl("^[A-Z]{2}$", state) ~ state,
        grepl("^American Samoa$", state) ~ "AS",
        grepl("^Federated States of Micronesia$", state) ~ "FM",
        grepl("^Guam$", state) ~ "GU",
        grepl("^Marshall Islands$", state) ~ "MH",
        grepl("^Northern Mariana Islands$", state) ~ "MP",
        grepl("^Puerto Rico$", state) ~ "PR",
        grepl("^U.S. Virgin Islands$", state) ~ "VI",
        grepl("^Palau$", state) ~ "PW"
      ))

  # Get the state epidemiologist and poison control centers contact information
  # for the selected notification
  state_epi <- subset(x[["state_epis"]], state %in% sent_to_state_epi)
  pcc_contact <- subset(x[["pcc_contact"]], id %in% sent_to_pcc)

  # Add other contacts
  other_contacts <- subset(x[["other_contacts"]], state %in% sent_to_state_epi)

  # Create a list of contacts for state epidemiologist and pcc
  list(
    state_epi = state_epi,
    pcc = pcc_contact,
    other_contacts = other_contacts
  )
}


# Set names and emails for draft message
emails <- function(x) {
  contacts <- x[c("state_epi", "pcc")]
  emails <- lapply(contacts, function(x) paste0(x[["name"]], " (", x[["email"]], ")"))
  paste(unlist(emails), collapse = "; ")
}


#' Set email addresses (to, cc, and bcc) for email based on if is draft or final.
#'
#' @param x list that contain state_epis, pcc, cdc_team, and
#'   other_contacts.
#' @param final logical; TRUE or FALSE depending if the final email notification
#'   is final or draft.
#'
#' @noRd
set_contact <- function(x, final) {

  cdctox <- "CDC Tox Team (CDC) <cdctoxteam@cdc.gov>"

  # Get section chief contact information and remove from list
  section_chief <- subset(x[["cdc_team"]], role=="section chief")
  section_chief <- glue_name_and_email(section_chief)
  x[["cdc_team"]] <- subset(x[["cdc_team"]], role!="section chief")

  # Filter out surveillance team members that should not be included in a draft
  # email
  if(final==FALSE) {
    x[["cdc_team"]] <- subset(
      x[["cdc_team"]], draft==TRUE & role=="surveillance"
    )
  }

  # Select name and email variable for each dataframe
  x <- lapply(x, "[", c("name", "email"))

  # Create email address including display name
  name_and_email <- lapply(x, glue_name_and_email)

  if(final==TRUE) {
    email <- list(
      from = cdctox,
      to = c(name_and_email[["state_epi"]], name_and_email[["pcc"]]),
      cc = c(cdctox, section_chief, name_and_email[["other_contacts"]]),
      bcc = c(name_and_email[["cdc_team"]])
    )
  } else
  if(final==FALSE) {
    email <- list(
      from = "", # Setting this blank should make Outlook use the current user account
      to = c(name_and_email[["cdc_team"]])
    )
  }

  # Concatenate/collapse all vectors
  email <- lapply(email, paste0, collapse = ", ")

  email

}


#' Create label for a dataframe that contain name, email, and phone as
#' variables. The label is organized with one element (e.g., name, email, phone)
#' per line.
#'
#' @param dfx
#'
#' @noRd
create_label <- function(dfx) {

  # Suppress NAs when pasting (https://stackoverflow.com/questions/13673894/suppress-nas-in-paste
  paste2 <- function(..., sep = " ") {
    L <- list(...)
    L <- lapply(L,function(x) {x[is.na(x)] <- ""; x})
    gsub(paste0("(^",sep,"|",sep,"$)"),"",
         gsub(paste0(sep,sep),sep,
              do.call(paste,c(L,list(sep=sep)))))
  }

  # Check the name and emails are not NAs. This should be the minimum required.
  if(any(is.na(dfx[["name"]]))) stop("The name variable contain blanks.")
  if(any(is.na(dfx[["email"]]))) stop("The email variable contain blanks.")

  # Build name, email and phone vectors
  name <- paste2(dfx$name, dfx$title)
  email <- unlist(lapply(dfx$email, function(x) as.character(shiny::a(x, href = x))))
  phone <- dfx$phone
  phone[!is.na(dfx$phone)] <- paste0("W: ", dfx$phone[!is.na(dfx$phone)])

  # Concatenate adding HTML line breaks
  contact <- paste2(name, email, phone, sep = "<br>")

  # Add HTML <p> tags
  contact <- sapply(contact, function(x) as.character(shiny::p(shiny::HTML((x)))), USE.NAMES = FALSE)

  # Combine everything
  paste(contact, collapse = "")

}


#' Generate content for an eml file
#'
#' Given a list with header fields and a body it generates the content for an
#' eml file in the \href{https://tools.ietf.org/html/rfc5322}{RFC 5322 format}.
#'
#' @param x list
#'
#' @return
#' character value with the content for an EML file using RFC 5322.
#' @export
#'
#' @examples
#' eml <- list(
#' from = "from@cdc.gov",
#' to = "to@cdc.gov",
#' cc = "cc@cdc.gov",
#' bcc = "bcc@cdc.gov",
#' subject = "My subject",
#' `x-unsent` = 1,
#' `Content-Type` = "text/html",
#' body = "This is my body"
#' )
#' create_eml(eml)
#' writeLines(create_eml(eml), "email.eml")
create_eml <- function(x) {

  # Set header
  elements <- x[!names(x) %in% c("body")]
  header <- paste0(names(elements), lapply(elements, function(x) paste(":", x)))

  # Enclose the body of content in the appropriate html tags
  x[["body"]] <- as.character(
    shiny::tags$html(
      shiny::tags$body(
        shiny::HTML(x[["body"]])
      )
    )
  )

  # Combine header and body adding CRLF between header and body
  c(header, "\n", x[["body"]])

}


#' Create email address that contain a display name
#'
#' @param x dataframe with a name and email variable.
#' @return a vector of emails with display names
#' @noRd
glue_name_and_email <- function(x) {
  if(nrow(x)==0) {
    NULL
  } else {
    paste0(x[["name"]], " <", x[["email"]], ">")
  }
}

#' Summarizes NPDS notifications database
#'
#' @param x object from `read_npds_notification_db`
#'
#' @return list of the following summary values:
#' \itemize{
#'   \item \code{n} numeric; is the total number of notifications in the database.
#'   \item \code{n_na} tibble; summarizes data completeness. Includes the number
#'   and percent of NAs for each variable.
#'   \item \code{frequency} tibble; summarizes the variables and values. Include
#'   the number and percent for each variable and respective values for all the
#'   available data.
#'   \item \code{frequency_by_year} tibble; summarizes the variables and values
#'   by year. Include the number and percent for each variable and respective
#'   values for all the available data and grouped by year.
#' }
#'
#' @keywords npds_notifications
#'
#' @import dplyr
#'
#' @export

# summary.npds_notification_db <- function(x) {
summarize_npds_db <- function(x) {

  # Get notifications table
  dfx <- x[["notifications"]]

  # Add year to use as grouping variable
  dfx[["year"]] <- lubridate::year(dfx[["anomaly_date"]])

  # Total number of records
  total <- nrow(dfx)

  # Number of notifications by year
  n_by_year <- dfx %>%
    group_by(year = lubridate::year(anomaly_date)) %>%
    summarise(n = n())

  # Median start to send in days
  median_start_to_sent <- dfx %>%
    mutate(diff = sent_date - anomaly_date) %>%
    pull(diff) %>%
    median(., na.rm = TRUE) %>%
    as.numeric(., units = "days")

  # Frequency
  frequency <- apply(
    dfx,
    2,
    function(x) dfx %>%
      group_by(value = !!x) %>%
      summarise(n = n(), percent = n / total * 100) %>%
      mutate(value = as.character(value)) %>%
      mutate(value = stringr::str_trim(value)) # For some reason values for
      # variable sent_to_pcc are padded with white space. This fix it but is not too elegant.
  )

  # Frequency by year
  frequency_by_year <- apply(
    dfx,
    2,
    function(x) dfx %>%
      group_by(year, value = !!x) %>%
      summarise(n = n(), percent = n / total * 100) %>%
      mutate(value = as.character(value)) %>%
      mutate(value = stringr::str_trim(value)) # See comment above
  )

  # Data completeness / Get summary for NAs (i.e., n and percent of NAs for each variable)
  n_na <- bind_rows(lapply(frequency, function(x) x[is.na(x[["value"]]),]), .id = "name")

  # Add poison center names to frequency table for sent_to_pcc
  # pcc_name <- x[["pcc_name"]]
  # pcc_name <- setNames(pcc_name, c("value", "pcc_name"))
  # sent_to_pcc_frequency <- frequency[["sent_to_pcc"]]
  # sent_to_pcc_frequency[["value"]] <- as.numeric(sent_to_pcc_frequency[["value"]])
  # frequency[["sent_to_pcc"]] <- dplyr::left_join(sent_to_pcc_frequency, pcc_name)

  # Convert to dataframe to make it easier to plot using ggplot
  frequency_dfx <- bind_rows(frequency, .id = "variable")
  frequency_by_year_dfx <- bind_rows(frequency_by_year, .id = "variable")

  # Put results in a list
  list(
    n = total,
    median_start_to_sent = median_start_to_sent,
    n_by_year = n_by_year,
    n_na = n_na,
    frequency = frequency_dfx,
    frequency_by_year = frequency_by_year_dfx
  )
}

#' Generate figures for the summary of the NPDS notification database
#'
#' @param x object from \code{summary.npds_notification_db()}
#' @param type character which can be either \code{"ggplot"} or \code{"plotly"}
#'   and will determine the output value of this function.
#' @param n_years integer that determines the number of years that will be
#'   displayed in the facet plots.
#'
#' @return list with ggplot or plotly objects depending on the value of the type
#'   argument.
#'
#' @keywords npds_notifications
#'
#' @export
#'
#' @import ggplot2

plot_summary <- function(x, type, n_years) {

  fill <- "#008265" # From NCEH color palette

  # Number of notifications sent per year
  n_by_year <- x[["n_by_year"]] %>%
    ggplot(., aes(x = year, y = n)) +
    geom_col(fill = fill) +
    coord_flip() +
    theme_minimal() +
    labs(
      # title = "Number of notifications sent per year",
      x = "Year",
      y = "Number of notifications sent"
    ) +
    theme(
      plot.title.position = "plot"
    )

  # Number of notifications sent by state
  sent_to_state_epi <- x[["frequency"]] %>%
    dplyr::filter(variable=="sent_to_state_epi") %>%
    ggplot(., aes(x = reorder(value, n), y = n)) +
    geom_col(fill = fill) +
    theme_minimal() +
    coord_flip() +
    labs(
      title = "Number of notifications sent per state",
      x = "State",
      y = "Number of notifications sent"
    )

  # Number of notifications sent by state per year (last 3 years)
  sent_to_state_epi_by_year <- x[["frequency_by_year"]] %>%
    dplyr::filter(variable=="sent_to_state_epi") %>%
    dplyr::filter(year %in% c(lubridate::year(Sys.Date())-0:n_years)) %>%
    ggplot(., aes(x = reorder(value, n), y = n)) +
    geom_col(fill = fill) +
    theme_minimal() +
    coord_flip() +
    facet_wrap(~ year, nrow = 1) +
    labs(
      title = "Number of notifications sent by state per year",
      x = "State",
      y = "Number of notifications sent"
    )

  # Number of notifications by defintion ID
  definition_id <- x[["frequency"]] %>%
    dplyr::filter(variable=="definition_id") %>%
    ggplot(., aes(x = reorder(value, n), y = n)) +
    geom_col(fill = fill) +
    theme_minimal() +
    coord_flip() +
    labs(
      # title = "Number of notifications by defintion ID",
      x = "Definition ID",
      y = "Number of notifications sent"
    )

  # Data completeness
  data_completness <- ggplot(x[["n_na"]], aes(x = reorder(name, percent), y = percent)) +
    geom_col(fill = fill) +
    coord_flip() +
    theme_minimal() +
    labs(
      # title = "Percent of missing values per variable",
      x = "Variable name",
      y = "Percent missing"
    )

  x <- list(
    n_by_year = n_by_year,
    sent_to_state_epi = sent_to_state_epi,
    sent_to_state_epi_by_year = sent_to_state_epi_by_year,
    defintion_id = definition_id,
    data_completness = data_completness
  )

  # Create interactive plots for dashboard
  if(type=="plotly") {
    x <- lapply(x, function(x) plotly::ggplotly(x) %>% plotly::layout(title = list(x = 0.1)))
  } else
  if(type=="ggplot") {
    x <- x
  }

  x

}
renejuan/DERTtools documentation built on March 19, 2022, 7:20 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
renejuan/DERTtools
Collection of utilities to support DERT work

R/npds_notifications.R
In renejuan/DERTtools: Collection of utilities to support DERT work

Defines functions plot_summary summarize_npds_db glue_name_and_email create_eml create_label set_contact emails select_contacts create_npds_notification

Documented in create_eml create_npds_notification plot_summary summarize_npds_db

R Package Documentation

Browse R Packages

We want your feedback!

renejuan/DERTtools Collection of utilities to support DERT work

R/npds_notifications.R In renejuan/DERTtools: Collection of utilities to support DERT work

Defines functions plot_summary summarize_npds_db glue_name_and_email create_eml create_label set_contact emails select_contacts create_npds_notification

Documented in create_eml create_npds_notification plot_summary summarize_npds_db

R Package Documentation

Browse R Packages

We want your feedback!

renejuan/DERTtools
Collection of utilities to support DERT work

R/npds_notifications.R
In renejuan/DERTtools: Collection of utilities to support DERT work