R/pi-mortality.r

Defines functions pi_mortality

Documented in pi_mortality

#' Pneumonia and Influenza Mortality Surveillance
#'
#' The National Center for Health Statistics (NCHS) collects and disseminates the Nation's
#' official vital statistics. NCHS collects death certificate data from state vital
#' statistics offices for virtually all deaths occurring in the United States. Pneumonia
#' and influenza (P&I) deaths are identified based on ICD-10
#' multiple cause of death codes.\cr
#' \cr
#' NCHS Mortality Surveillance System data are presented by the week the death occurred
#' at the national, state, and HHS Region levels. Data on the percentage of deaths due
#' to P&I on a national level are released two weeks after the week of death to allow
#' for collection of enough data to produce a stable percentage.  States and HHS regions
#' with less than 20% of the expected total deaths (average number of total deaths
#' reported by week during 2008-2012) will be marked as insufficient data. Collection
#' of complete data is not expected at the time of initial report, and a reliable
#' percentage of deaths due to P&I is not anticipated at the U.S. Department of Health
#' and Human Services region or state level within this two week period.  The data for
#' earlier weeks are continually revised and the proportion of deaths due to P&I may
#' increase or decrease as new and updated death certificate data are received by NCHS.\cr
#' \cr
#' The seasonal baseline of P&I deaths is calculated using a periodic regression model
#' that incorporates a robust regression procedure applied to data from the previous
#' five years. An increase of 1.645 standard deviations above the seasonal baseline
#' of P&I deaths is considered the "epidemic threshold," i.e., the point at which
#' the observed proportion of deaths attributed to pneumonia or influenza was
#' significantly higher than would be expected at that time of the year in the
#' absence of substantial influenza-related mortality. Baselines and thresholds are
#' calculated at the national and regional level and by age group.
#'
#' @md
#' @param coverage_area coverage area for data (national, state or region)
#' @param years a vector of years to retrieve data for (i.e. `2014` for CDC
#'        flu season 2014-2015). CDC has data for this API going back to 2009
#'        and up until the current, active flu season.
#'        Default value (`NULL`) means retrieve **all** years. NOTE: if you
#'        happen to specify a 2-digit season value (i.e. `57` == 2017-2018)
#'        the function is smart enough to retrieve by season ID vs convert that
#'        to a year.
#' @note Queries for "state" and "region" are not necessarily as "instantaneous" as other API endpoints and can near or over 30s retrieval delays.
#'
#' For consistency with \code{\link{ilinet}} columns indicating the week now contain 'week' instead of the previously abbreviation 'wk'.
#' @references
#' - [Pneumonia and Influenza Mortality Surveillance Portal](https://gis.cdc.gov/grasp/fluview/mortality.html)
#' @export
#' @examples \dontrun{
#' ndf <- pi_mortality()
#' sdf <- pi_mortality("state")
#' rdf <- pi_mortality("region")
#' }
pi_mortality <- function(coverage_area=c("national", "state", "region"), years=NULL) {

  coverage_area <- match.arg(tolower(coverage_area), choices = c("national", "state", "region"))

  us_states <- read.csv("https://gis.cdc.gov/grasp/fluview/Flu7References/Data/USStates.csv",
                        stringsAsFactors=FALSE)
  us_states <- setNames(us_states, c("region_name", "subgeoid", "state_abbr"))
  us_states <- us_states[,c("region_name", "subgeoid")]
  us_states$subgeoid <- as.character(us_states$subgeoid)

  meta <- jsonlite::fromJSON("https://gis.cdc.gov/grasp/flu7/GetPhase07InitApp?appVersion=Public")

  mapcode_df <- setNames(meta$nchs_mapcode[,c("mapcode", "description")], c("map_code", "callout"))
  mapcode_df$map_code <- as.character(mapcode_df$map_code)

  geo_df <- meta$nchs_geo_dim
  geo_df$geoid <- as.character(geo_df$geoid)

  age_df <- setNames(meta$nchs_ages, c("ageid", "age_label"))
  age_df$ageid <- as.character(age_df$ageid)

  sum_df <- meta$nchs_summary
  sum_df$seasonid <- as.character(sum_df$seasonid)
  sum_df$ageid <- as.character(sum_df$ageid)
  sum_df$geoid <- as.character(sum_df$geoid)

  available_seasons <- sort(meta$seasons$seasonid)

  if (is.null(years)) { # ALL YEARS
    years <- available_seasons
  } else { # specified years or seasons or a mix

    years <- as.numeric(years)
    years <- ifelse(years > 1996, years - 1960, years)
    years <- sort(unique(years))
    years <- years[years %in% available_seasons]

    if (length(years) == 0) {
      years <- rev(sort(meta$seasons$seasonid))[1]
      curr_season_descr <- meta$seasons[meta$seasons$seasonid == years, "description"]
      message(sprintf("No valid years specified, defaulting to this flu season => ID: %s [%s]",
                      years, curr_season_descr))
    }

  }

  httr::POST(
    url = "https://gis.cdc.gov/grasp/flu7/PostPhase07DownloadData",
    httr::user_agent(.cdcfluview_ua),
    httr::add_headers(
      Origin = "https://gis.cdc.gov",
      Accept = "application/json, text/plain, */*",
      Referer = "https://gis.cdc.gov/grasp/fluview/mortality.html"
    ),
    encode = "json",
    body = list(
      AppVersion = "Public",
      AreaParameters = list(list(ID=.geoid_map[coverage_area])),
      SeasonsParameters = lapply(years, function(.x) { list(ID=as.integer(.x)) }),
      AgegroupsParameters = list(list(ID="1"))
    ),
    # httr::verbose(),
    httr::timeout(.httr_timeout)
  ) -> res

  httr::stop_for_status(res)

  res <- httr::content(res, as="parsed", flatten=TRUE)

  suppressWarnings(suppressMessages(dplyr::bind_rows(res$seasons) %>%
    dplyr::left_join(mapcode_df, "map_code") %>%
    dplyr::left_join(geo_df, "geoid") %>%
    dplyr::left_join(age_df, "ageid") %>%
    dplyr::left_join(dplyr::mutate(mmwrid_map, mmwrid=as.character(mmwrid)), "mmwrid") -> xdf))

  xdf <- dplyr::mutate(xdf, coverage_area = coverage_area)

  if (coverage_area == "state") {
    xdf <- dplyr::left_join(xdf, us_states, "subgeoid")
  } else if (coverage_area == "region") {
    xdf$region_name <- sprintf("Region %s", xdf$subgeoid)
  } else {
    xdf$region_name <- "national"
  }

  xdf[,c("seasonid", "baseline", "threshold", "percent_pni",
         "percent_complete", "number_influenza", "number_pneumonia",
         "all_deaths", "Total_PnI", "weeknumber", "geo_description",
         "age_label", "wk_start", "wk_end", "year_wk_num", "mmwrid",
         "coverage_area", "region_name", "callout")] -> xdf

  xdf$baseline <- to_num(xdf$baseline) / 100
  xdf$threshold <- to_num(xdf$threshold) / 100
  xdf$percent_pni <- to_num(xdf$percent_pni) / 100
  xdf$percent_complete <- to_num(xdf$percent_complete) / 100
  xdf$number_influenza <- to_num(xdf$number_influenza)
  xdf$number_pneumonia <- to_num(xdf$number_pneumonia)
  xdf$all_deaths <- to_num(xdf$all_deaths)
  xdf$Total_PnI <- to_num(xdf$Total_PnI)
  xdf <- xdf %>%
    dplyr::rename(
      week_start = wk_start,
      week_end = wk_end,
      year_week_num = year_wk_num
    )
  xdf <- .mcga(xdf)

  xdf

}

Try the cdcfluview package in your browser

Any scripts or data that you put into this service are public.

cdcfluview documentation built on May 22, 2021, 5:07 p.m.