data-raw/completeness_report_fy21.R

# PROJECT:  Wavelength
# AUTHOR:   A.Chafetz | USAID
# PURPOSE:  site completeness
# LICENSE:  MIT
# DATE:     2021-12-11
# UPDATED:

# DEPENDENCIES ------------------------------------------------------------

  library(tidyverse)
  library(glitr)
  library(glamr)
  library(gophr)
  library(extrafont)
  library(scales)
  library(tidytext)
  library(patchwork)
  library(ggtext)
  library(glue)
  library(Wavelength)


# GLOBAL VARIABLES --------------------------------------------------------

    #global file path
    path <- "out/joint/HFR_Tableau_SQLview.csv"

    #date
    file_date <- file.info(path)$mtime %>% lubridate::as_date()

    #indicator order
    ind_sel <- c("PrEP_NEW", "VMMC_CIRC",
                 "HTS_TST", "HTS_TST_POS",
                 "TX_NEW", "TX_CURR", "TX_MMD")
    #red
    flag_red <- brewer.pal(5, "OrRd")[5]

# IMPORT ------------------------------------------------------------------

    df_hfr <- hfr_read(path)

    df_msd <- si_path() %>%
      return_latest("OU_IM") %>%
      read_rds()


# MUNGE -------------------------------------------------------------------

    #aggregate MSD to period x country x mech x indicator
    df_msd <- df_msd %>%
      filter(fundingagency == "USAID",
             fiscal_year == 2021,
             indicator %in% ind_sel,
             standardizeddisaggregate == "Total Numerator") %>%
      group_by(fiscal_year, countryname, indicator, mech_code) %>%
      summarise(across(starts_with("qtr"), sum, na.rm = TRUE), .groups = "drop") %>%
      mutate(across(starts_with("qtr"), ~ na_if(., 0))) %>%
      reshape_msd() %>%
      select(-period_type) %>%
      rename(mer_results = value)

    df_msd <- df_msd %>%
      bind_rows(df_msd %>%
                  filter(indicator == "TX_CURR") %>%
                  mutate(indicator = "TX_MMD"))
    #combine OU and countryname for regional missions
    df_hfr <- df_hfr %>%
      filter(fy == 2021) %>%
      mutate(countryname = ifelse(operatingunit == countryname,
                                  operatingunit, glue("{operatingunit}/{countryname}"))) %>%
      rename(hfr_results = val)

    #aggregate to the date x orgunit x mech x indicator level
    df_hfr_agg <- df_hfr %>%
      filter(expect_reporting == TRUE) %>%
      mutate(has_hfr_reporting = !is.na(hfr_results)) %>%
      group_by(countryname, orgunituid, date,
               mech_code, indicator, expect_reporting) %>%
      summarise(has_hfr_reporting = max(has_hfr_reporting, na.rm = TRUE),
                hfr_results = sum(hfr_results, na.rm = TRUE)) %>%
      ungroup()

    #aggregate across all sites in countryname across indicator and period
    df_hfr_comp <- df_hfr_agg %>%
      bind_rows(df_hfr_comp %>%
                  mutate(countryname = "ALL USAID")) %>%
      group_by(countryname, indicator, date) %>%
      summarise(across(c(has_hfr_reporting, expect_reporting), sum, na.rm = TRUE)) %>%
      ungroup()

    #calculate completeness
    df_hfr_comp <- df_hfr_comp %>%
      mutate(completeness = case_when(expect_reporting >  0 ~ round(has_hfr_reporting / expect_reporting, 2)))

    #clean up
    df_viz <- df_hfr_comp %>%
      mutate(countryname = countryname %>%
               recode("Democratic Republic of the Congo" = "DRC",
                      "Dominican Republic" = "DR",
                      "Papua New Guinea" = "PNG") %>%
               str_replace("West Africa Region", "WAR") %>%
               str_replace("Western Hemisphere Region", "WHR") %>%
               str_replace("Asia Region", "AR"),
             indicator = factor(indicator, ind_sel))

# VIZ ---------------------------------------------------------------------

    bounds <- df_viz %>%
      filter(countryname == "ALL USAID") %>%
      group_by(indicator) %>%
      summarise(completeness_avg = mean(completeness),
                sites = max(expect_reporting),
                .groups = "drop") %>%
      arrange(desc(completeness_avg)) %>%
      mutate(indicator = recode(indicator, "HTS_TST_POS" = "HTS_POS")) %>%
      filter(completeness_avg == max(completeness_avg) | completeness_avg == min(completeness_avg)) %>%
      mutate(x = glue("{percent(completeness_avg, 1)} for {indicator} ({label_number_si()(sites)} sites)")) %>%
      pull()

    df_viz %>%
      filter(indicator != "HTS_TST",
             countryname != "ALL USAID"
             # str_detect(countryname, "/", negate = TRUE)
             ) %>%
      ggplot(aes(date, fct_reorder(countryname, expect_reporting,  max), fill = completeness)) +
      geom_tile(color = "white") +
      facet_grid(~ indicator) +
      scale_x_date(breaks = as.Date(c("2020-10-01", "2021-01-01", "2021-04-01", "2021-07-01")),
                   #date_breaks = "3 months",
                   date_labels = "%b",
                   position = "top") +
      scale_fill_distiller(palette = "RdYlBu", direction = 1, labels = percent_format(1)) +
      # scale_fill_viridis_c(direction = -1,  labels = percent_format(1)) +
      labs(x = NULL, y = NULL, fill = "Site x Mech Completeness",
           title = glue("USAID's mean completeness ranged from {bounds[1]} down to {str_remove(bounds[2], ' sites')}") %>% toupper(),# %>% str_wrap(90),
           subtitle = "FY21 HFR Site x Mech Completeness of Reporting Rate",
           caption = glue("Source: HFR Tableau Extract {file_date}")) +
      si_style_nolines(facet_space = .1) +
      theme(strip.placement = "outside",
            strip.text.x = element_text(family = "Source Sans Pro SemiBold"),
            legend.position = "none",
            axis.text.y = element_text(size = 8))

    si_save("out/Completeness_FY21.png")



# CORRECTNESS -------------------------------------------------------------

    df_hfr_corr <- df_hfr_agg %>%
      mutate(period = convert_date_to_qtr(date))

    df_hfr_corr_tx <- df_hfr_corr %>%
      filter(indicator %in% c("TX_CURR", "TX_MMD"),
             has_hfr_reporting == 1) %>%
      group_by(countryname, orgunituid, indicator, mech_code, period) %>%
      filter(date == max(date)) %>%
      ungroup()

    df_hfr_corr <- df_hfr_corr %>%
      filter(indicator %ni% c("TX_CURR", "TX_MMD")) %>%
      bind_rows(df_hfr_corr_tx) %>%
      group_by(countryname, indicator, mech_code, period) %>%
      summarise(across(c(has_hfr_reporting, expect_reporting, hfr_results), sum, na.rm = TRUE)) %>%
      ungroup()

    df_hfr_corr <- df_hfr_corr %>%
      mutate(completeness = case_when(expect_reporting >  0 ~ round(has_hfr_reporting / expect_reporting, 2)))

    df_hfr_corr <- df_hfr_corr %>%
      tidylog::full_join(df_msd) %>%
      filter(!is.na(mer_results))

    df_viz_corr <- df_hfr_corr %>%
      filter(indicator!= "HTS_TST") %>%
      mutate(across(ends_with("results"), ~ ifelse(is.na(.), 0, .))) %>%
      group_by(indicator) %>%
      mutate(max_value = max(hfr_results, mer_results, na.rm = TRUE)) %>%
      ungroup()


    df_viz_corr %>%
      ggplot(aes(hfr_results, mer_results, color = completeness)) +
      geom_abline(slope = 1, color = grey30k, linetype = "dashed") +
      geom_blank(aes(x = max_value, y = max_value)) +
      geom_point(alpha = .8) +
      scale_x_continuous(labels = label_number_si()) +
      scale_y_continuous(labels = label_number_si()) +
      # scale_x_log10(labels = label_number_si()) +
      # scale_y_log10(labels = label_number_si()) +
      scale_color_distiller(palette = "RdYlBu", direction = 1, labels = percent_format(1)) +
      # facet_wrap(~indicator + period, scales = "free", ncol = 4,
      #            labeller = label_wrap_gen(multi_line = FALSE)) +
      facet_wrap(~indicator, scales = "free") +
      labs(x = "HFR Results (Quarterly Agg)", y = "MER Results",
           title = "Where HFR is reported, values tend to be close to MER reported values" %>% toupper(),
           subtitle = "FY21 HFR Quarterly Mech Correctness of Reporting Rate",
           caption = glue("Source: HFR Tableau Extract {file_date}")) +
      coord_cartesian(clip = "off") +
      si_style() +
      theme(legend.position = "none",
            panel.spacing.x = unit(.1, "lines"),
            panel.spacing.y = unit(.1, "lines"))


    si_save("out/Correctness_FY21.png")
USAID-OHA-SI/Wavelength documentation built on March 24, 2023, 10:07 a.m.