# PROJECT: Wavelength
# AUTHOR: A.Chafetz | USAID
# PURPOSE: site completeness
# LICENSE: MIT
# DATE: 2021-12-11
# UPDATED:
# DEPENDENCIES ------------------------------------------------------------
library(tidyverse)
library(glitr)
library(glamr)
library(gophr)
library(extrafont)
library(scales)
library(tidytext)
library(patchwork)
library(ggtext)
library(glue)
library(Wavelength)
# GLOBAL VARIABLES --------------------------------------------------------
#global file path
path <- "out/joint/HFR_Tableau_SQLview.csv"
#date
file_date <- file.info(path)$mtime %>% lubridate::as_date()
#indicator order
ind_sel <- c("PrEP_NEW", "VMMC_CIRC",
"HTS_TST", "HTS_TST_POS",
"TX_NEW", "TX_CURR", "TX_MMD")
#red
flag_red <- brewer.pal(5, "OrRd")[5]
# IMPORT ------------------------------------------------------------------
df_hfr <- hfr_read(path)
df_msd <- si_path() %>%
return_latest("OU_IM") %>%
read_rds()
# MUNGE -------------------------------------------------------------------
#aggregate MSD to period x country x mech x indicator
df_msd <- df_msd %>%
filter(fundingagency == "USAID",
fiscal_year == 2021,
indicator %in% ind_sel,
standardizeddisaggregate == "Total Numerator") %>%
group_by(fiscal_year, countryname, indicator, mech_code) %>%
summarise(across(starts_with("qtr"), sum, na.rm = TRUE), .groups = "drop") %>%
mutate(across(starts_with("qtr"), ~ na_if(., 0))) %>%
reshape_msd() %>%
select(-period_type) %>%
rename(mer_results = value)
df_msd <- df_msd %>%
bind_rows(df_msd %>%
filter(indicator == "TX_CURR") %>%
mutate(indicator = "TX_MMD"))
#combine OU and countryname for regional missions
df_hfr <- df_hfr %>%
filter(fy == 2021) %>%
mutate(countryname = ifelse(operatingunit == countryname,
operatingunit, glue("{operatingunit}/{countryname}"))) %>%
rename(hfr_results = val)
#aggregate to the date x orgunit x mech x indicator level
df_hfr_agg <- df_hfr %>%
filter(expect_reporting == TRUE) %>%
mutate(has_hfr_reporting = !is.na(hfr_results)) %>%
group_by(countryname, orgunituid, date,
mech_code, indicator, expect_reporting) %>%
summarise(has_hfr_reporting = max(has_hfr_reporting, na.rm = TRUE),
hfr_results = sum(hfr_results, na.rm = TRUE)) %>%
ungroup()
#aggregate across all sites in countryname across indicator and period
df_hfr_comp <- df_hfr_agg %>%
bind_rows(df_hfr_comp %>%
mutate(countryname = "ALL USAID")) %>%
group_by(countryname, indicator, date) %>%
summarise(across(c(has_hfr_reporting, expect_reporting), sum, na.rm = TRUE)) %>%
ungroup()
#calculate completeness
df_hfr_comp <- df_hfr_comp %>%
mutate(completeness = case_when(expect_reporting > 0 ~ round(has_hfr_reporting / expect_reporting, 2)))
#clean up
df_viz <- df_hfr_comp %>%
mutate(countryname = countryname %>%
recode("Democratic Republic of the Congo" = "DRC",
"Dominican Republic" = "DR",
"Papua New Guinea" = "PNG") %>%
str_replace("West Africa Region", "WAR") %>%
str_replace("Western Hemisphere Region", "WHR") %>%
str_replace("Asia Region", "AR"),
indicator = factor(indicator, ind_sel))
# VIZ ---------------------------------------------------------------------
bounds <- df_viz %>%
filter(countryname == "ALL USAID") %>%
group_by(indicator) %>%
summarise(completeness_avg = mean(completeness),
sites = max(expect_reporting),
.groups = "drop") %>%
arrange(desc(completeness_avg)) %>%
mutate(indicator = recode(indicator, "HTS_TST_POS" = "HTS_POS")) %>%
filter(completeness_avg == max(completeness_avg) | completeness_avg == min(completeness_avg)) %>%
mutate(x = glue("{percent(completeness_avg, 1)} for {indicator} ({label_number_si()(sites)} sites)")) %>%
pull()
df_viz %>%
filter(indicator != "HTS_TST",
countryname != "ALL USAID"
# str_detect(countryname, "/", negate = TRUE)
) %>%
ggplot(aes(date, fct_reorder(countryname, expect_reporting, max), fill = completeness)) +
geom_tile(color = "white") +
facet_grid(~ indicator) +
scale_x_date(breaks = as.Date(c("2020-10-01", "2021-01-01", "2021-04-01", "2021-07-01")),
#date_breaks = "3 months",
date_labels = "%b",
position = "top") +
scale_fill_distiller(palette = "RdYlBu", direction = 1, labels = percent_format(1)) +
# scale_fill_viridis_c(direction = -1, labels = percent_format(1)) +
labs(x = NULL, y = NULL, fill = "Site x Mech Completeness",
title = glue("USAID's mean completeness ranged from {bounds[1]} down to {str_remove(bounds[2], ' sites')}") %>% toupper(),# %>% str_wrap(90),
subtitle = "FY21 HFR Site x Mech Completeness of Reporting Rate",
caption = glue("Source: HFR Tableau Extract {file_date}")) +
si_style_nolines(facet_space = .1) +
theme(strip.placement = "outside",
strip.text.x = element_text(family = "Source Sans Pro SemiBold"),
legend.position = "none",
axis.text.y = element_text(size = 8))
si_save("out/Completeness_FY21.png")
# CORRECTNESS -------------------------------------------------------------
df_hfr_corr <- df_hfr_agg %>%
mutate(period = convert_date_to_qtr(date))
df_hfr_corr_tx <- df_hfr_corr %>%
filter(indicator %in% c("TX_CURR", "TX_MMD"),
has_hfr_reporting == 1) %>%
group_by(countryname, orgunituid, indicator, mech_code, period) %>%
filter(date == max(date)) %>%
ungroup()
df_hfr_corr <- df_hfr_corr %>%
filter(indicator %ni% c("TX_CURR", "TX_MMD")) %>%
bind_rows(df_hfr_corr_tx) %>%
group_by(countryname, indicator, mech_code, period) %>%
summarise(across(c(has_hfr_reporting, expect_reporting, hfr_results), sum, na.rm = TRUE)) %>%
ungroup()
df_hfr_corr <- df_hfr_corr %>%
mutate(completeness = case_when(expect_reporting > 0 ~ round(has_hfr_reporting / expect_reporting, 2)))
df_hfr_corr <- df_hfr_corr %>%
tidylog::full_join(df_msd) %>%
filter(!is.na(mer_results))
df_viz_corr <- df_hfr_corr %>%
filter(indicator!= "HTS_TST") %>%
mutate(across(ends_with("results"), ~ ifelse(is.na(.), 0, .))) %>%
group_by(indicator) %>%
mutate(max_value = max(hfr_results, mer_results, na.rm = TRUE)) %>%
ungroup()
df_viz_corr %>%
ggplot(aes(hfr_results, mer_results, color = completeness)) +
geom_abline(slope = 1, color = grey30k, linetype = "dashed") +
geom_blank(aes(x = max_value, y = max_value)) +
geom_point(alpha = .8) +
scale_x_continuous(labels = label_number_si()) +
scale_y_continuous(labels = label_number_si()) +
# scale_x_log10(labels = label_number_si()) +
# scale_y_log10(labels = label_number_si()) +
scale_color_distiller(palette = "RdYlBu", direction = 1, labels = percent_format(1)) +
# facet_wrap(~indicator + period, scales = "free", ncol = 4,
# labeller = label_wrap_gen(multi_line = FALSE)) +
facet_wrap(~indicator, scales = "free") +
labs(x = "HFR Results (Quarterly Agg)", y = "MER Results",
title = "Where HFR is reported, values tend to be close to MER reported values" %>% toupper(),
subtitle = "FY21 HFR Quarterly Mech Correctness of Reporting Rate",
caption = glue("Source: HFR Tableau Extract {file_date}")) +
coord_cartesian(clip = "off") +
si_style() +
theme(legend.position = "none",
panel.spacing.x = unit(.1, "lines"),
panel.spacing.y = unit(.1, "lines"))
si_save("out/Correctness_FY21.png")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.