# PROJECT: Wavelength
# AUTHOR: A.Chafetz | USAID
# PURPOSE: create a FY22 official site list
# LICENSE: MIT
# DATE: 2021-11-02
# UPDATED: 2022-01-18
# DEPENDENCIES ------------------------------------------------------------
library(tidyverse)
library(glamr)
library(googledrive)
library(googlesheets4)
library(janitor)
library(glue)
library(Wavelength)
# GLOBAL ------------------------------------------------------------------
#add credentials to access drive
load_secrets()
#site submission form (sheet)
gs_id <- as_sheets_id("1aW37FmWOc3TWQqUjoWagcZ53bj9cbaW2W8NFVIUlua4")
#create temp folder for downloading FY22 site files
temp_folder()
#return latest FY21 site list
fy21_sitelist <- return_latest("out", "HFR_FY21_GLOBAL_sitelist")
# DOWNLOAD FY22 SITE VALIDATION FILES -------------------------------------
#read in googlesheet with all submissions
df_sub <- read_sheet(gs_id, .name_repair = janitor::make_clean_names)
#pull last cntry submission & apply name
df_sub_name <- df_sub %>%
group_by(operating_unit_country) %>%
filter(timestamp == max(timestamp)) %>%
ungroup() %>%
mutate(id = str_extract(upload_your_hfr_site_validation_file_here, "(?<=id=).*")) %>%
select(timestamp, operating_unit_country, id, prefill_template) %>%
mutate(name = map(id, ~ drive_get(as_id(.))$name)) %>%
unnest(name)%>%
mutate(name = str_remove_all(name, "'"))
#download all
walk2(.x = df_sub_name$id,
.y = df_sub_name$name,
~ drive_download(as_id(.x),
path = file.path(folderpath_tmp, .y), overwrite = TRUE))
# COMBINE INTO ONE GLOBAL SITE LIST ---------------------------------------
#file names of submission to read in
files_submissions <- list.files(folderpath_tmp, full.names = TRUE, recursive = TRUE)
#test import files
# purrr::map(files_submissions,
# purrr::possibly(~tidy_sitelist(.x), otherwise = "Error"))
#import files
df_sitelist <- purrr::map_dfr(files_submissions, tidy_sitelist)
#flag if original or not
df_flag <- df_sub_name %>%
select(operatingunit = operating_unit_country, is_original = prefill_template) %>%
mutate(is_original = ifelse(is.na(is_original), FALSE, is_original))
df_sitelist <- left_join(df_sitelist, df_flag)
#parse operatingunit for ou and country
df_sitelist <- df_sitelist %>%
tidyr::separate(operatingunit, c("operatingunit", "countryname"), sep = "/", fill = "right") %>%
dplyr::mutate(countryname = ifelse(is.na(countryname), operatingunit, countryname))
#remove any rows where the UID is missing
df_sitelist <- df_sitelist %>%
tidylog::filter(!is.na(orgunituid)) %>%
tidylog::filter(!is.na(mech_code))
#adjust for incusion of some ending mechanisms"
df_sitelist <- df_sitelist %>%
mutate(mech_code = str_remove(mech_code, "\\(!\\) "))
#add source
df_sitelist <- df_sitelist %>%
mutate(source = case_when(is_original == FALSE ~ "Country Team Submitted",
TRUE ~ "Original DATIM - FY21Q4"))
# VALIDITY CHECK ----------------------------------------------------------
# df_msd <- si_path() %>%
# return_latest("OU_IM") %>%
# read_rds()
df_datim_org <- return_latest("out/DATIM", "GLOBAL_org") %>%
read_csv()
# df_msd_mech <- df_msd %>%
# filter(fundingagency == "USAID",
# fiscal_year == curr_fy,
# indicator %in% c("HTS_TST", "TX_NEW", "TX_CURR",
# "PrEP_NEW", "VMMC_CIRC")) %>%
# count(operatingunit, countryname, mech_code, indicator, wt = targets)
#
#
# df_sitelist %>%
# anti_join(df_msd_mech %>%
# select(operatingunit, mech_code)) %>%
# distinct(operatingunit, countryname,
# mech_code, mech_name, primepartner)
#
# df_sitelist %>%
# anti_join(df_msd_mech %>%
# select(operatingunit, mech_code, indicator)) %>%
# distinct(operatingunit, countryname,
# mech_code, mech_name, primepartner, indicator)
(df_err_orgunit <- df_sitelist %>%
anti_join(df_datim_org %>%
select(orgunituid, operatingunit, countryname)) %>%
distinct(operatingunit, countryname,
orgunit, orgunituid))
df_sitelist <- df_sitelist %>%
tidylog::filter(orgunituid %ni% df_err_orgunit$orgunituid)
# COMBINE CURRENT AND PRIOR SITE LIST -------------------------------------
#import
df_sitelist_prior <- read_csv(fy21_sitelist,
col_types = c(expect_reporting = "l",
is_original = "l",
.default = "c"))
#add start/end
df_sitelist_prior <- df_sitelist_prior %>%
mutate(start = "FY21 Oct",
end = "FY21 Sep")
#combine
df_sitelist <- df_sitelist %>%
bind_rows(df_sitelist_prior)
#arrange
df_sitelist <- df_sitelist %>%
arrange(start, operatingunit, countryname, orgunit)
#limit output
df_sitelist <- df_sitelist %>%
select(orgunit, orgunituid, type, operatingunit, countryname,
snu1, psnu, mech_code, mech_name, primepartner,
start, end, indicator, expect_reporting, is_original, source)
# EXPORT ------------------------------------------------------------------
date <- format(Sys.Date(), "%Y%m%d")
fy <- str_sub(curr_fy, -2)
file <- glue("HFR_FY{fy}_GLOBAL_sitelist_{date}.csv")
readr::write_csv(df_sitelist,
file.path("out", file), na = "")
grabr::s3_upload(
filepath = file.path("out", file),
bucket = "gov-usaid",
prefix = "ddc/uat/raw/hfr/receiving"
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.