data-raw/fy21_full_sitelist_with_mer.R

# PROJECT:  Wavelength
# AUTHOR:   A.Chafetz | USAID
# PURPOSE:  compare site list
# LICENSE:  MIT
# DATE:     2021-03-24
# UPDATED:  2021-09-23

# DEPENDENCIES ------------------------------------------------------------

library(tidyverse)
library(glamr)
library(Wavelength)
library(vroom)
library(googledrive)
library(googlesheets4)
library(gophr)
library(glue)

# GLOBAL VARIABLES --------------------------------------------------------

  load_secrets()

  gdrive_fldr_sitelist <- as_id("12bah06bx71-EPa0mdPiwOGCROOliJX85")

  api_pd <- "FY21Q3"

# IMPORT ------------------------------------------------------------------

  #import reporting level (community/facility)
  df_lvls <- get_outable(datim_user(), datim_pwd())

  #import org hierarchy
  df_orgs <-  return_latest("out/DATIM", "GLOBAL_org") %>%
    vroom()

  #import mech list
  df_mech <- return_latest("out/DATIM", "GLOBAL_mechanisms") %>%
    vroom() %>%
    select(mech_code, primepartner)

  #import current site list
  df_sitelist <- return_latest("out", "HFR_FY22_GLOBAL_sitelist") %>%
      vroom()

  #import current MER reporting from DATIM
  df_api <- return_latest("out/DATIM", "HFR_FY21Q.*GLOBAL_DATIM") %>%
    vroom()


# MUNGE -------------------------------------------------------------------

  #table of reporting level for inner join (to drop higher hierarchy)
    df_lvls <- df_lvls %>%
      select(countryname, community_lvl, facility_lvl) %>%
      gather(type, level, -countryname) %>%
      mutate(type = str_remove(type, "_lvl"))

  #limit org hierarchy to just comm/fac
    df_orgs_cf <- df_orgs %>%
      inner_join(df_lvls) %>%
      relocate(type, .after = level) %>%
      select(orgunituid, type)

  #add type into api for merging purposes with sitelist
  df_api <- left_join(df_api, df_orgs_cf, by = "orgunituid") %>%
    mutate(type = ifelse(is.na(type), "above site", type))

  #identify names to create same dataset as sitelist
  names_joint <- intersect(names(df_sitelist), names(df_api))

  #keep distinct list of reporting site by mech and indicator joining on partner name
  df_api <- df_api %>%
    filter(mer_results > 0) %>%
    select(all_of(names_joint)) %>%
    distinct() %>%
    left_join(df_mech) %>%
    mutate(expect_reporting = TRUE,
           is_original = FALSE,
           source = glue("DATIM - {api_pd}") %>% as.character)

  #identify distinct list of MER reporting sites x mech x ind for adding new ones to site list
  df_api_lim <- df_api %>%
    select(orgunituid, mech_code, indicator)

  #identify distinct list of site list sites x mech x ind to id new ones to add
  df_sitelist_lim <- df_sitelist %>%
    select(orgunituid, mech_code, indicator)

  #anti join to id the missing sites to add
  df_newsites <- anti_join(df_api_lim, df_sitelist_lim)

  #filter MER df to just the missing sites
  df_api_add <- semi_join(df_api, df_newsites)

  #append new sites to sitelist and add source to sitelist
  df_sitelist_new <- df_sitelist %>%
    bind_rows(df_api_add) %>%
    mutate(source = case_when(is.na(source) & is_original == "TRUE" ~ "Original DATIM - FY20Q4",
                              is.na(source) ~ "Country Team Submitted",
                              TRUE ~ source))

  #clean up mech name and partner to be conistent across sources
  df_sitelist_new <- df_sitelist_new %>%
    mutate(mech_code = as.character(mech_code)) %>%
    rename_official()

  #clean up org hierarchy to be conistent across sources
  lst_orgunits <- df_sitelist_new %>%
    distinct(orgunituid) %>%
    pull()

  df_orgs_new <- df_orgs %>%
    filter(orgunituid %in% lst_orgunits) %>%
    select(orgunit, orgunituid, operatingunit, countryname, snu1, psnu) %>%
    left_join(df_orgs_cf)

  df_sitelist_new <- df_sitelist_new %>%
    select(-c(orgunit, operatingunit, countryname, snu1, psnu, type)) %>%
    left_join(df_orgs_new,  by = "orgunituid") %>%
    tidylog::filter(!is.na(operatingunit))

  #arrange to match original site list ordering
  df_sitelist_new <- df_sitelist_new %>%
    select(all_of(c(names(df_sitelist), "source")))

  df_sitelist_new <- df_sitelist_new %>%
    mutate(start = ifelse(is.na(start), "FY22 Oct", start),
           end = ifelse(is.na(end), "FY22 Sep", end))

  df_sitelist_new <- df_sitelist_new %>%
    distinct()

# EXPORT ------------------------------------------------------------------

  date <- format(Sys.Date(), "%Y%m%d")

  file <- paste0("HFR_FY2_GLOBAL_sitelist_", date, ".csv")

  readr::write_csv(df_sitelist_new,
                   file.path("out", file), na = "")



# UPLOAD REFERENCE TABLES TO S3 AND GDRIVE --------------------------------

  #gdrive folder
  drive_browse(gdrive_fldr_sitelist)

  #move old version from gdrive to trash
  drive_ls(gdrive_fldr_sitelist, "csv") %>%
    drive_trash()

  #upload new version to google drive
  drive_upload(file.path("out", file),
               gdrive_fldr_sitelist,
               name = file)

# upload to s3 bucket
  return_latest("out", ".*") %>%
    grabr::s3_upload(
      bucket = "gov-usaid",
      prefix = "ddc/uat/raw/hfr/receiving"
    )
USAID-OHA-SI/Wavelength documentation built on March 24, 2023, 10:07 a.m.