R/tidy_sitelist.R

Defines functions tidy_sitelist

Documented in tidy_sitelist

#' Import and munge submitted site list
#'
#' @param filepath path to sitelist file
#' @param folderpath_output if output is desired, full folder path
#'
#' @export
#'
#' @examples
#' \dontrun{
#'   path <- "~/Data/HFR_FY21_SiteValidation_Kenya.xlsx"
#'   df_sites <- tidy_sitelist(path) }

tidy_sitelist <- function(filepath, folderpath_output = NULL){

  # print(filepath)

  #read in Site template
    df <- readxl::read_excel(filepath, sheet = "Site List",
                             col_types = c(.default = "text"))

  #remove extra columns
    df <- df %>%
      dplyr::select(-dplyr::starts_with('...'), -dplyr::starts_with("Column"))

  #rename
    df <- df %>%
      dplyr::rename(pd_start = start,
                    pd_end = end)

  #remove any sites that are missing UIDs
    df_keep <- dplyr::filter(df, !is.na(orgunit))

  #reshape, and only keep reporting indicators
    df_keep <- df_keep %>%
      tidyr::pivot_longer(HTS_TST:VMMC_CIRC,
                          names_to = "indicator",
                          values_to = "expect_reporting",
                          values_drop_na = TRUE) %>%
      dplyr::filter(expect_reporting %in% c("KEEP", "ADD")) %>%
      dplyr::mutate(expect_reporting = TRUE)

  #breakout mech info
    df_keep <- df_keep %>%
      tidyr::separate(mech_partner, c("mech_code", "mech_name", "primepartner"),
                      sep = "((?<=[[:digit:]]): | \\[)") %>%
      dplyr::mutate(primepartner = stringr::str_remove(primepartner, "]$"))

  #create HTS_TST_POS and TX_MMD from HTS_TST and TX_CURR
    df_addtl <- df_keep %>%
      dplyr::filter(indicator %in% c("HTS_TST", "TX_CURR")) %>%
      dplyr::mutate(indicator = dplyr::recode(indicator,
                                              "HTS_TST" = "HTS_TST_POS",
                                              "TX_CURR" = "TX_MMD"))

  #bind df together and arrange for output
  df_sites <- df_keep %>%
    dplyr::bind_rows(df_addtl) %>%
    dplyr::arrange(orgunit, mech_code, indicator)

  #fill empty start/end
  df_sites <- df_sites %>%
    dplyr::mutate(pd_start = ifelse(is.na(pd_start), paste0("FY", stringr::str_sub(curr_fy, -2), " Oct"), pd_start),
                  pd_end = ifelse(is.na(pd_end), paste0("FY", stringr::str_sub(curr_fy, -2), " Sep"), pd_end))

  #rename
  df_sites <- df_sites %>%
    dplyr::rename_with(~stringr::str_remove(., "pd_"))

  #force output
  df_sites <- dplyr::select(df_sites, orgunit, orgunituid, type, operatingunit,
                            snu1, psnu, mech_code, mech_name, primepartner,
                            start, end, indicator, expect_reporting)

  #convert df to NULL if no values
  if(nrow(df_sites) == 0)
    df_sites <- NULL

  #export
  if(!is.null(folderpath_output))
    hfr_export(df_sites, folderpath_output, type = "SiteList")

  return(df_sites)

}
USAID-OHA-SI/Wavelength documentation built on March 24, 2023, 10:07 a.m.