data-raw/config_update_analysis.R

#TODO
# check with Sam the general use of this script as it is missing variables and does not run in completion

library(magrittr)
library(tidyverse)
require(foreach)
library(datimutils)

# TODO function get_indicator_details is unused passed the stopped here line
# removed getMetadata from R file as it can be brought back in manually
# left datapackcommons library reference so that datimutils::getMetadata is not being reference here
get_indicator_details <- function(uid) {
  datapackcommons::getMetadata("indicators",
                               glue::glue("id:eq:{uid}"),
                               "name, id, numerator, denominator")
}

getFormDetails <- function(fiscal_yyyy_int, stream) {
  datapackr::getDatasetUids((fiscal_yyyy_int - 1), stream) %>%
    purrr::map(~datimutils::getSqlView(sql_view_uid = "DotdxKrNZxG",
                                       variable_keys = c("dataSets"),
                                       variable_values = c(.x))) %>%
    dplyr::bind_rows() %>%
    dplyr::select(-dataset) %>%
    distinct() %>%
    mutate(across(where(is.character), str_trim)) %>% #trimming implemented to clean white space
    as_tibble()
}

parseIndicators <- function(indicator_uids) {
  indicator_parts <- datimutils::getIndicators(indicator_uids,
                                               fields = "id,numerator,denominator") %>%
    dplyr::mutate(numerator_addends =
                    stringr::str_extract_all(numerator,
                                             "#\\{[a-zA-Z0-9]{11}([a-zA-Z0-9\\.]{12})?\\}"),
                  numerator_addends = purrr::map(numerator_addends,
                                                 stringr::str_remove_all, "#\\{"),
                  numerator_addends = purrr::map(numerator_addends,
                                                 stringr::str_remove_all, "\\}"),
                  numerator_addends_de = purrr::map(numerator_addends,
                                                    stringr::str_sub, 1, 11),
                  numerator_addends_coc = purrr::map(numerator_addends,
                                                     stringr::str_sub, 13, 23),
                  numerator_addends_coc = purrr::map(numerator_addends_coc,
                                                     na_if, ""),
                  denominator_addends =
                    stringr::str_extract_all(denominator,
                                             "#\\{[a-zA-Z0-9]{11}([a-zA-Z0-9\\.]{12})?\\}"),
                  denominator_addends = purrr::map(denominator_addends,
                                                   stringr::str_remove_all, "#\\{"),
                  denominator_addends = purrr::map(denominator_addends,
                                                   stringr::str_remove_all, "\\}"),
                  denominator_addends_de = purrr::map(denominator_addends,
                                                      stringr::str_sub, 1, 11),
                  denominator_addends_coc = purrr::map(denominator_addends,
                                                       stringr::str_sub, 13, 23),
                  denominator_addends_coc = purrr::map(denominator_addends_coc,
                                                       na_if, "")
    )


  return(indicator_parts)
}


doMC::registerDoMC(cores = 5)

datimutils::loginToDATIM(paste0(Sys.getenv("SECRETS_FOLDER"),
                                "datim.json"))

data_required <- datapackcommons::data_required


# get data element and category option combos

fy_20_r  <-  getFormDetails(2020, "mer_results") %>%
  dplyr::mutate(de.coc = paste(dataelementuid,
                               categoryoptioncombouid,
                               sep = "."),
                fy_20_r = TRUE)
fy_21_r  <-  getFormDetails(2021, "mer_results") %>%
  dplyr::mutate(de.coc = paste(dataelementuid,
                               categoryoptioncombouid,
                               sep = "."),
                fy_21_r = TRUE)
fy_21_t  <-  getFormDetails(2021, "mer_targets") %>%
  dplyr::mutate(de.coc = paste(dataelementuid,
                               categoryoptioncombouid,
                               sep = "."),
                fy_21_t = TRUE)
fy_22_t  <-  getFormDetails(2022, "mer_targets") %>%
  dplyr::mutate(de.coc = paste(dataelementuid,
                               categoryoptioncombouid,
                               sep = "."),
                fy_22_t = TRUE)

fy_23_t  <-  getFormDetails(2023, "mer_targets") %>%
  dplyr::mutate(de.coc = paste(dataelementuid,
                               categoryoptioncombouid,
                               sep = "."),
                fy_23_t = TRUE)

fy_22_t_fy_21_r_de_coc <- c(fy_22_t$dataelementuid,
                            fy_21_r$dataelementuid,
                            fy_22_t$de.coc,
                            fy_21_r$de.coc) %>% unique()

fy_21_r_to_fy_20_r_diff <- dplyr::full_join(fy_20_r, fy_21_r) %>%
  dplyr::arrange() %>%
  dplyr::filter(is.na(fy_21_r) | is.na(fy_20_r))

fy_22_t_to_fy_21_t_diff <- dplyr::full_join(fy_21_t, fy_22_t) %>%
  dplyr::arrange() %>%
  dplyr::filter(is.na(fy_21_t) | is.na(fy_22_t))

fy_23_t_to_fy_22_t_diff <- dplyr::full_join(fy_22_t, fy_23_t) %>%
  dplyr::arrange() %>%
  dplyr::filter(is.na(fy_22_t) | is.na(fy_23_t))

####################  stopped here


indicators_required <-
  c(data_required$A.dx_id, data_required$B.dx_id) %>%
  unique() %>%
  na.omit() %>%
  parseIndicators() %>%
  dplyr::mutate(matching_num_elements =
                  purrr::map(numerator_addends,
                             match,
                             fy_22_t_fy_21_r_de_coc),
                matching_den_elements =
                  purrr::map(denominator_addends,
                             match,
                             fy_22_t_fy_21_r_de_coc),
                missing_any_elements =
                  (purrr::map2_lgl(matching_num_elements,
                               matching_num_elements,
                    ~any(is.na(.x),
                         is.na(.y)))))

#TODO ask about because cannot run because missing fy_21_t_fy_20_r_de_coc variable
data_elements_only_missing <- dplyr::setdiff(c(data_required$A.dx_id,
                                           data_required$B.dx_id),
                                         indicators_required$id) %>%
  dplyr::setdiff(fy_21_t_fy_20_r_de_coc)


print(paste("These data elements are referenced but not available",
            datimutils::getDataElements(data_elements_only_missing)))

indicators_w_missing_elements <-
  dplyr::filter(indicators_required,
                missing_any_elements)

#TODO ask about because cannot run because missing fy_21_t_fy_20_r_de_coc variable
purrr::map(indicators_required$denominator_addends,
           match, fy_21_t_fy_20_r_de_coc)

de_coc <- c(indicators_required$numerator_addends,
            indicators_required$denominator_addends) %>%
  unlist() %>%
  unique()



#FY22 Update stopped here

extract_formula_components <- function(formula) {
  stringr::str_split(formula, "[+-]")[[1]] %>%
    stringr::str_remove_all(" ") %>%
    stringr::str_remove_all("\\#") %>%
    stringr::str_remove_all("\\{") %>%
    stringr::str_remove_all("\\}") %>%
    stringr::str_remove_all("\\(") %>%
    stringr::str_remove_all("\\)") %>%
    tibble::enframe(name = NULL) %>%
    dplyr::mutate(data_element_uid = stringr::str_sub(value, 1, 11)) %>%
    dplyr::mutate(co_combination_uid = dplyr::if_else(stringr::str_length(value) > 11,
                                                      stringr::str_sub(value, 13, 23),
                                                      NA_character_)) %>%
    dplyr::select(- value)
}

# TODO ask cannot run fy_19_r variable is missing
elements_fy19r_fy20t <- dplyr::bind_rows(fy_19_r, fy_20_t) %>% dplyr::distinct()


# -------------------------------------------------------------------------
compare_indicator_cocs <- function(indicator, dataset_details) {

  numerator_components <- indicator$numerator %>%
    extract_formula_components() %>%
    dplyr::bind_rows() %>%
    na.omit() %>%
    dplyr::mutate(dataelementuid = data_element_uid,
                  categoryoptioncombouid = co_combination_uid)

  numerator_component_des <- numerator_components$data_element_uid %>% unique()
  numerator_ind_to_dataset <- dataset_details %>%
    dplyr::filter(dataelementuid %in% numerator_component_des) %>%
    dplyr::full_join(numerator_components)

  denominator_components <<- indicator$denominator %>%
    extract_formula_components() %>%
    dplyr::bind_rows() %>%
    na.omit() %>%
    dplyr::mutate(dataelementuid = data_element_uid,
                  categoryoptioncombouid = co_combination_uid)

  denominator_component_des <- denominator_components$data_element_uid %>% unique()
  denominator_ind_to_dataset <- dataset_details %>%
    dplyr::filter(dataelementuid %in% denominator_component_des) %>%
    dplyr::full_join(denominator_components)

  return(list(numerator_ind_to_dataset = numerator_ind_to_dataset,
              denominator_ind_to_dataset = denominator_ind_to_dataset))
}

get_invalid_data_elements <- function(data_required_spec) {
  print(data_required_spec)
indicators  <-  c(data_required_spec$A.dx_id,
                  data_required_spec$B.dx_id) %>%
  unique() %>%
  na.omit() %>%
  purrr::map(get_indicator_details) %>%
  purrr::compact() %>%
  dplyr::bind_rows()

if (length(indicators) == 0) {
  return(NULL)
}

numerator_components <- indicators$numerator %>%
  purrr::map(extract_formula_components) %>%
  dplyr::bind_rows()

numerator_data_elements <- numerator_components %>%
  .[["data_element_uid"]] %>%
  unique() %>%
  purrr::map(~datapackcommons::getMetadata("dataElements",
                                           glue::glue("id:eq:{.x}"))) %>%
  dplyr::bind_rows()


denominator_components <- indicators$denominator[indicators$denominator != 1] %>%
  purrr::map(extract_formula_components) %>%
  dplyr::bind_rows()

denominator_data_elements <- denominator_components %>%
  .[["data_element_uid"]] %>%
  unique() %>%
  purrr::map(~datapackcommons::getMetadata("dataElements",
                                           glue::glue("id:eq:{.x}"))) %>%
  dplyr::bind_rows()

combined_data_elements <- dplyr::bind_rows(numerator_data_elements, denominator_data_elements)

dplyr::setdiff(combined_data_elements$displayName, elements_fy19r_fy20t$dataelement)

}




#TODO
# ask Sam about missing variables. The below function requires a missing fy variable
# current errors is: Error in do.ply(i) :
# task 1 failed - "object 'elements_fy19r_fy20t' not found"

plyr::alply(data_required, 1, get_invalid_data_elements, .parallel = TRUE)







indicators  <-  c(data_required$A.dx_id,
                  data_required$B.dx_id) %>%
  unique() %>%
  na.omit() %>%
  purrr::map(get_indicator_details) %>%
  purrr::compact() %>%
  dplyr::bind_rows()

indicators_with_co_combos <- dplyr::filter(indicators,
                                           stringr::str_detect(numerator, "\\.") |
                                             stringr::str_detect(denominator, "\\."))

#TODO
# ask Sam about missing variable "object 'elements_fy19r_fy20t' not found"
temp <- plyr::alply(indicators_with_co_combos, 1,
                    compare_indicator_cocs, elements_fy19r_fy20t) %>%
  set_names(indicators_with_co_combos$name)





numerator_components <- indicators$numerator %>%
  purrr::map(extract_formula_components) %>%
  dplyr::bind_rows() %>%
  .[["data_element_uid"]] %>%
  unique() %>%
  purrr::map(~datapackcommons::getMetadata("dataElements",
                                           glue::glue("id:eq:{.x}")))


denominator_components <- indicators$denominator[indicators$denominator != 1] %>%
  purrr::map(extract_formula_components) %>%
  dplyr::bind_rows() %>%
  .[["data_element_uid"]] %>%
  unique() %>%
  purrr::map(~datapackcommons::getMetadata("dataElements",
                                           glue::glue("id:eq:{.x}")))

#TODO
# ask Sam there is failure here with binding -- can be fixed but should confirm before fixing
combined_componenets <- dplyr::bind_rows(numerator_components, denominator_components) %>%
  distinct()

de_19r_20t <- c(fy_20_t$dataelement, fy_19_r$dataelement) %>% unique()

dplyr::setdiff(combined_componenets$id, de_19r_20t)





# vmmc_circ yield
fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "VMMC_CIRC \\(N, ")) %>%
  dplyr::filter(stringr::str_detect(dataelement, "Age/Sex/HIVStatus")) %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "Positive") |
                  stringr::str_detect(categoryoptioncombo, "Negative")) %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")


fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "VMMC_CIRC \\(N, ")) %>%
  dplyr::filter(stringr::str_detect(dataelement, "Age/Sex/HIVStatus")) %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "Positive")) %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")


# vmmc_circ indeterminate

fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "VMMC_CIRC \\(N, ")) %>%
  dplyr::filter(stringr::str_detect(dataelement, "Age/Sex/HIVStatus")) %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "Unknown")) %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")


# ovc_serv target

temp <- fy_20_t %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "OVC_SERV"))  %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")

# tx_tb_d results already

temp <- fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "TX_TB \\(D"))  %>%
  dplyr::filter(stringr::str_detect(dataelement, "Age/Sex/TBScreen/NewExistingART/")) %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "Already"))  %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")

temp <- fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "TX_TB \\(D"))  %>%
  dplyr::filter(stringr::str_detect(dataelement, "Age/Sex/TBScreen/NewExistingART/")) %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "Already"))  %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "TB Screen - Positive"))  %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")


# tx_tb_d results new

temp <- fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "TX_TB \\(D"))  %>%
  dplyr::filter(stringr::str_detect(dataelement, "Age/Sex/TBScreen/NewExistingART/")) %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "New"))  %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")

temp <- fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "TX_TB \\(D"))  %>%
  dplyr::filter(stringr::str_detect(dataelement, "Age/Sex/TBScreen/NewExistingART/")) %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "New"))  %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "TB Screen - Positive"))  %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")



# pmtct_stat d Percentage at Entry Known Positive

temp <- fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "PMTCT_STAT \\(N"))  %>%
  dplyr::filter(stringr::str_detect(dataelement, "Age/Sex/KnownNewResult")) %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "Known at Entry Positive"))  %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")



# PMTCT_STAT.N.newYield denominator

temp <- fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "PMTCT_STAT \\(N"))  %>%
  dplyr::filter(stringr::str_detect(dataelement, "Age/Sex/KnownNewResult")) %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "Newly Identified Positive") |
                  stringr::str_detect(categoryoptioncombo, "Newly Identified Negative"))  %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")

# PMTCT_STAT.N.newYield numerator

temp <- fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "PMTCT_STAT \\(N"))  %>%
  dplyr::filter(stringr::str_detect(dataelement, "Age/Sex/KnownNewResult")) %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "Newly Identified Positive"))  %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")


# TB_STAT.N.newYield denominator

temp <- fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "TB_STAT \\(N"))  %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "Newly Identified Positive"))  %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")

# TB_STAT Percentage With already known status numerator

temp <- fy_19_r %>%
  dplyr::select(-dataset) %>%
  distinct() %>%
  dplyr::filter(stringr::str_detect(dataelement, "TB_STAT \\(N"))  %>%
  dplyr::filter(stringr::str_detect(categoryoptioncombo, "Known at Entry Positive"))  %>%
  dplyr::transmute(element = paste0("#{", dataelementuid, ".", categoryoptioncombouid, "}")) %>%
  .[["element"]] %>%
  glue::glue_collapse(" + ")
pepfar-datim/data-pack-commons documentation built on April 26, 2024, 12:09 a.m.