R/who_method_cs_analysis.R

Defines functions short_svyby evaluate_args evaluate_df evaluate_drug_name estimate_cs_values

Documented in estimate_cs_values

#' Calculate CES estimates
#'
#' A function to obtain binary var (e.g., coverage: yes / no) estimates with CI from WHO method CS clean data.
#'
#' @param var A string denoting the name of the variable to analyse.
#' @param part A string denoting the name of the variable to use for partitioning the data .
#' @param design A survey package design object.
#' @param level Confidence level for interval
#' @param degf Degrees of freedom. By default NA, which call uses the svyciprop default `defg(design)` evaluation (see svyciprop documentation). For user inputted degrees of freedom enter a positive integer.
#'
#' @return A data frame containing the survey estimates for the inputted variable over the inputted administrative disaggregation (commonly the implementation unit) for the survey groups (SAC and/or adults) over all individuals, disaggregated by sex and (for SAC) by school attendance.
#'
#' @importFrom magrittr %>%
#' @importFrom rlang :=
#' @export
estimate_cs_values <- function(var, part, design, level = 0.95, degf = NA){

  #==================================================================================#
  # Step 1 - Do argument checking -----------------------------------------
  #==================================================================================#

  # Do argument checking
  evaluate_args(var = var, part = part, design = design)
  evaluate_drug_name(var = var, df = design$variables)
  # Last evaluates and creates object survey_groups
  survey_groups <- evaluate_df(df = design$variables)

  #==================================================================================#
  # Step 2 - Convert argument into formulas for svy functions -------------
  #==================================================================================#

  formula_var <- stats::as.formula(paste0("~", var))
  formula_part <- stats::as.formula(paste0("~", part))

  formula_part_sex <- stats::as.formula(paste0("~", part, "+ ind_sex"))
  formula_part_att <- stats::as.formula(paste0("~", part, "+ ind_child_attend_bin"))

  drug_name <- toupper(gsub('^(ind_)(.+)(_{1})(.+)(_bin)$',"\\4", var))
  question_name <- toupper(gsub('^(ind_)(.+)(_{1})(.+)(_bin)$',"\\2", var))
  question_name <- ifelse(question_name == "SWALLOWED", "coverage", "reach")

  # Convert strings to rlang sym objects to pass them thorugh piping, dplyr objects
  sex <- rlang::sym("sex")
  attendance <- rlang::sym("attendance")
  drug <- rlang::sym("drug")

  var_sym <- rlang::sym(var)
  part_sym <- rlang::sym(part)

  #==================================================================================#
  # Step 3 - Reduce design to relevant partitions ------------------------------------
  #==================================================================================#

  # Certain partitions may be available to one variable but not to another. For example, a survey may be for PZQ in 2
  # IUs, but only in one of them for ALB. In that case, all ALB var answers in the other partition are missing (NA).
  # Inversely, none is not NA. Use this to reduce the design to the relevant partitions

  relevant_partitions <- design$variables %>%
    dplyr::group_by(!!part_sym) %>% dplyr::summarise(count = sum(!is.na(!!var_sym))) %>%
    dplyr::filter(count > 0) %>% dplyr::pull(!!part_sym)

  design_f <- subset(design, eval(rlang::expr(!!part_sym)) %in% relevant_partitions)

  #==================================================================================#
  # Step 4 - Go through groups --------------------------------------------
  #==================================================================================#

  result <- list()
  for (group in sort(unique(survey_groups))){


    # Calculate overall result, by sex, and - if SAC - by attendance, give uniformity
    h_all <- short_svyby(formula_var, formula_part, design = design_f, group, level = level, degf = degf) %>%
      dplyr::mutate(!!sex := NA, !! attendance := NA, !!drug := drug_name, by = "overall") %>%
      dplyr::select(partition = 1, drug, by, sex, attendance, estimate = 2, lower = 3, upper = 4)


    h_sex <- short_svyby(formula_var, formula_part_sex, design = design_f, group, level = level, degf = degf) %>%
      dplyr::mutate(!!attendance := NA, !!drug := drug_name, by = "sex") %>%
      dplyr::select(partition = 1, drug, by, sex = 2, attendance, estimate = 3, lower = 4, upper = 5)

    helper <- rbind(h_all, h_sex)

    if (grepl("SAC", group)){
      h_att <- short_svyby(formula_var, formula_part_att, design = design_f, group, level = level, degf = degf) %>%
        dplyr::mutate(!!sex := NA, !!drug := drug_name, by = "attendance") %>%
        dplyr::select(partition = 1, drug, by, sex, attendance = 2, estimate = 3, lower = 4, upper = 5)
      helper <- rbind(helper, h_att)
    }

    # Use shortened group name (SAC, not 1_SAC)
    group_label <- gsub('^(\\d{1})(_{1})([A-Za-z]+)$',"\\3", group)
    result[[group_label]] <- helper

  }

  # Bind and put in standard order
  result <- dplyr::bind_rows(result, .id = "group") %>% dplyr::mutate(question = question_name) %>%
    dplyr::select(group, drug, partition, question, by, sex, attendance, estimate, lower, upper)

  return(result)

}


#==================================================================================#
#==================================================================================#
# Additional functions (used by large function, not exported) -------------
#==================================================================================#
#==================================================================================#

#==================================================================================#
# Add - Check the variable containing drug offering or taking conforms to data dictionary -----
#==================================================================================#

evaluate_drug_name <- function(var, df){

  if (! grepl('^(ind_)(.+)(_{1})(.+)(_bin)$', var)) {

    stop("Function based on the premise that variables of interest take the form ind_[string]_[3_letter_drug_abbreviation]_bin")

  }

  values <- stats::na.omit(df[[var]])

  if ((dplyr::n_distinct(values) > 2 | dplyr::n_distinct(values) < 1) | (max(values) > 1 | min(values) < 0)) {

    stop(sprintf("The answer codes should only be 0, 1 or NA. This is not the case for %s, where the values are %s",
                 var, paste(sort(unique(values)), collapse = ", ") ))

  }
}

#==================================================================================#
# Add - Check vars in data frame -----
#==================================================================================#

# Check the variables in the data frame used to group (SAC or adult) and disaggregate (sex, school attendance)
# conform to the data dictionary
evaluate_df <- function(df){

  # Check whether data contains variable informing of group (SAC & Adults)
  # Currently the data dicitonary has it as var "ind_group", as this changes, this function would need to change
  if (!("ind_group" %in% names(df))) stop("Function requires the variable ind_group denoting group (e.g., SAC, Adult)")

  # Further, the variable ind_group is written as code << 1_SAC >> for SAC, code << 2_Adult >> for adults.
  # Check at least one of these is contained. As the data dictionary evolves, this would need to reflect those changes
  survey_groups <- unique(stats::na.omit(df$ind_group))
  if (length(survey_groups) == 2){
    if (!all(c("1_SAC", "2_Adult") %in% survey_groups)) {
      stop(sprintf("The function assumes var ind_group is coded as << 1_SAC >> for SAC, code << 2_Adult >> for adults. Currently, var ind_group has answers: %s",
                   paste(survey_groups, collapse = ", ")))
    }
  } else {
    if (!any(c("1_SAC", "2_Adult") %in% survey_groups)) {
      stop(sprintf("The function assumes var ind_group is coded as << 1_SAC >> for SAC, code << 2_Adult >> for adults. Currently, var ind_group has answers: %s",
                   paste(survey_groups, collapse = ", ")))
    }
  }

  # Check for sex and school attendance variables
  if (!("ind_sex" %in% names(df))) stop("Function requires the variable ind_sex denoting sex")
  if (!("ind_child_attend_bin" %in% names(df))) stop("Function requires the variable ind_child_attend_bin denoting attendance")

  if (!any(c("1_Male", "2_Female") %in% unique(stats::na.omit(df$ind_sex)))) {
    stop(sprintf("The function assumes var ind_sex is coded as << 1_Male >> for males, << 2_Female >> for females. Currently, var ind_sex has answers: %s",
                 paste(unique(stats::na.omit(df$ind_sex)), collapse = ", ")))
  }

  if ("1_SAC" %in% survey_groups){ # Evaluate child attendance variable only if there are kids in the survey
    if (!any(0:1 %in% unique(stats::na.omit(df$ind_child_attend_bin)))) {
      stop(sprintf("The function assumes var ind_child_attend_bin is coded as 0 for non attending, 1 for attending. Currently, var ind_child_attend_bin has answers: %s",
                   paste(unique(stats::na.omit(df$ind_child_attend_bin)), collapse = ", ")))
    }
  }

  return(survey_groups)

}

#==================================================================================#
# Add - Check the arguments to the function ------
#==================================================================================#

evaluate_args <- function(var, part, design){

  # Check design object
  if(!(sort(unique(class(design)))[1] == "survey.design")) stop("Your design object is not a survey package design object")

  # Check var is string
  if (!(is.character(var) & length(var) == 1)) stop("The name of the variable to analyse is not a simple, length one string")

  # Check var is a column in df
  if(!var %in% names(design$variables)) stop(sprintf("Variable %s is not a column header name for the design object passed", var))

  # Check part is string
  if (!(is.character(part) & length(part) == 1)) stop("The name of the variable of the partition is not a simple, length one string")

  # Check part is a column in df
  if(!part %in% names(design$variables)) stop(sprintf("Partition argument %s is not a column header name for the design object passed", part))

}

#==================================================================================#
# Add - survey shorthand ------
#==================================================================================#

short_svyby <- function(formula_var, formula_part, design, group, level = level, degf = degf) {

  if (is.na(degf)){

    survey::svyby(formula_var, formula_part, design = subset(design, ind_group == group),
                  survey::svyciprop, vartype = "ci", method = "logit", na.rm = T, level = level) %>%
      tibble::remove_rownames()

  } else {

    survey::svyby(formula_var, formula_part, design = subset(design, ind_group == group),
                  survey::svyciprop, vartype = "ci", method = "logit", na.rm = T, level = level, df = degf) %>%
      tibble::remove_rownames()

  }

}
SCIFoundation/sciCoverageR documentation built on Oct. 31, 2024, 11:45 p.m.