R/disaggregate.R

Defines functions get_disagg_ get_disagg disagg_ndr.age_sex disagg_ndr.months_dispensed disagg_ndr.art_duration disagg_ndr.current_age disagg_ndr.pregnancy_status disagg_ndr.sex disagg_ndr disaggregate

Documented in disaggregate

#' Summarize an indicator into finer details by the specified variable
#'
#' Counts the number of occurrence of an outcome based on the category of
#' interest. It also provides "Totals" at the end of the columns or rows (or both)
#' where appropriate.
#'
#' @param data Data containing the indicator to be disaggregated.
#' @param by The variable of interest to be used for the disaggregation. The
#' options are any of: \code{"sex"}, \code{"current_age"}, \code{"pregnancy_status"}, \code{"art_duration"},
#' \code{"months_dispensed"} (of ARV), and \code{"age_sex"}.
#' @param ... Other parameters to be passed based on the \code{by} supplied. These are: 1) the \code{`level`} of disaggregation,
#' ("ip" or "country", "state", "lga" and "facility"). 2) \code{`pivot_wide`} TRUE or FALSE, used to determine how the tabular data should be presented.
#' The default options are "state" for  \code{by} and TRUE for \code{pivot_wide}.
#' value is "state".
#'
#' @return disaggregated data
#' @export disaggregate
#'
#' @examples
#' ### Disaggregate "TX_NEW" clients into age categories for each state
#' new_clients <- tx_new(ndr_example, from = "2021-01-01")
#' disaggregate(new_clients, by = "current_age") # default value of level is "state"
#'
#' ### Disaggregate "TX_CURR" by gender for each facility
#' curr_clients <- tx_curr(ndr_example)
#' disaggregate(curr_clients, by = "sex", level = "facility")
#'
disaggregate <- function(data, by, ...) {
  df <- structure(data, "class" = c(class(data), by))


  disagg_ndr(df, ...)
}



disagg_ndr <- function(df, ...) {
  UseMethod("disagg_ndr")
}


### sex disaggregation
disagg_ndr.sex <- function(df, level = "state", pivot_wide = TRUE, ...) {
  gender <- forcats::fct_collapse(df$sex,
    "Male" = "M",
    "Female" = "F",
    other_level = "unknown"
  )

  get_disagg(df, by = "sex", by_value = gender, pivot_wide = pivot_wide, level = level)
}




### pregnancy status
disagg_ndr.pregnancy_status <- function(df, level = "state", pivot_wide = TRUE, ...) {
  preg_status <- forcats::fct_collapse(df$pregnancy_status,
    pregnant = "P",
    breastfeeding = "BF",
    not_pregnant = "NP",
    other_level = "missing_or_unknown"
  )

  get_disagg(df, by = "pregnancy_status", by_value = preg_status, pivot_wide = pivot_wide, level = level)
}



### current_age
disagg_ndr.current_age <- function(df, level = "state", pivot_wide = TRUE, ...) {
  age <- dplyr::case_when(
    df$current_age < 1 ~ "<1",
    dplyr::between(df$current_age, 1, 4) ~ "1-4",
    dplyr::between(df$current_age, 5, 9) ~ "5-9",
    dplyr::between(df$current_age, 10, 14) ~ "10-14",
    dplyr::between(df$current_age, 15, 19) ~ "15-19",
    dplyr::between(df$current_age, 20, 24) ~ "20-24",
    dplyr::between(df$current_age, 25, 29) ~ "25-29",
    dplyr::between(df$current_age, 30, 34) ~ "30-34",
    dplyr::between(df$current_age, 35, 39) ~ "35-39",
    dplyr::between(df$current_age, 40, 44) ~ "40-44",
    dplyr::between(df$current_age, 45, 49) ~ "45-49",
    dplyr::between(df$current_age, 50, 54) ~ "50-54",
    dplyr::between(df$current_age, 55, 59) ~ "55-59",
    dplyr::between(df$current_age, 60, 64) ~ "60-64",
    df$current_age >= 65 ~ "65+", df$current_age <
      0 ~ "neg_age", is.na(df$current_age) ~
      "missing"
  )

  age <- factor(
    age,
    levels = c("<1", "1-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65+", "missing"),
    ordered = TRUE
  )

  get_disagg(df, by = "current_age", by_value = age, pivot_wide = pivot_wide, level = level)
}



### duration
disagg_ndr.art_duration <- function(df, level = "state", pivot_wide = TRUE, ...) {
  df$art_duration <- as.integer(df$appointment_date - df$art_start_date)

  art_duration <- dplyr::case_when(
    df$art_duration < 90 ~ "<3 months",
    dplyr::between(df$art_duration, 90, 179) ~ "3-5 months",
    df$art_duration >= 180 ~ "6 months+",
    is.na(df$art_duration) ~ "missing"
  )

  art_duration <- factor(art_duration, levels = c("missing", "<3 months", "3-5 months", "6 months+"), ordered = TRUE)

  get_disagg(df, by = "art_duration", by_value = art_duration, pivot_wide = pivot_wide, level = level)
}




### duration of ARV Dispensed
disagg_ndr.months_dispensed <- function(df, level = "state", pivot_wide = TRUE, ...) {
  if (!any(names(df) %in% "months_dispensed")) {
    df$months_dispensed <- floor(df$days_of_arv_refill / 28)
  }


  mmd <- dplyr::case_when(
    df$months_dispensed < 3 ~ "<3 months",
    dplyr::between(df$months_dispensed, 3, 5) ~ "3-5 months",
    df$months_dispensed >= 6 ~ "6 months+",
    TRUE ~ "missing"
  )

  mmd <- factor(mmd, levels = c("missing", "<3 months", "3-5 months", "6 months+"), ordered = TRUE)

  get_disagg(df, by = "months_dispensed", by_value = mmd, pivot_wide = pivot_wide, level = level)
}



### disaggregate by age and sex
disagg_ndr.age_sex <- function(df, level = "state", pivot_wide = TRUE) {
  age <- dplyr::case_when(
    df$current_age < 1 ~ "<1",
    dplyr::between(df$current_age, 1, 4) ~ "1-4",
    dplyr::between(df$current_age, 5, 9) ~ "5-9",
    dplyr::between(df$current_age, 10, 14) ~ "10-14",
    dplyr::between(df$current_age, 15, 19) ~ "15-19",
    dplyr::between(df$current_age, 20, 24) ~ "20-24",
    dplyr::between(df$current_age, 25, 29) ~ "25-29",
    dplyr::between(df$current_age, 30, 34) ~ "30-34",
    dplyr::between(df$current_age, 35, 39) ~ "35-39",
    dplyr::between(df$current_age, 40, 44) ~ "40-44",
    dplyr::between(df$current_age, 45, 49) ~ "45-49",
    dplyr::between(df$current_age, 50, 54) ~ "50-54",
    dplyr::between(df$current_age, 55, 59) ~ "55-59",
    dplyr::between(df$current_age, 60, 64) ~ "60-64",
    df$current_age >= 65 ~ "65+", df$current_age <
      0 ~ "neg_age", is.na(df$current_age) ~
      "missing"
  )

  age <- factor(
    age,
    levels = c("<1", "1-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65+", "missing"),
    ordered = TRUE
  )

  gender <- forcats::fct_collapse(df$sex,
    "Male" = "M",
    "Female" = "F",
    other_level = "unknown"
  )


  get_disagg_(df, by = c("sex", "current_age"), by_value = list(gender, age), pivot_wide = pivot_wide, level = level)
}


# for disaggregation by one variable
get_disagg <- function(df, by, by_value, level = "state", pivot_wide = TRUE, ...) {
  df[by] <- by_value

  by_c <- rlang::sym(by)
  dt <- switch(level,
    "ip" = dplyr::count(df, ip, !!by_c, .drop = FALSE, name = "number"),
    "country" = dplyr::count(df, ip, !!by_c, .drop = FALSE, name = "number"),
    "state" = dplyr::count(df, ip, state, !!by_c, .drop = FALSE, name = "number"),
    "lga" = dplyr::count(df, ip, state, lga, !!by_c, .drop = FALSE, name = "number"),
    "facility" = dplyr::count(df, ip, state, lga, facility, !!by_c, .drop = FALSE, name = "number")
  )

  dt <- structure(dt, "class" = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"))

  if (pivot_wide) {
    dt <- tidyr::pivot_wider(
      dt,
      names_from = tidyselect::all_of(by),
      values_from = "number"
    )
  }

  dt[is.na(dt)] <- 0 ## replace NAs with Zero



  tibble::as_tibble(janitor::adorn_totals(dt))
}



## for disaggregation by two variables
get_disagg_ <- function(df, by, by_value, level = level, pivot_wide = pivot_wide) {
  df[by[1]] <- by_value[[1]]

  df[by[2]] <- by_value[[2]]

  by_c <- rlang::syms(by)

  dt <- switch(level,
    "ip" = dplyr::count(df, ip, !!!by_c, .drop = FALSE, name = "number"),
    "country" = dplyr::count(df, ip, !!!by_c, .drop = FALSE, name = "number"),
    "state" = dplyr::count(df, ip, state, !!!by_c, .drop = FALSE, name = "number"),
    "lga" = dplyr::count(df, ip, state, lga, !!!by_c, .drop = FALSE, name = "number"),
    "facility" = dplyr::count(df, ip, state, lga, facility, !!!by_c, .drop = FALSE, name = "number")
  )

  dt <- structure(dt, "class" = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"))

  if (pivot_wide) {
    dt <- tidyr::pivot_wider(
      dt,
      names_from = by[2],
      values_from = "number"
    )
  }

  dt[is.na(dt)] <- 0 ## replace NAs with Zero



  tibble::as_tibble(janitor::adorn_totals(dt))
}



utils::globalVariables(
  c("lga", "pregnancy_status", "sex")
)

Try the tidyndr package in your browser

Any scripts or data that you put into this service are public.

tidyndr documentation built on April 8, 2022, 9:06 a.m.