R/utilities.R

Defines functions compute_coweights compute_for_all_age_segments calculate_component_survival_probabilities

#######################################################
# Utility Scripts
# Casey Breen
# 10/20/2017
#######################################################

globalVariables(".")

calculate_component_survival_probabilities <- function(df, grouping) {
  #Calculate component death probabilities
  . <- "quiet"
  df <- dplyr::summarise(
    df,
    cdpw = sum(cdpw_num) / sum(cdpw_denom)
  )

  #Calculate component survival probabilities
  df <- dplyr::mutate(df, csp = 1 - cdpw)
  df <- dplyr::group_by_at(df, c(grouping, "rate_type"))
  dplyr::summarise(
    df,
    mortality_rate = abs(prod(csp) - 1) * 1000
  )
}


compute_for_all_age_segments <- function(df, grouping) {
  . <- "quiet"
  df <- dplyr::group_by_at(df, c(grouping, "age_segment", "psu"))
  out <- dplyr::summarise(
    df,
    cdpw_num = sum(coweight_num_weight[numerator], na.rm = TRUE),
    cdpw_denom = sum(coweight_den_weight[denominator], na.rm = TRUE),
    cdp_num = sum(coweight_num[numerator], na.rm = TRUE),
    cdp_denom = sum(coweight_den[denominator], na.rm = TRUE)
  )
  rate_type_key <- list(`0-0` = c("neonatal", "infant", "underfive"),
                        `1-2` = c("postneonatal", "infant", "underfive"),
                        `3-5` = c("postneonatal", "infant", "underfive"),
                        `6-11` = c("postneonatal", "infant", "underfive"),
                        `12-23` = c("child", "underfive"),
                        `24-35` = c("child", "underfive"),
                        `36-47` = c("child", "underfive"),
                        `48-59` = c("child", "underfive"))
  dplyr::mutate(out, rate_type = rate_type_key[age_segment])
}

compute_coweights <- function(df, lower_age_segment, upper_age_segment) {
  #Set lower and upper limits of age interval
  . <- "quiet"
  df$al <- lower_age_segment
  df$au <- upper_age_segment

  #Set lower and upper limits of of time period
  df$tu <- df$intdatecmc
  df$tl <- df$intdatecmc - df$period

  #Calculate cohort limits
  df$tlau <- df$tl - df$au
  df$tlal <- df$tl - df$al
  df$tuau <- df$tu - df$au
  df$tual <- df$tu - df$al

  #Create the 3 cohorts by full exposure (1) or partial exposure (0.5)
  df$coweight_num[df$kiddobcmc >= df$tlau - 1 & df$kiddobcmc < df$tlal] <- 0.5
  df$coweight_num[df$kiddobcmc >= df$tlal & df$kiddobcmc < df$tuau-1] <- 1
  df$coweight_num[df$kiddobcmc >= df$tuau - 1 & df$kiddobcmc < df$tual] <-
    ifelse(upper_age_segment == 0, 1, 1)


  df$coweight_den[df$kiddobcmc >= df$tlau - 1 & df$kiddobcmc < df$tlal] <- 0.5
  df$coweight_den[df$kiddobcmc >= df$tlal & df$kiddobcmc < df$tuau-1] <- 1
  df$coweight_den[df$kiddobcmc >= df$tuau - 1 & df$kiddobcmc < df$tual] <- 0.5

  #Weight numerator by person weight
  df$coweight_num_weight <- df$coweight_num * (df$perweight*1000000)
  df$coweight_den_weight <- df$coweight_den * (df$perweight*1000000)


  df$numerator <- !is.na(df$kidagediedimp) & df$kidagediedimp >= lower_age_segment &
    df$kidagediedimp <= upper_age_segment

  df$denominator <- is.na(df$kidagediedimp) | df$kidagediedimp >= lower_age_segment


  df$age_segment <- paste0(lower_age_segment, "-", upper_age_segment)

  df[ , c("unique_id", "age_segment", "coweight_num", "coweight_den",
          "coweight_num_weight", "coweight_den_weight", "numerator",
          "denominator")]
}
caseybreen/childhoodmortality documentation built on June 8, 2020, 7:03 p.m.