R/data_reshape_wpp_output_long.R

Defines functions data_reshape_wpp_output_long

Documented in data_reshape_wpp_output_long

# this function takes the list output by the cmmppWPP_workflow_one_country_variant and reshapes
# it into a single long data frame 
# This is the preferred way to transfer the output data from R into Eagle

#------------------------------------------------------------

#' Reshape ccmppWPP output into a single long data frame with all indicators
#'
#' @description this function takes the list output by the cmmppWPP_workflow_one_country_variant and reshapes
#' it into a single long data frame 
#'
#' @author Sara Hertog
#'
#' @param wpp_output list of output indicators generated by ccmppWPP_workflow_one_country_variant()
#'
#' @details NA
#'
#' @return a data frame with revision, variant, indicator name, time, sex, age and value
#'
#' @export
#'

data_reshape_wpp_output_long <- function(wpp_output) {
  
  # identify global parameters from attributes
  atr <- attributes(wpp_output)
  atr <- atr[!(names(atr) == "names")]
  atr <- atr[names(atr) == "locid"] # for now only keep locid on this output to ensure that all values written to csv are numeric
  
  global <- data.frame(indicator = rep("global_parameters", length(atr)),
                       subindicator = names(atr),
                       time_start = as.numeric(NA),
                       time_span = as.numeric(NA),
                       sex = as.character(NA),
                       age_start = as.numeric(NA),
                       age_span = as.numeric(NA),
                       value = do.call(rbind, atr))
  
  wpp_output <- wpp_output[!(names(wpp_output) %in% c("mig_parameter", "mig_net_count_age_sex_override"))]
  
  out_list <- list()
  
  for (i in 1:length(wpp_output)) {
    df <- wpp_output[[i]]
    nm <- names(df)
    if ("indicator" %in% nm) {
      names(df)[names(df) == "indicator"] <- "subindicator"
      df$subindicator <- ifelse(substr(df$subindicator,1,3) == "lt_", 
                                substr(df$subindicator,4,nchar(df$subindicator)),
                                df$subindicator)
    } else if (!("indicator" %in% nm)) {
      df$subindicator <- ""
    }
    
    df$indicator <- names(wpp_output)[i]
    df$subindicator[substr(df$indicator, nchar(df$indicator) - 3, nchar(df$indicator)) == "_1x1"] <- "1x1"
    df$subindicator[substr(df$indicator, nchar(df$indicator) - 3, nchar(df$indicator)) == "_5x1"] <- "5x1"
    df$indicator[substr(df$indicator, nchar(df$indicator) - 3, nchar(df$indicator)) %in% c("_1x1", "_5x1")] <- 
      substr(df$indicator[substr(df$indicator, nchar(df$indicator) - 3, nchar(df$indicator)) %in% c("_1x1", "_5x1")],
             1,
             nchar(df$indicator[substr(df$indicator, nchar(df$indicator) - 3, nchar(df$indicator)) %in% c("_1x1", "_5x1")])-4)
    
                           
    if (!("sex" %in% nm)) {
      df$sex <- "both"
    }
    if (!("age_start" %in% nm)) {
      df$age_start <- 0
      df$age_span  <- -1
    }
    
    out_list[[i]] <- df
  }
  wpp_output_long <- rbind(global, do.call(rbind, out_list))
  
  # reduce size of file by shortening some fields
  wpp_output_long$sex <- substr(wpp_output_long$sex,1,1)
  wpp_output_long$age_span[which(wpp_output_long$age_span == 1000)] <- -1
  
  # remove youngest and oldest ages from asfr and pct output
  wpp_output_long <- wpp_output_long[!(wpp_output_long$indicator %in% c("fert_rate_age", "fert_pct_age") &
                                         (wpp_output_long$age_start < 10 | wpp_output_long$age_start > 59)),]

  # do some rounding

  # round to integer
  wpp_output_long$value <- ifelse(wpp_output_long$indicator %in% c("birth_count_age", "birth_count_tot_sex", "death_count_age_sex",
                                                         "death_count_cohort_sex", "death_count_tot_sex", "exposure_count_age_sex",
                                                         "mig_net_count_age_sex", "mig_net_count_tot_sex"), 
                                  round(wpp_output_long$value,0), wpp_output_long$value)
  # round to 3 places
  wpp_output_long$value <- ifelse(wpp_output_long$indicator %in% c("birth_rate_crude", "death_rate_crude", "fert_mean_age",
                                                                   "mig_net_rate_crude", "pop_change_rate_natural",
                                                                   "pop_change_rate_tot", "pop_pct_age_sex", "srb"), 
                                  round(wpp_output_long$value,3), wpp_output_long$value)
  
  # round to 6 places
  wpp_output_long$value <- ifelse(wpp_output_long$indicator %in% c("fert_pct_age", "fert_rate_age", "fert_rate_gross",
                                                                   "fert_rate_net", "fert_rate_tot"), 
                                  round(wpp_output_long$value,6), wpp_output_long$value)
  
  # life table values
  wpp_output_long$value <- ifelse(wpp_output_long$subindicator %in% c("lx", "ndx", "nLx", "Tx"),
                                  round(wpp_output_long$value, 3), wpp_output_long$value)
  wpp_output_long$value <- ifelse(wpp_output_long$subindicator %in% c("nMx", "nqx", "Sx", "Tx", "10q15", "10q25", "15q35",
                                                                      "1q0", "35q15", "45q15", "4q1", "1q0"),
                                  round(wpp_output_long$value, 8), wpp_output_long$value)
  wpp_output_long$value <- ifelse(wpp_output_long$subindicator %in% c("ex", "e0", "e100", "e15", "e50", "e60", "e65", "e80", "e85"),
                                  round(wpp_output_long$value, 4), wpp_output_long$value)
  wpp_output_long$value <- ifelse(wpp_output_long$subindicator %in% c("nAx"),
                                  round(wpp_output_long$value, 6), wpp_output_long$value)

  return(wpp_output_long)
  
}

# wpp_output_example <- ccmppWPP_workflow_one_country_variant(wpp_input = wpp_input_example)
# 
# wpp_output_long_eagle <- data_reshape_wpp_output_long(wpp_output = wpp_output_example)
markalava/ccmppWPP documentation built on April 21, 2022, 12:36 a.m.