R/ccmppWPP_input_file.R

Defines functions project_backwards_no_mig census_back_project_1950 basepop_adjust_1950_population ccmpp_input_file_proj_variants ccmppWPP_input_file_medium ccmppWPP_input_file_extend ccmppWPP_input_file_estimates

Documented in basepop_adjust_1950_population ccmpp_input_file_proj_variants census_back_project_1950

# read ccmppWPP inputs from country-specific excel input files

#' @export
ccmppWPP_input_file_estimates <- function(input_file_path) {


  # read metadata from parameters sheet of excel input file
  meta <-   readxl::read_xlsx(path = input_file_path,
                              sheet = "parameters")

  meta <- meta %>%
    dplyr::select(parameter, value) %>%
    dplyr::filter(!is.na(parameter))

  meta.list <- list()
  for (i in 1:nrow(meta)) {
    meta.list[[i]] <- ifelse(!is.na(suppressWarnings(as.numeric(meta$value[i]))), as.numeric(meta$value[i]), meta$value[i])
    names(meta.list)[i] <- gsub(" ", "_", meta$parameter[i])
  }
  rm(meta)

  base_year <- as.numeric(meta.list$Base_Year)
  begin_proj_year <- as.numeric(meta.list$Projection_First_Year)

  # base year population by sex and single year of age from 0:100
  pop_count_age_sex_base <- readxl::read_xlsx(path = input_file_path,
                                              sheet = "pop_count_age_sex_base",
                                              n_max = 1048576)

  # replace any NA values with 0
  pop_count_age_sex_base$value[is.na(pop_count_age_sex_base$value)] <- 0

  # asfr by single year of mother's age and single year of time (jan 1 to dec 31)
  fert_rate_age_f <- readxl::read_xlsx(path = input_file_path,
                                       sheet = "fert_rate_age_f",
                                              n_max = 1048576)
  fert_rate_age_f <- fert_rate_age_f[fert_rate_age_f$time_start >= base_year & fert_rate_age_f$time_start < begin_proj_year,]
  # ensure sort by time_start and age_start
  fert_rate_age_f <- fert_rate_age_f[order(fert_rate_age_f$time_start, fert_rate_age_f$age_start),]

  # srb estimates by single year of time (jan 1 to dec 31)
  srb <- readxl::read_xlsx(path = input_file_path,
                           sheet = "srb",
                           n_max = 1048576)
  srb <- srb[srb$time_start >= base_year & srb$time_start < begin_proj_year,]
  # ensure sort by time_start
  srb <- srb[order(srb$time_start),]

  # read the mortality estimation parameters
  MORT_PARAMS <- readxl::read_xlsx(path = input_file_path, sheet = "MORT_PARAMS",
                                              n_max = 1048576)
  mp <- MORT_PARAMS[MORT_PARAMS$type == "Estimation", c(2,3)]

  # for classic model life table families, we use the Coale-Demeny a0 rule, otherwise we use Andreev-Kinkaid
  a0rule <- ifelse(mp$value[mp$parameter == "Age_Specific_Mortality_Type"] == "Model-based" &
                     !is.na(mp$value[mp$parameter == "Age_Specific_MLT_Region"]) &
                     mp$value[mp$parameter == "Age_Specific_MLT_Region"] %in%
                     c("CD_West","CD_East","CD_North","CD_South", "UN_Chilean","UN_Far_Eastern",
                       "UN_General","UN_Latin_American","UN_South_Asian"), "cd", "ak")

  # asfr by single year of mother's age and single year of time (jan 1 to dec 31)
  life_table_age_sex <- readxl::read_xlsx(path = input_file_path,
                                       sheet = "life_table_age_sex",
                                              n_max = 1048576)
  life_table_age_sex <- life_table_age_sex[life_table_age_sex$time_start >= base_year & life_table_age_sex$time_start < begin_proj_year,]
  # ensure sort by time_start, sex and age_start
  life_table_age_sex <- life_table_age_sex[order(life_table_age_sex$time_start, life_table_age_sex$sex, life_table_age_sex$age_start),]

  # net international migration
  # estimate of counts by sex and single year of age
  mig_net_count_age_sex <- readxl::read_xlsx(path = input_file_path,
                                             sheet = "mig_net_count_age_sex",
                                              n_max = 1048576)
  mig_net_count_age_sex$value[is.na(mig_net_count_age_sex$value)] <- 0
  mig_net_count_age_sex <- mig_net_count_age_sex[mig_net_count_age_sex$time_start >= base_year & mig_net_count_age_sex$time_start < begin_proj_year,]
  # ensure sort by time_start, sex and age_start
  mig_net_count_age_sex <- mig_net_count_age_sex[order(mig_net_count_age_sex$time_start, mig_net_count_age_sex$sex, mig_net_count_age_sex$age_start),]

  # totals (in case need to apply age distribution)
  mig_net_count_tot_b <- readxl::read_xlsx(path = input_file_path,
                                           sheet = "mig_net_count_tot_b",
                                              n_max = 1048576)
  mig_net_count_tot_b$value[is.na(mig_net_count_tot_b$value)] <- 0
  mig_net_count_tot_b <- mig_net_count_tot_b[mig_net_count_tot_b$time_start >= base_year & mig_net_count_tot_b$time_start < begin_proj_year,]
  # ensure sort by time_start
  mig_net_count_tot_b <- mig_net_count_tot_b[order(mig_net_count_tot_b$time_start),]

  # rates -- THIS IS NOT AN OPTION FOR THE 2022 REVISION SO WE CREATE A PLACE HOLDER FOR FUTURE REVISIONS
  mig_net_rate_age_sex <- mig_net_count_age_sex
  mig_net_rate_age_sex$value <- 0

  # parameters
  mig_parameter <- readxl::read_xlsx(path = input_file_path,
                                     sheet = "mig_parameter",
                                              n_max = 1048576)
  # ensure sort by time_start
  mig_parameter <- mig_parameter[order(mig_parameter$time_start),]


  ccmppWPP_inputs_estimates <- list(pop_count_age_sex_base = pop_count_age_sex_base,
                                    life_table_age_sex     = life_table_age_sex,
                                    fert_rate_age_f        = fert_rate_age_f,
                                    srb                    = srb,
                                    mig_net_count_age_sex  = mig_net_count_age_sex,
                                    mig_net_rate_age_sex   = mig_net_rate_age_sex,
                                    mig_net_count_tot_b    = mig_net_count_tot_b,
                                    mig_parameter          = mig_parameter)

  # set attributes
  attr(ccmppWPP_inputs_estimates, "locid")    <- meta.list$LocationID
  attr(ccmppWPP_inputs_estimates, "locname")    <- meta.list$Location
  attr(ccmppWPP_inputs_estimates, "variant")  <- "estimates"
  attr(ccmppWPP_inputs_estimates, "a0rule")  <- a0rule

  return(ccmppWPP_inputs_estimates)

}

# this function takes 1x1 inputs to the ccmppWPP and extends them to a new open age group
# we use this to extend all inputs to OAG = 130+, which then ensures consistency in life tables across 3 sex categories (female, male, both)
# This will not _reduce_ the input to OAnew if it already has a higher open age interval.

#' @export
ccmppWPP_input_file_extend <- function(ccmppWPP_inputs, OAnew = 130, a0rule = "ak") {

  ############
  # life_table
  ############
  lt_in <- ccmppWPP_inputs$life_table_age_sex
  lt_in <- lt_in[order(lt_in$time_start, lt_in$sex, lt_in$indicator, lt_in$age_start),]
  ages  <- unique(lt_in$age_start)
  maxage <- max(ages)

  
  lts <- NULL
  # if the OA is less than OAnew, then extend the life tables to OAnew
  if (maxage < OAnew) {
    for (i in unique(lt_in$time_start)) {
      mxM <- lt_in$value[lt_in$indicator == "lt_nMx" & lt_in$sex == "male" & lt_in$time_start == i]
      names(mxM) <- ages
      mxF <- lt_in$value[lt_in$indicator == "lt_nMx" & lt_in$sex == "female" & lt_in$time_start == i]
      names(mxF) <- ages

      # extend to new OA using two sex coherent kannisto
      # use sex coherent kannisto to extend rather than sex-independent extension in lt_single_mx
      ext <- MortCast::cokannisto(mxM, mxF, est.ages = 80:(maxage-1), proj.ages = maxage:OAnew)
      mxM <- ext$male
      mxF <- ext$female

      lt_m <- DemoTools::lt_single_mx(nMx = mxM, Age = 0:OAnew, a0rule = a0rule, Sex = "m", OAG = TRUE, OAnew = OAnew)
      lt_m$sex <- "male"
      lt_f <- DemoTools::lt_single_mx(nMx = mxF, Age = 0:OAnew, a0rule = a0rule, Sex = "f", OAG = TRUE, OAnew = OAnew)
      lt_f$sex <- "female"

      lt <- rbind(lt_m, lt_f)
      lt$time_start <- i

      lts <- rbind(lts,lt)
    }
    }
    
    # if the OA is greater than OAnew, then truncate life tables to OAnew
    if (maxage > OAnew) {
      for (i in unique(lt_in$time_start)) {
        mxM <- lt_in$value[lt_in$indicator == "lt_nMx" & lt_in$sex == "male" & lt_in$time_start == i]
        names(mxM) <- ages
        mxF <- lt_in$value[lt_in$indicator == "lt_nMx" & lt_in$sex == "female" & lt_in$time_start == i]
        names(mxF) <- ages
        
        lt_m <- DemoTools::lt_single_mx(nMx = mxM, Age = 0:maxage, a0rule = a0rule, Sex = "m", OAG = TRUE, OAnew = OAnew)
        lt_m$sex <- "male"
        lt_f <- DemoTools::lt_single_mx(nMx = mxF, Age = 0:maxage, a0rule = a0rule, Sex = "f", OAG = TRUE, OAnew = OAnew)
        lt_f$sex <- "female"
        
        lt <- rbind(lt_m, lt_f)
        lt$time_start <- i
        
        lts <- rbind(lts,lt)
      }

    lts$AgeInt[is.na(lts$AgeInt)] <- 1000

    life_table_age_sex <-reshape(lts,
                                 direction = "long",
                                 times = paste0("lt_", names(lts)[3:11]),
                                 timevar = "indicator",
                                 idvar = c("time_start", "sex", "Age", "AgeInt"),
                                 varying = list(names(lts)[3:11]),
                                 v.names = "value")
    names(life_table_age_sex)[c(1,2)] <- c("age_start", "age_span")
    life_table_age_sex$time_span <- 1

    } 
  
  if (maxage == OAnew) {

    # check to see that all of the life table columns are present
    ltnames <- c("lt_nMx","lt_nqx","lt_lx","lt_nAx","lt_ndx","lt_nLx","lt_Tx","lt_Sx","lt_ex")

    ispresent <- function(x) {
      result <- x %in% lt_in$indicator
    }
    lt_indicators_present <- sapply(ltnames, FUN = ispresent )

    # if they are present, then just return the original input life tables
    if (all(lt_indicators_present)) {

      life_table_age_sex <- lt_in

    } else { # otherwise, re-compute the life table columns

      lt_f <- lt_complete_loop_over_time(lt_in[lt_in$indicator == "lt_nMx" & lt_in$sex == "female",],
                                         sex = "female", a0rule = a0rule, OAnew = 130)
      lt_m <- lt_complete_loop_over_time(lt_in[lt_in$indicator == "lt_nMx" & lt_in$sex == "male",],
                                         sex = "male", a0rule = a0rule, OAnew = 130)

      life_table_age_sex <- rbind(lt_f, lt_m)

    }
  }
  rm(ages,maxage)

  ############
  # base population
  ############
  pop_in <- ccmppWPP_inputs$pop_count_age_sex_base
  pop_in <- pop_in[order(pop_in$sex, pop_in$age_start),]
  ages <- unique(pop_in$age_start)
  maxage <- max(ages)

  if (maxage < OAnew) {
    ext_f <- DemoTools::OPAG(Pop = DemoTools::groupAges(pop_in$value[pop_in$sex == "female"]),
                             Age_Pop = seq(0,maxage,5),
                             nLx = DemoTools::groupAges(life_table_age_sex$value[life_table_age_sex$indicator=="lt_nLx" &
                                                                                   life_table_age_sex$sex == "female" &
                                                                                   life_table_age_sex$time_start == pop_in$time_start[1]]),
                             Age_nLx = seq(0,max(life_table_age_sex$age_start),5),
                             Redistribute_from = maxage,
                             OAnew = OAnew,
                             method = "mono")
    ext_f$Pop_out <- ifelse(is.na(ext_f$Pop_out), 0, ext_f$Pop_out)
    ext_f <- DemoTools::graduate_mono(ext_f$Pop_out, Age = ext_f$Age_out, AgeInt = DemoTools::age2int(ext_f$Age_out), OAG = TRUE)

    pop_count_age_sex_f <- c(pop_in$value[pop_in$sex == "female" & pop_in$age_start < maxage], ext_f[as.numeric(names(ext_f)) >= maxage])
    names(pop_count_age_sex_f) <- 0:OAnew

    ext_m <- DemoTools::OPAG(Pop = DemoTools::groupAges(pop_in$value[pop_in$sex == "male"]),
                             Age_Pop = seq(0,maxage,5),
                             nLx = DemoTools::groupAges(life_table_age_sex$value[life_table_age_sex$indicator=="lt_nLx" &
                                                                                   life_table_age_sex$sex == "male" &
                                                                                   life_table_age_sex$time_start == pop_in$time_start[1]]),
                             Age_nLx = seq(0,max(life_table_age_sex$age_start),5),
                             Redistribute_from = maxage,
                             OAnew = OAnew,
                             method = "mono")
    ext_m$Pop_out <- ifelse(is.na(ext_m$Pop_out), 0, ext_m$Pop_out)
    ext_m <- DemoTools::graduate_mono(ext_m$Pop_out, Age = ext_m$Age_out, AgeInt = DemoTools::age2int(ext_m$Age_out), OAG = TRUE)

    pop_count_age_sex_m <- c(pop_in$value[pop_in$sex == "male" & pop_in$age_start < maxage], ext_m[as.numeric(names(ext_m)) >= maxage])
    names(pop_count_age_sex_m) <- 0:OAnew

    pop_count_age_sex_base <- data.frame(time_start = rep(pop_in$time_start[1], (OAnew + 1) * 2),
                                         time_span = rep(0,(OAnew + 1) * 2),
                                         age_start = rep(0:OAnew,2),
                                         age_span = rep(1,(OAnew + 1) * 2),
                                         sex = c(rep("female", OAnew + 1),rep("male", OAnew + 1)),
                                         value = c(pop_count_age_sex_f, pop_count_age_sex_m))
    pop_count_age_sex_base$age_span[pop_count_age_sex_base$age_start == OAnew] <- 1000
  } 
  
  if (maxage > OAnew) {
    # truncate back to OAnew, aggregating OAG
    pop_count_age_sex_m <- DemoTools::groupOAG(Value = pop_in$value[pop_in$sex == "male"], Age = pop_in$age_start[pop_in$sex == "male"], OAnew = OAnew)
    pop_count_age_sex_f <- DemoTools::groupOAG(Value = pop_in$value[pop_in$sex == "female"], Age = pop_in$age_start[pop_in$sex == "female"], OAnew = OAnew)
    pop_count_age_sex_base <- data.frame(time_start = rep(pop_in$time_start[1], (OAnew + 1) * 2),
                                         time_span = rep(0,(OAnew + 1) * 2),
                                         age_start = rep(0:OAnew,2),
                                         age_span = rep(1,(OAnew + 1) * 2),
                                         sex = c(rep("female", OAnew + 1),rep("male", OAnew + 1)),
                                         value = c(pop_count_age_sex_f, pop_count_age_sex_m))
    pop_count_age_sex_base$age_span[pop_count_age_sex_base$age_start == OAnew] <- 1000
    
  }
  
  if (maxage == OAnew) {
    pop_count_age_sex_base <- pop_in
  }
  rm(ages,maxage)

  # replace any zero values with a very small number
  pop_count_age_sex_base$value[pop_count_age_sex_base$value == 0] <- 0.00001

  ############
  # age-specific fertility rates
  ############
  fert_in <- ccmppWPP_inputs$fert_rate_age_f
  fert_in <- fert_in [order(fert_in$time_start, fert_in$age_start),]
  ages <- unique(fert_in$age_start)
  maxage <- max(ages)

  # here we simply add zeros to the extra age groups
  if (maxage < OAnew) {

    asfr_add <- as.data.frame(matrix(0.0, nrow=OAnew - maxage, ncol = length(unique(fert_in$time_start)),
                                     dimnames = list((maxage+1):OAnew, unique(fert_in$time_start))))
    asfr_add$age_start = as.numeric(row.names(asfr_add))
    asfr_add <- reshape(asfr_add,
                        direction = "long",
                        times = as.numeric(names(asfr_add[1:(ncol(asfr_add)-1)])),
                        timevar = "time_start",
                        idvar = "age_start",
                        varying = list(names(asfr_add)[1:(ncol(asfr_add)-1)]),
                        v.names = "value")
    asfr_add$age_span <- 1
    asfr_add$time_span <- 1

    fert_rate_age_f <- rbind(fert_in, asfr_add)
    fert_rate_age_f$age_span <- ifelse(fert_rate_age_f$age_start == OAnew, 1000, 1)
    fert_rate_age_f <- fert_rate_age_f[order(fert_rate_age_f$time_start, fert_rate_age_f$age_start),]

  } 
  
  if (maxage > OAnew) {
    
    fert_rate_age_f <- fert_in[fert_in$age_start <= OAnew,]
    
  }
  if (maxage == OAnew) {
    
    fert_rate_age_f <- fert_in
  }
  rm(ages,maxage)

  ############
  # migration
  ############
  mig_in <- ccmppWPP_inputs$mig_net_count_age_sex
  mig_in <- mig_in [order(mig_in$time_start, mig_in$sex, mig_in$age_start),]
  ages <- unique(mig_in$age_start)
  maxage <- max(ages)

  if(maxage < OAnew) {
    mig_add <- as.data.frame(matrix(0.0, nrow=OAnew - maxage, ncol = length(unique(mig_in$time_start)),
                                    dimnames = list((maxage+1):OAnew, unique(mig_in$time_start))))

    mig_add$age_start = as.numeric(row.names(mig_add))
    mig_add <- reshape(mig_add,
                       direction = "long",
                       times = as.numeric(names(mig_add[1:(ncol(mig_add)-1)])),
                       timevar = "time_start",
                       idvar = "age_start",
                       varying = list(names(mig_add)[1:(ncol(mig_add)-1)]),
                       v.names = "value")
    mig_add$age_span <- 1
    mig_add$time_span <- 1

    mig_add_m <- mig_add
    mig_add_m$sex <- "male"
    mig_add_f <- mig_add
    mig_add_f$sex <- "female"

    mig_net_count_age_sex <- rbind(mig_in, mig_add_m, mig_add_f)
    mig_net_count_age_sex$age_span <- ifelse(mig_net_count_age_sex$age_start == OAnew, 1000, 1)
    mig_net_count_age_sex <- mig_net_count_age_sex[order(mig_net_count_age_sex$time_start, mig_net_count_age_sex$sex, mig_net_count_age_sex$age_start),]

    # for now set rates to 0 since we are not using these for 2022 revision
    mig_net_rate_age_sex <- mig_net_count_age_sex
    mig_net_rate_age_sex$value <- 0.0

  } 
  if (maxage > OAnew) {
    mig_net_count_age_sex <- mig_in[mig_in$age_start <= OAnew,]
    mig_net_rate_age_sex <- ccmppWPP_inputs$mig_net_rate_age_sex[ccmppWPP_inputs$mig_net_rate_age_sex$age_start <= OAnew,]
  }
  
  if (maxage == OAnew){
    mig_net_count_age_sex <- mig_in
    mig_net_rate_age_sex <- ccmppWPP_inputs$mig_net_rate_age_sex
  }
  rm(ages,maxage)


  ccmppWPP_inputs_extended <- list(pop_count_age_sex_base = pop_count_age_sex_base,
                                   life_table_age_sex = life_table_age_sex,
                                   fert_rate_age_f = fert_rate_age_f,
                                   srb = ccmppWPP_inputs$srb,
                                   mig_net_count_age_sex = mig_net_count_age_sex,
                                   mig_net_rate_age_sex = mig_net_rate_age_sex,
                                   mig_net_count_tot_b = ccmppWPP_inputs$mig_net_count_tot_b,
                                   mig_parameter = ccmppWPP_inputs$mig_parameter)

  attr(ccmppWPP_inputs_extended, "revision") <- attributes(ccmppWPP_inputs)$revision
  attr(ccmppWPP_inputs_extended, "locid")    <- attributes(ccmppWPP_inputs)$locid
  attr(ccmppWPP_inputs_extended, "locname")    <- attributes(ccmppWPP_inputs)$locname
  attr(ccmppWPP_inputs_extended, "variant")  <- attributes(ccmppWPP_inputs)$variant
  attr(ccmppWPP_inputs_extended, "a0rule")  <- attributes(ccmppWPP_inputs)$a0rule


  return(ccmppWPP_inputs_extended)

}


# THIS FUNCTION COMPILES THE INPUT FILE NEEDED FOR MEDIUM VARIANT PROJECTIONS

#' @export
ccmppWPP_input_file_medium <- function(tfr_median_all_locs, # medium tfr from bayesian model
                                       srb_median_all_locs, # projected srb
                                       e0_median_all_locs, # medium e0 from bayesian model
                                       mig_net_count_proj_all_locs, # projected net migration by age and sex
                                       PasfrGlobalNorm, # pasfr global norm from pasfr_global_model() function
                                       input_file_path, # file path name for Excel input file
                                       ccmpp_estimates_130_folder) { # file path to folder where ccmpp intermediate outputs for ages 0 to 130 are stored

  # read metadata from parameters sheet of excel input file
  meta <-   readxl::read_xlsx(path = input_file_path,
                              sheet = "parameters",
                              n_max = 1048576)

  meta <- meta %>%
    dplyr::select(parameter, value) %>%
    dplyr::filter(!is.na(parameter))

  meta.list <- list()
  for (i in 1:nrow(meta)) {
    meta.list[[i]] <- ifelse(!is.na(suppressWarnings(as.numeric(meta$value[i]))), as.numeric(meta$value[i]), meta$value[i])
    names(meta.list)[i] <- gsub(" ", "_", meta$parameter[i])
  }
  rm(meta)

  locid <- meta.list$LocationID
  projection_years <- meta.list$Projection_First_Year:meta.list$Projection_Last_Year

  # load the intermediate outputs file for estimates that contains population by single year of age from 0 to 130+
  load(paste0(ccmpp_estimates_130_folder,locid,"_ccmpp_output.RData"))

  # base year population by sex and single year of age from 0:100
  pop_count_age_sex_base <- ccmpp_output$pop_count_age_sex[ccmpp_output$pop_count_age_sex$time_start == meta.list$Projection_First_Year &
                                                             ccmpp_output$pop_count_age_sex$sex %in% c("female","male"),]

  ############################
  ############################
  # compute asfr for the medium variant TFR

  # get asfr observed from estimates
  asfr_observed <- ccmpp_output$fert_rate_age_f[ccmpp_output$fert_rate_age_f$age_start %in% c(10:54), # bayesPop requires single year of age from 10 to 54
                                                c("time_start", "age_start", "value")]
  # compute observed tfr from asfr
  tfr_observed <- sum_last_column(asfr_observed[, c("time_start", "value")])

  # compute observed pasfr
  pasfr_observed <- merge(asfr_observed, tfr_observed, by = "time_start")
  pasfr_observed$value <- pasfr_observed$value.x/pasfr_observed$value.y

  # transform to matrix
  pasfr_observed <- reshape(pasfr_observed[,c("time_start","age_start","value")],
                            direction = "wide", idvar = "age_start", timevar = "time_start")
  pasfr_observed_matrix <- as.matrix(pasfr_observed[,2:ncol(pasfr_observed)])
  rownames(pasfr_observed_matrix) <- unique(pasfr_observed$age_start)
  colnames(pasfr_observed_matrix) <- tfr_observed$time_start

  # get location-specific vectors of tfr estimates and projections, names are years
  tfr_median_all_locs <- tfr_median_all_locs[order(tfr_median_all_locs$time_start),]
  tfr_observed_projected <- tfr_median_all_locs$value[tfr_median_all_locs$LocID == locid]
  names(tfr_observed_projected) <- unique(tfr_median_all_locs$time_start)

  # project pasfr given projected tfr and historical observed pasfr
  pasfr_medium <- pasfr_given_tfr(PasfrGlobalNorm,
                                  pasfr_observed = pasfr_observed,
                                  tfr_observed_projected = tfr_observed_projected,
                                  years_projection = projection_years)

  # multiply projected pasfr by projected tfr to get projected asfr
  tfr_projected <- tfr_observed_projected[names(tfr_observed_projected) %in% projection_years]
  asfr_medium <- t(tfr_projected  * (t(pasfr_medium /100)))
  rownames(asfr_medium) <- 10:54

  # fill-in zero fertility for ages < 10 and > 54
  asfr_0_9 <- matrix(0.0, nrow=length(0:9), ncol = ncol(asfr_medium), dimnames = list(0:9, colnames(asfr_medium)))
  asfr_55_130 <- matrix(0.0, nrow=length(55:130), ncol = ncol(asfr_medium), dimnames = list(55:130, colnames(asfr_medium)))
  asfr_medium <- rbind(asfr_0_9,  asfr_medium, asfr_55_130)

  # transform to long data frame
  asfr_medium <- as.data.frame(asfr_medium)
  asfr_medium$age_start <- as.numeric(row.names(asfr_medium))
  asfr_medium <- reshape(asfr_medium,
                         idvar = "age_start",
                         direction = "long",
                         varying = list(names(asfr_medium)[1:(ncol(asfr_medium)-1)]),
                         times = names(asfr_medium)[1:(ncol(asfr_medium)-1)],
                         timevar = "time_start",
                         v.names = "value")
  asfr_medium$age_span <- ifelse(asfr_medium$age_start < 130, 1, 1000)
  asfr_medium$time_span <- 1

  ############################
  ############################
  # get projected srb from input file for now (later let's read this from somewhere else that analysts can't accidentally edit)

  srb <-   srb_median_all_locs[srb_median_all_locs$LocID == locid & srb_median_all_locs$time_start %in% projection_years,]

  ############################
  ############################
  # get projected 1mx from projected e0

  # get mx estimates by single year of age from 0 to 130
  life_table_age_sex_mx <- ccmpp_output$lt_complete_age_sex[ccmpp_output$lt_complete_age_sex$indicator=="lt_nMx" &
                                                              ccmpp_output$lt_complete_age_sex$sex %in% c("female", "male"),]

  # transform life_table_age_sex_mx into the matrices needed by the MortCast functions
  mxM <- life_table_age_sex_mx[life_table_age_sex_mx$sex == "male", c("age_start", "time_start", "value")]
  mxMr <- reshape(mxM, idvar = "age_start", timevar = "time_start", direction = "wide")
  mx_mat_m <- as.matrix(mxMr[,2:ncol(mxMr)])
  rownames(mx_mat_m) <- unique(mxM$age_start)
  colnames(mx_mat_m) <- unique(mxM$time_start)

  mxF <- life_table_age_sex_mx[life_table_age_sex_mx$sex == "female", c("age_start", "time_start", "value")]
  mxFr <- reshape(mxF, idvar = "age_start", timevar = "time_start", direction = "wide")
  mx_mat_f <- as.matrix(mxFr[,2:ncol(mxFr)])
  rownames(mx_mat_f) <- unique(mxF$age_start)
  colnames(mx_mat_f) <- unique(mxF$time_start)

  # parse median projected e0f and e0m
  e0_median_all_locs <- e0_median_all_locs[order(e0_median_all_locs$time_start),]
  e0f_projected <- e0_median_all_locs$value[e0_median_all_locs$LocID == locid &
                                             e0_median_all_locs$sex == "female" &
                                              e0_median_all_locs$time_start %in% projection_years]
  names(e0f_projected) <- projection_years

  e0m_projected <- e0_median_all_locs$value[e0_median_all_locs$LocID == locid &
                                              e0_median_all_locs$sex == "male" &
                                              e0_median_all_locs$time_start %in% projection_years]
  names(e0m_projected) <- projection_years

  # extract mortality parameters from Excel input file
  MORT_PARAMS <- readxl::read_xlsx(path = input_file_path, sheet = "MORT_PARAMS",
                                              n_max = 1048576)

  Age_Mort_Proj_arguments <- list(Age_Mort_Proj_Method1 = MORT_PARAMS$value[MORT_PARAMS$parameter=="Age_Mort_Proj_Method1"],
                                  Age_Mort_Proj_Method2 = MORT_PARAMS$value[MORT_PARAMS$parameter=="Age_Mort_Proj_Method2"],
                                  Age_Mort_Proj_Pattern = MORT_PARAMS$value[MORT_PARAMS$parameter=="Age_Mort_Proj_Pattern"],
                                  Age_Mort_Proj_Method_Weights = eval(parse(text = MORT_PARAMS$value[MORT_PARAMS$parameter=="Age_Mort_Proj_Method_Weights"])),
                                  Age_Mort_Proj_Adj_SR = eval(parse(text = MORT_PARAMS$value[MORT_PARAMS$parameter=="Age_Mort_Proj_Adj_SR"])),
                                  Latest_Age_Mortality_Pattern = eval(parse(text = MORT_PARAMS$value[MORT_PARAMS$parameter=="Latest_Age_Mortality_Pattern"])),
                                  Latest_Age_Mortality_Pattern_Years = eval(parse(text = MORT_PARAMS$value[MORT_PARAMS$parameter=="Latest_Age_Mortality_Pattern_Years"])),
                                  Smooth_Latest_Age_Mortality_Pattern = eval(parse(text = MORT_PARAMS$value[MORT_PARAMS$parameter=="Smooth_Latest_Age_Mortality_Pattern"])),
                                  Smooth_Latest_Age_Mortality_Pattern_Degree = eval(parse(text = MORT_PARAMS$value[MORT_PARAMS$parameter=="Smooth_Latest_Age_Mortality_Pattern_Degree"])))
  
   # mx_medium <- mx_given_e0(mx_mat_m = mx_mat_m, # matrix of mx estimates (age in rows, years in columns)
   #                         mx_mat_f = mx_mat_f,
   #                         e0m = e0m_projected, # vector of projected e0 (named with years)
   #                         e0f = e0f_projected,
   #                         Age_Mort_Proj_Method1 = tolower(Age_Mort_Proj_arguments$Age_Mort_Proj_Method1),
   #                         Age_Mort_Proj_Method2 = tolower(Age_Mort_Proj_arguments$Age_Mort_Proj_Method2),  # only used if first method is "pmd"
   #                         Age_Mort_Proj_Pattern = Age_Mort_Proj_arguments$Age_Mort_Proj_Pattern,
   #                         Age_Mort_Proj_Method_Weights = Age_Mort_Proj_arguments$Age_Mort_Proj_Method_Weights,
   #                         Age_Mort_Proj_Adj_SR = Age_Mort_Proj_arguments$Age_Mort_Proj_Adj_SR,
   #                         Latest_Age_Mortality_Pattern = Age_Mort_Proj_arguments$Latest_Age_Mortality_Pattern,
   #                         Latest_Age_Mortality_Pattern_Years = Age_Mort_Proj_arguments$Latest_Age_Mortality_Pattern_Years,
   #                         Smooth_Latest_Age_Mortality_Pattern = Age_Mort_Proj_arguments$Smooth_Latest_Age_Mortality_Pattern,
   #                         Smooth_Latest_Age_Mortality_Pattern_Degree = Age_Mort_Proj_arguments$Smooth_Latest_Age_Mortality_Pattern_Degree) # a number between 1 and nrow(mx_mat). Higher numbers give less smoothing

  # old
   mx_medium <- mx_given_e0(mx_mat_m = mx_mat_m, # matrix of mx estimates (age in rows, years in columns)
                            mx_mat_f = mx_mat_f,
                            e0m = e0m_projected, # vector of projected e0 (named with years)
                            e0f = e0f_projected,
                            Age_Mort_Proj_Method1 = tolower(Age_Mort_Proj_arguments$Age_Mort_Proj_Method1),
                            Age_Mort_Proj_Method2 = tolower(Age_Mort_Proj_arguments$Age_Mort_Proj_Method2),  # only used if first method is "pmd"
                            Age_Mort_Proj_Pattern = Age_Mort_Proj_arguments$Age_Mort_Proj_Pattern,
                            Age_Mort_Proj_Method_Weights = Age_Mort_Proj_arguments$Age_Mort_Proj_Method_Weights,
                            Age_Mort_Proj_Adj_SR = Age_Mort_Proj_arguments$Age_Mort_Proj_Adj_SR,
                            Latest_Age_Mortality_Pattern = Age_Mort_Proj_arguments$Latest_Age_Mortality_Pattern,
                            Smooth_Latest_Age_Mortality_Pattern = Age_Mort_Proj_arguments$Smooth_Latest_Age_Mortality_Pattern) # a number between 1 and nrow(mx_mat). Higher numbers give less smoothing
   
  # organize into a long file required by ccmppWPP

  mx_f <- mx_medium$female$mx[,colnames(mx_medium$female$mx) %in% projection_years]
  mx_f <- as.data.frame(mx_f)
  mx_f$age_start <- as.numeric(row.names(mx_f))
  mx_f <- reshape(mx_f,
                  idvar = "age_start",
                  direction = "long",
                  varying = list(names(mx_f)[1:(ncol(mx_f)-1)]),
                  times = names(mx_f)[1:(ncol(mx_f)-1)],
                  timevar = "time_start",
                  v.names = "value")
  mx_f$age_span <- ifelse(mx_f$age_start < max(mx_f$age_start), 1, 1000)
  mx_f$time_span <- 1
  mx_f$sex <- "female"

  mx_m <- mx_medium$male$mx[,colnames(mx_medium$male$mx) %in% projection_years]
  mx_m <- as.data.frame(mx_m)
  mx_m$age_start <- as.numeric(row.names(mx_m))
  mx_m <- reshape(mx_m,
                  idvar = "age_start",
                  direction = "long",
                  varying = list(names(mx_m)[1:(ncol(mx_m)-1)]),
                  times = names(mx_m)[1:(ncol(mx_m)-1)],
                  timevar = "time_start",
                  v.names = "value")
  mx_m$age_span <- ifelse(mx_m$age_start < max(mx_m$age_start), 1, 1000)
  mx_m$time_span <- 1
  mx_m$sex <- "male"

  mx_all_long <- rbind(mx_f, mx_m)
  mx_all_long$time_start <- as.numeric(mx_all_long$time_start)
  
  lts_all <- list()
  for (i in 1:(length(projection_years))) {
    ltf <- DemoTools::lt_single_mx(nMx = mx_all_long$value[mx_all_long$sex == "female" & mx_all_long$time_start == projection_years[i]],
                                   Age = mx_all_long$age_start[mx_all_long$sex == "female" & mx_all_long$time_start == projection_years[i]],
                                   Sex = "f")
    ltf$sex <- "female"
    ltf$time_start <- projection_years[i]
    ltm <- DemoTools::lt_single_mx(nMx = mx_all_long$value[mx_all_long$sex == "male" & mx_all_long$time_start == projection_years[i]],
                                   Age = mx_all_long$age_start[mx_all_long$sex == "male" & mx_all_long$time_start == projection_years[i]],
                                   Sex = "m")
    ltm$sex <- "male"
    ltm$time_start <- projection_years[i]
    
    lts_all[[i]] <- rbind(ltf, ltm)
  }
  lts_all <- do.call(rbind, lts_all)
  
  lts_all_long <- reshape(lts_all,
                          idvar = c("time_start", "sex", "Age"),
                          drop = c("AgeInt"),
                          direction = "long",
                          varying = list(names(lts_all)[3:11]),
                          times = names(lts_all)[3:11],
                          timevar = "indicator",
                          v.names = "value")
  
  lts_all_long$age_start <- lts_all_long$Age
  lts_all_long$age_span <- 1
  lts_all_long$age_span[lts_all_long$age_start == 130] <- 1000
  lts_all_long$time_span <- 1
  lts_all_long$indicator <- paste0("lt_", lts_all_long$indicator)
  lts_all_long <- lts_all_long[,c("indicator", "time_start", "time_span", "sex", "age_start", "age_span", "value")]
  
  # net international migration inputs
  mig_net_count_age_sex <- list()
  for (i in 1:length(projection_years)) {
    mig_net_count_age_sex[[i]] <- data.frame(time_start = rep(projection_years[i], 262),
                                             age_start = rep(0:130,2),
                                             sex = c(rep("male", 131), rep("female", 131)))
  }
  mig_net_count_age_sex <- do.call(rbind, mig_net_count_age_sex)
  mig_net_count_age_sex <- merge(mig_net_count_age_sex,
                                 mig_net_count_proj_all_locs[mig_net_count_proj_all_locs$LocID == locid & 
                                                               mig_net_count_proj_all_locs$time_start %in% projection_years,
                                                             c("time_start", "sex", "age_start", "value")],
                                 by = c("time_start", "sex", "age_start"), all.x = TRUE, all.y= FALSE)
  mig_net_count_age_sex$time_span <- 1
  mig_net_count_age_sex$age_span <- 1
  mig_net_count_age_sex$age_span[mig_net_count_age_sex$age_start == 130] <- 1000
  mig_net_count_age_sex$value[is.na(mig_net_count_age_sex$value)] <- 0
  mig_net_count_age_sex <- mig_net_count_age_sex[mig_net_count_age_sex$time_start <= max(projection_years),
                                                 c("time_start", "time_span", "sex", "age_start", "age_span", "value")]
  
  mig_net_rate_age_sex <- mig_net_count_age_sex
  mig_net_rate_age_sex$value <- 0 # This is a placeholder -- not in use for WPP 2021
  mig_net_count_tot_b <- sum_last_column(mig_net_count_age_sex[,c("time_start", "time_span", "value")]) # This is a placeholder -- not in use for WPP 2022

  mig_parameter <- data.frame(indicator = c(rep("mig_type", length(projection_years)), rep("mig_assumption", length(projection_years))),
                              time_start = rep(projection_years,2),
                              time_span = rep(1, length(projection_years)*2),
                              value = c(rep("counts", length(projection_years)), rep("end", length(projection_years))))

  # assemble the ccmppWPP input file needed to carry out the deterministic projection for the medium variant
  inputs <- list(pop_count_age_sex_base = pop_count_age_sex_base,
                 life_table_age_sex     = lts_all_long[,c("indicator","time_start","time_span","sex","age_start","age_span","value")],
                 fert_rate_age_f        = asfr_medium[,c("time_start", "time_span", "age_start", "age_span", "value")],
                 srb                    = srb,
                 mig_net_count_age_sex  = mig_net_count_age_sex,
                 mig_net_rate_age_sex   = mig_net_rate_age_sex,
                 mig_net_count_tot_b    = mig_net_count_tot_b,
                 mig_parameter          = mig_parameter)

  # set attributes
  attributes <- attributes(ccmpp_output)
  attr(inputs, "locid") <- attributes$locid
  attr(inputs, "locname") <- attributes$locname
  attr(inputs, "variant") <- "medium"
  attr(inputs, "a0rule") <- attributes$a0rule

  return(inputs)

}

# THIS FUNCTION PREPARES THE INPUT FILES FOR EACH OF THE DETERMINISTIC PROJECTION VARIANT SCENARIOS

#' Derive inputs needed for deterministic projection variants
#'
#' @description Returns a list of age specific fertility rates and life table values needed as inputs to the
#' deterministic projection variants, including low, high and constant fertility, instant replacement fertility and
#' momentum, and constant mortality
#'
#' @author Sara Hertog
#'
#' @param medium_variant_outputs list of data frames. medium variant output from the ccmppWPP_workflow_one_country_variant function
#'
#' @details accesses the global model in the data frame "pasfr_global_model" needed to infer pasfr from tfr
#'
#' @return a list of data frames
#' @export
#'

ccmpp_input_file_proj_variants <- function(ccmppWPP_estimates,
                                           ccmppWPP_medium,
                                           PasfrGlobalNorm) {


  # create vector of projection time_starts
  projection_times <- unique(ccmppWPP_medium$fert_rate_age_f$time_start)
  projection_start_year <- projection_times[1]

  # extract asfr estimates
  asfr_df <- ccmppWPP_estimates$fert_rate_age_f[, c("time_start", "age_start", "value")]
  
  # extract tfr estimates
  tfr_est <- sum_last_column(asfr_df[,c("time_start", "value")])$value
  names(tfr_est) <- unique(asfr_df$time_start)
  
  # compute pasfr
  pasfr_est <- merge(asfr_df, tfr_est, by = "time_start")
  pasfr_est$value <- pasfr_est$value.x/pasfr_est$value.y
  pasfr_est <- pasfr_est[, c("time_start", "age_start", "value")]

  # reshape to matrix
  pasfr_estimates <- reshape(pasfr_est, idvar = "age_start", timevar = "time_start", direction = "wide")
  pasfr_estimates <- as.matrix(pasfr_estimates[,c(2:ncol(pasfr_estimates))])
  rownames(pasfr_estimates) <- unique(pasfr_est$age_start)
  colnames(pasfr_estimates) <- unique(pasfr_est$time_start)

  ############################
  ############################
  # compute asfr inputs for low/high fertility variants

  # extract medium variant tfr
  tfr_med <- sum_last_column(ccmppWPP_medium$fert_rate_age_f[, c("time_start", "value")])$value
  names(tfr_med) <- projection_times

  #    for tfr adjustment, subtract/add 0.25 children in first five years, 0.4 children in next five years, 0.5 children thereafter
  tfr_adj <- rep(0.5, length(projection_times))
  tfr_adj[which(projection_times >= projection_start_year & projection_times < projection_start_year+5)] <- 0.25
  tfr_adj[which(projection_times >= projection_start_year+5 & projection_times < projection_start_year+10)] <- 0.40

  #    compute asfr for low-fertility variant
  tfr_low <- tfr_med - tfr_adj

  pasfr_low <- pasfr_given_tfr(PasfrGlobalNorm = PasfrGlobalNorm,
                               pasfr_observed = pasfr_estimates[11:55,],
                               tfr_observed_projected = c(tfr_est, tfr_low),
                               years_projection = projection_times,
                               num_points = 15)

  asfr_low <- t(tfr_low * t(pasfr_low/100))
  
  asfr_0_9 <- matrix(0, nrow = 10, ncol = ncol(asfr_low))
  asfr_0_9 <- matrix(0, nrow = 10, ncol = ncol(asfr_low))
  
  asfr_low <- rbind(as.matrix[])
  asfr_low <- rbind(asfr_0_9,  asfr_low, asfr_55_100)

  # transform to long data frame
  fert_rate_age_f_low <- as.data.frame(asfr_low)
  fert_rate_age_f_low$age_start <- as.numeric(row.names(asfr_low))
  fert_rate_age_f_low <- reshape(fert_rate_age_f_low,
                                 idvar = "age_start",
                                 direction = "long",
                                 varying = list(names(fert_rate_age_f_low)[1:(ncol(fert_rate_age_f_low)-1)]),
                                 times = names(fert_rate_age_f_low)[1:(ncol(fert_rate_age_f_low)-1)],
                                 timevar = "time_start",
                                 v.names = "value")
  fert_rate_age_f_low$age_span <- ifelse(fert_rate_age_f_low$age_start < 100, 1, 1000)
  fert_rate_age_f_low$time_span <- 1


  #    compute asfr for high-fertility variant
  tfr_high <- tfr_med + tfr_adj

  pasfr_high <- pasfr_given_tfr(PasfrGlobalNorm = PasfrGlobalNorm,
                                pasfr_observed = pasfr_estimates,
                                tfr_observed_projected = c(tfr_est, tfr_high),
                                years_projection = projection_times,
                                num_points = 15)

  asfr_high <- t(tfr_high * t(pasfr_high/100))
  asfr_high <- rbind(asfr_0_9,  asfr_high, asfr_55_100)

  # transform to long data frame
  fert_rate_age_f_high <- as.data.frame(asfr_high)
  fert_rate_age_f_high$age_start <- as.numeric(row.names(asfr_high))
  fert_rate_age_f_high <- reshape(fert_rate_age_f_high,
                                  idvar = "age_start",
                                  direction = "long",
                                  varying = list(names(fert_rate_age_f_high)[1:(ncol(fert_rate_age_f_high)-1)]),
                                  times = names(fert_rate_age_f_high)[1:(ncol(fert_rate_age_f_high)-1)],
                                  timevar = "time_start",
                                  v.names = "value")
  fert_rate_age_f_high$age_span <- ifelse(fert_rate_age_f_high$age_start < 100, 1, 1000)
  fert_rate_age_f_high$time_span <- 1

  ############################
  ############################
  # extract asfr inputs for constant-fertility variant (constant at last estimated)

  asfr_last_observed <- ccmppWPP_estimates$fert_rate_age_1x1[ccmppWPP_estimates$fert_rate_age_1x1$time_start == projection_start_year-1,]
  fert_rate_age_f_constant <- NULL
  for (i in 1:length(projection_times)) {
    asfr_add <- asfr_last_observed
    asfr_add$time_start <- projection_times[i]
    fert_rate_age_f_constant <- rbind(fert_rate_age_f_constant, asfr_add)
  }

  ############################
  ############################
  #    compute asfr for instant replacement fertility (NRR = 1)

  fert_rate_age_f_instant <- list()
  for (i in 1:length(projection_times)) {

    srb_time              <- ccmppWPP_medium$srb[ccmppWPP_medium$srb$time_start == projection_times[i],]
    fert_rate_age_f_time  <- ccmppWPP_medium$fert_rate_age_1x1[ccmppWPP_medium$fert_rate_age_1x1$time_start == projection_times[i],]
    lx_f_time             <- ccmppWPP_medium$lt_complete_age_sex[ccmppWPP_medium$lt_complete_age_sex$time_start == projection_times[i] &
                                                                   ccmppWPP_medium$lt_complete_age_sex$indicator == "lt_lx" &
                                                                   ccmppWPP_medium$lt_complete_age_sex$sex == "female",]
    # interpolate lx to middle of each age group
    lx_f_mid           <- (lx_f_time$value + c(lx_f_time$value[2:length(lx_f_time$value)],NA))/2
    # compute female births implied by lx_f_mid, period asfr and period srb
    births_f_age       <- (lx_f_mid * fert_rate_age_f_time$value) * (1/(1+srb_time$value))
    births_f_age       <- births_f_age[!is.na(births_f_age)]
    # compute scaling factor needed to achieve NRR=1
    scaling_factor    <- 100000/sum(births_f_age)
    # compute instant replacement asfr by multiplying period asfr by scaling factor
    fert_rate_age_f_time$value   <- fert_rate_age_f_time$value * scaling_factor
    fert_rate_age_f_instant[[i]] <- fert_rate_age_f_time

  }
  fert_rate_age_f_instant <- do.call(rbind, fert_rate_age_f_instant)

  ############################
  ############################
  # extract life table inputs for constant-mortality variant

  # take sex-specific life tables from last observed period
  lt_last_observed <- ccmppWPP_estimates$lt_complete_age_sex[ccmppWPP_estimates$lt_complete_age_sex$time_start == projection_start_year-1 &
                                                               ccmppWPP_estimates$lt_complete_age_sex$sex %in% c("male","female"),]
  # initialize constant-mortality output
  lt_constant <- list()
  # repeat starting period sex-specific life table for each period in projection horizon
  for (i in 1:length(projection_times)) {

    lt_time            <- lt_last_observed
    lt_time$time_start <- projection_times[i]
    lt_constant[[i]]   <- lt_time

  }
  # assemble life table outputs into one data frame
  life_table_age_sex_constant <- do.call(rbind, lt_constant)

  ############################
  ############################
  #    compute asfr for momentum (constant mortality and NRR = 1)

  fert_rate_age_f_momentum <- list()
  for (i in 1:length(projection_times)) {

    srb_time              <- ccmppWPP_medium$srb[ccmppWPP_medium$srb$time_start == projection_times[i],]
    fert_rate_age_f_time  <- ccmppWPP_medium$fert_rate_age_1x1[ccmppWPP_medium$fert_rate_age_1x1$time_start == projection_times[i],]
    lx_f_time             <- life_table_age_sex_constant[life_table_age_sex_constant$time_start == projection_times[i] &
                                                           life_table_age_sex_constant$indicator=="lt_lx" &
                                                           life_table_age_sex_constant$sex=="female",]
    # interpolate lx to middle of each age group
    lx_f_mid            <- (lx_f_time$value + c(lx_f_time$value[2:length(lx_f_time$value)],NA))/2
    # compute female births implied by lx_mid, period asfr and period srb
    births_f_age       <- (lx_f_mid * fert_rate_age_f_time$value) * (1/(1+srb_time$value))
    births_f_age       <- births_f_age[!is.na(births_f_age)]
    # compute scaling factor needed to achieve NRR=1
    scaling_factor    <- 100000/sum(births_f_age)
    # compute instant replacement asfr by multiplying period asfr by scaling factor
    fert_rate_age_f_time$value   <- fert_rate_age_f_time$value * scaling_factor
    fert_rate_age_f_momentum[[i]] <- fert_rate_age_f_time

  }
  fert_rate_age_f_momentum <- do.call(rbind, fert_rate_age_f_momentum)

  ### QUESTION: DO WE HOLD SRB CONSTANT AT 2020 LEVEL OVER INSTANT REPLACEMENT AND MOMENTUM SCENARIOS
  ### OR USE MEDIUM VARIANT SRB EVEN IF IT CHANGES OVER THE PROJECTION HORIZON?
  ### CURRENT IMPLEMENTATION IS USING THE MEDIUM VARIANT SRB

  # assemble the ccmppWPP input objects needed for deterministic variants

  pop_count_age_sex_base <- ccmppWPP_estimates$pop_count_age_sex_1x1[ccmppWPP_estimates$pop_count_age_sex_1x1$time_start == projection_start_year &
                                                                       ccmppWPP_estimates$pop_count_age_sex_1x1$sex %in% c("male", "female"),]

  # make a dummy filler for migration rates since these are not operationalized yet
  mig_net_rate_age_sex = ccmppWPP_medium$mig_net_count_age_sex_1x1[ccmppWPP_medium$mig_net_count_age_sex_1x1$sex %in% c("male","female"),]
  mig_net_rate_age_sex$value <- 0
  mig_net_count_tot_b <- ccmppWPP_medium$mig_net_count_tot_sex[ccmppWPP_medium$mig_net_count_tot_sex$sex == "both",
                                                               names(ccmppWPP_medium$mig_net_count_tot_sex) != "sex"]

  # all inputs same as medium variant, except asfr, which are low
  inputs_low <- list(pop_count_age_sex_base = pop_count_age_sex_base,
                     life_table_age_sex = ccmppWPP_medium$lt_complete_age_sex[ccmppWPP_medium$lt_complete_age_sex$sex %in% c("male","female"),],
                     fert_rate_age_f = fert_rate_age_f_low,
                     srb = ccmppWPP_medium$srb,
                     mig_net_count_age_sex = ccmppWPP_medium$mig_net_count_age_sex_1x1[ccmppWPP_medium$mig_net_count_age_sex_1x1$sex %in% c("male","female"),],
                     mig_net_rate_age_sex = mig_net_rate_age_sex,
                     mig_net_count_tot_b = mig_net_count_tot_b,
                     mig_parameter = ccmppWPP_medium$mig_parameter)

  # assign attributes
  attr(inputs_low, "locid") <- attributes(ccmppWPP_estimates)$locid
  attr(inputs_low, "locname") <- attributes(ccmppWPP_estimates)$locname
  attr(inputs_low, "variant") <- "low fertility"
  attr(inputs_low, "a0rule")  <- attributes(ccmppWPP_estimates)$a0rule


  # same as medium variant, but with high asfr
  inputs_high <- inputs_low
  inputs_high$fert_rate_age_f <- fert_rate_age_f_high
  # assign attributes
  attr(inputs_high, "locid") <- attributes(ccmppWPP_estimates)$locid
  attr(inputs_high, "locname") <- attributes(ccmppWPP_estimates)$locname
  attr(inputs_high, "variant") <- "high fertility"
  attr(inputs_high, "a0rule")  <- attributes(ccmppWPP_estimates)$a0rule

  # same as medium variant, but with constant asfr
  inputs_constant <- inputs_low
  inputs_constant$fert_rate_age_f <- fert_rate_age_f_constant
  # assign attributes
  attr(inputs_constant, "locid") <- attributes(ccmppWPP_estimates)$locid
  attr(inputs_constant, "locname") <- attributes(ccmppWPP_estimates)$locname
  attr(inputs_constant, "variant") <- "constant fertility"
  attr(inputs_constant, "a0rule")  <- attributes(ccmppWPP_estimates)$a0rule

  # instant replacement same as medium variant, but with instant replacement asfr
  inputs_instant <- inputs_low
  inputs_instant$fert_rate_age_f <- fert_rate_age_f_instant
  # assign attributes
  attr(inputs_instant, "locid") <- attributes(ccmppWPP_estimates)$locid
  attr(inputs_instant, "locname") <- attributes(ccmppWPP_estimates)$locname
  attr(inputs_instant, "variant") <- "instant replacement"
  attr(inputs_instant, "a0rule")  <- attributes(ccmppWPP_estimates)$a0rule

  # momentum is instant replacement asfr, constant mortality, zero migration
  inputs_momentum <- inputs_instant
  inputs_instant$life_table_age_sex <- life_table_age_sex_constant
  inputs_instant$mig_net_count_age_sex$value <- 0
  inputs_instant$mig_net_count_tot_b$value <- 0
  # assign attributes
  attr(inputs_momentum, "locid") <- attributes(ccmppWPP_estimates)$locid
  attr(inputs_momentum, "locname") <- attributes(ccmppWPP_estimates)$locname
  attr(inputs_momentum, "variant") <- "momentum"
  attr(inputs_momentum, "a0rule")  <- attributes(ccmppWPP_estimates)$a0rule

  # no change is medium inputs with constant fertility and constant mortality
  inputs_nochange <- inputs_constant
  inputs_nochange$life_table_age_sex <- life_table_age_sex_constant
  # assign attributes
  attr(inputs_nochange, "locid") <- attributes(ccmppWPP_estimates)$locid
  attr(inputs_nochange, "locname") <- attributes(ccmppWPP_estimates)$locname
  attr(inputs_nochange, "variant") <- "no change"
  attr(inputs_nochange, "a0rule")  <- attributes(ccmppWPP_estimates)$a0rule

  # constant mortality is same as medium variant but with constant life tables
  inputs_constant_mort <- inputs_nochange
  inputs_constant_mort$fert_rate_age_f <- ccmppWPP_medium$fert_rate_age_1x1
  # assign attributes
  attr(inputs_constant_mort, "locid") <- attributes(ccmppWPP_estimates)$locid
  attr(inputs_constant_mort, "locname") <- attributes(ccmppWPP_estimates)$locname
  attr(inputs_constant_mort, "variant") <- "constant mortality"
  attr(inputs_constant_mort, "a0rule")  <- attributes(ccmppWPP_estimates)$a0rule

  # zero migration is medium inputs but zero migration
  inputs_nomig <- inputs_low
  inputs_nomig$fert_rate_age_f <- ccmppWPP_medium$fert_rate_age_1x1
  inputs_nomig$mig_net_count_age_sex$value <- 0
  inputs_nomig$mig_net_count_tot_b$value <- 0
  # assign attributes
  attr(inputs_nomig, "locid") <- attributes(ccmppWPP_estimates)$locid
  attr(inputs_nomig, "locname") <- attributes(ccmppWPP_estimates)$locname
  attr(inputs_nomig, "variant") <- "zero migration"
  attr(inputs_nomig, "a0rule")  <- attributes(ccmppWPP_estimates)$a0rule



  variant_inputs <- list(low_fert = inputs_low,
                         high_fert = inputs_high,
                         constant_fert = inputs_constant,
                         instant_replace = inputs_instant,
                         momentum = inputs_momentum,
                         no_change = inputs_nochange,
                         constant_mort = inputs_constant,
                         zero_mig = inputs_nomig)

  return(variant_inputs)
}



# THIS FUNCTION PERFORMS A BASEPOP ADJUSTMENT TO 1950 POPULATION SO THAT POP COUNTS ARE CONSISTENT WITH FERT AND MORT

#' Adjust base population children for consistency with fertility and mortality
#'
#' @description Invokes DemoTools::basepop_five to adjust counts of children under age 10 to be consistent with 1950
#' fertility and mortality
#'
#' @author Sara Hertog
#'
#' @param input_file_path character string pointing to excel input file
#'
#' @details uses life tables, asfr and srb from the excel input file
#'
#' @return data frame with adjusted 1950 base population
#' @export
#'
basepop_adjust_1950_population <- function(pop_count_age_sex_base,
                                           input_file_path) {

  # read metadata from parameters sheet of excel input file
  meta <-   readxl::read_xlsx(path = input_file_path,
                              sheet = "parameters",
                                              n_max = 1048576)

  meta <- meta %>%
    dplyr::select(parameter, value) %>%
    dplyr::filter(!is.na(parameter))

  meta.list <- list()
  for (i in 1:nrow(meta)) {
    meta.list[[i]] <- ifelse(!is.na(suppressWarnings(as.numeric(meta$value[i]))), as.numeric(meta$value[i]), meta$value[i])
    names(meta.list)[i] <- gsub(" ", "_", meta$parameter[i])
  }
  rm(meta)

  # import life tables from excel input file
  life_table_age_sex <-   readxl::read_xlsx(path = input_file_path, sheet = "life_table_age_sex",
                                              n_max = 1048576)
  # import age specific fertility rates from excel input file
  fert_rate_age_f <-   readxl::read_xlsx(path = input_file_path, sheet = "fert_rate_age_f",
                                              n_max = 1048576)
  # import sex ratios at birth from excel input file
  srb <-   readxl::read_xlsx(path = input_file_path, sheet = "srb",
                                              n_max = 1048576)

  # parse nLx for males and females transform into the matrices needed for basepop
  nLxDatesIn <- 1950.0 - c(0.5, 2.5, 7.5)

  nLxMatMale <- life_table_age_sex %>%
    dplyr::filter(indicator == "lt_nLx" & sex == "male" & time_start == 1950) %>%
    select(value) %>%
    apply(MARGIN = 2, FUN = function(S) {DemoTools::single2abridged(Age = S)}) %>%
    as.data.frame() %>%
    mutate(value2 = value,
           value3 = value) %>%
    as.matrix()
  colnames(nLxMatMale) <- nLxDatesIn

  nLxMatFemale <- life_table_age_sex %>%
    dplyr::filter(indicator == "lt_nLx" & sex == "female" & time_start == 1950) %>%
    select(value) %>%
    apply(MARGIN = 2, FUN = function(S) {DemoTools::single2abridged(Age = S)}) %>%
    as.data.frame() %>%
    mutate(value2 = value,
           value3 = value) %>%
    as.matrix()
  colnames(nLxMatFemale) <- nLxDatesIn

  radix <- life_table_age_sex$value[life_table_age_sex$indicator=="lt_lx" &
                                      life_table_age_sex$age_start == 0][1]

  # parse ASFR and transform into the matrix needed for basepop

  AsfrMat <- fert_rate_age_f[, c("time_start", "age_start", "value")]

  AsfrMat <- AsfrMat %>%
    dplyr::filter(time_start == 1950) %>%
    mutate(age5 = 5 * floor(age_start/5)) %>%
    group_by(age5) %>%
    summarise(asfr = mean(value)) %>%
    dplyr::filter(age5 >=15 & age5 <= 45) %>%
    mutate(asfr2 = asfr,
           asfr3 = asfr) %>%
    select(-age5) %>%
    as.matrix()

  colnames(AsfrMat) <- nLxDatesIn
  rownames(AsfrMat) <- seq(15,45,5)
  AsfrDatesIn <- nLxDatesIn

  # get SRB
  SRBDatesIn <- floor(1950.5 - c(0.5, 2.5, 7.5))

  parse_columns <- ifelse(SRBDatesIn < 1950, 1950, SRBDatesIn)

  SRB <- NULL
  for (k in 1:length(parse_columns)) {
    SRB <- c(SRB, srb$value[srb$time_start == parse_columns[k]])
  }
  SRBDatesIn <- SRBDatesIn + 0.5


  popin <- pop_count_age_sex_base %>%
    mutate(value = replace(value, is.na(value), 0)) %>%
    arrange(sex, age_start)
  Age1 <- popin$age_start[popin$sex == "male"]
  popM <- popin$value[popin$sex=="male"]
  popF <- popin$value[popin$sex=="female"]

  # group to abridged age groups
  popM_abr <- DemoTools::single2abridged(popM)
  popF_abr <- DemoTools::single2abridged(popF)
  Age_abr  <- as.numeric(row.names(popM_abr))

  # run basepop_five()
  BP1 <- DemoTools::basepop_five(location = meta.list$LocationID,
                                 refDate = 1950.0,
                                 Age = Age_abr,
                                 Females_five = popF_abr,
                                 Males_five = popM_abr,
                                 nLxFemale = nLxMatFemale,
                                 nLxMale   = nLxMatMale,
                                 nLxDatesIn = nLxDatesIn,
                                 AsfrMat = AsfrMat,
                                 AsfrDatesIn = AsfrDatesIn,
                                 SRB = SRB,
                                 SRBDatesIn = SRBDatesIn,
                                 radix = radix,
                                 verbose = FALSE)

  # graduate result to single year of age
  popM_BP1 <- DemoTools::graduate_mono(Value = BP1[[2]], Age = Age_abr, AgeInt = DemoTools::age2int(Age_abr), OAG = TRUE)
  popF_BP1 <- DemoTools::graduate_mono(Value = BP1[[1]], Age = Age_abr, AgeInt = DemoTools::age2int(Age_abr), OAG = TRUE)

  # define childhood ages to be adjusted with basepop
  adjust_basepop_1950_maxage <- as.numeric(meta.list$adjust_basepop_1950_maxage)
  adjust_basepop_1950_maxage <- ifelse(length(adjust_basepop_1950_maxage)==0, 9, adjust_basepop_1950_maxage)
  adjust_basepop_1950_maxage <- ifelse(is.na(adjust_basepop_1950_maxage), 9, adjust_basepop_1950_maxage)

  popM_out <- c(popM_BP1[1:(adjust_basepop_1950_maxage+1)],popM[(adjust_basepop_1950_maxage+2):length(popM)])
  popF_out <- c(popF_BP1[1:(adjust_basepop_1950_maxage+1)],popF[(adjust_basepop_1950_maxage+2):length(popF)])

  popout <- popin %>%
    mutate(value = replace(value, sex == "male", round(popM_out)),
           value = replace(value, sex == "female", round(popF_out)))

 return(popout)

}

# THIS FUNCTION BACK PROJECTS A CENSUS POPULATION TO JANUARY 1 1950

#' Adjust base population children for consistency with fertility and mortality
#'
#' @description Back projects from census one year at a time using input survival ratios, then interpolates to 1 Jan 1950
#'
#' @author Sara Hertog
#'
#' @param census_protocol_adjusted data frame with census population by single year of age and sex output from census adjustment protocol
#' @param census_reference_date numeric. census reference date as decimal
#' @param life_table_age_sex data frame. the life_table_age_sex table from input file
#'
#' @details extends both population and life tables to oag = 130+; NO INTERNATIONAL MIGRATION
#'
#' @return data frame with 1950 base population back projected from the earliest census
#' @export
#'

census_back_project_1950 <- function(census_protocol_adjusted, census_reference_date, life_table_age_sex) {

  lts <- life_table_age_sex[life_table_age_sex$time_start <= census_reference_date,]
  lts <- lts[order(lts$time_start, lts$sex, lts$indicator, lts$age_start),]
  age_max <- max(lts$age_start)
  if (age_max < 130) {
    lts$id <- paste(lts$time_start, lts$sex, sep = " - ")
    ids <- unique(lts$id)

    lts_new <- list()
    for (i in 1:length(ids)) {
      df <- lts[lts$id == ids[i] & lts$indicator == "lt_nMx",]
      df_ext <- DemoTools::lt_single_mx(nMx = df$value, Age = df$age_start, Sex = substr(df$sex[1],1,1), OAnew = 130,
                                        extrapFit = 90:age_max, extrapFrom = age_max, extrapLaw = "Kannisto")

      nMx_max1 <- ifelse(df_ext$nMx<1, df_ext$nMx, 1)
      df_ext1 <- DemoTools::lt_single_mx(nMx = nMx_max1, Age = df_ext$Age, Sex = substr(df$sex[1],1,1), OAnew = 130)

      df_ext1 <- reshape(df_ext1, idvar = c("Age", "AgeInt"),
                         direction = "long",
                         times = paste0("lt_",names(df_ext1)[3:11]), timevar = "indicator",
                         varying = list(names(df_ext1)[3:11]), v.names = "value")

      df_ext1$time_start <- df$time_start[1]
      df_ext1$time_span <- df$time_span[1]
      df_ext1$sex <- df$sex[1]
      df_ext1$age_start <- df_ext1$Age
      df_ext1$age_span <- df_ext1$AgeInt
      df_ext1 <- df_ext1[,names(df)[1:7]]

      lts_new[[i]] <- df_ext1
    }
    lts <- do.call(rbind,lts_new)
  }

  # get reference years for back projection
  back_dts <- census_reference_date-(1:(census_reference_date-1949))

  # extend census population to OAG 130+

  redist_from_age <- max(65, census_adj_all[[1]]$age_redist_start - (length(back_dts) + 10))

  census_extended_M <- DemoTools::OPAG(Pop = census_protocol_adjusted$DataValue[census_protocol_adjusted$SexID == 1],
                                       Age_Pop = census_protocol_adjusted$AgeStart[census_protocol_adjusted$SexID == 1],
                                       nLx = lts$value[lts$time_start == max(floor(census_reference_date),1950) &
                                                         lts$sex == "male" & lts$indicator == "lt_nLx"],
                                       Age_nLx = 0:130,
                                       Redistribute_from = redist_from_age,
                                       OAnew = 130)$Pop_out
  
  # smooth the join point of the extension a bit
  
  # if the value at the join point is lower than that of the previous age
  if (census_extended_M[redist_from_age+1] < census_extended_M[redist_from_age]) {
    census_extended_M[redist_from_age+1] <- (census_extended_M[redist_from_age] + census_extended_M[redist_from_age+2])/2
    if (census_extended_M[redist_from_age+2] < census_extended_M[redist_from_age+1]) {
      census_extended_M[redist_from_age+2] <- (census_extended_M[redist_from_age+1] + census_extended_M[redist_from_age+3])/2
      if (census_extended_M[redist_from_age+3] < census_extended_M[redist_from_age+2]) {
        census_extended_M[redist_from_age+3] <- (census_extended_M[redist_from_age+2] + census_extended_M[redist_from_age+4])/2
      }
    }
  } else {
  
  # identify the minimum age above the redist_from_age at which the difference between the value from extended and the 
  # previous age group value from original census is negative (such that population shrinks with age)
  nages <- length(census_protocol_adjusted$DataValue[census_protocol_adjusted$SexID == 1])
  compare <- data.frame(age_ext = 1:(nages-1),
                        ext = census_extended_M[2:nages],
                        age_cen = 0:(nages-2),
                        cen = census_protocol_adjusted$DataValue[census_protocol_adjusted$SexID == 1][1:(nages-1)])
  compare$diff <- compare$ext - compare$cen
  compare$pct <- compare$diff/compare$cen *100
  age_for_blend <- min(compare$age_ext[compare$age_ext >= redist_from_age & compare$pct <= -9])
  rm(compare)
  
  census_extended_M <- c(census_protocol_adjusted$DataValue[census_protocol_adjusted$SexID == 1][1:age_for_blend],
                         census_extended_M[(age_for_blend+1):131])
  names(census_extended_M) <- 0:130
  
  }
  

  census_extended_F <- DemoTools::OPAG(Pop = census_protocol_adjusted$DataValue[census_protocol_adjusted$SexID == 2],
                                       Age_Pop = census_protocol_adjusted$AgeStart[census_protocol_adjusted$SexID == 2],
                                       nLx = lts$value[lts$time_start == max(floor(census_reference_date),1950) &
                                                         lts$sex == "female" & lts$indicator == "lt_nLx"],
                                       Age_nLx = 0:130,
                                       Redistribute_from = redist_from_age,
                                       OAnew = 130)$Pop_out
  
  # smooth the join point of the extension a bit
  
  # if the value at the join point is lower than that of the previous age
  if (census_extended_F[redist_from_age+1] < census_extended_F[redist_from_age]) {
    census_extended_F[redist_from_age+1] <- (census_extended_F[redist_from_age] + census_extended_F[redist_from_age+2])/2
    if (census_extended_F[redist_from_age+2] < census_extended_F[redist_from_age+1]) {
      census_extended_F[redist_from_age+2] <- (census_extended_F[redist_from_age+1] + census_extended_F[redist_from_age+3])/2
      if (census_extended_F[redist_from_age+3] < census_extended_F[redist_from_age+2]) {
        census_extended_F[redist_from_age+3] <- (census_extended_F[redist_from_age+2] + census_extended_F[redist_from_age+4])/2
      }
    }
  } else {
  
  # identify the minimum age above the redist_from_age at which the difference between the value from extended and the 
  # previous age group value from original census is negative (such that population shrinks with age)
  nages <- length(census_protocol_adjusted$DataValue[census_protocol_adjusted$SexID == 2])
  compare <- data.frame(age_ext = 1:(nages-1),
                        ext = census_extended_F[2:nages],
                        age_cen = 0:(nages-2),
                        cen = census_protocol_adjusted$DataValue[census_protocol_adjusted$SexID == 2][1:(nages-1)])
  compare$diff <- compare$ext - compare$cen
  compare$pct <- compare$diff/compare$cen *100
  age_for_blend <- min(compare$age_ext[compare$age_ext >= redist_from_age & compare$pct <= -9])
  rm(compare)
  
  census_extended_F <- c(census_protocol_adjusted$DataValue[census_protocol_adjusted$SexID == 2][1:age_for_blend],
                         census_extended_F[(age_for_blend+1):131])
  names(census_extended_F) <- 0:130

  }


  # create a matrix to store population by age back projected
  popM_back <- as.matrix(census_extended_M)
  colnames(popM_back) <- census_reference_date

  popF_back <- as.matrix(census_extended_F)
  colnames(popF_back) <- census_reference_date

  # initialize starting population
  popM <- popM_back[,1]
  popF <- popF_back[,1]

  # loop thru back dates, one year back at a time
  for (i in 1:length(back_dts)) {

    # extract male Sx
    Sx_age_m <- lts$value[lts$sex == "male" & lts$time_start == max(1950,floor(back_dts[i])) &
                            lts$indicator == "lt_Sx"]

    # back project males one step
    popM <- project_backwards_no_mig(pop_count_age_start = popM, Sx_age = Sx_age_m)
    # add a 0 for the last age group (fine because there's no one this old in 1950)
    popM <- c(popM,0)
    names(popM) <- 0:130

    # extract female Sx
    Sx_age_f <- lts$value[lts$sex == "female" & lts$time_start == max(1950,floor(back_dts[i])) &
                            lts$indicator == "lt_Sx"]

    # back project females one step
    popF <- project_backwards_no_mig(pop_count_age_start = popF, Sx_age = Sx_age_f)
    # add a 0 for the last age group
    popF <- c(popF,0)
    names(popF) <- 0:130

    popM_back <- cbind(popM_back, popM)
    colnames(popM_back)[i+1] <- back_dts[i]

    popF_back <- cbind(popF_back, popF)
    colnames(popF_back)[i+1] <- back_dts[i]

  }

  # interpolate to 1 January 1950

  interpM <- DemoTools::interpolatePop(Pop1 = popM_back[,ncol(popM_back)],
                                       Pop2 = popM_back[,ncol(popM_back)-1],
                                       Date1 = as.numeric(colnames(popM_back)[ncol(popM_back)]),
                                       Date2 = as.numeric(colnames(popM_back)[ncol(popM_back)-1]),
                                       DesiredDate = 1950.0,
                                       method = "linear")

  interpF <- DemoTools::interpolatePop(Pop1 = popF_back[,ncol(popF_back)],
                                       Pop2 = popF_back[,ncol(popF_back)-1],
                                       Date1 = as.numeric(colnames(popF_back)[ncol(popF_back)]),
                                       Date2 = as.numeric(colnames(popF_back)[ncol(popF_back)-1]),
                                       DesiredDate = 1950.0,
                                       method = "linear")

  # assemble into a data frame

  pop_out <- data.frame(time_start = rep(1950, 262),
                        time_span = rep(0,262),
                        sex = c(rep("male", 131), rep("female", 131)),
                        age_start = rep(0:130,2),
                        age_span = rep(c(rep(1,130),1000),2),
                        value = c(interpM, interpF))

  return(pop_out)

}


project_backwards_no_mig <- function(pop_count_age_start,
                                     Sx_age) {

  # check that lengths of inputs agree
  check_length <- length(pop_count_age_start) == length(Sx_age)
  if (isFALSE(check_length)) { stop("Input columns are not all the same length")}

  # get input ages
  ages <- names(pop_count_age_start)
  nage <- length(ages) # number of age groups

  # get backwards population
  pop_count_age_end <- pop_count_age_start[2:nage]/ Sx_age[1:(nage-1)]
  names(pop_count_age_end) <- ages[1:nage-1]

  return(pop_count_age_end)

}
markalava/ccmppWPP documentation built on April 21, 2022, 12:36 a.m.