R/functions.R

#-------------------------------------------------------------------------------
# functions.R - This program includes the functions used in the PRIME model.
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Simulation log reporting
#'
#' Appends message of simulation run (\code{x}) to log file (\code{logname}).
#'
#' @param logname log filename
#' @param x message of simulation run
#'
#' @return None
#' @export
#'
#' @examples #
#'
writelog <- function (logname,
                      x) {

  # wait until log file is yours
  Sys.sleep (0.02)

  while (file.exists (paste0 (logname, "_locked") ) ) {
    Sys.sleep (0.02)
  }

  # lock log file
  file.create (paste0 (logname,"_locked"))

  # write to log file
  write ( paste0 (format (Sys.time(), "%Y/%m/%d %H:%M:%S"), " ", x),
          file   = logname,
          append = TRUE )

  # unlock log file
  file.remove (paste0 (logname,"_locked") )

} # end of function -- writelog
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Creates .data.batch for running multiple birth cohorts
#'
#' Creates .data.batch which is used when running/looping over multiple birth
#'   cohorts ( runCohort() ) at once.
#'
#' .data.batch is based on the data.table (DT) coverage_data, which is a DT with
#'   columns country_code (ISO3), year (of vaccination),
#'   age_first (age at vaccination), age_last (age at vaccination),
#'   coverage (in proportion, for all the agegroups specified).
#'
#' If you only want to run 1 age in this country/coverage combination,
#'   age_first==age_last
#'
#' @param coverage_data Data table with columns country_code,
#'        year (of vaccination), age_first, age_last, coverage.
#' @param reporting_years Numeric_vector, years that should be reported
#'        (parameter: not required)
#' @param force Logical, whether .data.batch should be overwritten if it already
#'        exists (parameter: not required)
#'
#' @return batch data of cohorts with vaccination coverage
#' @export
#'
#' @examples #
RegisterBatchData <- function (coverage_data,
                               reporting_years = -1,
                               force           = FALSE) {

  if (exists(".data.batch") & !force) {
    stop ("'.data.batch' already exists.")
  }

  macs   <- coverage_data[age_first != age_last]
  nomacs <- coverage_data[age_first == coverage_data$age_last]

  if (nrow(macs) > 0) {
    for(m in 1:nrow(macs)) {
      ages <- macs[m,age_first]:macs[m,age_last]
      for(a in ages){
        nomacs <- rbindlist(
          list(
            nomacs,
            macs[m,]
          )
        )
        nomacs[nrow(nomacs),"age_first"] <- a
        nomacs[nrow(nomacs),"age_last"]  <- a
      }
    }
    coverage_data <- nomacs
  }

  coverage_data [, "birthcohort"] <-
    coverage_data [, year] - coverage_data [, age_first]

  setorder (coverage_data, country_code, birthcohort, age_first)

  demographic_years <- sort (as.numeric (
    unique (data.pop [country_code %in%
                        unique (coverage_data [, country_code]), year])
    ))
  coverage_years <- sort (as.numeric (
    unique (coverage_data$birthcohort)
    ))

  if (length(reporting_years) > 1 && reporting_years != -1) {
    reporting_years <- sort (
      unique (
        c (
          min (coverage_data [, year] - coverage_data [, age_last]):
            max ((coverage_data [, year] - coverage_data [, age_first]) + 100)
        )
      )
    )
    min_year <- min (reporting_years)
    max_year <- max (reporting_years)
  } else {
    min_year <- min (coverage_years)
    max_year <- max (coverage_years)
  }

  if (min(coverage_years) > min_year) {
    year_first <- min (coverage_years)
  } else {
    year_first <- min_year
  }
  if (max(demographic_years) < max_year) {
    year_last <- max (demographic_years)
  } else {
    year_last <- max_year
  }

  birthcohorts <- year_first:year_last
  template     <- coverage_data [birthcohort==coverage_years[1]]

  # remove birthcohorts which should not be modelled
  coverage_data <- coverage_data [birthcohort %in% birthcohorts]

  # select birthcohorts which are not yet in the coverage-data
  birthcohorts <- birthcohorts [!(birthcohorts %in% coverage_data[,birthcohort])]

  # add birthcohorts to coverage data
  for (b in birthcohorts) {
    t_coverage_data <- template
    t_coverage_data [, "birthcohort"] <- b
    t_coverage_data [, "coverage"]    <- 0
    coverage_data <- rbindlist (
      list(
        coverage_data,
        t_coverage_data
      )
    )
  }

  colnames (coverage_data) [which (colnames (coverage_data) == "age_first")] <-
    "agevac"

  coverage_data [, "activity_type"] <- "routine"
  coverage_data [, "target"] <- NA

  coverage_data <- coverage_data[ , c("country_code",
                                      "birthcohort",
                                      "coverage",
                                      "agevac",
                                      "activity_type",
                                      "target")
                                  ]

  setorder (coverage_data, country_code, birthcohort, agevac)

  .data.batch <<- coverage_data

  # return batch data of cohorts with vaccination coverage
  return (.data.batch)

} # end of function -- RegisterBatchData
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Creates .data.batch for running multiple birth cohorts (VIMC runs)
#'
#' Creates .data.batch which is used when running/looping over multiple birth
#'   cohorts ( runCohort() ) at once. Similar to RegisterBatchData, but for when
#'   we make runs for VIMC.
#'
#' .data.batch is based on the data.table (DT) coverage_data, which is a DT with
#'   columns country_code (ISO3), year (of vaccination),
#'   age_first (age at vaccination), age_last (age at vaccination),
#'   coverage (in proportion, for all the age groups specified).
#'
#' @param vimc_coverage data table with coverage estimates as downloaded from
#'   VIMC montagu
#' @param vimc_template data table with reporting template as downloaded from
#'   VIMC montagu
#' @param use_campaigns logical, whether campaigns as stated in coverage files
#'   should be modelled
#' @param use_routine logical, whether routine vaccination as stated in coverage
#'   file should be modelled
#' @param restrict_to_coverage_data logical, whether the first birth-cohort
#'   should be the first cohort that is mentioned in the coverage data.
#'     If TRUE, restrict to coverage data.
#'     If FALSE, restrict to cohorts provided in vimc_template.
#' @param force logical, whether .data.batch should be overwritten if it already
#'   exists
#' @param psa integer, indicating how many runs for probabilistic sensitivity
#'   analysis (PSA). 0 to run no PSA.
#'
#' @return batch data of cohorts with vaccination coverage
#' @export
#'
#' @examples #
RegisterBatchDataVimc <- function (vimc_coverage,
                                   vimc_template,
                                   use_campaigns,
                                   use_routine,
                                   restrict_to_coverage_data = FALSE,
                                   force                     = FALSE,
                                   psa                       = 0) {

  if (exists(".data.batch") & !force) {
    stop("'.data.batch' already exists.")
  }

  if (use_campaigns) {
    coverage_data <- vimc_coverage[activity_type=="routine" |
                                     (activity_type=="campaign" &
                                        coverage > 0)]
  } else {
    coverage_data <- vimc_coverage[activity_type=="routine"]
  }
  if (!use_routine) {
    coverage_data[activity_type=="routine" & coverage > 0, "coverage"] <- 0
  }

  coverage_data [target=="<NA>","target"] <- NA
  class (coverage_data$target) <- "numeric"
  coverage_data <- coverage_data [country_code %in% unique(vimc_template[,country])]
  macs <- coverage_data [age_first != age_last]
  nomacs <- coverage_data [age_first == coverage_data$age_last]

  if (nrow(macs) > 0) {
    for (m in 1:nrow(macs)) {
      ages <- macs[m,age_first]:macs[m,age_last]
      for (a in ages) {
        nomacs <- rbindlist(
          list(
            nomacs,
            macs[m,]
          )
        )
        if(a == ages[1]) {
          target <- as.numeric (nomacs[nrow(nomacs),target])
        }
        nomacs [nrow(nomacs),"age_first"] <- a
        nomacs [nrow(nomacs),"age_last"]  <- a
        #spread size of target evenly over age-strata targetted
        nomacs [nrow(nomacs),"target"] <- target/length(ages)
      }
    }
    coverage_data <- nomacs
  }
  coverage_data[,"birthcohort"] <- coverage_data[,year] - coverage_data[,age_first]
  setorder(coverage_data, country_code, birthcohort, age_first)

  # get years that should be reported in template
  reporting_years <- sort(as.numeric(unique(vimc_template$year)))

  # get years for which there is demographic data
  demographic_years <- sort(as.numeric(unique(data.pop[country_code %in% unique(coverage_data[, country_code]), year])))

  # get birthcohorts for which we have coverage data
  coverage_years <- sort(as.numeric(unique(coverage_data$birthcohort)))

  # get min and max birthcohorts that should be modelled, following template
  min_year <- min(reporting_years) -
    max(vimc_template[year==min(reporting_years),age])
  max_year <- max(reporting_years) -
    min(vimc_template[year==max(reporting_years),age])

  # restrict to years for which we have demographic data, or to years for which
  # we have coverage data
  if (!restrict_to_coverage_data) {
    if (min(demographic_years) > min_year) {
      year_first <- min(demographic_years)
    } else {
      year_first <- min_year
    }
  } else {
    if (min(coverage_years) > min_year) {
      year_first <- min(coverage_years)
    } else {
      year_first <- min_year
    }
  }

  # restrict to years for which we have demographic data
  if (max(demographic_years) < max_year) {
    year_last <- max(demographic_years)
  } else {
    year_last <- max_year
  }

  birthcohorts <- year_first:year_last

  # remove birthcohorts which should not be modelled
  coverage_data <- coverage_data [birthcohort %in% birthcohorts]

  # sort by country code
  countries <- sort ( unique( coverage_data[,country_code] ) )

  for(c in countries) {

    # create template for data not yet in coverage-data
    template <- coverage_data [
      country_code  == c &
      activity_type == "routine" &
      birthcohort   == min (coverage_data [country_code == c &
                                             activity_type == "routine",
                                           birthcohort] ) ]

    # select birthcohorts which are not yet in the coverage-data and add to data
    missing_birthcohorts <- birthcohorts[!(birthcohorts %in% coverage_data[country_code == c,birthcohort])]

    if (length(missing_birthcohorts) > 0 ) {
      for (b in missing_birthcohorts) {
        t_coverage_data <- template
        t_coverage_data [,"birthcohort"] <- b
        t_coverage_data [,"coverage"]    <- 0
        coverage_data <- rbindlist (
          list(
            coverage_data,
            t_coverage_data
          )
        )
      }
    }
  }
  colnames (coverage_data)[which(colnames(coverage_data)=="age_first")] <- "agevac"
  coverage_data <- coverage_data [, c("country_code",
                                      "birthcohort",
                                      "coverage",
                                      "agevac",
                                      "activity_type",
                                      "target",
                                      "vaccine")
                                  ]

  # model countries without coverage data but in template (with coverage-level of 0)
  countries <- sort (unique(vimc_template[, country]))
  countries <- countries [!(countries %in% unique(coverage_data[, country_code]))]

  if (length(countries) > 0) {
    for (c in countries) {
      t_coverage_data <- coverage_data[country_code == unique (coverage_data[,country_code])[1]]
      t_coverage_data [,"target"] <- 0
      t_coverage_data [,"coverage"] <- 0
      t_coverage_data [,"country_code"] <- c

      coverage_data <- rbindlist (
        list(
          t_coverage_data,
          coverage_data
        )
      )
    }
  }

  setorder (coverage_data, country_code, birthcohort, agevac)

  # sum coverage for each birth cohort (even if vaccinated across multiple years)
  coverage_data <- coverage_data [, .(country_code,
                                      agevac,
                                      cov = sum (coverage),
                                      activity_type,
                                      target,
                                      vaccine),
                                  by = .(birthcohort, country_code)]

  # use single row per birth cohort
  coverage_data <- coverage_data [, .SD [which.max (agevac)], by = .(birthcohort, country_code)]

  # rename coverage column
  colnames(coverage_data) [colnames(coverage_data) == "cov"] <- "coverage"

  # check if coverage exceeds 100% for any cohort
  coverage_data [(coverage > 1), coverage := 1]

  .data.batch <<- coverage_data

  # sort by country code
  countries <- sort (unique(.data.batch[,country_code]))

  # ----------------------------------------------------------------------------
  # ----------------------------------------------------------------------------
  # Note: The following code chunk for PSA is redundant -- grandfathered
  # from older version. The newer version does not use data.quality to conduct
  # psa. Instead it uses uncertainty intervals in burden estimates and
  # disability weights to conduct probabilistic sensitivity analysis.
  # ----------------------------------------------------------------------------
  # sensitivity analysis
  if (psa > 1) {
    psadat <- data.table(
      country = character(0),
      run_id = numeric(0),
      incidence = numeric(0),
      mortality = numeric(0)
    )
    for (c in countries){
      # select proxy country if data not available
      tc <- switch(
        c,
        "XK"  = "ALB",  # demography data available for XK but no burden & cost
        "PSE" = "JOR",  # burden & demography data available for PSE but no cost
        # "MHL"="KIR",
        # "TUV"="FJI",
        # "SSD"="SDN",
        c
      )
      inc_quality  <- data.quality [iso3==tc, Incidence]
      mort_quality <- data.quality [iso3==tc, Mortality]
      if (inc_quality == 0){
        inc_quality <- c(0.5,1.5)
      } else {
        inc_quality <- c(0.8,1.2)
      }
      if(mort_quality == 0){
        mort_quality <- c(0.5,1.5)
      } else {
        mort_quality <- c(0.8,1.2)
      }
      psadat <- rbindlist(
        list(
          psadat,
          data.table(
            country = rep(c, psa),
            psa = c(1:psa),
            incidence = runif (n=psa, min=inc_quality[1], max=inc_quality[2]),
            mortality = runif (n=psa, min=inc_quality[1], max=mort_quality[2])
          )
        )
      )
    }

    if (exists(".data.batch.psa") & !force) {
      warning("'.data.batch.psa' already exists and is NOT overwritten.")
    } else {
      .data.batch.psa <<- psadat
    }
  }
  # ----------------------------------------------------------------------------
  # ----------------------------------------------------------------------------

  # return batch data of cohorts with vaccination coverage
  return (.data.batch)

} # end of function -- RegisterBatchDataVimc
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Run multiple cohorts in a batch
#'
#' Runs multiple cohorts in one batch, based on the data in .data.batch
#'
#' @param countries ignore, read from .data.batch
#' @param coverage ignore, read from .data.batch
#' @param agevac ignore, read from .data.batch
#' @param agecohort ignore, read from .data.batch
#' @param canc.inc year from where incidence data is read
#'        (2018; old data: 2012) --
#'        with updated 2018 Globocan data,
#'        DALY weights from GBD,
#'        and DALY estimation based on prevalence instead of age of incidence
#' @param sens ignore, does not do anything anymore
#' @param unwpp_mortality logical, whether to create lifetables based on
#'        UNWPP mortality estimates or WHO data
#' @param year_born ignore
#' @param year_vac ignore
#' @param runs ignore
#' @param vaccine_efficacy_beforesexdebut vaccine efficacy before sexual debut
#' @param vaccine_efficacy_aftersexdebut vaccine efficacy after sexual debut
#' @param log name of log file
#' @param by_calendaryear logical, output values by calendar year or
#'        by year of birth cohort
#' @param use_proportions logical, output data as rates per capita or in totals
#' @param analyseCosts logical, directly run cost-effectiveness analysis
#'        on output or not
#' @param canc.cost Character (optional): Is cost of cancer adjusted
#'        ("adj" for International $) or not ("unadj" for US$)
#' @param discounting Logical (optional): If TRUE, run cost-effectiveness analysis
#'        undiscounted and discounted. If FALSE, only uses undiscounted
#' @param disc.cost Number (optional): Discounting for health costs
#'        (only if discounting=TRUE)
#' @param disc.ben Number (optional): Discounting for health outcomes
#'        (only if discounting=TRUE)
#' @param psa integer, number of runs for probabilistic sensitivity analysis (PSA)
#' @param psa_vals data table with values to use in probabilistic sensitivity
#'        analysis, usually .data.batch.psa, generated by
#'        RegisterBatchData* functions (currently only RegisterBatchDataVimc)
#' @param disability.weights character, disability weights for cervical cancer
#'        from GBD 2017 or GBD 2001
#' @param wb.indicator character, World Bank indicator for GDP/GNI per capita
#'        in I$/US$ and current/constant data
#' @param wb.year numeric, year of the World Bank indicator value
#' @param vaccine character, bivalent/quadrivalent (4vHPV) or
#'        nonavalent (9vHPV) vaccine
#' @return Returns combined results
#' @export
#'
#' @examples #
BatchRun <- function (countries                       = -1,
                      coverage                        = -1,
                      agevac                          = -1,
                      agecohort                       = -1,
                      canc.inc                        = "2020",
                      sens                            = -1,
                      unwpp_mortality                 = TRUE,
                      year_born                       = -1,
                      year_vac                        = -1,
                      runs                            = 1,
                      vaccine_efficacy_beforesexdebut = 1,
                      vaccine_efficacy_aftersexdebut  = 0,
                      log                             = -1,
                      by_calendaryear                 = FALSE,
                      use_proportions                 = TRUE,
                      analyseCosts                    = FALSE,
                      canc.cost                       = "unadj",
                      discounting                     = FALSE,
                      disc.cost                       = 0.03,
                      disc.ben                        = 0.03,
                      psa                             = 0,
                      psa_vals                        = ".data.batch.psa",
                      disability.weights              = "gbd_2017",
                      wb.indicator                    = "NY.GDP.PCAP.PP.CD",
                      wb.year                         = 2017,
                      vaccine                         = "4vHPV"
                      ) {

  ages <- as.numeric (colnames (data.incidence) [!grepl("\\D", colnames(data.incidence) ) ] )
  ages <- ages [!is.na(ages)]

  if (countries == -1) {
    countries <- sort (unique (.data.batch [, country_code] ) )
  }

  if (year_vac == -1 & year_born == -1) {
    years <- sort (unique (.data.batch [, birthcohort] ) )
  }

  if (psa > 1) {
    psadat <- get(".data.batch.psa")
    if(length(unique(psadat[,run_id])) != psa){
      stop ("Number of specified PSA does not correspond to number of run ids in PSA file")
    } else {
      runs  <- psa
      dopsa <- TRUE
    }
  } else {
    dopsa  <- FALSE
    runs   <- 1
    psadat <- -1
  }

  # create initial variables that won't be overwritten in foreach loops
  init_coverage  <- coverage
  init_agevac    <- agevac
  init_agecohort <- agecohort

  combine <- foreach (
    cn = 1:length(countries),
    .packages = c("data.table", "prime"),
    # .errorhandling="pass",
    .export = c(".data.batch", "data.pop", "writelog")
  ) %:% foreach(
    y = 1:length(years)
  ) %:% foreach(
    r=1:runs
  ) %dopar% {
    #) %do% {

    # --------------------------------------------------------------------------
    # single cohort
    .t_data.batch <- .data.batch [country_code == countries[cn] &
                                    birthcohort == years[y]]

    # adjust vaccine efficacy for 1-dose
    if (.t_data.batch$vaccine == "HPV_1D") {

      vaccine_efficacy_beforesexdebut <- 0.975

      # Single-dose efficacy: bivalent VE was 97.5% (95% CI 90.0-99.4%)
      # Barnabas RV, et al (KEN SHE Study Team).
      # Durability of single-dose HPV vaccination in young Kenyan women: randomized controlled trial 3-year results.
      # Nat Med. 2023 Dec;29(12):3224-3232. doi: 10.1038/s41591-023-02658-0

    }

    # --------------------------------------------------------------------------

    if (init_coverage == -1){
      coverage <- .t_data.batch [1, coverage]
    } else {
      coverage <- init_coverage
    }
    if (init_agevac==-1){
      agevac <- .t_data.batch [1, agevac]
    } else {
      agevac <- init_agevac
    }
    if ( (init_agecohort == -1) & (agevac >= 0) ) {
    # if ( (init_agecohort == -1) & (agevac > 1) ) {
      # agecohort <- agevac - 1
      agecohort <- agevac
    } else if (init_agecohort == -1){
      agecohort <- 0
      # agecohort <- 1
    } else {
      agecohort <- init_agecohort
    }



    # --------------------------------------------------------------------------
    # get cohort size from UNWPP data table -- data.pop

    # get cohort size for a specific age
    # if year > 2100, then use cohort size for the specific age in 2100
    if ((years[y] + agecohort) <= 2100) {

      cohort <- data.pop [country_code == countries[cn] &
                            year       == (years[y] + agecohort) &
                            age_from   == agecohort,
                          value]
    } else {

      cohort <- data.pop [country_code == countries[cn] &
                            year       == 2100 &
                            age_from   == agecohort,
                          value]
    }
    # --------------------------------------------------------------------------




    if(is.character(log)){
      writelog(
        log,
        paste0(
          "country: '",
          countries[cn],
          "'; birthcohort: '",
          years[y],
          "'; run: '",
          r,
          "'; agecohort: '",
          agecohort,
          "'; agevac: '",
          agevac,
          "'; coverage: '",
          coverage,
          "'; cohort_size: '",
          cohort,
          "'; .t_data.batch (rows): '",
          nrow(.t_data.batch),
          "';"
        )
      )
    }

    if (nrow (.t_data.batch) > 1) {
      campaigns <- list()
      for (cmp in 2:nrow(.t_data.batch)) {
        campaigns[[length(campaigns)+1]] <- list(
          "year" = .t_data.batch [cmp, birthcohort],
          "ages" = .t_data.batch [cmp, agevac],
          # add type and coverage data -
          # if type is 'routine', coverage is already a proportion;
          # if type is 'campaign', proportion will be calculated from target population
          "type" = .t_data.batch [cmp, activity_type],
          #"coverage" = switch(
          #  .t_data.batch[cmp,activity_type],
          #  "routine" = .t_data.batch[cmp,coverage],
          #  "campaign" = .t_data.batch[cmp,coverage]*.t_data.batch[cmp,target]
          #)
          "coverage" = .t_data.batch [cmp, coverage]
        )
      }
    } else {
      campaigns <- -1
    }

    if (dopsa) {
      cpsadat <- list(
        incidence = psadat[country == countries[cn] & run_id == r, incidence],
        mortality = psadat[country == countries[cn] & run_id == r, mortality]
      )
    } else {
      cpsadat <- list(
        incidence = 1,
        mortality = 1
      )
    }

    data <- RunCountry (
      country_iso3          = countries[cn],
      vaceff_beforesexdebut = vaccine_efficacy_beforesexdebut,
      vaceff_aftersexdebut  = vaccine_efficacy_aftersexdebut,
      cov                   = coverage,
      agevac                = agevac,
      agecohort             = agecohort,
      cohort                = cohort,
      canc.inc              = canc.inc,
      unwpp_mortality       = unwpp_mortality,
      year_born             = years[y],
      campaigns             = campaigns,
      run_batch             = TRUE,
      analyseCosts          = analyseCosts,
      canc.cost             = canc.cost,
      discounting           = discounting,
      disc.cost             = disc.cost,
      disc.ben              = disc.ben,
      psadat                = cpsadat,
      disability.weights    = disability.weights,
      wb.indicator          = wb.indicator,
      wb.year               = wb.year,
      vaccine               = vaccine
    )

    data [,"country"]     <- countries [cn]
    data [,"birthcohort"] <- years [y]

    if (dopsa) {
      data [,"run_id"] <- r
    }
    return(data)

  }  # end of foreach

  for (l in 1:length(combine)) {
    for (i in 1:length(combine[[l]])) {
      combine[[l]][[i]] <- rbindlist(combine[[l]][[i]])
    }
    combine[[l]] <- rbindlist(combine[[l]])
  }
  combine <- rbindlist(combine)

  if (by_calendaryear) {
    combine[, "birthcohort"] <- combine[,birthcohort] + combine[,age]
    colnames(combine)[colnames(combine) == "birthcohort"] <- "year"
  }
  if (!use_proportions) {

    # estimate absolute values from proportions
    combine [, "vaccinated"] <- combine [, vaccinated] * combine [, cohort_size]
    combine [, "immunized"]  <- combine [, immunized]  * combine [, cohort_size]

    combine [, "inc.cecx"]   <- combine [, inc.cecx]   * combine [, cohort_size]
    combine [, "mort.cecx"]  <- combine [, mort.cecx]  * combine [, cohort_size]
    combine [, "prev.cecx"]  <- combine [, prev.cecx]  * combine [, cohort_size]

    combine [, "lifey"]      <- combine [, lifey]      * combine [, cohort_size]
    combine [, "disability"] <- combine [, disability] * combine [, cohort_size]
    combine [, "cost.cecx"]  <- combine [, cost.cecx]  * combine [, cohort_size]

    # change column names to more meaningful names
    colnames (combine) [colnames (combine) == "inc.cecx"]  <- "cases"
    colnames (combine) [colnames (combine) == "mort.cecx"] <- "deaths"
    colnames (combine) [colnames (combine) == "prev.cecx"] <- "prevalence"
    colnames (combine) [colnames (combine) == "cost.cecx"] <- "costs"
  }

  return(combine)

} # end of function -- BatchRun
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Formatting output for VIMC Montagu
#'
#' \code{OutputVimc} takes result of BatchRun and outputs it in format to be
#'   uploaded to VIMC Montagu.
#'
#' @param DT data table with results
#' @param age_stratified logical, whether output should be stratified by age
#' @param calendar_year logical, whether output should be given by calendar year
#'        of event OR by year of birth of cohort
#' @param vimc_template data table with template file downloaded from montagu
#'
#' @return #
#' @export
#'
#' @examples #
OutputVimc <- function (DT,
                        age_stratified = TRUE,
                        calendar_year  = FALSE,
                        vimc_template  = -1) {

  #check if data by calendar_year
  if ("year" %in% colnames(DT)) {
    is_by_calendar_year <- TRUE
  } else {
    is_by_calendar_year <- FALSE
    colnames(DT)[which(colnames(DT) == "birthcohort")] <- "year"
  }

  #check if values are proportions
  if (!("cases" %in% colnames(DT))) {
    DT [, "inc.cecx"]   <- DT [, cohort_size] * DT [, inc.cecx]
    DT [, "mort.cecx"]  <- DT [, cohort_size] * DT [, mort.cecx]
    DT [, "disability"] <- DT [, cohort_size] * DT [, disability] +
      DT [, cohort_size] * DT[, lifey]
    DT [, "lifey"]  <- DT [, cohort_size] * DT [, lifey]

    if ("run_id" %in% colnames(DT)) {
      DT <- DT[, c("scenario",
                   "run_id",
                   "country",
                   "year",
                   "age",
                   "cohort_size",
                   "inc.cecx",
                   "mort.cecx",
                   "disability",
                   "lifey") ]

      colnames(DT) <- c("scenario",
                        "run_id",
                        "country",
                        "year",
                        "age",
                        "cohort_size",
                        "cases",
                        "deaths",
                        "dalys",
                        "yll")
    } else {
      DT <- DT[, c("scenario",
                   "country",
                   "year",
                   "age",
                   "cohort_size",
                   "inc.cecx",
                   "mort.cecx",
                   "disability",
                   "lifey") ]

      colnames(DT) <- c("scenario",
                        "country",
                        "year",
                        "age",
                        "cohort_size",
                        "cases",
                        "deaths",
                        "dalys",
                        "yll")
    }
  } else {
    DT[,"dalys"] <- DT[,lifey] + DT[,disability]
    if ("run_id" %in% colnames(DT)) {
      DT <- DT[, c("scenario",
                   "run_id",
                   "country",
                   "year",
                   "age",
                   "cohort_size",
                   "cases",
                   "deaths",
                   "dalys",
                   "lifey")]

      colnames(DT) <- c("scenario",
                        "run_id",
                        "country",
                        "year",
                        "age",
                        "cohort_size",
                        "cases",
                        "deaths",
                        "dalys",
                        "yll")

    } else {
      DT <- DT[, c("scenario",
                   "country",
                   "year",
                   "age",
                   "cohort_size",
                   "cases",
                   "deaths",
                   "dalys",
                   "lifey") ]

      colnames(DT) <- c("scenario",
                        "country",
                        "year",
                        "age",
                        "cohort_size",
                        "cases",
                        "deaths",
                        "dalys",
                        "yll")
    }
  }

  if (is.data.table(vimc_template)) {
    #calendar year is needed to select data in vimc_template
    if (!is_by_calendar_year) {
      DT[,"year"] <- DT[,year] + DT[,age]
    }
    DT <- DT[year %in% vimc_template[,year]]
    for (y in sort(unique(DT[,year]))) {
      DT <- DT[year!=y | (year==y & age %in% vimc_template[year==y,age])]
    }
    for (c in unique(DT[,country])) {
      DT [country == c, "country_name"] <- unique (vimc_template[country == c,
                                                                 country_name])
    }
    DT[,"disease"] <- unique(vimc_template[,"disease"])

    #revert back to impact year
    if (!is_by_calendar_year) {
      DT[,"year"] <- DT[,year] - DT[,age]
    }
    if ("run_id" %in% colnames(DT)) {
      DT <- DT[,c("scenario","run_id",colnames(vimc_template)),with=F]
    } else {
      DT <- DT[,c("scenario",colnames(vimc_template)),with=F]
    }
  }

  #return correct year
  if (calendar_year & !is_by_calendar_year) {
    DT[,"year"] <- DT[,year] + DT[,age]
  } else if (!calendar_year & is_by_calendar_year) {
    DT[,"year"] <- DT[,year] - DT[,age]
  }

  if ("run_id" %in% colnames(DT)) {
    if (!age_stratified) {
      DT <- dtAggregate (DT, "age", c("cohort_size",
                                      "cases",
                                      "deaths",
                                      "dalys",
                                      "yll",
                                      "run_id"))
    }
    setorder (DT, scenario, run_id, country, year, age)
  } else {
    if (!age_stratified) {
      DT <- dtAggregate(DT, "age", c("cohort_size",
                                     "cases",
                                     "deaths",
                                     "dalys",
                                     "yll"))
    }
    setorder(DT, scenario, country, year, age)
  }

  return(DT)

} # end of function -- OutputVimc
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Formatting output for VIMC Montagu
#'
#' \code{OutputVimc_old} takes result of BatchRun and outputs it in format to be
#'   uploaded to VIMC Montagu.
#'
#' @param DT data table with results
#' @param age_stratified logical, whether output should be stratified by age
#' @param calendar_year logical, whether output should be given by calendar year
#'        of event OR by year of birth of cohort
#' @param vimc_template data table with template file downloaded from montagu
#'
#' @return #
#' @export
#'
#' @examples #
OutputVimc_old <- function (DT,
                        age_stratified = TRUE,
                        calendar_year  = FALSE,
                        vimc_template  = -1) {

  #check if data by calendar_year
  if ("year" %in% colnames(DT)) {
    is_by_calendar_year <- TRUE
  } else {
    is_by_calendar_year <- FALSE
    colnames(DT)[which(colnames(DT) == "birthcohort")] <- "year"
  }

  #check if values are proportions
  if (!("cases" %in% colnames(DT))) {
    DT [, "inc.cecx"]   <- DT [, cohort_size] * DT [, inc.cecx]
    DT [, "mort.cecx"]  <- DT [, cohort_size] * DT [, mort.cecx]
    DT [, "disability"] <- DT [, cohort_size] * DT [, disability] +
      DT [, cohort_size] * DT[, lifey]

    if ("run_id" %in% colnames(DT)) {
      DT <- DT[, c("scenario",
                   "run_id",
                   "country",
                   "year",
                   "age",
                   "cohort_size",
                   "inc.cecx",
                   "mort.cecx",
                   "disability")]

      colnames(DT) <- c("scenario",
                        "run_id",
                        "country",
                        "year",
                        "age",
                        "cohort_size",
                        "cases",
                        "deaths",
                        "dalys")
    } else {
      DT <- DT[, c("scenario",
                   "country",
                   "year",
                   "age",
                   "cohort_size",
                   "inc.cecx",
                   "mort.cecx",
                   "disability")
               ]

      colnames(DT) <- c("scenario",
                        "country",
                        "year",
                        "age",
                        "cohort_size",
                        "cases",
                        "deaths",
                        "dalys")
    }
  } else {
    DT[,"dalys"] <- DT[,lifey] + DT[,disability]
    if ("run_id" %in% colnames(DT)) {
      DT <- DT[, c("scenario",
                   "run_id",
                   "country",
                   "year",
                   "age",
                   "cohort_size",
                   "cases",
                   "deaths",
                   "dalys")]
    } else {
      DT <- DT[, c("scenario",
                   "country",
                   "year",
                   "age",
                   "cohort_size",
                   "cases",
                   "deaths",
                   "dalys")]
    }
  }

  if (is.data.table(vimc_template)) {
    #calendar year is needed to select data in vimc_template
    if (!is_by_calendar_year) {
      DT[,"year"] <- DT[,year] + DT[,age]
    }
    DT <- DT[year %in% vimc_template[,year]]
    for (y in sort(unique(DT[,year]))) {
      DT <- DT[year!=y | (year==y & age %in% vimc_template[year==y,age])]
    }
    for (c in unique(DT[,country])) {
      DT [country == c, "country_name"] <- unique (vimc_template[country == c,
                                                                 country_name])
    }
    DT[,"disease"] <- unique(vimc_template[,"disease"])

    #revert back to impact year
    if (!is_by_calendar_year) {
      DT[,"year"] <- DT[,year] - DT[,age]
    }
    if ("run_id" %in% colnames(DT)) {
      DT <- DT[,c("scenario","run_id",colnames(vimc_template)),with=F]
    } else {
      DT <- DT[,c("scenario",colnames(vimc_template)),with=F]
    }
  }

  #return correct year
  if (calendar_year & !is_by_calendar_year) {
    DT[,"year"] <- DT[,year] + DT[,age]
  } else if (!calendar_year & is_by_calendar_year) {
    DT[,"year"] <- DT[,year] - DT[,age]
  }

  if ("run_id" %in% colnames(DT)) {
    if (!age_stratified) {
      DT <- dtAggregate (DT, "age", c("cohort_size",
                                      "cases",
                                      "deaths",
                                      "dalys",
                                      "run_id"))
    }
    setorder (DT, scenario, run_id, country, year, age)
  } else {
    if (!age_stratified) {
      DT <- dtAggregate(DT, "age", c("cohort_size",
                                     "cases",
                                     "deaths",
                                     "dalys"))
    }
    setorder(DT, scenario, country, year, age)
  }

  return(DT)

} # end of function -- OutputVimc_old
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Run PRIME for a single birth-cohort
#'
#' Runs PRIME for one birth-cohort. Usually called by another function such as
#'   RunCountry().
#'
#' @param lifetab Data.table: The life-table for this cohort. Can be created
#'        using the lifeTable() function.
#' @param cohort Number: The cohort-size of this birth-cohort at the time where
#'        the lifetable starts.
#' @param incidence Numeric vector: Age-specific CeCx(16/18) incidence-rates.
#' @param mortality_cecx Numeric vector: Age-specific CeCx(16/18) mortality-rates.
#' @param prevalence Numeric vector: Age-specific CeCx(16/18) prevalence rates
#'        (5-year prevalence) -- referring to people who are alive within
#'        5 years of diagnosis.
#' @param agevac Number: Age at which the cohort is vaccinated.
#' @param coverage Number: Proportion of the cohort that will receive a vaccination.
#' @param campaigns List or number: MAC cohort-vaccinations (needs to be changed).
#' @param vaccine_efficacy_nosexdebut Number: proportion indicating
#'        vaccine-efficacy before sexual debut.
#' @param vaccine_efficacy_sexdebut Number: proportion indicating
#'        vaccine-efficacy after sexual debut.
#' @param cost_cancer Number: total per capita cost of cancer.
#' @param discounting Logical (optional): If TRUE, run cost-effectiveness analysis
#'        undiscounted and discounted. If FALSE, only uses undiscounted
#' @param disc.cost Number (optional): Discounting for health costs
#'        (only if discounting=TRUE)
#' @param disc.ben Number (optional): Discounting for health outcomes
#'        (only if discounting=TRUE)
#'
#' @return Returns a data.table with size of the birth-cohort and age-specific
#'   incidence-rates, mortality-rates, years-of-life-lost, years-of-healthy-life-lost,
#'   and cancer-costs before and after vaccination.
#'   Also displays whether discounting has been used ("type" column).
#'
#' @examples
#' lifetab <- lifeTable(unlist(data.mortall[iso3=="AFG",
#'   as.character(0:100), with=FALSE], use.names=FALSE), 9)
#' cohort <- -1
#' incidence <- unlist(data.incidence[iso3=="AFG", as.character(0:100), with=FALSE],
#'   use.names=FALSE)
#' mortality_cecx <- unlist(data.mortall[iso3=="AFG", as.character(0:100), with=FALSE],
#'   use.names=FALSE)
#' prevalence <- unlist(data.cecx_5y_prevalence[iso3=="AFG",
#'   as.character(0:100), with=FALSE], use.names=FALSE)
#' agevac <- 9
#' coverage <- 0.8
#' campaigns <- -1
#' vaccine_efficacy_nosexdebut <- 0.95
#' vaccine_efficacy_sexdebut <- 0
#' cost_cancer <- 100
#'
#' RunCohort(lifetab, cohort, incidence, mortality_cecx, prevalence, agevac,
#'   coverage, campaigns, vaccine_efficacy_nosexdebut, vaccine_efficacy_sexdebut,
#'   cost_cancer, disc.cost=0.03, disc.ben=0.03,
#'   discounting=FALSE, country_iso3="AFG", run_country=FALSE)
#'
#' @export
#' @import data.table
#' @import foreach
#'
RunCohort <- function (lifetab,
                       cohort,
                       incidence,
                       mortality_cecx,
                       prevalence,
                       agevac,
                       coverage,
                       campaigns,
                       vaccine_efficacy_nosexdebut,
                       vaccine_efficacy_sexdebut,
                       cost_cancer,
                       discounting        = FALSE,
                       disc.cost          = 0.03,
                       disc.ben           = 0.03,
                       country_iso3       = NULL,
                       run_country        = FALSE,
                       disability.weights = "gbd_2017") {

  #check if required variables are present
  if (sum (!sapply (ls(), function(x) {checkSize(get(x))} ) ) > 0) {
    stop("Not all values have the required length")
  }

  ages <- lifetab [, age]
  lexp <- lifetab [, ex]

  ##############################################################################
  # Calculate weights for DALYs. This calculation of daly.canc.nonfatal and
  # daly.canc.fatal is specific for GBD 2001 disability weights.
  # daly.canc.nonfatal <- daly.canc.diag + daly.canc.seq * 4
  # daly.canc.fatal <- daly.canc.diag + daly.canc.terminal
  #
  ##############################################################################
  #
  # Calculate yld using GBD 2001 disability weights
  if (disability.weights == "gbd_2001") {

    ##############################################################################
    # daly.canc.seq <- switch(
    #   data.global[iso3==country_iso3,`WHO Mortality Stratum`],
    #   "A"=0.04,
    #   "B"=0.11,
    #   "C"=0.13,
    #   "D"=0.17,
    #   "E"=0.17
    # )
    ##############################################################################
    # disability weight for long term sequela based on WHO mortality stratum
    # this is specific for gbd_2001
    stratum       <- data.global [iso3==country_iso3, `WHO Mortality Stratum`]
    daly.canc.seq <- data.disability_weights [Source == "gbd_2001" &
                                                WHO_MortalityStratum == stratum,
                                              Mid]
    ##############################################################################

    # diagnosis, therapy and control over 1 year
    diag <- data.disability_weights [Source == disability.weights &
                                       Sequela == "diagnosis",
                                     Mid]

    # metastatic stage for 6 months and terminal stage for 6 months (for a total of 1 year)
    # note: disability weights for metastatic and terminals stages are equal
    terminal <- data.disability_weights [Source == disability.weights &
                                           Sequela == "terminal",
                                         Mid]

    # long term sequela (for 4 years)
    # note: duration of long-term sequela is same irrespective of
    # WHO mortality startum (A/B/C/D/C)
    control <- daly.canc.seq *
      data.disability_weights [Source == disability.weights &
                                 Sequela == "control" &
                                 WHO_MortalityStratum == "E",
                               Duration]

    # yld associated with a non fatal case & a fatal case
    daly.canc.nonfatal <- diag + control
    daly.canc.fatal    <- diag + terminal

    # combine yld contribution from non-fatal and fatal cases
    yld <- ((incidence - mortality_cecx) * daly.canc.nonfatal) +
      (mortality_cecx * daly.canc.fatal)

    if (discounting) {
      ############################################################################
      # estimate yld discounted (taking into account for morbidity in future years)
      # in gbd 2001, yld is attributed to age of incidence
      # diagnosis, therapy and control over 1 year + long term sequela for next 4 years
      daly.canc.nonfatal.disc <- diag +
        daly.canc.seq * ( 1/(1+disc.ben)   + 1/(1+disc.ben)^2 +
                          1/(1+disc.ben)^3 + 1/(1+disc.ben)^4 )

      # diagnosis, therapy and control in 1st year followed by
      # metastatic stage for 6 months and terminal stage for 6 months (in 2nd year)
      daly.canc.fatal.disc <- diag + terminal * 1/(1+disc.ben)

      yld.disc <- ((incidence - mortality_cecx) * daly.canc.nonfatal.disc) +
        (mortality_cecx * daly.canc.fatal.disc)

      # daly.canc.nonfatal.disc <- daly.canc.diag +
      #   daly.canc.seq * ( 1/(1+disc.ben) +
      #                       1/(1+disc.ben)^2 +
      #                       1/(1+disc.ben)^3 +
      #                       1/(1+disc.ben)^4 )
      #
      # daly.canc.fatal.disc <- daly.canc.diag + daly.canc.terminal * 1/(1+disc.ben)
      ############################################################################
    }

  } else if (disability.weights == "gbd_2017") {

    # calculate yld using GBD 2017 disability weights
    ##########################################################################
    # diability weights for different phases of cervical cancer
    # (diagnosis & therapy, controlled, metastatic, terminal)
    # dw = list (diag       = daly.canc.diag,
    #            control    = daly.canc.control,
    #            metastatic = daly.canc.metastatic,
    #            terminal   = daly.canc.terminal)
    #
    # duration of different phases of cervical cancer
    # (diagnosis & therapy, controlled, metastatic, terminal) -- unit in years
    # cecx_duration = list (diag = 4.8/12, metastatic = 9.21/12, terminal = 1/12)
    # duration of controlled phases is based on remainder of time after attributing to other phases
    #
    ##########################################################################

    # disability weights for different phases of cervical cancer
    # (diagnosis & therapy, controlled, metastatic, terminal)
    dw = list (diag       = data.disability_weights [Source == disability.weights &
                                                       Sequela == "diagnosis",
                                                     Mid],
               control    = data.disability_weights [Source == disability.weights &
                                                       Sequela == "control",
                                                     Mid],
               metastatic = data.disability_weights [Source == disability.weights &
                                                       Sequela == "metastatic",
                                                     Mid],
               terminal   = data.disability_weights [Source == disability.weights &
                                                       Sequela == "terminal",
                                                     Mid])

    # duration of different phases of cervical cancer
    # (diagnosis & therapy, controlled, metastatic, terminal) -- unit in years
    cecx_duration = list (diag       = data.disability_weights [Source == disability.weights &
                                                                  Sequela=="diagnosis",
                                                                Duration],
                          metastatic = data.disability_weights [Source == disability.weights &
                                                                  Sequela=="metastatic",
                                                                Duration],
                          terminal   = data.disability_weights [Source == disability.weights &
                                                                  Sequela=="terminal",
                                                                Duration])
    # duration of controlled phases is based on remainder of time after attributing to other phases

    # combine yld contribution from (incidence, prevalence and mortality) cases
    yld <-
      (incidence  * dw$diag * cecx_duration$diag) +
      (prevalence * dw$control) +
      (mortality_cecx * ( (dw$metastatic * cecx_duration$metastatic) +
                           (dw$terminal * cecx_duration$terminal) ) )

    if (discounting) {
      # estimate yld discounted (taking into account for morbidity in future years)
      # in gbd 2017, since yld is attributed to age of prevalence,
      # yld (discounted) = yld (undiscounted)
      yld.disc <- yld
    }

  }


  ##############################################################################

  # discounting
  if (discounting) {

    # daly.canc.nonfatal.disc <- daly.canc.diag +
    #   daly.canc.seq * ( 1/(1+disc.ben) +
    #                       1/(1+disc.ben)^2 +
    #                       1/(1+disc.ben)^3 +
    #                       1/(1+disc.ben)^4 )
    #
    # daly.canc.fatal.disc <- daly.canc.diag + daly.canc.terminal * 1/(1+disc.ben)

    disc.cost.yr <- rep(0, length(ages))
    disc.ben.yr  <- rep(0, length(ages))

    disc.cost.yr [1:(which(ages == agevac))] <- 1
    disc.ben.yr  [1:(which(ages == agevac))] <- 1

    # age of vaccination is base year for discounting of this cohort
    # estimate cumulative discount rates for each year beyond age of vaccination
    for (a in ages [which (ages > agevac) ]) {
      disc.cost.yr [which (ages == a)] <- 1 / (1 + disc.cost)^(a - agevac)
      disc.ben.yr  [which (ages == a)] <- 1 / (1 + disc.ben)^(a - agevac)
    }


    ##############################################################################
    # estimate remaining life expectancy (discounted)
    # lexp.disc <- rep(0,length(ages))
    #
    # for (a in ages[-which(ages==max(ages))]) {
    #   lexp.disc [which (ages==a)] <- sum(
    #     disc.ben.yr [1:floor (
    #       lexp [which (ages==a)]
    #     )]
    #   ) + (
    #     lexp [which (ages==a)]
    #     -floor(
    #       lexp [which (ages==a)]
    #     )
    #   ) * disc.ben.yr [floor(
    #     lexp [which (ages==a)]
    #   ) + 1]
    # }

  # YLL discounting based on formula = (1 - exp^( -r * L )) / r
  # https://www.who.int/quantifying_ehimpacts/publications/en/9241546204chap3.pdf

  lexp.disc <- (1 - exp (-1 * disc.ben * lexp)) / disc.ben

  }
  ##############################################################################


  coverage <- ageCoverage (ages,
                           coverage,
                           vaccine_efficacy_nosexdebut,
                           vaccine_efficacy_sexdebut,
                           campaigns,
                           lifetab,
                           cohort,
                           agevac,
                           country_iso3 = country_iso3)

  # In out.pre data table, 'lifey' refers to YLL and 'disability' refers to YLD
  # YLL - Years of Life Lost due to premature mortality
  # YLD - Years of Life lost due to Disability

  # expected number of cases, deaths, dalys, and costs (static)
  out.pre <- data.table (
    age         = ages,
    cohort_size = cohort * lifetab [, lx.adj],
    vaccinated  = rep (0, length(ages)),
    immunized   = rep (0, length(ages)),
    inc.cecx    = incidence,
    mort.cecx   = mortality_cecx,
    lifey       = mortality_cecx * lexp,
    # disability  = (incidence - mortality_cecx)*daly.canc.nonfatal + mortality_cecx*daly.canc.fatal,
    # disability  = (incidence  * daly.canc.diag * 4.8/12) + (prevalence * daly.canc.control) + (mortality_cecx * (daly.canc.metastatic * 9.21/12 + daly.canc.terminal * 1/12) ),
    # disability  = (incidence  * dw$diag * cecx_duration$diag) + (prevalence * dw$control) + (mortality_cecx * (dw$metastatic * cecx_duration$metastatic + dw$terminal * cecx_duration$terminal) ),
    disability  = yld,
    cost.cecx   = incidence * cost_cancer,
    prev.cecx   = prevalence
  )

  out.post                   <- out.pre
  out.post                   <- out.post * (1 - coverage [, effective_coverage])
  out.post [, "age"]         <- ages
  out.post [, "cohort_size"] <- cohort * lifetab [, lx.adj]
  out.post [, "vaccinated"]  <- coverage [, coverage]
  out.post [, "immunized"]   <- coverage [, effective_coverage]

  # discounted
  if (discounting) {
    out.pre.disc                  <- out.pre
    out.pre.disc [, "inc.cecx"]   <- out.pre.disc [, inc.cecx]   * disc.ben.yr
    out.pre.disc [, "mort.cecx"]  <- out.pre.disc [, mort.cecx]  * disc.ben.yr
    out.pre.disc [, "prev.cecx"]  <- out.pre.disc [, prev.cecx]  * disc.ben.yr
    out.pre.disc [, "lifey"]      <- out.pre [,mort.cecx] * lexp.disc * disc.ben.yr

    ############################################################################
    # out.pre.disc [, "disability"] <- (
    #   (out.pre[, inc.cecx] -
    #      out.pre [, mort.cecx]) * daly.canc.nonfatal.disc +
    #      out.pre [, mort.cecx]  * daly.canc.fatal.disc
    # ) * disc.ben.yr

    out.pre.disc [, "disability"] <- yld.disc * disc.ben.yr
    ############################################################################

    out.pre.disc [, "cost.cecx"]    <- out.pre [, cost.cecx] * disc.cost.yr

    out.post.disc                   <- out.pre.disc
    out.post.disc                   <- out.pre.disc * (1-coverage[, effective_coverage])

    out.post.disc [, "age"]         <- ages
    out.post.disc [, "cohort_size"] <- cohort * lifetab[, lx.adj]
    out.post.disc [, "vaccinated"]  <- coverage [, coverage]
    out.post.disc [, "immunized"]   <- coverage [, effective_coverage]

    out.pre.disc  [, "scenario"]    <- "pre-vaccination"
    out.pre.disc  [, "type"]        <- "discounted"
    out.post.disc [, "scenario"]    <- "post-vaccination"
    out.post.disc [, "type"]        <- "discounted"
  }

  out.pre  [, "scenario"] <- "pre-vaccination"
  out.pre  [, "type"]     <- "undiscounted"
  out.post [, "scenario"] <- "post-vaccination"
  out.post [, "type"]     <- "undiscounted"

  # combine results
  if (discounting) {
    out.pre <- rbindlist(
      list(
        out.pre,
        out.pre.disc
      )
    )
    out.post <- rbindlist(
      list(
        out.post,
        out.post.disc
      )
    )
  }

  out <- rbindlist(
    list(
      out.pre,
      out.post
    )
  )

  out <- out [ , c ("scenario",
                    "type",
                    "age",
                    "cohort_size",
                    "vaccinated",
                    "immunized",
                    "inc.cecx",
                    "mort.cecx",
                    "lifey",
                    "disability",
                    "cost.cecx",
                    "prev.cecx"),
               with = FALSE]

  return (out)

} # end of function -- RunCohort
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Run PRIME for a specific country
#'
#' Runs RunCohort() using country-specific estimates.
#' If year_born and year_vac are not provided, assumes vaccination occurs in
#'   the current year.
#'
#' @param country_iso3 Character string (required): ISO3 code of the country
#' @param vaceff Number (optional): Proportion indicating vaccine-efficacy
#' @param cov Number (optional): Proportion with routine coverage
#' @param agevac Integer (optional): Target age for HPV vaccination
#' @param agecohort Integer (optional): Reference age for cohort-size
#'        (only used when 'cohort' is not provided)
#' @param cohort Integer (optional): Cohort-size. -1 if unknown
#' @param canc.inc Integer (optional): Reference year for cancer incidence rates
#'        (Globocan: 2020 or 2018 or 2012)
#' @param sens Numeric-vector (optional): Specific values to be used in a PSA.
#'        -1 if PSA's are not used
#' @param unwpp_mortality Logical (optional): If TRUE, uses year-specific UNWPP
#'        mortality estimates to construct life-tables.
#'        If FALSE, use WHO based mortality estimates.
#' @param year_born Integer (optional): Year in which cohort is born
#' @param year_vac Integer (optional): Year in which cohort is vaccinated
#' @param campaigns List (optional): Multi-Age-Cohort campaigns
#'        (needs to be changed)
#' @param analyseCosts Logical (optional): If FALSE, returns result from
#'        RunCohort() function.
#'        If TRUE, runs analyseCosts() with country-specific results.
#' @param canc.cost Character (optional): Is cost of cancer adjusted
#'        ("adj" for International $) or not ("unadj" for US$)
#' @param discounting Logical (optional): If TRUE, run cost-effectiveness analysis
#'        undiscounted and discounted. If FALSE, only uses undiscounted
#' @param disc.cost Number (optional): Discounting for health costs
#'        (only if discounting=TRUE)
#' @param disc.ben Number (optional): Discounting for health outcomes
#'        (only if discounting=TRUE)
#' @param disability.weights character, disability weights for cervical cancer
#'        from GBD 2017 or GBD 2001
#' @param wb.indicator character, World Bank indicator for GDP/GNI per capita
#'        in I$/US$ and current/constant data
#' @param wb.year numeric, year of the World Bank indicator value
#' @param vaccine character, bivalent/quadrivalent (4vHPV) or
#'        nonavalent (9vHPV) vaccine
#'
#' @return data.table with country-specific results of HPV vaccination.
#'         Returns cost-analysis if analyseCosts=TRUE
#' @export
#' @importFrom wbstats wb
#'
#' @examples RunCountry("AFG")
#' @examples RunCountry("AFG", year_vac=2020, agevac=10, cov=0.75, vaceff_beforesexdebut =0.88)
#' @examples RunCountry("AFG", year_vac=2020, agevac=10, cov=0.75, vaceff_beforesexdebut =0.88,
#'           analyseCosts=TRUE)
RunCountry <- function (country_iso3,
                        vaceff_beforesexdebut = 1,
                        vaceff_aftersexdebut  = 0,
                        cov                   = 1,
                        agevac                = 10,
                        agecohort             = 10,
                        cohort                = -1,
                        canc.inc              = "2020",
                        sens                  = -1,
                        unwpp_mortality       = TRUE,
                        year_born             = -1,
                        year_vac              = -1,
                        campaigns             = -1,
                        analyseCosts          = FALSE,
                        canc.cost             = "unadj",
                        discounting           = FALSE,
                        disc.cost             = 0.03,
                        disc.ben              = 0.03,
                        run_batch             = FALSE,
                        psadat                = -1,
                        disability.weights    = "gbd_2017",
                        wb.indicator          = "NY.GDP.PCAP.PP.CD",
                        wb.year               = 2017,
                        vaccine               = "4vHPV") {

  ## check if all required data is present in the global environment
  # if(sum(!(c("data.incidence", "data.global", "data.costcecx", "data.mortcecx", "data.mortall", "data.mortall.unwpp") %in% ls(name=.GlobalEnv, all.names=T))) > 0){
  #	stop("Not all required datafiles seem to be present in your environment. Please load all datafiles required.")
  # }

  # check if required variables are present (sanity check, sees if any variables
  # passed to function have a length of 0)
  if (sum (!sapply (ls(), function(x) {checkSize(get(x))} )) > 0) {
    stop ("Not all values have the required length")
  }

  # retrieve year of birthcohort
  if (year_vac!=-1 & year_born!=-1 ) {
    # check if year of vaccination corresponds with age of vaccination
    # for this birth-cohort
    if (year_vac-agevac != year_born) {
      stop (
        paste0 ("Year of vaccination (",
                year_vac,
                ") and age of vaccination (",
                agevac,
                ") do not correspond with chosen birthcohort (",
                year_born,
                "). Please change accordingly or omit 'year_born' or 'year_vac'."
        )
      )
    }
  } else if (year_vac==-1 & year_born!=-1) {
    year_vac <- year_born+agevac

  } else if (year_vac!=-1 & year_born==-1) {
    year_born <- year_vac-agevac

  } else if (year_vac==-1 & year_born==-1) {
    #assume vaccination in current year
    year_vac <- as.numeric(format(Sys.time(),format="%Y"))
    year_born <- year_vac-agevac
  }

  ages <- as.numeric (colnames (data.incidence) [!grepl ("\\D",
                                                    colnames (data.incidence))])
  ages <- ages[!is.na(ages)]


  # ----------------------------------------------------------------------------
  # Before using DISEASE BURDEN data (check for countries with unavailable data)
  # ----------------------------------------------------------------------------
  # If no data available, switch to another country as proxy
  if ( (country_iso3 %in% c("XK")) |
       ( (country_iso3 == "PSE") & (canc.inc == "2012") ) ) {
    proxy <- TRUE
    country_iso3 <- switch (
      country_iso3,
      "XK"  = "ALB",  # demography (unwpp) available for XK
                      # no data for burden, demography (who), cost for XK
      "PSE" = "JOR",  # burden (2018) & demography (unwpp) available for PSE but no cost
                      # no data for burden (2012), demography (who), cost for PSE
      country_iso3
    )
  } else {
    proxy <- FALSE
  }
  # ----------------------------------------------------------------------------
  # The following commented code snippet is redundant -- can be removed
  # # ----------------------------------------------------------------------------
  # # If no data available, use other country as proxy
  # # if ( country_iso3 %in% c("XK","MHL","TUV","PSE") ) {
  # if ( country_iso3 %in% c("XK") ) {
  #   proxy <- TRUE
  #   country_iso3 <- switch (
  #     country_iso3,
  #     "XK"  = "ALB",  # demography data available for XK but no burden & cost
  #     # "PSE" = "JOR",  # burden & demography data available for PSE but no cost
  #                       # no cancer burden data for globocan 2012
  #     # "MHL" = "KIR",
  #     # "TUV" = "FJI",
  #     country_iso3
  #   )
  # } else {
  #   proxy <- FALSE
  # }
  #
  # # If no data available, use other country as proxy
  # if ( (country_iso3 %in% c("PSE")) & (canc.inc == "2012")) {
  #   proxy <- TRUE
  #   country_iso3 <- switch (
  #     country_iso3,
  #     "PSE" = "JOR",  # burden & demography data available for PSE but no cost and
  #                     # no cancer burden data for globocan 2012
  #     country_iso3
  #   )
  # } else {
  #   proxy <- FALSE
  # }
  #
  # # ----------------------------------------------------------------------------


  ######################### Look into this
  # # burden & demography data available for PSE (switch back from JOR)
  # if (proxy) {
  #   country_iso3 <- switch (
  #     country_iso3,
  #     "JOR" = "PSE",  # burden & demography data available for PSE but no cost
  #     country_iso3
  #   )
  # }
  ######################### Look into this

  # ----------------------------------------------------------------------------
  # # % of CeCx due to 16/18
  # p1618 <- data.global [iso3==country_iso3, `% CeCx due to 16/18`] / 100

  # Percentage (%) of cervical cancer due to HPV high risk types contained in
  # bivalent/quadrivalent/nonavalent HPV vaccine
  # 4vHPV - bivalent/quadrivalent HPV vaccine
  # 9vHPV - nonavalent HPV vaccine
  #
  if (vaccine == "4vHPV") {

    p1618 <- data.hpv_distribution [iso3 == country_iso3, hpv_4v] / 100

  } else if (vaccine == "9vHPV") {

    p1618 <- data.hpv_distribution [iso3 == country_iso3, hpv_9v] / 100

  }
  # ----------------------------------------------------------------------------

  # age-dependent parameters
  if (canc.inc == "2020") {
    inc <- unlist (data.incidence2020 [iso3 == country_iso3,
                                       as.character(ages),
                                       with = F],
                   use.names=F) * p1618

    mort.cecx <- unlist (data.mortcecx2020 [iso3 == country_iso3,
                                            as.character(ages),
                                            with = F],
                         use.names=F) * p1618

    cecx_5y_prev <- unlist (data.cecx_5y_prevalence2020 [iso3==country_iso3,
                                                         as.character(ages),
                                                         with = F],
                            use.names=F) * p1618
  }
  else if (canc.inc == "2018") {
    inc <- unlist (data.incidence [iso3==country_iso3,
                                   as.character(ages),
                                   with=F],
                   use.names=F) * p1618

    mort.cecx <- unlist (data.mortcecx [iso3==country_iso3,
                                        as.character(ages),
                                        with=F],
                         use.names=F) * p1618

    cecx_5y_prev <- unlist (data.cecx_5y_prevalence [iso3==country_iso3,
                                                     as.character(ages),
                                                     with=F],
                            use.names=F) * p1618
  }
  else if (canc.inc == "2012") {
    inc <- unlist (data.incidence2012 [iso3==country_iso3,
                                       as.character(ages),
                                       with=F],
                   use.names=F) * p1618

    mort.cecx    <- unlist (data.mortcecx2012 [iso3==country_iso3,
                                               as.character(ages),
                                               with=F],
                            use.names=F) * p1618

    cecx_5y_prev <- unlist (data.cecx_5y_prevalence [iso3==country_iso3,
                                                     as.character(ages),
                                                     with=F],
                            use.names=F) * p1618

    # 5-year cervical cancer prevalence data for 2012 is not available
    cecx_5y_prev <- cecx_5y_prev * 0
  }
  else if (canc.inc == "2008") {
    #inc=as.numeric(data.incidence08[c,2:(maxage+2)])*p1618
    #mort.cecx=as.numeric(data.mortcecx08[c,2:(maxage+2)])*p1618
  }

  # set incidence and mortality and prevalence values to 0 if they are missing
  inc          [which (is.na (inc)          )] <- 0
  mort.cecx    [which (is.na (mort.cecx)    )] <- 0
  cecx_5y_prev [which (is.na (cecx_5y_prev) )] <- 0


  # ----------------------------------------------------------------------------
  # After using DISEASE BURDEN data
  # ----------------------------------------------------------------------------
  # Switch back to original country for which proxy was set due to unavailable data
  if (proxy) {
    proxy <- FALSE
    country_iso3 <- switch (
      country_iso3,
      "ALB" = "XK",
      "JOR" = "PSE",
      country_iso3
    )
  }
  # ----------------------------------------------------------------------------


  ##############################################################################
  # daly.canc.seq <- switch(
  #   data.global[iso3==country_iso3,`WHO Mortality Stratum`],
  #   "A"=0.04,
  #   "B"=0.11,
  #   "C"=0.13,
  #   "D"=0.17,
  #   "E"=0.17
  # )
  ##############################################################################
  # disability weight for long term sequela based on WHO mortality stratum
  # this is specific for gbd_2001
  # stratum       <- data.global [iso3==country_iso3, `WHO Mortality Stratum`]
  # daly.canc.seq <- data.disability_weights [Source=="gbd_2001" &
  #                                             WHO_MortalityStratum==stratum, Mid]
  ##############################################################################

  # Calculate total and vaccinated cohort size
  # If UN population projections unavailable, cohort size = 1 otherwise
  # cohort size = number of 10-14y/5
  if (cohort==-1) {

    # --------------------------------------------------------------------------
    # get cohort size from UNWPP data table -- data.pop

    # get cohort size at vaccination age
    # if year > 2100, then use cohort size for vaccination age in 2100
    if ((year_born + agecohort) <= 2100) {

      cohort <- data.pop [country_code == country_iso3 &
                            year == (year_born + agecohort) &
                            age_from == agecohort,
                          value]
    } else {

      cohort <- data.pop [country_code == country_iso3 &
                            year == 2100 &
                            age_from == agecohort,
                          value]
    }
    # --------------------------------------------------------------------------

    # data.popproj -- not used anymore (to be removed from prime package)
    # cohort <- unlist (data.popproj [iso3 == country_iso3,
    #                                 as.character(year_born + agecohort),
    #                                 with=F],
    #                   use.names=F)/5

    if (length(cohort)==0) {
      cohort <- 1
    }
    # --------------------------------------------------------------------------
    # The following code snippet is redundant (proxy is FALSE at this stage) -- start
    # It can be removed.
    else if (proxy) {
      cohort <- switch (
        country_iso3,
        "ALB" = cohort * 1824000/2774000,
        # "KIR" = cohort * 52634/102351,
        # "FJI" = cohort * 9876/881065,
        "JOR" = cohort * 4170000/6459000,
        cohort
      )
      # The above code snippet is redundant (proxy is FALSE at this stage) -- end
      # --------------------------------------------------------------------------
    }
  }


  # ----------------------------------------------------------------------------
  # Before using DEMOGRAPHY data (check for countries with unavailable data)
  # ----------------------------------------------------------------------------
  # If no data available, switch to another country as proxy
  if ( (country_iso3 %in% c("XK", "PSE")) &
       (unwpp_mortality == FALSE) )  {
    proxy <- TRUE
    country_iso3 <- switch (
      country_iso3,
      "XK"  = "ALB",  # demography (unwpp) available for XK
                      # no data for burden, demography (who), cost for XK
      "PSE" = "JOR",  # burden (2018) & demography (unwpp) available for PSE but no cost
                      # no data for burden (2012), demography (who), cost for PSE
      country_iso3
    )
  } else {
    proxy <- FALSE
  }
  # ----------------------------------------------------------------------------
  # The following commented code snippet is redundant -- can be removed
  # # ----------------------------------------------------------------------------
  # # If no data available, use other country as proxy
  # # if ( country_iso3 %in% c("XK","MHL","TUV","PSE") ) {
  # if ( country_iso3 %in% c("PSE") ) {
  #   proxy <- TRUE
  #   country_iso3 <- switch (
  #     country_iso3,
  #     # "XK"  = "ALB",  # demography data available for XK but no burden & cost
  #     "PSE" = "JOR",  # burden & demography data available for PSE but no cost
  #     # "MHL" = "KIR",
  #     # "TUV" = "FJI",
  #     country_iso3
  #   )
  # } else {
  #   # proxy <- FALSE
  # }
  # # ----------------------------------------------------------------------------


  # create lifetables
  if (!unwpp_mortality) {
    # Use WHO mortality estimates
    mort.all <- unlist (data.mortall[iso3==country_iso3,
                                     as.character(ages),
                                     with=F],
                        use.names=F)
    lifetab  <- lifeTable (qx=mort.all, agecohort)
  } else {

    # Use UNWPP mortality estimates
    # if year is outside of scope of mortality estimates, use last available data
    mx <- numeric(length(ages))

    for (a in ages) {
      if (year_born+a > max (data.mortall.unwpp.nqx [country_code == country_iso3, year])) {
        lookup.yr <- max (data.mortall.unwpp.nqx [country_code == country_iso3, year])
      } else if (year_born+a < min (data.mortall.unwpp.nqx [country_code == country_iso3, year])) {
        lookup.yr <- min (data.mortall.unwpp.nqx [country_code == country_iso3, year])
      } else {
        lookup.yr <- year_born + a
      }

      # ------------------------------------------------------------------------
      #### UNWPP changes

      # read nqx value and start and end ages of interval
      # note: this is nqx (and not mx)
      mortality <- data.mortall.unwpp.nqx [(country_code == country_iso3) &
                                            (age_from   <= a) &
                                            (age_to     >= a) &
                                            (year - (lookup.yr) <  1) &
                                            (year - (lookup.yr) > -5),
                                          list (value, age_from, age_to)]

      # set mortality to 1 if no data is found
      # if (length(mortality) < 1) {
      if (nrow (mortality) < 1) {
        mortality <- 1
      } else {
        # calculate central death rate (mx)
        # Calculate central death rate for specific age intervals
        age_interval <- (mortality$age_to - mortality$age_from + 1)

        # 'mx' = ('nqx' / (1 - 0.5 * 'nqx') ) / n
        mortality <- (mortality$value / (1 - 0.5 * mortality$value) ) / age_interval
      }

      mx[which(ages==a)] <- mortality
    }

    lifetab <- lifeTable (mx = mx,
                          agecohort = agecohort)
    # lifetab <- lifeTable (mx=mx, agecohort=agecohort)


    #### UNWPP changes
    # --------------------------------------------------------------------------

  #   # ------------------------------------------------------------------------
  #   #### UNWPP changes
    # if data.mortall.unwpp.mx is used instead of data.mortall.unwpp.nqx,
    # then uncomment the below code snippet and comment the above code snippet.
    #
    # if (year_born+a > max(data.mortall.unwpp.mx[country_code==country_iso3,year])) {
    #   lookup.yr <- max(data.mortall.unwpp.mx[country_code==country_iso3,year])
    # } else if (year_born+a < min(data.mortall.unwpp.mx[country_code==country_iso3,year])) {
    #   lookup.yr <- min(data.mortall.unwpp.mx[country_code==country_iso3,year])
    # } else {
    #   lookup.yr <- year_born + a
    # }
    #
  #   mortality <- data.mortall.unwpp.mx [(country_code == country_iso3) &
  #                                         (age_from   <= a) &
  #                                         (age_to     >= a) &
  #                                         (year - (lookup.yr) <  1) &
  #                                         (year - (lookup.yr) > -5),
  #                                       value]
  #
  #   # set mortality to 1 if no data is found
  #   if (length(mortality) < 1) {
  #     mortality <- 1
  #   }
  #   mx[which(ages==a)] <- mortality
  # }
  #
  # lifetab <- lifeTable (mx=mx, agecohort=agecohort)
  # #### UNWPP changes
  # # --------------------------------------------------------------------------

  }  # end of -- if (!unwpp_mortality)  # create lifetables

  # ----------------------------------------------------------------------------
  # After using DEMOGRAPHY data
  # ----------------------------------------------------------------------------
  # Switch back to original country for which proxy was set due to unavailable data
  if (proxy) {
    proxy <- FALSE
    country_iso3 <- switch (
      country_iso3,
      "ALB" = "XK",
      "JOR" = "PSE",
      country_iso3
    )
  }
  # ----------------------------------------------------------------------------



  # ----------------------------------------------------------------------------
  # Before using COST data (check for countries with unavailable data)
  # ----------------------------------------------------------------------------
  # If no data available, switch to another country as proxy
  if ( country_iso3 %in% c("XK", "PSE") ) {
    proxy <- TRUE
    country_iso3 <- switch (
      country_iso3,
      "XK"  = "ALB",  # demography (unwpp) available for XK
                      # no data for burden, demography (who), cost for XK
      "PSE" = "JOR",  # burden (2018) & demography (unwpp) available for PSE but no cost
                      # no data for burden (2012), demography (who), cost for PSE
      country_iso3
    )
  } else {
    proxy <- FALSE
  }
  # ----------------------------------------------------------------------------


  # ----------------------------------------------------------------------------
  # set country specific variables
  # cost per FVG
  cost.vac <- monetary_to_number (
    data.global [iso3 == country_iso3,
                 `Vaccine price (USD) [4]`]
  ) + monetary_to_number (
    data.global [iso3 == country_iso3,
                 `Vaccine delivery/ operational/ admin costs (USD) [5]`]
  )

  # cost per cancer episode
  if (canc.cost == "unadj") {
    # cost.canc <- monetary_to_number(data.costcecx[iso3==country_iso3, cancer_cost])
    cost.canc <- data.costcecx [iso3==country_iso3, cancer_cost]
  } else if (canc.cost == "adj") {
    # cost.canc <- monetary_to_number(data.costcecx[iso3==country_iso3, cancer_cost_adj])
    cost.canc <- data.costcecx [iso3==country_iso3, cancer_cost_adj]
  }

  # if cost unavailable and cost analysis is not required, then set cost to 0
  # in this way, health impact analysis can still be run for such countries
  # (GLP, REU, NCL, MTQ, PYF, GUM, PRI, GUF)
  if ( (length(cost.vac)  == 0) & (analyseCosts == FALSE) ) {
    cost.vac <- 0
  }
  if ( (length(cost.canc) == 0) & (analyseCosts == FALSE) ) {
    cost.canc <- 0
  }

  # ----------------------------------------------------------------------------


  # ----------------------------------------------------------------------------
  # After using COST data
  # ------------------------------------------------------------------------------
  # Switch back to original country for which proxy was set due to unavailable data
  if (proxy) {
    proxy <- FALSE
    country_iso3 <- switch (
      country_iso3,
      "ALB" = "XK",
      "JOR" = "PSE",
      country_iso3
    )
  }
  # ----------------------------------------------------------------------------


  ## UPDATE: To be updated -- PSA for cecx_5y_prev

  if (is.list(psadat)) {
    #apply psa multipliers for incidence and mortality
    if ("incidence" %in% names(psadat)) {
      inc <- inc * psadat[["incidence"]]
    }
    if ("mortality" %in% names(psadat)) {
      mort.cecx <- mort.cecx * psadat[["mortality"]]
    }
    if ("vaccine_efficacy" %in% names(psadat)) {
      vaceff <- vaceff * psadat[["vaccine_efficacy"]]
    }
    if ("vaccine_cost" %in% names(psadat)) {
      cost.vac <- cost.vac * psadat[["vaccine_cost"]]
    }
    if ("cancer_cost" %in% names(psadat)) {
      cost.canc <- cost.canc * psadat[["cancer_cost"]]
    }
    if ("discounting_cost" %in% names(psadat)) {
      disc.cost <- disc.cost * psadat[["discounting_cost"]]
    }
    if ("discounting_ben" %in% names(psadat)) {
      disc.ben <- disc.ben * psadat[["discounting_ben"]]
    }
  }


  # calling RunCohort function
  result_cohort <- RunCohort (
    lifetab                     = lifetab,
    cohort                      = cohort,
    incidence                   = inc,
    mortality_cecx              = mort.cecx,
    prevalence                  = cecx_5y_prev,
    agevac                      = agevac,
    coverage                    = cov,
    campaigns                   = campaigns,
    vaccine_efficacy_nosexdebut = vaceff_beforesexdebut,
    vaccine_efficacy_sexdebut   = vaceff_aftersexdebut,
    cost_cancer                 = cost.canc,
    discounting                 = discounting,
    disc.cost                   = disc.cost,
    disc.ben                    = disc.ben,
    country_iso3                = country_iso3,
    run_country                 = TRUE,
    disability.weights          = disability.weights
  )

  if (analyseCosts) {
    # gdp_per_capita <- monetary_to_number (
    #   data.global [iso3==country_iso3, `GDP per capita (2011 i$) [7]`] )

    # Note: Refer to World Bank indicators at https://data.worldbank.org/indicator
    # For example, GDP per capita, PPP (current international $) - 2017
    # https://data.worldbank.org/indicator/NY.GDP.PCAP.PP.CD

    # GDP/GNI per capita
    # wb.indicator: World Bank indicator for GDP/GNI per capita in I$/US$ and current/constant data
    # wb.year: year of the World Bank indicator value
    # Note: Data available for PSE and XK -- thereby, no need to set proxy to another country
    gdp_per_capita <- wb (country   = country_iso3,
                          indicator = wb.indicator,
                          startdate = wb.year,
                          enddate   = wb.year)$value

    return (analyseCosts (result_cohort, cost.vac, gdp_per_capita))
  }
  else {
    return (result_cohort)
  }

} # end of function -- RunCountry
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Retrieve ISO3-code of country
#'
#' @param countryname Character string (required): Full name of the country
#' @param name Logical (optional): If TRUE, returns full name and alternative
#'        names of returned country (may be useful to double-check that it is
#'        the correct country)
#'
#' @return Character string with ISO3 code. Will also return full name
#'         if name=TRUE.
#' @export
#'
#' @examples
#' getISO3("Afghanistan")
#' getISO3("Congo",name=TRUE)
getISO3 <- function (countryname,
                     name = FALSE) {

  countryname <- data.table (country=countryname)
  if (name) {
    country_iso3 <- dtColMatch (countryname,
                                c("country"),
                                data.countryname,
                                c("name1", "name2", "name3", "name4"),
                                "iso3")

    name     <- data.countryname [iso3==country_iso3, name1]

    name_alt <- unique (unlist (data.countryname [iso3==country_iso3,
                                                  c("name2", "name3", "name4"),
                                                  with=FALSE],
                                use.names=FALSE))

    name_alt <- name_alt[!(name_alt %in% c(""," "))]

    if (length(name_alt) > 0) {
      name <- paste0(
        name," (",
        paste0(name_alt,collapse="; "),
        ")"
      )
    }

    return ( paste0(name, ": ", country_iso3) )
  } else {

    return (dtColMatch (countryname,
                        c("country"),
                        data.countryname,
                        c("name1", "name2", "name3", "name4"),
                        "iso3")
            )
  }

} # end of function -- getISO3
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Returns cost-effectiveness for a single birthcohort in a single country
#'
#' Usually called using RunCountry(..., analyseCosts=TRUE)
#'
#' @param results Data.table (required): results from RunCohort()
#' @param vaccine_cost Number (required): cost of a single vaccine
#' @param gdp_per_capita Number (required): GDP per capita
#'
#' @return Data.table with cost-analysis
#' @export
#'
#' @examples analyseCosts(RunCountry("AFG"), 100, 561)
analyseCosts <- function (results,
                          vaccine_cost,
                          gdp_per_capita) {

  #check if required variables are present
  if (sum (!sapply(ls(), function(x) {checkSize(get(x))})) > 0) {
    stop ("Not all values have the required length")
  }

  results [, "vaccinated"] <- results [, vaccinated] * results [, cohort_size]
  results [, "immunized"]  <- results [, immunized]  * results [, cohort_size]
  results [, "inc.cecx"]   <- results [, inc.cecx]   * results [, cohort_size]
  results [, "mort.cecx"]  <- results [, mort.cecx]  * results [, cohort_size]
  results [, "lifey"]      <- results [, lifey]      * results [, cohort_size]
  results [, "disability"] <- results [, disability] * results [, cohort_size]
  results [, "cost.cecx"]  <- results [, cost.cecx]  * results [, cohort_size]

  costvariables <- c ("Cohort size",
                      "Vac cohort size",
                      "Vaccine cost",
                      "Costs saved",
                      "Net cost",
                      "CeCx prevented",
                      "Deaths prevented",
                      "Life years saved",
                      "Nonfatal DALYs prevented",
                      "Cost/death prevented",
                      "Cost/life year saved",
                      "Cost/DALY prevented",
                      "GDP/capita",
                      "CE at 1xGDP/capita?",
                      "CE at 3xGDP/capita?",
                      "Cut-off price"
  )

  costeffect <- data.table (variable = costvariables)

  types <- unique (results[, type])

  for (d in types) {
    costeffect [,d] <- numeric(length(costvariables))
  }

  # get agevac
  vaccinated <- 0
  a          <- -1

  while (vaccinated==0) {
    a <- a+1
    vaccinated <- results [scenario == "post-vaccination" &
                             type   == "undiscounted" &
                             age    == a,
                           vaccinated]
  }
  agevac <- a

  cohort_size <- 0
  a           <- -1

  while (cohort_size==0) {
    a <- a+1
    cohort_size <- results [scenario == "post-vaccination" &
                              type   == "undiscounted" &
                              age    == a,
                            cohort_size]
  }
  agecohort <- a

  aggregated <- dtAggregate (results,
                             "age",
                             id.vars = c("scenario", "type") )

  difference <- aggregated [scenario == "pre-vaccination"]

  difference [, "scenario"] <- "difference"

  difference [, "cohort_size"] <-
    abs (aggregated [scenario=="pre-vaccination", cohort_size] -
           aggregated [scenario=="post-vaccination", cohort_size])

  difference [, "vaccinated"] <-
    abs (aggregated [scenario=="pre-vaccination", vaccinated] -
           aggregated [scenario=="post-vaccination", vaccinated])

  difference [, "immunized"] <-
    abs (aggregated [scenario=="pre-vaccination", immunized] -
           aggregated [scenario=="post-vaccination", immunized])

  difference [, "inc.cecx"] <-
    aggregated [scenario=="pre-vaccination",  inc.cecx] -
    aggregated [scenario=="post-vaccination", inc.cecx]

  difference [, "mort.cecx"] <-
    aggregated [scenario=="pre-vaccination",  mort.cecx] -
    aggregated [scenario=="post-vaccination", mort.cecx]

  difference [, "lifey"] <-
    aggregated [scenario=="pre-vaccination",  lifey] -
    aggregated [scenario=="post-vaccination", lifey]

  difference [, "disability"] <-
    aggregated [scenario=="pre-vaccination",  disability] -
    aggregated [scenario=="post-vaccination", disability]

  difference [, "cost.cecx"] <-
    aggregated [scenario=="pre-vaccination",  cost.cecx] -
    aggregated [scenario=="post-vaccination", cost.cecx]

  for (d in types) {

    costeffect [variable=="Cohort size", d] <- cohort_size

    costeffect [variable == "Vac cohort size", d] <-
      results [age        == agevac &
                 scenario == "post-vaccination" &
                 type     == d,
              vaccinated]

    costeffect [variable == "Vaccine cost", d] <-
      unlist (costeffect [variable == "Vac cohort size",
                          d,
                          with = FALSE],
              use.names=FALSE) * vaccine_cost

    costeffect [variable == "Costs saved", d] <-
      difference [type == d, cost.cecx]

    costeffect [variable == "Net cost", d] <-
      unlist (costeffect [variable == "Vaccine cost",
                          d,
                          with = FALSE],
              use.names = FALSE) -
      unlist (costeffect [variable == "Costs saved",
                          d,
                          with = FALSE],
              use.names = FALSE)

    costeffect [variable == "CeCx prevented", d] <-
      difference [type == d, inc.cecx]

    costeffect [variable == "Deaths prevented", d] <-
      difference [type == d, mort.cecx]

    costeffect [variable == "Life years saved", d] <-
      difference [type == d, lifey]

    costeffect [variable == "Nonfatal DALYs prevented", d] <-
      difference [type == d, disability]

    costeffect [variable == "Cost/death prevented", d] <-
      unlist (costeffect [variable == "Net cost",
                          d,
                          with = FALSE],
              use.names = FALSE) /
      unlist (costeffect [variable == "Deaths prevented",
                          d,
                          with = FALSE],
              use.names = FALSE)

    costeffect [variable == "Cost/life year saved", d] <-
      unlist (costeffect [variable == "Net cost",
                          d,
                          with = FALSE],
              use.names = FALSE) /
      unlist (costeffect [variable == "Life years saved",
                          d,
                          with = FALSE],
              use.names = FALSE)

    costeffect [variable == "Cost/DALY prevented", d] <-
      unlist (costeffect [variable == "Net cost",
                          d,
                          with = FALSE],
              use.names = FALSE) /
      (unlist (costeffect [variable == "Life years saved",
                           d,
                           with = FALSE],
               use.names = FALSE) +
         unlist (costeffect [variable == "Nonfatal DALYs prevented",
                             d,
                             with = FALSE],
                 use.names = FALSE) )

    costeffect [variable == "GDP/capita", d] <- gdp_per_capita

    costeffect [variable == "CE at 1xGDP/capita?", d] <-
      as.numeric (unlist (costeffect [variable == "Cost/DALY prevented",
                                      d,
                                      with = FALSE],
                          use.names = FALSE) <
                    gdp_per_capita)

    costeffect [variable == "CE at 3xGDP/capita?", d] <-
      as.numeric (unlist (costeffect [variable == "Cost/DALY prevented",
                                      d,
                                      with = FALSE],
                          use.names = FALSE) <
                    (gdp_per_capita * 3) )

    costeffect [variable == "Cut-off price", d] <-
      (unlist (costeffect [variable == "Costs saved",
                           d,
                           with = FALSE],
               use.names = FALSE) +
         (unlist (costeffect [variable == "Life years saved",
                              d,
                              with = FALSE],
                  use.names = FALSE) +
            unlist (costeffect [variable == "Nonfatal DALYs prevented",
                                d,
                                with = FALSE],
                    use.names = FALSE) ) *
         gdp_per_capita) /
      unlist (costeffect [variable == "Vac cohort size",
                          d,
                          with = FALSE],
              use.names = FALSE)

    costeffect [, d] <- round (unlist (costeffect [, d, with=FALSE],
                                       use.names = FALSE), 2)
  }

  return (costeffect)

} # end of function -- analyseCosts
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Checks whether the size of a variable is larger than 0
#'
#' Used to determine that all required variables are passed to a function
#' Checks whether a vector has length > 0 or a data.table/data.frame has nrow > 0
#'
#' @param v Variable (required)
#'
#' @return Logical: TRUE if size is not 0, false if size is 0
#' @export
#'
#' @examples
#' x <- c()
#' checkSize(x)
#'
#' x <- c(2,5)
#' checkSize(x)
#'
#' A <- c()
#' B <- c(1,2,3)
#' sapply(c("A","B"),function(x){checkSize(get(x))})
checkSize <- function (v) {

  if (is.vector(v)) {
    size <- length(v)
  } else if (is.data.frame(v) | is.data.table(v)) {
    size <- nrow(v)
  } else {
    size <- 0
  }

  if (size > 0) {
    return (TRUE)
  } else {
    return (FALSE)
  }

} # end of function -- checkSize
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Construct lifetable based on qx-column
#'
#' qx = age-specific probability of dying
#'
#' @param qx Numeric vector (required): Age-specific probabilities of dying
#' @param agecohort Number (optional): Age at which cohort is started
#'
#' @return Data.table with lifetable
#' @export
#'
#' @examples
#'
#' qx <- unlist(data.mortall[iso3=="AFG", as.character(0:100), with=FALSE], use.names=FALSE)
#' lifeTable(qx, 9)
lifeTable <- function (qx        = NULL,
                       mx        = NULL,
                       agecohort = 0) {

  if (is.null(qx) & is.null(mx)) {
    stop ("Provide qx or mx values")
  } else if (is.null(qx)) {
    # convert central mortality rate to qx
    qx <- 2*mx/(2+mx)
  }

  ages <- c(0:(length(qx)-1))

  lifetab <- data.table (
    age    = ages,
    qx     = qx,
    px     = 1 - qx,
    lx     = rep (0, length(ages)),
    lx.adj = rep (0, length(ages)),
    llx    = rep (0, length(ages)),
    ttx    = rep (0, length(ages)),
    ex     = rep (0, length(ages))
  )

  # proportion of people that have survived up until this year
  lifetab [age==0, "lx"] <- 1
  lifetab [age>0,  "lx"] <- sapply (ages [-which(ages==0)],
                                    function (a) {prod (lifetab [age<a, px])} )

  # ((proportion of people that has survived up until this year) +
  # (proportion of people that will survive this year)) / 2
  lifetab [age <  max(ages), "llx"] <- sapply (ages [-which (ages == max(ages))],
                                               function (a) {
                                                 mean (lifetab [age %in% c(a,a+1), lx])
                                                 }
                                               )
  lifetab [age == max(ages), "llx"] <- lifetab [age == max(ages), lx] / 2

  # ttx is summed proportion that survives - llx (years of life left)
  lifetab [age==0, "ttx"] <- sum (lifetab [, llx])
  lifetab [age>0,  "ttx"] <- sapply (ages [which (ages>0)],
                                     function (a) {
                                       lifetab [age==0, ttx] -
                                         sum (lifetab [age<a, llx])
                                       }
                                     )

  # if any ttx is extremely small (as a result of very small fractions and
  # floating-point errors) set to zero
  lifetab[ttx < 1e-13,"ttx"] <- 0

  # ttx/lx = (years of life left)
  lifetab [, "ex"]                 <- lifetab [, ttx] / lifetab [, lx]
  lifetab [age == max(ages), "ex"] <- 0

  # Now create a new lx which starts at age agecohort for use in calculating
  # impact of vaccinating people at age agecohort
  lifetab [age >= agecohort, "lx.adj"] <- lifetab [age >= agecohort, lx] /
                                          lifetab [age == agecohort, lx]

  lifetab [age <  agecohort, "lx.adj"] <- lifetab [age <  agecohort, lx] /
                                          lifetab [age == agecohort, lx]
  # lx at age less than agecohort will be larger than lx at age of agecohort
  # lx - general definition: number of persons surviving to exact age x
  # lifetab [age < agecohort,  "lx.adj"] <- 0

  return (lifetab)

} # end of function -- lifeTable
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Get age-specific coverage-rates
#'
#' @param ages Numeric vector (required): ages in model
#' @param routine_coverage Number (required): proportion of population that
#'        receives routine vaccination
#' @param vaccine_efficacy Number (required): proportion indicating
#'        vaccine-efficacy
#' @param campaigns List or number (required): if a list, applies MAC
#'        vaccination (needs to change)
#' @param lifetab Data.table (required): lifetable generated with lifeTable()
#' @param cohort Number (required): cohort-size (only used in MAC campaigns)
#' @param agevac Number (required): target age for vaccination
#'
#' @return Data.table with coverage and effective coverage by age. Used in RunCohort()
#' @export
#'
#' @examples
#' ages <- c(0:100)
#' routine_coverage <- 0.75
#' vaccine_efficacy <- 0.8
#' lifetab <- lifeTable(unlist(data.mortall[iso3=="AFG", as.character(0:100),
#'   with=FALSE], use.names=FALSE), 9)
#' agevac <- 9
#' ageCoverage (ages, routine_coverage, vaccine_efficacy, -1,
#'   lifetab, cohort, agevac)
ageCoverage <- function (ages,
                         routine_coverage,
                         vaccine_efficacy_nosexdebut,
                         vaccine_efficacy_sexdebut,
                         campaigns,
                         lifetab,
                         cohort,
                         agevac,
                         country_iso3 = NULL) {

  coverage <- data.table (
    age      = ages,
    coverage = rep (0, length(ages))
  )

  coverage[age >= agevac,"coverage"] <- routine_coverage
  coverage[,"effective_coverage"] <-
    (coverage [, coverage] * vaccine_efficacy_nosexdebut *
       (1 - propSexDebut (agevac, country_iso3))) +
    (coverage [, coverage] * vaccine_efficacy_sexdebut *
       (propSexDebut(agevac, country_iso3)))

  if (is.list(campaigns)) {
    for (y in 1:length(campaigns)) {
      campaign_age      <- campaigns[[y]][["ages"]]
      campaign_coverage <- campaigns[[y]][["coverage"]]
      # if activity type is campaign, coverage proportion still needs to be calculated
      # if(campaigns[[y]][["type"]] == "campaign"){
      #	campaign_coverage <- campaign_coverage/(lifetab[age==campaign_age,"lx.adj"]*cohort)
      #}

      if (campaign_coverage > 1) {
        campaign_coverage <- 1
      }

      # ------------------------------------------------------------------------
      # older version: campaign coverage is spread randomly among the previous vaccinated cohort
      #
      # # coverage increases for all subsequent age-strata
      # init_cov <- coverage[age >= campaign_age, coverage]
      # coverage[age >= campaign_age, "coverage"] <- init_cov +
      #   (1-init_cov) * campaign_coverage
      #
      # # vaccine not efficacious for girls that have sexually debuted
      # coverage [age >= campaign_age, "effective_coverage"] <-
      #   (coverage [age >= campaign_age, effective_coverage]) +
      #   ((1 - init_cov) *
      #      campaign_coverage *
      #      (1 - propSexDebut (campaign_age, country_iso3)) *
      #      vaccine_efficacy_nosexdebut) +
      #   ((1 - init_cov) *
      #      campaign_coverage *
      #      (propSexDebut (campaign_age, country_iso3)) *
      #      vaccine_efficacy_sexdebut)
      # ------------------------------------------------------------------------


      # ------------------------------------------------------------------------
      # updated version: campaign coverage goes only to unvaccinated girls among the previous vaccinated cohort
      #
      # coverage increases for all subsequent age-strata
      init_cov <- coverage[age >= campaign_age, coverage]

      # check if campaign coverage will take cohort coverage beyond 100%;
      # if yes, cap cohort coverage to 100%
      if ( (init_cov [1] + campaign_coverage) > 1) {
        campaign_coverage <- 1 - init_cov [1]
      }

      coverage [age >= campaign_age, "coverage"] <- init_cov + campaign_coverage

      # vaccine not efficacious for girls that have sexually debuted
      coverage [age >= campaign_age, "effective_coverage"] <-
        (coverage [age >= campaign_age, effective_coverage]) +
        (campaign_coverage *
           (1 - propSexDebut (campaign_age, country_iso3)) *
           vaccine_efficacy_nosexdebut) +
        (campaign_coverage *
           (propSexDebut (campaign_age, country_iso3)) *
           vaccine_efficacy_sexdebut)
      # ------------------------------------------------------------------------

    }
  }

  return (coverage)

} # end of function -- ageCoverage
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Proportion of girls sexually debuted
#'
#' \code{propSexDebut} returns proportion of girls sexually debuted in
#'   country \code{country_iso3} at age \code{age}.
#'
#' @param age age of girls
#' @param country_iso3 ISO3 country code
#'
#' @return Returns proportion of girls in a given country that has
#'   sexually debuted at a given age.
#'
#' @examples
#' propSexDebut (20, "IND")
#' propSexDebut (30, "ETH")
#'
#' @export
propSexDebut <- function (age,
                          country_iso3) {

  if (age < 12) {
    prop_sexdebut <- 0
  } else {
    # calculate proportion of girls that have sexually debuted at age a + 1
    if (nrow (data.sexual_debut  [iso3 == country_iso3]) != 1 ||
        is.na (data.sexual_debut [iso3 == country_iso3, a]) ||
        is.na (data.sexual_debut [iso3 == country_iso3, b]) ||
        is.na (data.sexual_debut [iso3 == country_iso3, cluster.id]) ) {

      # cannot estimate data.sexual_debut, use prop_sexdebut of 0
      prop_sexdebut <- 0

    } else if (!is.na (data.sexual_debut [iso3 == country_iso3, a]) &
               !is.na (data.sexual_debut [iso3 == country_iso3, b]) ) {

      # estimate proportion sexual debut with country specific parameters
      prop_sexdebut <- pgamma (
        # prop will be 0 for ages lower than 12
        (age + 1 - 12),
        shape = data.sexual_debut [iso3 == country_iso3, a],
        scale = data.sexual_debut [iso3 == country_iso3, b]
      )
    } else {

      # estimate proportion sexual debut at 1+age-of-vaccination,
      # based on parameters of country with highest proportion of girls
      # sexually debuting at age 15
      cluster_id  <- data.sexual_debut [iso3 == country_iso3, cluster.id]
      cluster_max <- data.sexual_debut [cluster.id == cluster_id &
                                          X15 == data.sexual_debut [
                                            cluster.id == cluster_id,
                                            max (X15)],
                                        iso3]
      prop_sexdebut <- pgamma (
        #prop will be 0 for ages lower than 12
        (age + 1 - 12),
        shape = data.sexual_debut [iso3 == cluster_max, a],
        scale = data.sexual_debut [iso3 == cluster_max, b]
      )
    }
  }

  return (prop_sexdebut)

} # end of function -- propSexDebut
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Match two data-tables on multiple columns
#'
#' Returns vector with column-of-interest where columns match
#'
#' If at least one value in any of the input_match_on columns matches with a
#'   value in any of the reference_match_on columns, the two rows will match
#'
#' @param input Data.table (required): input-table to match
#' @param input_match_on Character vector (required): column-names in
#'        input-table to match
#' @param reference Data.table (required): reference-table to match
#' @param reference_match_on Character vector (required): column-names in
#'        reference-table to match
#' @param reference_return Character string (required): column-name in
#'        reference-table that is returned (where values match)
#'
#' @return Character vector with values from reference_return column in
#'         reference_match_on data.table where values match
#' @export
#'
#' @examples
#' dtColMatch (data.global, c("Country"), data.countryname,
#'   c("name1", "name2", "name3", "name4"), "iso3")
#'
# Extend data.table library
# Used to match multiple columns of different data-tables.
# Return variable of interest.
dtColMatch <- function (input,
                        input_match_on,
                        reference,
                        reference_match_on,
                        reference_return) {

  rows <- rep (NA, nrow(input))

  for (imatch in input_match_on) {
    for (rmatch in reference_match_on) {
      rows [is.na(rows)] <- pmatch (
        tolower (
          unlist (input [which(is.na(rows)), imatch, with=F], use.names=F)
        ),
        tolower (
          unlist (reference [, rmatch, with=F], use.names=F)
        )
      )
    }
  }

  return (unlist (data.countryname [rows,
                                    reference_return,
                                    with = FALSE],
                  use.names = FALSE))

} # end of function -- dtColMatch
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Collapse data-tables
#'
#' @param DT Data-table (required)
#' @param aggr_on Character string (required): column-name that will be used to
#'        collapse on (i.e. combine all age-strata)
#' @param measure.vars Character string (optional): column-names that will be
#'        collapsed (function will be applied to all these columns)
#' @param id.vars Character string (optional): column-names that will remain
#'        stratified
#'
#' N.b. if measure.vars is not provided, all columns that are not in id.vars
#'   and aggr_on will be assumed to be assumed
#'
#' @param func Character string (optional): function that will be applied to
#'        data (if optional, values will be summed)
#' @param na.rm Logical (optional): if TRUE, removes NA from measure.vars
#'        columns before applying function (or passes na.rm=TRUE to function)
#'
#' @return Returns collapsed data.table
#' @export
#'
dtAggregate <- function (DT,
                         aggr_on,
                         measure.vars = c(),
                         id.vars      = c(),
                         func         = "sum",
                         na.rm        = TRUE) {

  # private function
  dtAggregateSingle <- function (DT,
                                 aggr_on,
                                 measure.vars,
                                 id.vars,func="sum") {
    return (switch (
      func,
      "sum" = DT[
        ,
        .(
          list (
            unique (
              get (aggr_on)
            )
          ),
          sum (
            get (measure.vars),
            na.rm = na.rm
          )
        ),
        by = eval(id.vars)
        ],
      DT
    ))
  }

  available_funcs <- c("sum")

  if (!(func %in% available_funcs)) {
    stop (
      paste0(
        "Not possible to aggregate using function '",
        func,
        "'."
      )
    )
  }

  # Use all other columns if measure or id vars are not provided
  if (length(measure.vars) == 0 & length(id.vars) == 0) {
    stop (
      "Please provide measure.vars and/or id.vars"
      )
  } else if (length(measure.vars) == 0) {
    measure.vars <- colnames(DT) [!(colnames(DT) %in% c(id.vars, aggr_on))]
  } else if (length(id.vars) == 0) {
    id.vars <- colnames(DT) [!(colnames(DT) %in% c(measure.vars, aggr_on))]
  }

  c <- 1
  dt_main <- dtAggregateSingle (DT,
                                aggr_on,
                                measure.vars[c],
                                id.vars,func)
  class (dt_main$V1) <- "character"
  dt_main [, "V1"]   <- "aggregated"

  colnames (dt_main) [colnames(dt_main) == "V1"] <- aggr_on
  colnames (dt_main) [colnames(dt_main) == "V2"] <- measure.vars[c]

  for (c in 1:length(measure.vars)) {
    dt_current <- dtAggregateSingle (DT,
                                     aggr_on,
                                     measure.vars[c],
                                     id.vars,
                                     func)
    dt_main [, measure.vars[c]] <- dt_current[, V2]
  }

  return (dt_main)

} # end of function -- dtAggregate
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Convert monetary character-strings to numeric values
#'
#' @param x Character string to convert
#'
#' @return Returns number with value, stripped from any currency symbols and
#'   thousand-seperators (i.e. "B#2,010.50" becomes 2010.5)
#' @export
#'
#' @examples
#'
#' monetary_to_number ("$2,200.20")
#'
#' # Note that values using German or Dutch notation (i.e. using a comma to
#'   separate decimals and a dot to seperate thousands) are converted as well.
#'   monetary_to_number ("$2.200,20")
#'
monetary_to_number <- function (x) {

  if (!is.character (x)) {

    # return value since incoming value is not a character-string
    return (x)

  } else {

    # remove any valuta_signs
    valuta <- c ("$", "B#", "B%", "b,")

    for (v in valuta) {
      x <- gsub (paste0 ("\\",v), "", x)
    }

    # check what the decimal sign is
    dot   <- sum (strsplit (x,"")[[1]] == ".")
    comma <- sum (strsplit (x,"")[[1]] == ",")

    if (dot>1 & comma>1) {
      stop ("Value has multiple comma's and dots")

    } else if (comma>1 & dot<=1) {

      # assume that comma is used to separate thousands
      # (i.e. English notation)
      x <- gsub (",", "", x, fixed=T)

    } else if (dot>1 & comma<=1) {

      # assume that dot is used to separate thousands
      # (i.e. Dutch or German notation)
      x <- gsub (".", "",  x, fixed=T)
      x <- gsub (",", ".", x, fixed=T)

    } else if (comma==1 & dot==1) {

      # check whether comma or dot comes first
      chars <- strsplit (x, "")[[1]]
      i     <- 0

      while (i < length (chars)) {

        i <- i+1
        if (chars[i] %in% c (".", ",")) {

          thousand_sep <- chars [i]
          i            <- length (chars)
        }
      }

      if (thousand_sep == ",") {

        # comma is used to separate thousands
        x <- gsub (",", "", x, fixed=T)

      } else {

        # dot is used to separate thousands
        x <- gsub (".", "",  x, fixed=T)
        x <- gsub (",", ".", x, fixed=T)
      }

    } else {

      # assume that comma is used to separate thousands
      # (i.e. English notation)
      x <- gsub (",", "", x, fixed=T)
    }

    # fix to keep decimalvalues with big numbers
    x <- as.numeric (x)

    # return monetary value in numeric format
    return (x)
  }

} # end of function -- monetary_to_number
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Generate vaccine impact estimates - HPV 16/18 types (VIMC central run)
#'
#' Generate vaccine impact estimates (HPV 16/18 types) for VIMC central runs.
#'   The inputs are vaccine coverage and disease burden template files and
#'   outputs are disease burden estimates (pre-vaccination and post-vaccination).
#'
#' Three disease burden estimates (HPV 16/18 types) are generated.
#'   (i)   disease burden estimates for no vaccination (vimc format)
#'   (ii)  disease burden estimates for vaccination (vimc format)
#'   (iii) disease burden estimates for vaccination (pre- and post-vaccination)
#'   and includes YLDs and YLLs
#'
#' @param vaccine_coverage_file csv file (input), vaccine coverage data
#'   (vimc format)
#' @param disease_burden_template_file csv file (input), disease burden template
#'   (vimc format)
#' @param disease_burden_no_vaccination_file csv file (output), disease burden estimates
#'   for no vaccination (vimc format)
#' @param disease_burden_vaccination_file csv file (output), disease burden estimates
#'   for vaccination (vimc format)
#' @param disease_burden_results_file csv file (output), disease burden estimates
#'   pre-vaccination and post-vaccination
#' @param campaign_vaccination logical, indicates campaign vaccination
#' @param routine_vaccination logical, indicates routine vaccination
#' @param vaccine character, bivalent/quadrivalent/nonovalent HPV vaccine
#' @param canc.inc character, year of GLOBOCAN estimates
#' @param country_set character, run for all countries specified in
#'   disease burden template file (or) run for a set of countries
#'
#' @return Null return value; disease burden estimates are saved to corresponding files
#' @export
#'
#' @examples
#'   EstimateVaccineImpactVimcCentral (
#'     vaccine_coverage_file              = "coverage_hpv-routine-default.csv",
#'     disease_burden_template_file       = "central-burden-template.csv",
#'     disease_burden_no_vaccination_file = "central_burden_no_vaccination.csv",
#'     disease_burden_vaccination_file    = "central_burden_vaccination.csv",
#'     disease_burden_results_file        = "central_burden_results.csv",
#'     routine_vaccination                = TRUE,
#'     campaign_vaccination               = TRUE)
#'
EstimateVaccineImpactVimcCentral <- function (vaccine_coverage_file,
                                              disease_burden_template_file,
                                              disease_burden_no_vaccination_file,
                                              disease_burden_vaccination_file,
                                              disease_burden_results_file,
                                              campaign_vaccination,
                                              routine_vaccination,
                                              vaccine      = "4vHPV",
                                              canc.inc     = "2020",
                                              country_set  = "all") {

  # read files -- vaccination coverage
  vimc_coverage <- fread (vaccine_coverage_file)

  # ----------------------------------------------------------------------------
  # consider activity type "routine-intensified" as "campaign"
  vimc_coverage [activity_type == "routine-intensified", activity_type := "campaign"]
  # ----------------------------------------------------------------------------

  # read file -- central disease burden template
  vimc_template <- fread (disease_burden_template_file)

  # run for all countries or a specific country
  if (country_set [1] != "all")
  {
    vimc_template <- vimc_template [country %in% country_set]
  }

  # register batch data for vimc runs
  RegisterBatchDataVimc (vimc_coverage             = vimc_coverage,
                         vimc_template             = vimc_template,
                         use_campaigns             = campaign_vaccination,
                         use_routine               = routine_vaccination,
                         restrict_to_coverage_data = FALSE,
                         force                     = TRUE,
                         psa                       = 0)

  # log file to keep track of simulation run
  log_file <- "log/prime_log.log"

  # start of parallelisation
  cl <- makeCluster (detectCores())   # registering number of cores
  registerDoParallel (cl)             # start of parallelisation

  # simulation runs through the batch of cohorts
  results <- BatchRun(countries                       = -1,
                      coverage                        = -1,
                      agevac                          = -1,
                      agecohort                       = -1,
                      sens                            = -1,
                      year_born                       = -1,
                      year_vac                        = -1,
                      runs                            = 1,
                      vaccine_efficacy_beforesexdebut = 1,
                      vaccine_efficacy_aftersexdebut  = 0,
                      log                             = log_file,
                      by_calendaryear                 = TRUE,
                      use_proportions                 = TRUE,
                      analyseCosts                    = FALSE,
                      psa                             = 0,
                      psa_vals                        = ".data.batch.psa",
                      unwpp_mortality                 = TRUE,
                      disability.weights              = "gbd_2017",
                      canc.inc                        = canc.inc,
                      vaccine                         = vaccine
  )

  # end of parallelisation
  stopCluster (cl)

  # convert results to vimc format
  convert_results <- OutputVimc (DT            = results,
                                 calendar_year = TRUE,
                                 vimc_template = vimc_template)

  # save full results
  fwrite (x    = results,
          file = disease_burden_results_file)

  # Saving output for no vaccination scenario (vimc format)
  no_vaccination <- convert_results [scenario == "pre-vaccination"]
  no_vaccination <- no_vaccination  [, colnames (vimc_template), with=FALSE]
  no_vaccination <- no_vaccination  [!is.na(deaths)]
  fwrite (x    = no_vaccination,
          file = disease_burden_no_vaccination_file)

  # Saving output for vaccination scenario (vimc format)
  vaccination <- convert_results [scenario == "post-vaccination"]
  vaccination <- vaccination     [, colnames (vimc_template), with=FALSE]
  vaccination <- vaccination     [!is.na(deaths)]
  fwrite (x    = vaccination,
          file = disease_burden_vaccination_file)

  return (NULL)

} # end of function -- EstimateVaccineImpactVimcCentral
#-------------------------------------------------------------------------------


# ------------------------------------------------------------------------------
#' Generate/read Latin hyper cube sample of parameters for sensitivity analysis
#'
#' Generate Latin hyper cube sample of input parameters based on their
#'   distributions for probabilistic sensitivity analysis.
#'   If file (sample of parameters) already exists,
#'   then read Latin hyper cube sample of input parameters.
#'
#' @param country_codes ISO3 country codes of countries
#' @param vaccine bivalent/quadrivalent or nonavalent HPV vaccine
#' @param psa_runs integer, simulation runs for sensitivity analysis
#' @param seed_state integer, seed value for random number generator
#' @param psadat_file character string, file to save Latin hyper cube
#'   sample of input parameters
#' @param psadat_vimc_file character string, file to save Latin hyper cube
#'   sample of input parameters (VIMC format)
#' @param run_lhs logical, if TRUE create new sample of input parameters,
#'   if FALSE, read sample of input parameters from file
#'
#' @return Null return value; disease burden estimates are saved to corresponding files
#' @export
#' @import lhs
#' @import stats
#' @importFrom prevalence betaExpert
#'
#' @examples
#'   old_CreatePsaData (
#'     country_codes    = c("AFG", "ALB"),
#'     vaccine          = "4vHPV",
#'     psa_runs         = 200,
#'     seed_state       = 1,
#'     psadat_file      = "psadat.csv",
#'     psadat_vimc_file = "psadat_vimc.csv",
#'     run_lhs          = TRUE)
# ------------------------------------------------------------------------------
# This old function can be removed and not used further
# ------------------------------------------------------------------------------
old_CreatePsaData <- function (country_codes,
                           vaccine          = "4vHPV",
                           psa_runs         = 0,
                           seed_state       = 1,
                           psadat_file      = "psadat.csv",
                           psadat_vimc_file = "psadat_vimc.csv",
                           run_lhs          = FALSE) {

  # create new sample of input parameters (or)
  # read sample of input parameters from file
  if (run_lhs) {

    # set seed for reproducibility
    set.seed (seed = seed_state)

    # sensitivity analysis
    if (psa_runs > 1) {

      # create empty psa data table (8 + 1 psa parameters)
      psadat <- data.table (
        country                            = character (),  # iso3 country code
        run_id                             = numeric   (),  # run id (psa run)
        dw_diagnosis                       = numeric   (),  # disability weight - diagnosis phase
        dw_control                         = numeric   (),  # disability weight - control phase
        dw_metastatic                      = numeric   (),  # disability weight - metastatic phase
        dw_terminal                        = numeric   (),  # disability weight - terminal phase
        incidence_ratio                    = numeric   (),  # cervical cancer incidence ratio
        mortality_ratio                    = numeric   (),  # cervical cancer mortality ratio
        prevalence_ratio                   = numeric   (),  # cervical cancer prevalence ratio
        hpv_distribution_ratio             = numeric   (),  # hpv (types in vaccine) distribution ratio
        hpv_distribution_non_vaccine_ratio = numeric   ()   # hpv (non-vaccine types) distribution ratio
      )
    }

    # cervical cancer phases
    # 1 - diagnosis; 2 - control, 3 - metastatic; 4 - terminal
    cecx_phases <- c ("diagnosis", "control", "metastatic", "terminal")

    # disease burden metrics
    burden_metrics <- c ("incidence", "mortality", "prevalence")

    # hpv distribution
    # HPV 16/18 for bivalent or quadrivalent vaccine  (4vHPV)
    # HPV 16/18/31/31/45/52/58 for nonavalent vaccine (9vHPV)

    # hpv (types in vaccine) distribution ratio (and) hpv (non-vaccine types) distribution ratio
    hpv_distribution_ratios <- 2

    # number of parameters
    parameters <- length (cecx_phases) + length (burden_metrics) + hpv_distribution_ratios

    # create data table specific for each country with parameter values for
    # probabilistic sensitivity analysis
    for (country_code in country_codes) {

      # construct a random Latin hypercube design
      cube <- randomLHS (n = psa_runs,
                         k = parameters)

      # --------------------------------------------------------------------------
      # If no data available for a country, switch to another country as proxy
      if ( country_code %in% c ("XK") ) {
        proxy <- TRUE
        country_code <- switch (
          country_code,
          "XK"  = "ALB",  # demography (unwpp) available for XK
          # no data for burden, demography (who), cost for XK
          country_code
        )
      } else {
        proxy <- FALSE
      }
      # --------------------------------------------------------------------------


      #---------------------------------------------------------------------------
      # disability weights -- psa values
      #---------------------------------------------------------------------------

      # create data table to store psa values of
      # disability weights of different sequelaes (cancer phases)
      dw_psa_DT <- data.table (run_id = c (1:psa_runs))

      # loop through disability weights of different sequelaes (cancer phases)
      for (i in 1:length (cecx_phases)) {

        # disability weight for a cancer phase -- mean and 95% uncertainty intervals
        dw <- data.disability_weights [Source == "gbd_2017" & Sequela == cecx_phases [i],
                                       c(Mid, Low, High)]
        names (dw) <- c("mid", "low", "high")

        # get shape parameters of beta distribution
        shape_param <- betaExpert (best   = dw ["mid"],
                                   lower  = dw ["low"],
                                   upper  = dw ["high"],
                                   p      = 0.95,
                                   method = "mean")

        # sample input parameter values (latin hyper cube sampling)
        # based on their distributions for psa runs
        dw_psa_values <- qbeta (p      = cube [, i],
                                shape1 = shape_param$alpha,
                                shape2 = shape_param$beta)

        # data table of psa values containg
        # disability weights of different sequelaes (cancer phases)
        dw_psa_DT <- cbind (dw_psa_DT, dw_psa_values)

      } # end of loop -- for (i in 1:length (cecx_phases))

      # set column names for psa table of disability weights
      names (dw_psa_DT) <- c ("run_id",
                              "dw_diagnosis",
                              "dw_control",
                              "dw_metastatic",
                              "dw_terminal")

      #---------------------------------------------------------------------------
      # (incidence, mortality, prevalence) burden ratios -- psa values
      #---------------------------------------------------------------------------

      # create data table to store psa values of
      # (incidence, mortality, prevalence) ratios
      burden_psa_DT <- data.table ()


      # loop through burden ratios
      for (i in 1:length (burden_metrics)) {

        # mean value and 95% uncertainty intervals -- incidence, mortality, prevalence
        #
        # incidence
        if (burden_metrics [i] == "incidence") {
          burden  <- data.incidence_ui  [iso3 == country_code, c(Mid, Low, High)]

          # set proxy values (incidence) for missing countries
          # based around the median of available data for other countries
          if (length (burden) == 0) {
            burden <- c (1,
                         median (data.incidence_ui$Low  / data.incidence_ui$Mid),
                         median (data.incidence_ui$High / data.incidence_ui$Mid))
          }
        }
        # mortality
        else if (burden_metrics [i] == "mortality") {
          burden  <- data.mortcecx_ui   [iso3 == country_code, c(Mid, Low, High)]

          # set proxy values (mortality) for missing countries
          # based around the median of available data for other countries
          if (length (burden) == 0) {
            burden <- c (1,
                         median (data.mortcecx_ui$Low  / data.mortcecx_ui$Mid),
                         median (data.mortcecx_ui$High / data.mortcecx_ui$Mid))
          }
        }
        # prevalence
        else if (burden_metrics [i] == "prevalence") {
          burden <- data.prevalence_ui [iso3 == country_code, c(Mid, Low, High)]

          # set proxy values (prevalence) for missing countries
          # based around the median of available data for other countries
          if (length (burden) == 0) {
            burden <- c (1,
                         median (data.prevalence_ui$Low  / data.prevalence_ui$Mid),
                         median (data.prevalence_ui$High / data.prevalence_ui$Mid))
          }
        }

        names (burden)  <- c("mid", "low", "high")

        # log of burden values
        log_burden  <- log (burden)

        # Estimating the global cancer incidence and mortality in 2018:
        # GLOBOCAN sources and methods
        # (refer to paper for defintion of uncertainty intervals)
        # https://www.ncbi.nlm.nih.gov/pubmed/30350310

        # standard error in log scale
        log_burden_se  <- (log_burden  ["high"] - log_burden ["low"]) / 3.92

        # sample input parameter values (latin hyper cube sampling)
        # based on their distributions for psa runs
        burden_psa_values <- qlnorm (p       = cube [, (length (cecx_phases) + i)],
                                     meanlog = log_burden ["mid"],
                                     sdlog   = log_burden_se)

        # ratio of psa values (incidence, mortality, prevalence) to mean values
        burden_ratio  <- burden_psa_values / burden ["mid"]

        # data table of psa values containg
        # (incidence, mortality, prevalence) ratios
        burden_psa_DT <- cbind (burden_psa_DT, burden_ratio)

      } # end of loop -- for (i in 1:length (burden_metrics))

      # set column names for psa table of disability weights
      names (burden_psa_DT) <- c ("incidence_ratio",
                                  "mortality_ratio",
                                  "prevalence_ratio")

      #---------------------------------------------------------------------------
      # hpv distribution ratios -- psa values
      #---------------------------------------------------------------------------

      # create data table to store psa values of
      # hpv distribution
      hpv_distribution_ratio_psa_DT <- data.table ()
      # data.table (hpv_distribution_ratio             = numeric (),
      #             hpv_distribution_non_vaccine_ratio = numeric ())

      # hpv distribution -- mean and 95% uncertainty intervals
      # HPV 16/18 for bivalent or quadrivalent vaccine  (4vHPV)
      # HPV 16/18/31/31/45/52/58 for nonavalent vaccine (9vHPV)
      if (vaccine == "4vHPV") {

        # Relative contribution of HPV 16/18 in ICC HPV-positive cases
        hpv_distribution <- data.hpv_distribution [iso3 == country_code,
                                                   c("hpv_4v", "hpv_4v_low", "hpv_4v_high")]
      } else if (vaccine == "9vHPV") {

        # Relative contribution of HPV 16/18/31/33/45/52/58 in ICC HPV-positive cases
        hpv_distribution <- data.hpv_distribution [iso3 == country_code,
                                                   c("hpv_9v", "hpv_9v_low", "hpv_9v_high")]
      }

      # change hpv distribution values from percentage to proportion
      hpv_distribution <- hpv_distribution / 100

      # set names for values
      names (hpv_distribution) <- c("mid", "low", "high")

      # get shape parameters of beta distribution
      shape_param <- betaExpert (best   = hpv_distribution$mid,
                                 lower  = hpv_distribution$low,
                                 upper  = hpv_distribution$high,
                                 p      = 0.95,
                                 method = "mean")

      # sample input parameter values (latin hyper cube sampling)
      # based on their distributions for psa runs
      # hpv (types in vaccine) distribution
      hpv_distribution_psa_values <- qbeta (p      = cube [, length (cecx_phases) +
                                                             length (burden_metrics) +
                                                             hpv_distribution_ratios - 1],
                                            shape1 = shape_param$alpha,
                                            shape2 = shape_param$beta)

      # hpv (non-vaccine types) distribution
      hpv_distribution_non_vaccine_psa_values <- 1 - hpv_distribution_psa_values

      # ratio of hpv distribution values to mean values (types in vaccine)
      hpv_distribution_ratio <- hpv_distribution_psa_values / hpv_distribution$mid

      # ratio of hpv distribution values to mean values (non-vaccine types)
      hpv_distribution_non_vaccine_ratio <-
        hpv_distribution_non_vaccine_psa_values / (1 - hpv_distribution$mid)

      # data table of psa values containing
      # hpv distribution ratios
      hpv_distribution_ratio_psa_DT <- cbind (hpv_distribution_ratio,
                                              hpv_distribution_non_vaccine_ratio)

      # # set column names for psa table of hpv distribution ratios
      # names (hpv_distribution_ratio_psa_DT) <- c ("hpv_distribution_ratio")

      #---------------------------------------------------------------------------

      # --------------------------------------------------------------------------
      # Switch back to original country for which proxy was set due to unavailable data
      if (proxy) {
        proxy <- FALSE
        country_code <- switch (
          country_code,
          "ALB" = "XK",
          country_code
        )
      }
      # --------------------------------------------------------------------------

      # create data table specific for this country with psa parameter values
      country_psa <- data.table (
        country = rep (x = country_code, times = psa_runs),
        dw_psa_DT,
        burden_psa_DT,
        hpv_distribution_ratio_psa_DT
      )

      # add country specific table to full psa data table
      psadat <- rbindlist (list (psadat, country_psa))


    } # end of loop -- for (country_code in countries)

    # drop hpv_distribution_non_vaccine_ratio column for vimc format file
    psadat_temp <- psadat [, - ("hpv_distribution_non_vaccine_ratio")]

    # reshape psa data table to wide format (for VIMC)
    psadat_vimc <- dcast (data      = psadat_temp,
                          formula   = run_id ~ country,
                          sep       = ":",
                          value.var = colnames (psadat_temp [, -c("run_id", "country")]))

    # create list of psa data for internal runs and VIMC upload
    psadat_list <- list (psadat      = psadat,
                         psadat_vimc = psadat_vimc)

    # save parameters values for sensitivity analysis -- internal
    fwrite (x    = psadat_list [["psadat"]],
            file = psadat_file)

    # save parameters values for sensitivity analysis -- VIMC upload
    fwrite (x    = psadat_list [["psadat_vimc"]],
            file = psadat_vimc_file)

  } else {

    # read sample of input parameters from file
    psadat_list <- list (psadat      = fread (psadat_file),
                         psadat_vimc = fread (psadat_vimc_file))

  } # end of -- if (create_new)

  # return psa data for probabilistic sensitivity analysis
  # return (list (psadat      = psadat,
  #               psadat_vimc = psadat_vimc))
  return (psadat_list)

} # end of function -- old_CreatePsaData
#-------------------------------------------------------------------------------
# This old function can be removed and not used further
# ------------------------------------------------------------------------------


# ------------------------------------------------------------------------------
#' Generate/read Latin hyper cube sample of parameters for sensitivity analysis
#'
#' Generate Latin hyper cube sample of input parameters based on their
#'   distributions for probabilistic sensitivity analysis.
#'   If file (sample of parameters) already exists,
#'   then read Latin hyper cube sample of input parameters.
#'
#' @param country_codes ISO3 country codes of countries
#' @param vaccine bivalent/quadrivalent or nonavalent HPV vaccine
#' @param psa_runs integer, simulation runs for sensitivity analysis
#' @param seed_state integer, seed value for random number generator
#' @param psadat_file character string, file to save Latin hyper cube
#'   sample of input parameters
#' @param psadat_vimc_file character string, file to save Latin hyper cube
#'   sample of input parameters (VIMC format)
#' @param run_lhs logical, if TRUE create new sample of input parameters,
#'   if FALSE, read sample of input parameters from file
#'
#' @return Null return value; disease burden estimates are saved to corresponding files
#' @export
#' @import lhs
#' @import stats
#' @importFrom prevalence betaExpert
#'
#' @examples
#'   CreatePsaData (
#'     country_codes    = c("AFG", "ALB"),
#'     vaccine          = "4vHPV",
#'     psa_runs         = 200,
#'     seed_state       = 1,
#'     psadat_file      = "psadat.csv",
#'     psadat_vimc_file = "psadat_vimc.csv",
#'     run_lhs          = TRUE)
# ------------------------------------------------------------------------------
CreatePsaData <- function (country_codes,
                               vaccine          = "4vHPV",
                               psa_runs         = 0,
                               seed_state       = 1,
                               psadat_file      = "psadat.csv",
                               psadat_vimc_file = "psadat_vimc.csv",
                               run_lhs          = FALSE) {

  # create new sample of input parameters (or)
  # read sample of input parameters from file
  if (run_lhs) {

    # set seed for reproducibility
    set.seed (seed = seed_state)

    # sensitivity analysis
    if (psa_runs > 1) {

      # create empty psa data table (8 + 1 psa parameters)
      psadat <- data.table (
        country                            = character (),  # iso3 country code
        run_id                             = numeric   (),  # run id (psa run)
        dw_diagnosis                       = numeric   (),  # disability weight - diagnosis phase
        dw_control                         = numeric   (),  # disability weight - control phase
        dw_metastatic                      = numeric   (),  # disability weight - metastatic phase
        dw_terminal                        = numeric   (),  # disability weight - terminal phase
        incidence_ratio                    = numeric   (),  # cervical cancer incidence ratio
        mortality_ratio                    = numeric   (),  # cervical cancer mortality ratio
        prevalence_ratio                   = numeric   (),  # cervical cancer prevalence ratio
        hpv_distribution_ratio             = numeric   (),  # hpv (types in vaccine) distribution ratio
        hpv_distribution_non_vaccine_ratio = numeric   (),  # hpv (non-vaccine types) distribution ratio
        one_dose_vaccine_efficacy_ratio    = numeric   ()   # one-dose vaccine efficacy ratio
      )
    }

    # cervical cancer phases
    # 1 - diagnosis; 2 - control, 3 - metastatic; 4 - terminal
    cecx_phases <- c ("diagnosis", "control", "metastatic", "terminal")

    # disease burden metrics
    burden_metrics <- c ("incidence", "mortality", "prevalence")

    # hpv distribution
    # HPV 16/18 for bivalent or quadrivalent vaccine  (4vHPV)
    # HPV 16/18/31/31/45/52/58 for nonavalent vaccine (9vHPV)

    # hpv (types in vaccine) distribution ratio (and) hpv (non-vaccine types) distribution ratio
    hpv_distribution_ratios <- 2

    # vaccine efficacy for one-dose vaccination
    vaccine_efficacy_parameter <- 1

    # number of parameters
    parameters <- length (cecx_phases) + length (burden_metrics) + hpv_distribution_ratios + vaccine_efficacy_parameter

    # create data table specific for each country with parameter values for
    # probabilistic sensitivity analysis
    for (country_code in country_codes) {

      # construct a random Latin hypercube design
      cube <- randomLHS (n = psa_runs,
                         k = parameters)

      # ------------------------------------------------------------------------
      # If no data available for a country, switch to another country as proxy
      if ( country_code %in% c ("XK") ) {
        proxy <- TRUE
        country_code <- switch (
          country_code,
          "XK"  = "ALB",  # demography (unwpp) available for XK
          # no data for burden, demography (who), cost for XK
          country_code
        )
      } else {
        proxy <- FALSE
      }
      # ------------------------------------------------------------------------


      #-------------------------------------------------------------------------
      # disability weights -- psa values
      #-------------------------------------------------------------------------

      # create data table to store psa values of
      # disability weights of different sequelaes (cancer phases)
      dw_psa_DT <- data.table (run_id = c (1:psa_runs))

      # loop through disability weights of different sequelaes (cancer phases)
      for (i in 1:length (cecx_phases)) {

        # disability weight for a cancer phase -- mean and 95% uncertainty intervals
        dw <- data.disability_weights [Source == "gbd_2017" & Sequela == cecx_phases [i],
                                       c(Mid, Low, High)]
        names (dw) <- c("mid", "low", "high")

        # get shape parameters of beta distribution
        shape_param <- betaExpert (best   = dw ["mid"],
                                   lower  = dw ["low"],
                                   upper  = dw ["high"],
                                   p      = 0.95,
                                   method = "mean")

        # sample input parameter values (latin hyper cube sampling)
        # based on their distributions for psa runs
        dw_psa_values <- qbeta (p      = cube [, i],
                                shape1 = shape_param$alpha,
                                shape2 = shape_param$beta)

        # data table of psa values containg
        # disability weights of different sequelaes (cancer phases)
        dw_psa_DT <- cbind (dw_psa_DT, dw_psa_values)

      } # end of loop -- for (i in 1:length (cecx_phases))

      # set column names for psa table of disability weights
      names (dw_psa_DT) <- c ("run_id",
                              "dw_diagnosis",
                              "dw_control",
                              "dw_metastatic",
                              "dw_terminal")

      #-------------------------------------------------------------------------
      # (incidence, mortality, prevalence) burden ratios -- psa values
      #-------------------------------------------------------------------------

      # create data table to store psa values of
      # (incidence, mortality, prevalence) ratios
      burden_psa_DT <- data.table ()


      # loop through burden ratios
      for (i in 1:length (burden_metrics)) {

        # mean value and 95% uncertainty intervals -- incidence, mortality, prevalence
        #
        # incidence
        if (burden_metrics [i] == "incidence") {
          burden  <- data.incidence_ui  [iso3 == country_code, c(Mid, Low, High)]

          # set proxy values (incidence) for missing countries
          # based around the median of available data for other countries
          if (length (burden) == 0) {
            burden <- c (1,
                         median (data.incidence_ui$Low  / data.incidence_ui$Mid),
                         median (data.incidence_ui$High / data.incidence_ui$Mid))
          }
        }
        # mortality
        else if (burden_metrics [i] == "mortality") {
          burden  <- data.mortcecx_ui   [iso3 == country_code, c(Mid, Low, High)]

          # set proxy values (mortality) for missing countries
          # based around the median of available data for other countries
          if (length (burden) == 0) {
            burden <- c (1,
                         median (data.mortcecx_ui$Low  / data.mortcecx_ui$Mid),
                         median (data.mortcecx_ui$High / data.mortcecx_ui$Mid))
          }
        }
        # prevalence
        else if (burden_metrics [i] == "prevalence") {
          burden <- data.prevalence_ui [iso3 == country_code, c(Mid, Low, High)]

          # set proxy values (prevalence) for missing countries
          # based around the median of available data for other countries
          if (length (burden) == 0) {
            burden <- c (1,
                         median (data.prevalence_ui$Low  / data.prevalence_ui$Mid),
                         median (data.prevalence_ui$High / data.prevalence_ui$Mid))
          }
        }

        names (burden)  <- c("mid", "low", "high")

        # log of burden values
        log_burden  <- log (burden)

        # Estimating the global cancer incidence and mortality in 2018:
        # GLOBOCAN sources and methods
        # (refer to paper for defintion of uncertainty intervals)
        # https://www.ncbi.nlm.nih.gov/pubmed/30350310

        # standard error in log scale
        log_burden_se  <- (log_burden  ["high"] - log_burden ["low"]) / 3.92

        # sample input parameter values (latin hyper cube sampling)
        # based on their distributions for psa runs
        burden_psa_values <- qlnorm (p       = cube [, (length (cecx_phases) + i)],
                                     meanlog = log_burden ["mid"],
                                     sdlog   = log_burden_se)

        # ratio of psa values (incidence, mortality, prevalence) to mean values
        burden_ratio  <- burden_psa_values / burden ["mid"]

        # data table of psa values containg
        # (incidence, mortality, prevalence) ratios
        burden_psa_DT <- cbind (burden_psa_DT, burden_ratio)

      } # end of loop -- for (i in 1:length (burden_metrics))

      # set column names for psa table of disability weights
      names (burden_psa_DT) <- c ("incidence_ratio",
                                  "mortality_ratio",
                                  "prevalence_ratio")

      #-------------------------------------------------------------------------
      # hpv distribution ratios -- psa values
      #-------------------------------------------------------------------------

      # create data table to store psa values of
      # hpv distribution
      hpv_distribution_ratio_psa_DT <- data.table ()
      # data.table (hpv_distribution_ratio             = numeric (),
      #             hpv_distribution_non_vaccine_ratio = numeric ())

      # hpv distribution -- mean and 95% uncertainty intervals
      # HPV 16/18 for bivalent or quadrivalent vaccine  (4vHPV)
      # HPV 16/18/31/31/45/52/58 for nonavalent vaccine (9vHPV)
      if (vaccine == "4vHPV") {

        # Relative contribution of HPV 16/18 in ICC HPV-positive cases
        hpv_distribution <- data.hpv_distribution [iso3 == country_code,
                                                   c("hpv_4v", "hpv_4v_low", "hpv_4v_high")]
      } else if (vaccine == "9vHPV") {

        # Relative contribution of HPV 16/18/31/33/45/52/58 in ICC HPV-positive cases
        hpv_distribution <- data.hpv_distribution [iso3 == country_code,
                                                   c("hpv_9v", "hpv_9v_low", "hpv_9v_high")]
      }

      # change hpv distribution values from percentage to proportion
      hpv_distribution <- hpv_distribution / 100

      # set names for values
      names (hpv_distribution) <- c("mid", "low", "high")

      # get shape parameters of beta distribution
      shape_param <- betaExpert (best   = hpv_distribution$mid,
                                 lower  = hpv_distribution$low,
                                 upper  = hpv_distribution$high,
                                 p      = 0.95,
                                 method = "mean")

      # sample input parameter values (latin hyper cube sampling)
      # based on their distributions for psa runs
      # hpv (types in vaccine) distribution
      hpv_distribution_psa_values <- qbeta (p      = cube [, length (cecx_phases) +
                                                             length (burden_metrics) +
                                                             hpv_distribution_ratios - 1],
                                            shape1 = shape_param$alpha,
                                            shape2 = shape_param$beta)

      # hpv (non-vaccine types) distribution
      hpv_distribution_non_vaccine_psa_values <- 1 - hpv_distribution_psa_values

      # ratio of hpv distribution values to mean values (types in vaccine)
      hpv_distribution_ratio <- hpv_distribution_psa_values / hpv_distribution$mid

      # ratio of hpv distribution values to mean values (non-vaccine types)
      hpv_distribution_non_vaccine_ratio <-
        hpv_distribution_non_vaccine_psa_values / (1 - hpv_distribution$mid)

      # data table of psa values containing
      # hpv distribution ratios
      hpv_distribution_ratio_psa_DT <- cbind (hpv_distribution_ratio,
                                              hpv_distribution_non_vaccine_ratio)

      # # set column names for psa table of hpv distribution ratios
      # names (hpv_distribution_ratio_psa_DT) <- c ("hpv_distribution_ratio")

      #-------------------------------------------------------------------------


      #-------------------------------------------------------------------------
      # one-dose vaccine efficacy

      # Barnabas, R.V., Brown, E.R., Onono, M.A. et al.
      # Durability of single-dose HPV vaccination in young Kenyan women: randomized controlled trial 3-year results.
      # Nat Med 29, 3224–3232 (2023). https://doi.org/10.1038/s41591-023-02658-0
      #
      # bivalent VE was 97.5% (95% CI 90.0–99.4%, P<0.0001)
      #-------------------------------------------------------------------------

      # create data table to store psa values of
      # one-dose vaccine efficacy
      one_dose_vaccine_efficacy_psa_DT <- data.table ()

      # one-dose efficacy
      one_dose_efficacy_value <- c (mid = 0.975, low = 0.9, high = 0.994)

      # get shape parameters of beta distribution
      shape_param <- betaExpert (best   = one_dose_efficacy_value ["mid"],
                                 lower  = one_dose_efficacy_value ["low"],
                                 upper  = one_dose_efficacy_value ["high"],
                                 p      = 0.95,
                                 method = "mean")

      # sample input parameter values (latin hyper cube sampling)
      # based on their distributions for psa runs
      # one-dose vaccine efficacy
      one_dose_vaccine_efficacy <- qbeta (p      = cube [, length (parameters)],
                                          shape1 = shape_param$alpha,
                                          shape2 = shape_param$beta)

      # one-dose efficacy ratio
      one_dose_vaccine_efficacy_ratio <- one_dose_vaccine_efficacy / one_dose_efficacy_value ["mid"]

      # data table of psa values containing
      # one-dose vaccine efficacy
      one_dose_vaccine_efficacy_psa_DT <- cbind (one_dose_vaccine_efficacy_ratio)

      #-------------------------------------------------------------------------


      # ------------------------------------------------------------------------
      # Switch back to original country for which proxy was set due to unavailable data
      if (proxy) {
        proxy <- FALSE
        country_code <- switch (
          country_code,
          "ALB" = "XK",
          country_code
        )
      }
      # ------------------------------------------------------------------------

      # create data table specific for this country with psa parameter values
      country_psa <- data.table (
        country = rep (x = country_code, times = psa_runs),
        dw_psa_DT,
        burden_psa_DT,
        hpv_distribution_ratio_psa_DT,
        one_dose_vaccine_efficacy_psa_DT
      )

      # add country specific table to full psa data table
      psadat <- rbindlist (list (psadat, country_psa))

    } # end of loop -- for (country_code in countries)

    # drop hpv_distribution_non_vaccine_ratio column for vimc format file
    psadat_temp <- psadat [, - ("hpv_distribution_non_vaccine_ratio")]

    # reshape psa data table to wide format (for VIMC)
    psadat_vimc <- dcast (data      = psadat_temp,
                          formula   = run_id ~ country,
                          sep       = ":",
                          value.var = colnames (psadat_temp [, -c("run_id", "country")]))

    # create list of psa data for internal runs and VIMC upload
    psadat_list <- list (psadat      = psadat,
                         psadat_vimc = psadat_vimc)

    # save parameters values for sensitivity analysis -- internal
    fwrite (x    = psadat_list [["psadat"]],
            file = psadat_file)

    # save parameters values for sensitivity analysis -- VIMC upload
    fwrite (x    = psadat_list [["psadat_vimc"]],
            file = psadat_vimc_file)

  } else {

    # read sample of input parameters from file
    psadat_list <- list (psadat      = fread (psadat_file),
                         psadat_vimc = fread (psadat_vimc_file))

  } # end of -- if (create_new)

  # return psa data for probabilistic sensitivity analysis
  # return (list (psadat      = psadat,
  #               psadat_vimc = psadat_vimc))
  return (psadat_list)

} # end of function -- CreatePsaData
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Emulate vaccine impact estimates (VIMC stochastic runs)
#'
#' Emulate vaccine impact estimates for VIMC stochastic runs/sensitivity
#'   analysis. The inputs are central disease burden estimates, input
#'   parameter distributions (latin hyper sampling), runs for sensitivity analysis,
#'   and filename for stochastic burden estimates. The outputs are stochastic
#'   disease burden estimates (full results file plus a file per country).
#'
#'   Stochastic disease burden estimates are generated.
#'     (i)  full results files
#'            1 full results file for pre-vaccination (optional)
#'            1 full results file for post-vaccination
#'     (ii) 1 file per country for all runs
#'            1 file per country for all runs -- pre-vaccination (optional)
#'            1 file per country for all runs -- post-vaccination
#'
#' @param disease_burden_template_file csv file (required),
#'   central disease burden template (vimc format); add column with run_id to get
#'   stochastic disease burden template
#' @param centralBurdenResultsFile csv file (required), central disease burden
#'   estimates pre and post vaccination
#' @param psaData data table (required), latin hyper cube sample of input parameters
#' @param diseaseBurdenStochasticFolder character string (required),
#'   stochastic disease burden estimates folder (output)
#' @param diseaseBurdenStochasticFile character string (required),
#'   stochastic disease burden estimates file(s) (output)
#' @param psa_runs integer (required), simulation runs for sensitivity analysis
#' @param countryCodes list (optional), If country codes are provided,
#'   stochastic burden estimates are generated for these countries.
#'   If set to -1, then stochastic burden estimates are generated for the
#'   countries included in the central burden estimates.
#' @param vaccination_scenario logical (required), generate stochastic burden
#'   estimates for (vaccination) or (no vaccination) scenario
#'
#' @return Null return value; disease burden estimates are saved to corresponding files
#' @export
#'
EmulateVaccineImpactVimcStochastic <- function (disease_burden_template_file,
                                                centralBurdenResultsFile,
                                                psaData,
                                                diseaseBurdenStochasticFolder,
                                                diseaseBurdenStochasticFile,
                                                psa_runs,
                                                countryCodes = -1,
                                                vaccination_scenario) {

  # read file -- central disease burden template
  vimc_template <- fread (disease_burden_template_file)

  # read in central burden results
  central_burden <- fread (centralBurdenResultsFile)

  # initialise empty table to save stochastic results in vimc format
  header <- vimc_template [0, ]                 # empty table with requisite
  #   columns
  header [, run_id := numeric ()]               # add column for run id
  setcolorder (header, c("disease", "run_id"))  # reorder columns

  # save header to file for stochastic estimates of disease burden
  stochastic_file <- paste0 (diseaseBurdenStochasticFolder,
                             diseaseBurdenStochasticFile,
                             ".csv")
  fwrite (x      = header,
          file   = stochastic_file,
          append = FALSE)


  # extract burden estimates for pre- or post-vaccination
  if (vaccination_scenario) {

    # vaccination scenario
    central_burden <- central_burden [scenario == "post-vaccination"]

  } else {

    # no vaccination scenario
    central_burden <- central_burden [scenario == "pre-vaccination"]
  }

  # get country codes
  if (countryCodes [1] == -1) {
    countryCodes <- unique (central_burden [, country])
  }

  # generate stochastic burden estimates for each country
  for (country_code in countryCodes) {

    # get disease burden template for current country (of this loop)
    vimc_template_country <- vimc_template [country == country_code]

    # get central burden estimates for current country
    central_burden_country <- central_burden [country == country_code]

    # get latin hyper cube sample of input parameters for curent country
    psadat_country <- psaData [country == country_code]

    # file to save stochastic burden estimates of current country
    stochastic_file_country <- paste0 (diseaseBurdenStochasticFolder,
                                       diseaseBurdenStochasticFile,
                                       "_", country_code,
                                       ".csv")

    # initialise header for stochastic burden estimates
    stochastic_burden_country <- header

    # generate post-hoc stochastic estimates for each run
    for (run_number in 1:psa_runs) {

      # ------------------------------------------------------------------------
      # initialise burden estimate to central burden estimates
      # burden <- central_burden_country  # in this way, data table will work by reference
      # make a copy of data table to avoid reference
      burden <- copy (central_burden_country)
      # ------------------------------------------------------------------------

      # ------------------------------------------------------------------------
      # probabilistic sensitivity analysis to generate stochastic estimates

      # cases -- incidence
      burden [, inc.cecx := (inc.cecx * psadat_country [run_id == run_number,
                                                        incidence_ratio]
                             * psadat_country [run_id == run_number,
                                               hpv_distribution_ratio])]

      # deaths -- mortality
      burden [, mort.cecx := (mort.cecx * psadat_country [run_id == run_number,
                                                          mortality_ratio]
                              * psadat_country [run_id == run_number,
                                                hpv_distribution_ratio])]
      # prevalence
      burden [, prev.cecx := (prev.cecx * psadat_country [run_id == run_number,
                                                          prevalence_ratio]
                              * psadat_country [run_id == run_number,
                                                hpv_distribution_ratio])]

      # YLL -- premature mortality
      burden [, lifey := (lifey * psadat_country [run_id == run_number,
                                                  mortality_ratio]
                          * psadat_country [run_id == run_number,
                                            hpv_distribution_ratio])]

      # ------------------------------------------------------------------------
      # estimation of YLD -- disability

      # disability weights for different phases of cervical cancer
      # (diagnosis & therapy, controlled, metastatic, terminal)
      dw = list (diag       = psadat_country [run_id == run_number,
                                              dw_diagnosis],
                 control    = psadat_country [run_id == run_number,
                                              dw_control],
                 metastatic = psadat_country [run_id == run_number,
                                              dw_metastatic],
                 terminal   = psadat_country [run_id == run_number,
                                              dw_terminal]
      )

      # duration of different phases of cervical cancer
      # (diagnosis & therapy, controlled, metastatic, terminal) -- unit in years
      disability.weights <- "gbd_2017"

      cecx_duration = list (diag       = data.disability_weights [Source == disability.weights &
                                                                    Sequela=="diagnosis",
                                                                  Duration],
                            metastatic = data.disability_weights [Source == disability.weights &
                                                                    Sequela=="metastatic",
                                                                  Duration],
                            terminal   = data.disability_weights [Source == disability.weights &
                                                                    Sequela=="terminal",
                                                                  Duration])
      # duration of controlled phases is based on remainder of
      # time after attributing to other phases

      # YLD -- disability
      # combine yld contribution from (incidence, prevalence and mortality) cases
      burden [, disability := (inc.cecx  * dw$diag * cecx_duration$diag) +
                (prev.cecx * dw$control) +
                (mort.cecx * ( (dw$metastatic * cecx_duration$metastatic) +
                                 (dw$terminal   * cecx_duration$terminal) ) ) ]

      # ------------------------------------------------------------------------

      # ------------------------------------------------------------------------

      # convert results to vimc format
      convert_results <- OutputVimc (DT            = burden,
                                     calendar_year = TRUE,
                                     vimc_template = vimc_template_country)

      # drop scenario (post-vaccination or pre-vaccination) column
      convert_results [, scenario := NULL]

      # add column for run id
      convert_results [, run_id := run_number]

      # add stochastic burden estimate adjusted by sensitivity analysis to the
      # stochastic burden table of all runs
      stochastic_burden_country <- rbind (stochastic_burden_country,
                                          convert_results,
                                          use.names = TRUE)

    } # end of -- for (run_id in 1:psa_runs)

    # save stochastic burden estimates of current country
    # fwrite (x      = stochastic_burden_country,
    #         file   = stochastic_file_country,
    #         append = FALSE)

    # save stochastic burden estimates of current country to larger file with
    # stochastic burden estimates of all countries
    fwrite (x      = stochastic_burden_country,
            file   = stochastic_file,
            append = TRUE)

  } # end of -- for (country_code in countryCodes)

  return (NULL)

} # end of function -- EmulateVaccineImpactVimcStochastic
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Generate vaccine impact estimates - HPV all types (VIMC central run)
#'
#' Generate vaccine impact estimates (HPV all types) for VIMC central runs.
#'   The inputs are cecx burden estimates pre- and post-vaccination with
#'     4vHPV / 9vHPV at different ages.
#'   The outputs are cecx burden estimates pre- and post-vaccination with
#'     HPV all types at different ages.
#'
#' Three disease burden estimates (HPV all types) are generated.
#'   (i)   disease burden estimates for no vaccination (vimc format)
#'   (ii)  disease burden estimates for vaccination (vimc format)
#'   (iii) disease burden estimates for vaccination (pre- and post-vaccination)
#'           and includes YLDs and YLLs
#'
#' @param cecx_burden_file csv file (input), disease burden estimates
#'   (cecx burden attributable to HPV 16/18)
#' @param cecx_burden_file_all csv file (output), disease burden estimates
#'   (cecx burden attributable to HPV all types)
#' @param vaccine character, bivalent/quadrivalent/nonovalent HPV vaccine
#' @param disease_burden_template_file csv file (input), disease burden template
#'   (vimc format)
#' @param disease_burden_no_vaccination_file_all csv file (output), disease burden estimates
#'   for no vaccination (cecx burden attributable to HPV all types - vimc format)
#' @param disease_burden_vaccination_file_all csv file (output), disease burden estimates
#'   for vaccination (cecx burden attributable to HPV all types - vimc format)
#' @param disease_burden_vaccination_file_all csv file (output), disease burden estimates
#'   for vaccination (cecx burden attributable to HPV all types - vimc format)
#'
#' @return Null return value; disease burden estimates are saved to corresponding files
#' @export
#'
#' @examples
#'     Estimate_all_cecx_burden_central (
#'       cecx_burden_file                       = "central_burden_results_file.csv",
#'       cecx_burden_file_all                   = "central_burden_results_file_all.csv",
#'       vaccine                                = "4vHPV",
#'       disease_burden_template_file           = "central_burden_template_file.csv",
#'       disease_burden_no_vaccination_file_all = "central_burden_no_vaccination_file_all_vimc_format.csv",
#'       disease_burden_vaccination_file_all    = "central_burden_vaccination_file_all_vimc_format.csv",
#'       vimc_output                            = TRUE)
#'
Estimate_all_cecx_burden_central <- function (cecx_burden_file,
                                              cecx_burden_file_all,
                                              vaccine,
                                              disease_burden_template_file           = "",
                                              disease_burden_no_vaccination_file_all = "",
                                              disease_burden_vaccination_file_all    = "",
                                              vimc_output                            = TRUE) {

  # read cecx burden estimates (vaccine HPV types) pre- and post-vaccination
  cecx_burden <- fread (file             = cecx_burden_file,
                        header           = "auto",
                        stringsAsFactors = F)

  # split cecx burden estimates by pre- and post-vaccination
  cecx_burden_prevac  <- cecx_burden [scenario == "pre-vaccination" ]
  cecx_burden_postvac <- cecx_burden [scenario == "post-vaccination"]

  # combine data tables by matched columns
  # cecx_burden_prevac & cecx_burden_postvac
  cecx_burden_prepostvac <- cecx_burden_prevac [cecx_burden_postvac,
                                                on = .(type    = type,
                                                       age     = age,
                                                       country = country,
                                                       year    = year)]

  # create data table of country (iso3) and HPV vaccine type distribution
  if (vaccine == "4vHPV") {
    hpv_distribution <- data.hpv_distribution [, c ("iso3", "hpv_4v")]

    # rename hpv distribution column to hpv
    setnames (hpv_distribution, old = c("hpv_4v"), new = c ("hpv"))

  } else if (vaccine == "9vHPV") {

    hpv_distribution <- data.hpv_distribution [, c ("iso3", "hpv_9v")]

    # rename hpv distribution column to hpv
    setnames (hpv_distribution, old = c("hpv_9v"), new = c ("hpv"))
  }

  # add a row for XK (Kosovo)
  new_row          <- data.table ("iso3" = "XK", "hpv" = hpv_distribution [iso3 == "ALB", hpv])
  hpv_distribution <- rbindlist (list (hpv_distribution, new_row))
  # ----------------------------------------------------------------------------

  # ----------------------------------------------------------------------------

  # combine data tables -- cecx_burden_prepostvac & hpv_distribution
  cecx_burden_prepostvac <- merge (x     = cecx_burden_prepostvac,
                                   y     = hpv_distribution,
                                   by.x  = "country",
                                   by.y  = "iso3",
                                   all.x = TRUE)

  # ----------------------------------------------------------------------------
  # add additional burden due to non-vaccine hpv types causing cervical cancer to
  # both pre- and post-vaccination burden due to vaccine hpv types causing cervical cancer

  # incidence
  # cecx_burden_prepostvac [, i.inc.cecx := i.inc.cecx + (inc.cecx * (100/hpv - 1)) ]
  # cecx_burden_prepostvac [, inc.cecx   := inc.cecx * (100/hpv) ]

  # burden -- incidence, mortality, yll, yld, cost
  for (burden_type in c("inc.cecx", "mort.cecx", "lifey", "disability", "cost.cecx", "prev.cecx")) {

    cecx_burden_prepostvac [, paste0("i.",burden_type) :=
                              get (paste0("i.",burden_type)) +
                              (get(burden_type) * (100/hpv - 1)) ]

    cecx_burden_prepostvac [, paste0(burden_type) := get(burden_type) * 100/hpv]
  }
  # ----------------------------------------------------------------------------

  # ----------------------------------------------------------------------------
  # (i) split the table into 2 tables for pre- and post-vaccination with burden
  #     estimates for all hpv types causing cervical cancer
  # (ii) combine the 2 tables for pre- and post-vaccination

  cecx_burden_prevac_all <- cecx_burden_prepostvac [, c("country",
                                                        "scenario",
                                                        "type",
                                                        "age",
                                                        "cohort_size",
                                                        "vaccinated",
                                                        "immunized",
                                                        "inc.cecx",
                                                        "mort.cecx",
                                                        "lifey",
                                                        "disability",
                                                        "cost.cecx",
                                                        "prev.cecx",
                                                        "year")]

  cecx_burden_postvac_all <- cecx_burden_prepostvac [, c("country",
                                                         "i.scenario",
                                                         "type",
                                                         "age",
                                                         "i.cohort_size",
                                                         "i.vaccinated",
                                                         "i.immunized",
                                                         "i.inc.cecx",
                                                         "i.mort.cecx",
                                                         "i.lifey",
                                                         "i.disability",
                                                         "i.cost.cecx",
                                                         "i.prev.cecx",
                                                         "year")]


  # rename column names
  setnames (cecx_burden_postvac_all,
            old = c("i.scenario",
                    "i.cohort_size",
                    "i.vaccinated",
                    "i.immunized",
                    "i.inc.cecx",
                    "i.mort.cecx",
                    "i.lifey",
                    "i.disability",
                    "i.cost.cecx",
                    "i.prev.cecx"),
            new = c("scenario",
                    "cohort_size",
                    "vaccinated",
                    "immunized",
                    "inc.cecx",
                    "mort.cecx",
                    "lifey",
                    "disability",
                    "cost.cecx",
                    "prev.cecx"))

  # combine the 2 tables for pre- and post-vaccination
  cecx_burden_all <- rbind (cecx_burden_prevac_all,
                            cecx_burden_postvac_all,
                            use.names = TRUE)
  # ----------------------------------------------------------------------------

  # save file cecx burden estimates (all HPV types) pre- and post-vaccination
  fwrite (x         = cecx_burden_all,
          file      = cecx_burden_file_all,
          col.names = T,
          row.names = F)

  # ----------------------------------------------------------------------------
  # if output is to be saved in vimc format
  if (vimc_output) {

    # read vimc disease burden template file
    vimc_template <- fread (file             = disease_burden_template_file,
                            header           = "auto",
                            stringsAsFactors = F)

    # convert results to vimc format
    convert_results <- OutputVimc (DT            = cecx_burden_all,
                                   calendar_year = TRUE,
                                   vimc_template = vimc_template)

    # Saving output for no vaccination scenario (vimc format)
    no_vaccination <- convert_results [scenario == "pre-vaccination"]
    no_vaccination <- no_vaccination  [, colnames (vimc_template), with=FALSE]
    no_vaccination <- no_vaccination  [!is.na(deaths)]
    fwrite (x    = no_vaccination,
            file = disease_burden_no_vaccination_file_all)

    # Saving output for vaccination scenario (vimc format)
    vaccination <- convert_results [scenario == "post-vaccination"]
    vaccination <- vaccination     [, colnames (vimc_template), with=FALSE]
    vaccination <- vaccination     [!is.na(deaths)]
    fwrite (x    = vaccination,
            file = disease_burden_vaccination_file_all)
  }
  # ----------------------------------------------------------------------------


  return ()

} # end of function -- Estimate_all_cecx_burden_central
#-------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Generate diagnostic plots of vaccine coverage and burden estimates
#'
#' Generate diagnostic plots of vaccine coverage and
#'   burden estimates (cases, deaths, dalys)
#'
#' Diagnostic plots of vaccine coverage and burden estimates (cases, deaths, dalys)
#'   are generated for each country and scenario. Optionally, comparative plots
#'   across all scenarios can be generated.
#'
#' @param vaccine_coverage_folder character, folder to read vaccine coverage of
#'          different scenarios
#' @param coverage_prefix character, prefix of coverage file
#' @param touchstone character, touchstone (VIMC)
#' @param scenarios character, names of vaccination scenarios
#' @param no_vaccine_scenario character, name of no vaccination scenario
#' @param plot_no_vaccine_scenario, logical, if TRUE then also generate plots
#'          for no vaccine scenario
#' @param burden_estimate_folder character, folder to read burden estimates of
#'          different scenarios
#' @param plot_folder character, diagnostic plot folder
#' @param plot_file character, diagnostic plot file
#' @param plot_type character, diagnostic plot type (central or stochastic)
#' @param countries, character, "all" countries or specific countries (iso3 codes)
#' @param start_year numeric, start year of plot
#' @param end_year numeric, end year of plot
#' @param compare_plots logical, if TRUE then generate comparative plots
#' @param vaccine_prefix character, prefix of burden estimates file (vaccination scenarios)
#' @param no_vaccine_prefix character, prefix of burden estimates file (no vaccination scenario)
#'
#' @return Null return value; diagnostic plots are saved to file
#' @export
#'
#' @examples
#' Generate_diagnostic_plots (
#'     vaccine_coverage_folder    = "vaccine_coverage",
#'     coverage_prefix            = "coverage_",
#'     touchstone                 = "touchstone",
#'     scenarios                  = c ("hpv-routine-default", "hpv-routine-best"),
#'     no_vaccine_scenario        = "hpv-no-vaccination",
#'     plot_no_vaccine_scenario   = TRUE,
#'     burden_estimate_folder     = "output_all",
#'     plot_folder                = "plots"
#'     plot_file                  = "plot_file.pdf",
#'     plot_type                  = "central"
#'     countries                  = "all",
#'     start_year                 = 2000,
#'     end_year                   = 2100,
#'     compare_plots              = TRUE,
#'     vaccine_prefix             = "central-burden-vaccination_all_",
#'     no_vaccine_prefix          = "central-burden-novaccination_all_" )
#'
Generate_diagnostic_plots<- function (vaccine_coverage_folder,
                                      coverage_prefix,
                                      touchstone,
                                      scenarios,
                                      no_vaccine_scenario,
                                      plot_no_vaccine_scenario = TRUE,
                                      burden_estimate_folder,
                                      plot_folder,
                                      plot_file,
                                      plot_type,
                                      countries,
                                      start_year               = -1,
                                      end_year                 = -1,
                                      compare_plots            = FALSE,
                                      vaccine_prefix,
                                      no_vaccine_prefix
) {

  # if also plotting no vaccination scenario (in addition to vaccination scenarios),
  # then add no vaccination scenario in generation of diagnostic plots
  if (plot_no_vaccine_scenario) {
    scenarios <- c (no_vaccine_scenario, scenarios)
  }

  # diagnostic plots filename
  pdf (file.path (plot_folder, plot_file))

  # burden estimates of all scenarios
  all_burden <- NULL

  # scenarios
  for (scenario_name in scenarios) {

    # vaccine coverage file
    vaccine_coverage_file <- file.path (vaccine_coverage_folder,
                                        paste0 (coverage_prefix,
                                                touchstone,
                                                "_",
                                                scenario_name,
                                                ".csv"))

    # burden estimate filename
    if (scenario_name == no_vaccine_scenario) {

      burden_estimate_file <- paste0 (no_vaccine_prefix,
                                      touchstone,
                                      "_",
                                      scenario_name,
                                      ".csv")

    } else {

      burden_estimate_file <- paste0 (vaccine_prefix,
                                      touchstone,
                                      "_",
                                      scenario_name,
                                      ".csv")
    }


    # burden file with folder
    burden_file <- file.path (burden_estimate_folder,
                              burden_estimate_file)

    # read data -- vaccine coverage and burden estimates
    vaccine_coverage <- fread (vaccine_coverage_file)
    burden_estimate  <- fread (burden_file)

    # set start and end year if not set
    if (start_year == -1) {
      start_year <- min (burden_estimate [, year])
    }
    if (end_year == -1) {
      end_year <- max (burden_estimate [, year])
    }

    # extract vaccine coverage and burden estimates between start and end years
    vaccine_coverage <- vaccine_coverage [year >= start_year & year <= end_year]
    burden_estimate  <- burden_estimate  [year >= start_year & year <= end_year]

    # add scenario name to burden estimate data table
    burden_estimate [, scenario := scenario_name]

    # ------------------------------------------------------------------------
    # If comparative plots across all scenarios is desired, than combine
    # burden estimates across all scenario. Make sure the combined data table
    # size is within the size of RAM.
    if (compare_plots) {
      # combine burden estimates of all scenarios
      if (is.null (all_burden)) {
        all_burden <- burden_estimate
      } else {
        all_burden <- rbindlist (list (all_burden,
                                       burden_estimate),
                                 use.names = TRUE)
      }
    }
    # ------------------------------------------------------------------------

    # if countries are specified to all, then set countries to all countries in coverage file
    if (countries[1] == "all") {
      countries	<- as.character (unique (burden_estimate [, country] ) )
    }

    # iso3 country codes
    country_iso3_codes <- countries

    # --------------------------------------------------------------------------
    # central_stochastic specific summary burden estimates
    if (plot_type == "central_stochastic") {

      # summary of burden estimates (sum burden by calendar year)
      burden_estimate_summary <-
        burden_estimate [, lapply (.SD, sum),
                         .SDcols = c ("cases",        "dalys",        "deaths",
                                      "cases.median", "dalys.median", "deaths.median",
                                      "cases.low",    "dalys.low",    "deaths.low",
                                      "cases.high",   "dalys.high",   "deaths.high"),
                         by = .(year, country, scenario)]
    }
    # --------------------------------------------------------------------------


    # plot for each country
    for (country_iso3_code in country_iso3_codes) {

      # plot vaccine coverage
      coverage_plot <- ggplot (data = vaccine_coverage [country_code == country_iso3_code],
                               aes (x = year,
                                    y = coverage * 100,
                                    color = factor (activity_type))) +
        scale_x_continuous (breaks = pretty_breaks ()) +
        geom_point () +
        labs (title = countrycode (sourcevar    = country_iso3_code,
                                   origin       = "iso3c",
                                   destination  = "country.name",
                                   custom_match = c('XK' = 'Kosovo') ),
              x = "Year",
              y = "Vaccine coverage (%)",
              colour = "vaccine") +
        theme_bw ()


      # ------------------------------------------------------------------------
      # central specific plot
      if (plot_type == "central") {

        # plot burden -- cases, deaths, dalys
        plotwhat       <- c("cases", "deaths", "dalys")
        plotwhat_label <- c("Cases", "Deaths", "DALYs")

        burden_plot_list <- lapply (1:length(plotwhat), function (i) {

          toplot = plotwhat [i]

          p <- ggplot(burden_estimate [country == country_iso3_code],
                      aes(x = year, y = get(toplot))) +
            scale_x_continuous (breaks = pretty_breaks ()) +
            stat_summary (fun = sum, geom = "line") +
            ylab (toplot) +
            labs (title = countrycode (sourcevar    = country_iso3_code,
                                       origin       = "iso3c",
                                       destination  = "country.name",
                                       custom_match = c('XK' = 'Kosovo') ),
                  x = "Year",
                  y = plotwhat_label [i]) +
            theme_bw ()
        })

      }
      # central_stochastic specific plot
      else if (plot_type == "central_stochastic") {


        # plot burden -- cases, deaths, dalys
        plotwhat        <- c ("cases", "deaths", "dalys")
        plotwhat_label  <- c ("Cases", "Deaths", "DALYs")

        plotwhat.median <- c ("cases.median", "deaths.median", "dalys.median")
        plotwhat.low    <- c ("cases.low",    "deaths.low",    "dalys.low")
        plotwhat.high   <- c ("cases.high",   "deaths.high",   "dalys.high")

        burden_plot_list <- lapply (1:length(plotwhat), function (i) {

          toplot = plotwhat [i]

          p <- ggplot (burden_estimate_summary [country == country_iso3_code],
                       aes (x = year)) +
            geom_line (aes (y = get ( plotwhat        [i] )), color = "black") +
            geom_line (aes (y = get ( plotwhat.median [i] )), color = "red") +
            geom_line (aes (y = get ( plotwhat.low    [i] )), color = "pink") +
            geom_line (aes (y = get ( plotwhat.high   [i] )), color = "pink") +
            scale_x_continuous (breaks = pretty_breaks ()) +
            # stat_summary (fun = sum, geom = "line") +
            ylab (toplot) +
            labs (title = countrycode (sourcevar   = country_iso3_code,
                                       origin      = "iso3c",
                                       destination = "country.name",
                                       custom_match = c('XK' = 'Kosovo') ),
                  x = "Year",
                  y = plotwhat_label [i]) +
            theme_bw ()
        })
      }
      # ------------------------------------------------------------------------

      # list of plots
      plot_list <- list (coverage_plot,
                         burden_plot_list [[1]],
                         burden_plot_list [[2]],
                         burden_plot_list [[3]])

      # arrange plots in a single page
      plots <- ggarrange (plotlist = plot_list,
                          ncol = 2,
                          nrow = 2)

      # print plots
      print (annotate_figure (plots,
                              top = text_grob (scenario_name,
                                               color = "black",
                                               size = 9)))

    } # end of loop -- for (country_iso3_code in country_iso3_codes)

  } # end of loop -- for (scenario_name in scenarios)


  # --------------------------------------------------------------------------
  # comparative plots across all scenarios
  if (compare_plots) {

    # add comparative plot across all scenarios for each country
    for (country_iso3_code in country_iso3_codes) {

      # plot burden -- cases, deaths, dalys
      plotwhat       <- c("cases", "deaths", "dalys")
      plotwhat_label <- c("Cases", "Deaths", "DALYs")

      for (i in 1:length (plotwhat)) {

        toplot = plotwhat [i]

        p <- ggplot(all_burden [country == country_iso3_code],
                    aes(x     = year,
                        y     = get (toplot),
                        group = scenario,
                        color = factor (scenario))) +
          scale_x_continuous (breaks = pretty_breaks ()) +
          stat_summary (fun = sum, geom = "line") +
          ylab (toplot) +
          labs (title = countrycode (sourcevar   = country_iso3_code,
                                     origin      = "iso3c",
                                     destination = "country.name",
                                     custom_match = c('XK' = 'Kosovo') ),
                x = "Year",
                y = plotwhat_label [i],
                colour = "Scenario") +
          theme_bw ()

        print (p)
      }

    } # end of loop -- for (country_iso3_code in country_iso3_codes)

  } # end of -- if (compare_plots)
  # --------------------------------------------------------------------------

  dev.off ()

  return ()

} # end of function -- Generate_diagnostic_plots
# ------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Emulate vaccine impact estimates (VIMC stochastic runs) - all cervical cancer burden
#'
#' Emulate vaccine impact estimates for VIMC stochastic runs/sensitivity
#'   analysis. The inputs are central disease burden estimates, input
#'   parameter distributions (latin hyper sampling), runs for sensitivity analysis,
#'   and filename for stochastic burden estimates.
#'   The outputs are stochastic disease burden estimates -- all cervical cancer burden.
#'   (full results file plus a file per country).
#'
#'
#'   Stochastic disease burden estimates are generated.
#'     (i)  full results files
#'            1 full results file for pre-vaccination (optional)
#'            1 full results file for post-vaccination
#'     (ii) 1 file per country for all runs
#'            1 file per country for all runs -- pre-vaccination (optional)
#'            1 file per country for all runs -- post-vaccination
#'
#' @param disease_burden_template_file csv file (required),
#'   central disease burden template (vimc format); add column with run_id to get
#'   stochastic disease burden template
#' @param centralBurdenResultsFile csv file (required), central disease burden
#'   estimates pre and post vaccination (HPV 16/18 types)
#' @param centralBurdenResultsFile_all_cecx_burden csv file (required), central disease burden
#'   estimates pre and post vaccination (all HPV types)
#' @param central_disease_burden_file_all csv file (required),
#'   central disease burden estimates (vimc format)
#' @param psaData data table (required), latin hyper cube sample of input parameters
#' @param diseaseBurdenStochasticFile_all_cecx_burden character string (required),
#'   stochastic disease burden estimates file(s) (output) - all cervical cancer burden
#' @param diseaseBurden_central_StochasticFile_all_cecx_burden character string (required),
#'   (central + 95% uncertainty interval) disease burden estimates file(s)
#'   (output) - all cervical cancer burden
#' @param psa_runs integer (required), simulation runs for sensitivity analysis
#' @param countryCodes list (optional), If country codes are provided,
#'   stochastic burden estimates are generated for these countries.
#'   If set to -1, then stochastic burden estimates are generated for the
#'   countries included in the central burden estimates.
#' @param vaccination_scenario logical (required), generate stochastic burden
#'   estimates for (vaccination) or (no vaccination) scenario
#'
#' @return Null return value; disease burden estimates are saved to corresponding files
#' @export
#'
EmulateVaccineImpactVimcStochastic_all_cecx_burden <-
  function (disease_burden_template_file,
            centralBurdenResultsFile,
            centralBurdenResultsFile_all_cecx_burden,
            central_disease_burden_file_all,
            batch_cohort_file,
            psaData,
            diseaseBurdenStochasticFile_all_cecx_burden,
            diseaseBurden_central_StochasticFile_all_cecx_burden,
            psa_runs,
            countryCodes = -1,
            vaccination_scenario) {

    # ----------------------------------------------------------------------------
    # read file -- central disease burden template
    vimc_template <- fread (disease_burden_template_file)

    # read batch cohort file
    batch_cohort_dt <- fread (batch_cohort_file)

    # central burden estimates (vimc format -- all cecx burden)
    burden_central_dt <- fread (central_disease_burden_file_all)

    # read in central burden results -- HPV vaccine types (and) HPV all types
    central_burden     <- fread (centralBurdenResultsFile)
    central_burden_all <- fread (centralBurdenResultsFile_all_cecx_burden)

    # initialise empty table to save stochastic results in vimc format
    header <- vimc_template [0, ]                 # empty table with requisite
    #   columns
    header [, run_id := numeric ()]               # add column for run id
    setcolorder (header, c("disease", "run_id"))  # reorder columns

    # save header to file for stochastic estimates of disease burden
    # all cecx burden
    stochastic_file_all <- diseaseBurdenStochasticFile_all_cecx_burden

    fwrite (x      = header,
            file   = stochastic_file_all,
            append = FALSE)
    # ----------------------------------------------------------------------------

    # initialise empty data table -- central + stochastic estimates (median + 95% uncertainty intervals)
    central_stochastic_dt <- vimc_template [0, ]

    # create new columns for burden (median + 95% uncertainty intervals)
    central_stochastic_dt [, ':=' (cases.median  = numeric (), cases.low  = numeric (), cases.high  = numeric (),
                                   dalys.median  = numeric (), dalys.low  = numeric (), dalys.high  = numeric (),
                                   deaths.median = numeric (), deaths.low = numeric (), deaths.high = numeric ())]

    # ----------------------------------------------------------------------------


    # extract burden estimates for pre- or post-vaccination
    if (vaccination_scenario) {

      # vaccination scenario
      central_burden     <- central_burden     [scenario == "post-vaccination"]
      central_burden_all <- central_burden_all [scenario == "post-vaccination"]

    } else {

      # no vaccination scenario
      central_burden     <- central_burden     [scenario == "pre-vaccination"]
      central_burden_all <- central_burden_all [scenario == "pre-vaccination"]
    }

    # keep only needed columns in central burden data table (HPV types in vaccine)
    central_burden <-
      central_burden [, c ("scenario", "type", "age",
                           "immunized",
                           "inc.cecx", "mort.cecx", "lifey", "disability", "prev.cecx",
                           "country", "year")]

    # get country codes
    if (countryCodes [1] == -1) {
      countryCodes <- unique (central_burden [, country])
    }

    # generate stochastic burden estimates for each country
    for (country_code in countryCodes) {

      # get disease burden template for current country (of this loop)
      vimc_template_country <- vimc_template [country == country_code]

      # read batch cohort file for current country (of this loop)
      batch_cohort_country <- batch_cohort_dt [country_code == country_code]

      # keep requisite columns in batch cohort
      batch_cohort_country <-
        batch_cohort_country [, c ("birthcohort", "country_code", "vaccine")]

      # get central burden estimates for current country
      central_burden_country     <- central_burden     [country == country_code]
      central_burden_all_country <- central_burden_all [country == country_code]

      # add column -- birthcohort
      central_burden_country [, birthcohort := year - age]

      # add vaccine type especially for 1-dose (or not)
      central_burden_country <- merge (x    = central_burden_country,
                                       y    = batch_cohort_country,
                                       by.x = c("country",      "birthcohort"),
                                       by.y = c("country_code", "birthcohort"),
                                       all  = TRUE)

      # add column -- vaccine efficacy, especially for 1-dose
      central_burden_country [                   , vaccine_efficacy := 1    ]
      central_burden_country [vaccine == "HPV_1D", vaccine_efficacy := 0.975]

      # get latin hyper cube sample of input parameters for current country
      psadat_country <- psaData [country == country_code]

      # initialise header for stochastic burden estimates
      stochastic_burden_country <- header

      # generate post-hoc stochastic estimates for each run
      for (run_number in 1:psa_runs) {

        # ----------------------------------------------------------------------
        # initialise burden estimate to central burden estimates
        # make a copy of data table to avoid direct reference to original data table
        burden     <- copy (central_burden_country)
        burden_all <- copy (central_burden_all_country)
        # ----------------------------------------------------------------------

        # combine columns burden_all (all cecx burden) and burden (HPV types in vaccine) data tables
        burden_dt <- merge (x    = burden_all,
                            y    = burden,
                            by.x = c("country", "age", "year", "scenario", "type"),
                            by.y = c("country", "age", "year", "scenario", "type"),
                            all  = TRUE)

        # estimate cost of cervical cancer per case
        burden_dt [inc.cecx.x > 0, cost.cecx.percase := cost.cecx / inc.cecx.x]

        # estimate cervical cancer burden for non-vaccine HPV types
        burden_dt [, inc.cecx.z   := inc.cecx.x   - inc.cecx.y]
        burden_dt [, mort.cecx.z  := mort.cecx.x  - mort.cecx.y]
        burden_dt [, lifey.z      := lifey.x      - lifey.y]
        burden_dt [, disability.z := disability.x - disability.y]
        burden_dt [, prev.cecx.z  := prev.cecx.x  - prev.cecx.y]

        # ----------------------------------------------------------------------
        # adjust burden for sensitivity analysis, for 1-dose
        burden_dt [vaccine == "HPV_1D",
                   inc.cecx.y := (inc.cecx.y / (1 - immunized.y * vaccine_efficacy) ) *
                     (1 - immunized.y * vaccine_efficacy * psadat_country [run_id == run_number, one_dose_vaccine_efficacy_ratio])]

        burden_dt [vaccine == "HPV_1D",
                   mort.cecx.y := (mort.cecx.y / (1 - immunized.y * vaccine_efficacy) ) *
                     (1 - immunized.y * vaccine_efficacy * psadat_country [run_id == run_number, one_dose_vaccine_efficacy_ratio])]

        burden_dt [vaccine == "HPV_1D",
                   lifey.y := (lifey.y / (1 - immunized.y * vaccine_efficacy) ) *
                     (1 - immunized.y * vaccine_efficacy * psadat_country [run_id == run_number, one_dose_vaccine_efficacy_ratio])]

        burden_dt [vaccine == "HPV_1D",
                   disability.y := (disability.y / (1 - immunized.y * vaccine_efficacy) ) *
                     (1 - immunized.y * vaccine_efficacy * psadat_country [run_id == run_number, one_dose_vaccine_efficacy_ratio])]

        burden_dt [vaccine == "HPV_1D",
                   prev.cecx.y := (prev.cecx.y / (1 - immunized.y * vaccine_efficacy) ) *
                     (1 - immunized.y * vaccine_efficacy * psadat_country [run_id == run_number, one_dose_vaccine_efficacy_ratio])]

        # ----------------------------------------------------------------------

        # ----------------------------------------------------------------------
        # probabilistic sensitivity analysis to generate stochastic estimates
        # ----------------------------------------------------------------------

        # burden (HPV types in vaccine)

        # cases -- incidence
        burden_dt [, inc.cecx.y := (inc.cecx.y * psadat_country [run_id == run_number,
                                                                 incidence_ratio]
                                    * psadat_country [run_id == run_number,
                                                      hpv_distribution_ratio])]

        # deaths -- mortality
        burden_dt [, mort.cecx.y := (mort.cecx.y * psadat_country [run_id == run_number,
                                                                   mortality_ratio]
                                     * psadat_country [run_id == run_number,
                                                       hpv_distribution_ratio])]
        # prevalence
        burden_dt [, prev.cecx.y := (prev.cecx.y * psadat_country [run_id == run_number,
                                                                   prevalence_ratio]
                                     * psadat_country [run_id == run_number,
                                                       hpv_distribution_ratio])]

        # YLL -- premature mortality
        burden_dt [, lifey.y := (lifey.y * psadat_country [run_id == run_number,
                                                           mortality_ratio]
                                 * psadat_country [run_id == run_number,
                                                   hpv_distribution_ratio])]

        # ----------------------------------------------------------------------
        # burden (non-vaccine HPV types)

        # cases -- incidence
        burden_dt [, inc.cecx.z := (inc.cecx.z * psadat_country [run_id == run_number,
                                                                 incidence_ratio]
                                    * psadat_country [run_id == run_number,
                                                      hpv_distribution_non_vaccine_ratio])]

        # deaths -- mortality
        burden_dt [, mort.cecx.z := (mort.cecx.z * psadat_country [run_id == run_number,
                                                                   mortality_ratio]
                                     * psadat_country [run_id == run_number,
                                                       hpv_distribution_non_vaccine_ratio])]
        # prevalence
        burden_dt [, prev.cecx.z := (prev.cecx.z * psadat_country [run_id == run_number,
                                                                   prevalence_ratio]
                                     * psadat_country [run_id == run_number,
                                                       hpv_distribution_non_vaccine_ratio])]

        # YLL -- premature mortality
        burden_dt [, lifey.z := (lifey.z * psadat_country [run_id == run_number,
                                                           mortality_ratio]
                                 * psadat_country [run_id == run_number,
                                                   hpv_distribution_non_vaccine_ratio])]

        # ----------------------------------------------------------------------
        # estimation of YLD -- disability

        # disability weights for different phases of cervical cancer
        # (diagnosis & therapy, controlled, metastatic, terminal)
        dw = list (diag       = psadat_country [run_id == run_number,
                                                dw_diagnosis],
                   control    = psadat_country [run_id == run_number,
                                                dw_control],
                   metastatic = psadat_country [run_id == run_number,
                                                dw_metastatic],
                   terminal   = psadat_country [run_id == run_number,
                                                dw_terminal]
        )

        # duration of different phases of cervical cancer
        # (diagnosis & therapy, controlled, metastatic, terminal) -- unit in years
        disability.weights <- "gbd_2017"

        cecx_duration = list (diag       = data.disability_weights [Source == disability.weights &
                                                                      Sequela=="diagnosis",
                                                                    Duration],
                              metastatic = data.disability_weights [Source == disability.weights &
                                                                      Sequela=="metastatic",
                                                                    Duration],
                              terminal   = data.disability_weights [Source == disability.weights &
                                                                      Sequela=="terminal",
                                                                    Duration])
        # duration of controlled phases is based on remainder of
        # time after attributing to other phases

        # YLD -- disability
        # combine yld contribution from (incidence, prevalence and mortality) cases

        # burden (HPV types in vaccine)
        burden_dt [, disability.y := (inc.cecx.y  * dw$diag * cecx_duration$diag) +
                     (prev.cecx.y * dw$control) +
                     (mort.cecx.y * ( (dw$metastatic * cecx_duration$metastatic) +
                                        (dw$terminal   * cecx_duration$terminal) ) ) ]

        # burden (non-vaccine HPV types)
        burden_dt [, disability.z := (inc.cecx.z  * dw$diag * cecx_duration$diag) +
                     (prev.cecx.z * dw$control) +
                     (mort.cecx.z * ( (dw$metastatic * cecx_duration$metastatic) +
                                        (dw$terminal   * cecx_duration$terminal) ) ) ]

        # ------------------------------------------------------------------------

        # sum burden (HPV types in vaccine) and burden (non-vaccine HPV types)
        burden_dt [, inc.cecx.x   := inc.cecx.y   + inc.cecx.z   ]
        burden_dt [, mort.cecx.x  := mort.cecx.y  + mort.cecx.z  ]
        burden_dt [, prev.cecx.x  := prev.cecx.y  + prev.cecx.z  ]
        burden_dt [, lifey.x      := lifey.y      + lifey.z      ]
        burden_dt [, disability.x := disability.y + disability.z ]

        # estimate updated cost
        burden_dt [inc.cecx.x  > 0, cost.cecx := inc.cecx.x * cost.cecx.percase]
        burden_dt [inc.cecx.x == 0, cost.cecx := 0]
        # ------------------------------------------------------------------------

        # keep columns of interest
        burden_dt <- burden_dt [, .(country, scenario, type, age, cohort_size, vaccinated,
                                    immunized.x,
                                    inc.cecx.x, mort.cecx.x, lifey.x, disability.x, cost.cecx, prev.cecx.x, year)]

        # rename burden columns
        setnames (burden_dt,
                  old = c ("inc.cecx.x", "mort.cecx.x", "lifey.x", "disability.x", "prev.cecx.x", "immunized.x"),
                  new = c ("inc.cecx",   "mort.cecx",   "lifey",   "disability",   "prev.cecx"  , "immunized"  ) )

        # ------------------------------------------------------------------------

        # convert results to vimc format
        convert_results <- OutputVimc (DT            = burden_dt,
                                       calendar_year = TRUE,
                                       vimc_template = vimc_template_country)

        # drop scenario (post-vaccination or pre-vaccination) column
        convert_results [, scenario := NULL]

        # add column for run id
        convert_results [, run_id := run_number]

        # add stochastic burden estimate adjusted by sensitivity analysis to the
        # stochastic burden table of all runs
        stochastic_burden_country <- rbind (stochastic_burden_country,
                                            convert_results,
                                            use.names = TRUE)

      } # end of -- for (run_id in 1:psa_runs)

      # save stochastic burden estimates of current country to larger file with
      # stochastic burden estimates of all countries
      fwrite (x      = stochastic_burden_country,
              file   = stochastic_file_all,
              append = TRUE)

      # --------------------------------------------------------------------------
      # generate burden for current country -- central + median + 95% uncertainty intervals

      # burden -- median
      burden_median_dt <- stochastic_burden_country [, lapply (.SD, quantile, probs = c(0.5), na.rm = TRUE),
                                                     .SDcols = c ("cases", "dalys", "deaths"),
                                                     by = .(year, age, country)]

      # burden -- low 95% uncertainty interval
      burden_low_dt <- stochastic_burden_country [, lapply (.SD, quantile, probs = c(0.025), na.rm = TRUE),
                                                  .SDcols = c ("cases", "dalys", "deaths"),
                                                  by = .(year, age, country)]

      # burden -- high 95% uncertainty interval
      burden_high_dt <- stochastic_burden_country [, lapply (.SD, quantile, probs = c(0.975), na.rm = TRUE),
                                                   .SDcols = c ("cases", "dalys", "deaths"),
                                                   by = .(year, age, country)]

      # rename burden columns
      setnames (burden_median_dt,
                old = c ("cases",        "dalys",        "deaths" ),
                new = c ("cases.median", "dalys.median", "deaths.median" ) )

      setnames (burden_low_dt,
                old = c ("cases",        "dalys",        "deaths" ),
                new = c ("cases.low",    "dalys.low",    "deaths.low" ) )

      setnames (burden_high_dt,
                old = c ("cases",        "dalys",        "deaths" ),
                new = c ("cases.high",   "dalys.high",   "deaths.high" ) )

      # central burden estimates for current country (vimc format -- all cecx burden)
      burden_central_country_dt <- burden_central_dt [country == country_code]


      # combine central, median, low, and high burden estimates
      burden_central_country_dt <- merge (x = burden_central_country_dt,
                                          y = burden_median_dt,
                                          by.x = c ("year", "age", "country"),
                                          by.y = c ("year", "age", "country"),
                                          all  = TRUE)

      burden_central_country_dt <- merge (x = burden_central_country_dt,
                                          y = burden_low_dt,
                                          by.x = c ("year", "age", "country"),
                                          by.y = c ("year", "age", "country"),
                                          all  = TRUE)

      burden_central_country_dt <- merge (x = burden_central_country_dt,
                                          y = burden_high_dt,
                                          by.x = c ("year", "age", "country"),
                                          by.y = c ("year", "age", "country"),
                                          all  = TRUE)

      # add central + stochastic estimates (median + 95% uncertainty intervals)
      # for current country to table for multiple countries
      # all cervical cancer burden
      central_stochastic_dt <- rbind (central_stochastic_dt,
                                      burden_central_country_dt,
                                      use.names = TRUE)
      # --------------------------------------------------------------------------


    } # end of -- for (country_code in countryCodes)

    # write streamlined stochastic estimates to file (central + median + 95% uncertainty intervals)
    fwrite (x      = central_stochastic_dt,
            file   = diseaseBurden_central_StochasticFile_all_cecx_burden,
            append = FALSE)

    return (NULL)

  } # end of function -- EmulateVaccineImpactVimcStochastic_all_cecx_burden
# ------------------------------------------------------------------------------


#-------------------------------------------------------------------------------
#' Emulate vaccine impact estimates (VIMC stochastic runs) - all cervical cancer burden
#'
#' Emulate vaccine impact estimates for VIMC stochastic runs/sensitivity
#'   analysis. The inputs are central disease burden estimates, input
#'   parameter distributions (latin hyper sampling), runs for sensitivity analysis,
#'   and filename for stochastic burden estimates.
#'   The outputs are stochastic disease burden estimates -- all cervical cancer burden.
#'   (full results file plus a file per country).
#'
#'
#'   Stochastic disease burden estimates are generated.
#'     (i)  full results files
#'            1 full results file for pre-vaccination (optional)
#'            1 full results file for post-vaccination
#'     (ii) 1 file per country for all runs
#'            1 file per country for all runs -- pre-vaccination (optional)
#'            1 file per country for all runs -- post-vaccination
#'
#' @param disease_burden_template_file csv file (required),
#'   central disease burden template (vimc format); add column with run_id to get
#'   stochastic disease burden template
#' @param centralBurdenResultsFile csv file (required), central disease burden
#'   estimates pre and post vaccination (HPV 16/18 types)
#' @param centralBurdenResultsFile_all_cecx_burden csv file (required), central disease burden
#'   estimates pre and post vaccination (all HPV types)
#' @param central_disease_burden_file_all csv file (required),
#'   central disease burden estimates (vimc format)
#' @param psaData data table (required), latin hyper cube sample of input parameters
#' @param diseaseBurdenStochasticFile_all_cecx_burden character string (required),
#'   stochastic disease burden estimates file(s) (output) - all cervical cancer burden
#' @param diseaseBurden_central_StochasticFile_all_cecx_burden character string (required),
#'   (central + 95% uncertainty interval) disease burden estimates file(s)
#'   (output) - all cervical cancer burden
#' @param psa_runs integer (required), simulation runs for sensitivity analysis
#' @param countryCodes list (optional), If country codes are provided,
#'   stochastic burden estimates are generated for these countries.
#'   If set to -1, then stochastic burden estimates are generated for the
#'   countries included in the central burden estimates.
#' @param vaccination_scenario logical (required), generate stochastic burden
#'   estimates for (vaccination) or (no vaccination) scenario
#'
#' @return Null return value; disease burden estimates are saved to corresponding files
#' @export
#'
# ------------------------------------------------------------------------------
# This old function can be removed and not used further
# ------------------------------------------------------------------------------
old_EmulateVaccineImpactVimcStochastic_all_cecx_burden <-
  function (disease_burden_template_file,
            centralBurdenResultsFile,
            centralBurdenResultsFile_all_cecx_burden,
            central_disease_burden_file_all,
            psaData,
            diseaseBurdenStochasticFile_all_cecx_burden,
            diseaseBurden_central_StochasticFile_all_cecx_burden,
            psa_runs,
            countryCodes = -1,
            vaccination_scenario) {

    # ----------------------------------------------------------------------------
    # read file -- central disease burden template
    vimc_template <- fread (disease_burden_template_file)

    # central burden estimates (vimc format -- all cecx burden)
    burden_central_dt <- fread (central_disease_burden_file_all)

    # read in central burden results -- HPV vaccine types (and) HPV all types
    central_burden     <- fread (centralBurdenResultsFile)
    central_burden_all <- fread (centralBurdenResultsFile_all_cecx_burden)

    # initialise empty table to save stochastic results in vimc format
    header <- vimc_template [0, ]                 # empty table with requisite
    #   columns
    header [, run_id := numeric ()]               # add column for run id
    setcolorder (header, c("disease", "run_id"))  # reorder columns

    # save header to file for stochastic estimates of disease burden
    # all cecx burden
    stochastic_file_all <- diseaseBurdenStochasticFile_all_cecx_burden

    fwrite (x      = header,
            file   = stochastic_file_all,
            append = FALSE)
    # ----------------------------------------------------------------------------

    # initialise empty data table -- central + stochastic estimates (median + 95% uncertainty intervals)
    central_stochastic_dt <- vimc_template [0, ]

    # create new columns for burden (median + 95% uncertainty intervals)
    central_stochastic_dt [, ':=' (cases.median  = numeric (), cases.low  = numeric (), cases.high  = numeric (),
                                   dalys.median  = numeric (), dalys.low  = numeric (), dalys.high  = numeric (),
                                   deaths.median = numeric (), deaths.low = numeric (), deaths.high = numeric ())]

    # ----------------------------------------------------------------------------


    # extract burden estimates for pre- or post-vaccination
    if (vaccination_scenario) {

      # vaccination scenario
      central_burden     <- central_burden     [scenario == "post-vaccination"]
      central_burden_all <- central_burden_all [scenario == "post-vaccination"]

    } else {

      # no vaccination scenario
      central_burden     <- central_burden     [scenario == "pre-vaccination"]
      central_burden_all <- central_burden_all [scenario == "pre-vaccination"]
    }

    # keep only needed columns in central burden data table (HPV types in vaccine)
    central_burden <-
      central_burden [, c ("scenario", "type", "age",
                           "inc.cecx", "mort.cecx", "lifey", "disability", "prev.cecx",
                           "country", "year")]

    # get country codes
    if (countryCodes [1] == -1) {
      countryCodes <- unique (central_burden [, country])
    }

    # generate stochastic burden estimates for each country
    for (country_code in countryCodes) {

      # get disease burden template for current country (of this loop)
      vimc_template_country <- vimc_template [country == country_code]

      # get central burden estimates for current country
      central_burden_country     <- central_burden     [country == country_code]
      central_burden_all_country <- central_burden_all [country == country_code]

      # get latin hyper cube sample of input parameters for curent country
      psadat_country <- psaData [country == country_code]

      # initialise header for stochastic burden estimates
      stochastic_burden_country <- header

      # generate post-hoc stochastic estimates for each run
      for (run_number in 1:psa_runs) {

        # ------------------------------------------------------------------------
        # initialise burden estimate to central burden estimates
        # make a copy of data table to avoid direct reference to original data table
        burden     <- copy (central_burden_country)
        burden_all <- copy (central_burden_all_country)
        # ------------------------------------------------------------------------

        # combine columns burden_all (all cecx burden) and burden (HPV types in vaccine) data tables
        burden_dt <- merge (x    = burden_all,
                            y    = burden,
                            by.x = c("country", "age", "year", "scenario", "type"),
                            by.y = c("country", "age", "year", "scenario", "type"),
                            all  = TRUE)

        # estimate cost of cervical cancer per case
        burden_dt [inc.cecx.x > 0, cost.cecx.percase := cost.cecx / inc.cecx.x]

        # estimate cervical cancer burden for non-vaccine HPV types
        burden_dt [, inc.cecx.z   := inc.cecx.x   - inc.cecx.y]
        burden_dt [, mort.cecx.z  := mort.cecx.x  - mort.cecx.y]
        burden_dt [, lifey.z      := lifey.x      - lifey.y]
        burden_dt [, disability.z := disability.x - disability.y]
        burden_dt [, prev.cecx.z  := prev.cecx.x  - prev.cecx.y]




        # ------------------------------------------------------------------------
        # probabilistic sensitivity analysis to generate stochastic estimates
        # ------------------------------------------------------------------------

        # burden (HPV types in vaccine)

        # cases -- incidence
        burden_dt [, inc.cecx.y := (inc.cecx.y * psadat_country [run_id == run_number,
                                                                 incidence_ratio]
                                    * psadat_country [run_id == run_number,
                                                      hpv_distribution_ratio])]

        # deaths -- mortality
        burden_dt [, mort.cecx.y := (mort.cecx.y * psadat_country [run_id == run_number,
                                                                   mortality_ratio]
                                     * psadat_country [run_id == run_number,
                                                       hpv_distribution_ratio])]
        # prevalence
        burden_dt [, prev.cecx.y := (prev.cecx.y * psadat_country [run_id == run_number,
                                                                   prevalence_ratio]
                                     * psadat_country [run_id == run_number,
                                                       hpv_distribution_ratio])]

        # YLL -- premature mortality
        burden_dt [, lifey.y := (lifey.y * psadat_country [run_id == run_number,
                                                           mortality_ratio]
                                 * psadat_country [run_id == run_number,
                                                   hpv_distribution_ratio])]

        # ------------------------------------------------------------------------
        # burden (non-vaccine HPV types)

        # cases -- incidence
        burden_dt [, inc.cecx.z := (inc.cecx.z * psadat_country [run_id == run_number,
                                                                 incidence_ratio]
                                    * psadat_country [run_id == run_number,
                                                      hpv_distribution_non_vaccine_ratio])]

        # deaths -- mortality
        burden_dt [, mort.cecx.z := (mort.cecx.z * psadat_country [run_id == run_number,
                                                                   mortality_ratio]
                                     * psadat_country [run_id == run_number,
                                                       hpv_distribution_non_vaccine_ratio])]
        # prevalence
        burden_dt [, prev.cecx.z := (prev.cecx.z * psadat_country [run_id == run_number,
                                                                   prevalence_ratio]
                                     * psadat_country [run_id == run_number,
                                                       hpv_distribution_non_vaccine_ratio])]

        # YLL -- premature mortality
        burden_dt [, lifey.z := (lifey.z * psadat_country [run_id == run_number,
                                                           mortality_ratio]
                                 * psadat_country [run_id == run_number,
                                                   hpv_distribution_non_vaccine_ratio])]

        # ------------------------------------------------------------------------
        # estimation of YLD -- disability

        # disability weights for different phases of cervical cancer
        # (diagnosis & therapy, controlled, metastatic, terminal)
        dw = list (diag       = psadat_country [run_id == run_number,
                                                dw_diagnosis],
                   control    = psadat_country [run_id == run_number,
                                                dw_control],
                   metastatic = psadat_country [run_id == run_number,
                                                dw_metastatic],
                   terminal   = psadat_country [run_id == run_number,
                                                dw_terminal]
        )

        # duration of different phases of cervical cancer
        # (diagnosis & therapy, controlled, metastatic, terminal) -- unit in years
        disability.weights <- "gbd_2017"

        cecx_duration = list (diag       = data.disability_weights [Source == disability.weights &
                                                                      Sequela=="diagnosis",
                                                                    Duration],
                              metastatic = data.disability_weights [Source == disability.weights &
                                                                      Sequela=="metastatic",
                                                                    Duration],
                              terminal   = data.disability_weights [Source == disability.weights &
                                                                      Sequela=="terminal",
                                                                    Duration])
        # duration of controlled phases is based on remainder of
        # time after attributing to other phases

        # YLD -- disability
        # combine yld contribution from (incidence, prevalence and mortality) cases

        # burden (HPV types in vaccine)
        burden_dt [, disability.y := (inc.cecx.y  * dw$diag * cecx_duration$diag) +
                     (prev.cecx.y * dw$control) +
                     (mort.cecx.y * ( (dw$metastatic * cecx_duration$metastatic) +
                                        (dw$terminal   * cecx_duration$terminal) ) ) ]

        # burden (non-vaccine HPV types)
        burden_dt [, disability.z := (inc.cecx.z  * dw$diag * cecx_duration$diag) +
                     (prev.cecx.z * dw$control) +
                     (mort.cecx.z * ( (dw$metastatic * cecx_duration$metastatic) +
                                        (dw$terminal   * cecx_duration$terminal) ) ) ]

        # ------------------------------------------------------------------------

        # sum burden (HPV types in vaccine) and burden (non-vaccine HPV types)
        burden_dt [, inc.cecx.x   := inc.cecx.y   + inc.cecx.z   ]
        burden_dt [, mort.cecx.x  := mort.cecx.y  + mort.cecx.z  ]
        burden_dt [, prev.cecx.x  := prev.cecx.y  + prev.cecx.z  ]
        burden_dt [, lifey.x      := lifey.y      + lifey.z      ]
        burden_dt [, disability.x := disability.y + disability.z ]

        # estimate updated cost
        burden_dt [inc.cecx.x  > 0, cost.cecx := inc.cecx.x * cost.cecx.percase]
        burden_dt [inc.cecx.x == 0, cost.cecx := 0]
        # ------------------------------------------------------------------------

        # keep columns of interest
        burden_dt <- burden_dt [, .(country, scenario, type, age, cohort_size, vaccinated, immunized,
                                    inc.cecx.x, mort.cecx.x, lifey.x, disability.x, cost.cecx, prev.cecx.x, year)]

        # rename burden columns
        setnames (burden_dt,
                  old = c ("inc.cecx.x", "mort.cecx.x", "lifey.x", "disability.x", "prev.cecx.x"),
                  new = c ("inc.cecx",   "mort.cecx",   "lifey",   "disability",   "prev.cecx"  ) )

        # ------------------------------------------------------------------------

        # convert results to vimc format
        convert_results <- OutputVimc (DT            = burden_dt,
                                       calendar_year = TRUE,
                                       vimc_template = vimc_template_country)

        # drop scenario (post-vaccination or pre-vaccination) column
        convert_results [, scenario := NULL]

        # add column for run id
        convert_results [, run_id := run_number]

        # add stochastic burden estimate adjusted by sensitivity analysis to the
        # stochastic burden table of all runs
        stochastic_burden_country <- rbind (stochastic_burden_country,
                                            convert_results,
                                            use.names = TRUE)

      } # end of -- for (run_id in 1:psa_runs)

      # save stochastic burden estimates of current country to larger file with
      # stochastic burden estimates of all countries
      fwrite (x      = stochastic_burden_country,
              file   = stochastic_file_all,
              append = TRUE)

      # --------------------------------------------------------------------------
      # generate burden for current country -- central + median + 95% uncertainty intervals

      # burden -- median
      burden_median_dt <- stochastic_burden_country [, lapply (.SD, quantile, probs = c(0.5), na.rm = TRUE),
                                                     .SDcols = c ("cases", "dalys", "deaths"),
                                                     by = .(year, age, country)]

      # burden -- low 95% uncertainty interval
      burden_low_dt <- stochastic_burden_country [, lapply (.SD, quantile, probs = c(0.025), na.rm = TRUE),
                                                  .SDcols = c ("cases", "dalys", "deaths"),
                                                  by = .(year, age, country)]

      # burden -- high 95% uncertainty interval
      burden_high_dt <- stochastic_burden_country [, lapply (.SD, quantile, probs = c(0.975), na.rm = TRUE),
                                                   .SDcols = c ("cases", "dalys", "deaths"),
                                                   by = .(year, age, country)]

      # rename burden columns
      setnames (burden_median_dt,
                old = c ("cases",        "dalys",        "deaths" ),
                new = c ("cases.median", "dalys.median", "deaths.median" ) )

      setnames (burden_low_dt,
                old = c ("cases",        "dalys",        "deaths" ),
                new = c ("cases.low",    "dalys.low",    "deaths.low" ) )

      setnames (burden_high_dt,
                old = c ("cases",        "dalys",        "deaths" ),
                new = c ("cases.high",   "dalys.high",   "deaths.high" ) )

      # central burden estimates for current country (vimc format -- all cecx burden)
      burden_central_country_dt <- burden_central_dt [country == country_code]


      # combine central, median, low, and high burden estimates
      burden_central_country_dt <- merge (x = burden_central_country_dt,
                                          y = burden_median_dt,
                                          by.x = c ("year", "age", "country"),
                                          by.y = c ("year", "age", "country"),
                                          all  = TRUE)

      burden_central_country_dt <- merge (x = burden_central_country_dt,
                                          y = burden_low_dt,
                                          by.x = c ("year", "age", "country"),
                                          by.y = c ("year", "age", "country"),
                                          all  = TRUE)

      burden_central_country_dt <- merge (x = burden_central_country_dt,
                                          y = burden_high_dt,
                                          by.x = c ("year", "age", "country"),
                                          by.y = c ("year", "age", "country"),
                                          all  = TRUE)

      # add central + stochastic estimates (median + 95% uncertainty intervals)
      # for current country to table for multiple countries
      # all cervical cancer burden
      central_stochastic_dt <- rbind (central_stochastic_dt,
                                      burden_central_country_dt,
                                      use.names = TRUE)
      # --------------------------------------------------------------------------


    } # end of -- for (country_code in countryCodes)

    # write streamlined stochastic estimates to file (central + median + 95% uncertainty intervals)
    fwrite (x      = central_stochastic_dt,
            file   = diseaseBurden_central_StochasticFile_all_cecx_burden,
            append = FALSE)

    return (NULL)

  } # end of function -- old_EmulateVaccineImpactVimcStochastic_all_cecx_burden
# ------------------------------------------------------------------------------
# This old function can be removed and not used further
# ------------------------------------------------------------------------------
lshtm-vimc/prime documentation built on April 21, 2024, 3:21 a.m.