R/mig_estimate_rc.R

Defines functions mig_estimate_rc

Documented in mig_estimate_rc

#' Estimate Rogers-Castro migration age schedule

#' @description Given a set of ages and observed age-specific migrants, estimate the parameters of a Roger-Castro model migration schedule.
#' Choose between a 7, 9, 11 or 13 parameter model.

#' @param ages numeric. A vector of integers for ages.
#' @param migrants numeric. A vector of integers for observed age-specific migrants.
#' @param pop numeric. A vector of integers for age-specific population or sample sizes, of which "migrants" experienced a migration event.
#' @param mx numeric. A vector of age-specific migration rates.
#' @param sigma numeric. Standard deviation of migration rates for Normal model. Argument is option, standard deviation is estimated if Normal model is run without being specified.
#' @param pre_working_age logical (TRUE/FALSE). Whether or not to include pre working age component.
#' @param working_age logical (TRUE/FALSE). Whether or not to include working age component.
#' @param retirement logical (TRUE/FALSE). Whether or not to include retirement age component.
#' @param post_retirement logical (TRUE/FALSE). Whether or not to include post retirement age component.
#' @param net_mig numeric. Deprecated argument, use migrants instead.
#' @param ... additional inputs to stan, see ?rstan::stan for details.
#' @importFrom rstan sampling extract
#' @import Rcpp
#' @importFrom stats quantile median
#' @importFrom magrittr %>%
#' @importFrom dplyr group_by summarise rename mutate ungroup
#' @importFrom rlang sym
#' @importFrom tibble tibble
#' @importFrom tibble as.tibble
#' @importFrom tidybayes gather_draws
#' @return A list of length 3. The first element, \code{pars_df}, is a data frame that provides parameter estimates with 95% credible intervals.
#' The second element, \code{fit_df}, is a data frame that shows the data and estimated migration rates at each age.
#' The third element, \code{check_converge}, is a data frame that provides the R-hat values and effective sample sizes.
#' @export
#' @examples
#' # Ex 1: Run poisson model using ages, migrants, and population
#' ages <- 0:80
#'migrants <- c(202,215,167,188,206,189,164,
#'             158,197,185,176,173,167,198,
#'             203,237,249,274,319,345,487,
#'             491,521,505,529,527,521,529,
#'             507,484,467,439,399,399,380,
#'             368,310,324,289,292,270,269,
#'             285,254,245,265,257,258,263,
#'             253,346,293,332,346,349,355,
#'             386,346,344,352,331,320,307,
#'             320,310,258,254,243,256,263,
#'             183,169,172,160,166,113,132,
#'             111,130,110,113)
#'pop <- c(105505,105505,105505,105505,105505,
#'         106126,106126,106126,106126,106126,
#'         100104,100104,100104,100104,100104,
#'         114880,114880,114880,114880,114880,
#'         136845,136845,136845,136845,136845,
#'         136582,136582,136582,136582,136582,
#'         141935,141935,141935,141935,141935,
#'         134097,134097,134097,134097,134097,
#'         130769,130769,130769,130769,130769,
#'         133718,133718,133718,133718,133718,
#'         154178,154178,154178,154178,154178,
#'         145386,145386,145386,145386,145386,
#'         126270,126270,126270,126270,126270,
#'         108314,108314,108314,108314,108314,
#'         79827,79827,79827,79827,79827,59556,
#'         59556,59556,59556,59556,59556)
#'
#'
#' # fit the model
#' res <- mig_estimate_rc(ages = ages, migrants = migrants, pop = pop,
#'                        pre_working_age = TRUE,
#'                        working_age = TRUE,
#'                        retirement = TRUE,
#'                        post_retirement = FALSE,
#'                        #optional inputs into stan
#'                        control = list(adapt_delta = 0.95, max_treedepth = 10),
#'                        iter = 10, chains = 1 #to speed up example
#'                        )
#' # plot the results and data
#' plot(ages, migrants/pop, ylab = "migration rate", xlab = "age")
#' lines(ages, res[["fit_df"]]$median, col = "red")
#' legend("topright", legend=c("data", "fit"), col=c("black", "red"), lty=1, pch = 1)
#'
#' # Ex 2: Run normal model using ages and mx
#' ages <- 0:80
#' mx <- c(0.001914601, 0.002037818, 0.001582863, 0.001781906,
#'         0.001952514, 0.001780902, 0.001545333, 0.001488796,
#'         0.001856284, 0.001743211, 0.001758172, 0.001728203,
#'         0.001668265, 0.001977943, 0.002027891, 0.002063022,
#'         0.002167479, 0.002385097, 0.002776811, 0.003003134,
#'         0.003558771, 0.003588001, 0.003807227, 0.003690307,
#'         0.003865687, 0.003858488, 0.003814558, 0.003873131,
#'         0.003712056, 0.003543659, 0.003290238, 0.003092965,
#'         0.002811146, 0.002811146, 0.002677282, 0.002744282,
#'         0.002311759, 0.002416161, 0.002155156, 0.002177528,
#'         0.002064710, 0.002057062, 0.002179416, 0.001942356,
#'         0.001873533, 0.001981783, 0.001921955, 0.001929434,
#'         0.001966826, 0.001892041, 0.002244159, 0.001900401,
#'         0.002153355, 0.002244159, 0.002263617, 0.002441776,
#'         0.002655001, 0.002379872, 0.002366115, 0.002421141,
#'         0.002621367, 0.002534252, 0.002431298, 0.002534252,
#'         0.002455057, 0.002381964, 0.002345034, 0.002243477,
#'         0.002363499, 0.002428126, 0.002292457, 0.002117078,
#'         0.002154659, 0.002004334, 0.002079497, 0.001897374,
#'         0.002216401, 0.001863792, 0.002182820, 0.001847001,
#'         0.001897374)
#'
#' # fit the model
#' res <- mig_estimate_rc(ages = ages, mx = mx,
#'                        pre_working_age = TRUE,
#'                        working_age = TRUE,
#'                        retirement = TRUE,
#'                        post_retirement = FALSE,
#'                        #optional inputs into stan
#'                        control = list(adapt_delta = 0.95, max_treedepth = 10),
#'                        iter = 10, chains = 1 #to speed up example
#'                        )
#'
mig_estimate_rc <- function(ages,
                            migrants,
                            pop,
                            mx,
                            sigma,
                            pre_working_age,
                            working_age,
                            retirement,
                            post_retirement,
                            net_mig,
                            ...){

  # initial checks
  stopifnot(any(pre_working_age, working_age, retirement, post_retirement))
  if(!missing(net_mig)){
    warning("argument net_mig is deprecated; please use migrants instead.", call. = FALSE)
    migrants <- net_mig
  }

  if(missing(ages)) stop("ages is missing")

  run_poisson = 0
  run_normal_sigma_given = 0
  run_normal_sigma_estimated = 0

  if(!missing(migrants) & !missing(pop)) {

    run_poisson = 1
    message("mig_estimate_rc is running poisson model, Using arguments ages, migrants, and pop")
    if(length(ages)!=length(migrants) | length(migrants)!=length(pop))
      stop("length of arguments ages, migrants and pop must be equal")
    if ( !all(migrants == floor(migrants)) ) stop("migrants must be integers")
    if (sum(migrants > pop)>0) stop("must have migrants <= pop")
    if (!missing(mx) & !missing(sigma))
      warning("arguments mx and sigma are ignored")

  } else if (!missing(mx) & !missing(sigma)) {

    run_normal_sigma_given = 1
    message("mig_estimate_rc is running normal model, Using arguments ages, mx, and sigma")
    if(length(ages)!=length(mx))
      stop("length of arguments ages and mx must be equal")
    if (sum(mx<0) > 0) stop("mx must be non-negative")
    if (length(sigma) != 1) stop("sigma must have length 1")
    if (sigma <= 0) stop("sigma must be positive")
    if (!missing(migrants) & !missing(pop))
      warning("arguments migrants and pop are ignored")

  } else if (!missing(mx) ) {

    run_normal_sigma_estimated = 1
    message("mig_estimate_rc is running normal model, Using arguments ages and mx")
    if(length(ages)!=length(mx))
      stop("length of arguments ages and mx must be equal")
    if (sum(mx<0) > 0) stop("mx must be non-negative")
    if (!missing(migrants) & !missing(pop))
      warning("arguments migrants and pop are ignored")

  } else {

    stop("mig_estimate_rc requires either data for migrants and pop, or data for mx")

  }

  # organize data for model input and fit the model
  if (run_poisson == 1) {
    x <- ages
    pop <- pop
    y <- migrants

    mig_data <- list(
      N = length(x),
      y = y,
      x = x,
      pop = pop,
      pre_working_age = as.numeric(pre_working_age), #binary
      working_age = as.numeric(working_age),
      retirement = as.numeric(retirement),
      post_retirement = as.numeric(post_retirement)
    )

    rc_fit <- rstan::sampling(stanmodels$rcmodel_poisson, data = mig_data, ...)

  } else if (run_normal_sigma_estimated == 1){
    x <- ages
    y <- mx

    mig_data <- list(
      N = length(x),
      y = y,
      x = x,
      pre_working_age = as.numeric(pre_working_age), #binary
      working_age = as.numeric(working_age),
      retirement = as.numeric(retirement),
      post_retirement = as.numeric(post_retirement)
    )

    rc_fit <- rstan::sampling(stanmodels$rcmodel_normal, data = mig_data, ...)

  } else if (run_normal_sigma_given == 1){
    x <- ages
    y <- mx

    mig_data <- list(
      N = length(x),
      y = y,
      x = x,
      sigma = sigma,
      pre_working_age = as.numeric(pre_working_age), #binary
      working_age = as.numeric(working_age),
      retirement = as.numeric(retirement),
      post_retirement = as.numeric(post_retirement)
    )

    rc_fit <- rstan::sampling(stanmodels$rcmodel_normal_sigma_given, data = mig_data, ...)

  } else {
    stop("Require either data for migrants and pop, or data for mx")
  }


  # extract the posterior samples
  list_of_draws <- rstan::extract(rc_fit)

  # create a matrix to store fitted values
  if (run_poisson == 1) {mx = y/pop}
  y_hat      <- matrix(nrow = length(list_of_draws[[1]]), ncol = length(x))
  these_pars <- list()
  parnames   <- names(list_of_draws)[grep("alpha|a[0-9]|mu[0-9]|lambda|^c$",names(list_of_draws))]
  for(j in 1:length(list_of_draws[[1]])){
    for(i in 1:length(parnames)){
      these_pars[[names(list_of_draws)[i]]] <- list_of_draws[[names(list_of_draws)[i]]][j]
    }
    y_hat[j,] <- mig_calculate_rc(ages = ages, pars = these_pars)
  }

  dfit <- tibble(age = x,
                 data = mx, median = apply(y_hat, 2, median),
                 lower = apply(y_hat, 2, quantile,0.025),
                 upper = apply(y_hat, 2, quantile, 0.975),
                 diff_sq = (!!sym("median") - !!sym("data"))^2)

  #TR: experimenting rm pipes re segfault error on osx...
  pars_df <- gather_draws(rc_fit, !!sym("a[0-9]\\[1\\]"),
                          !!sym("alpha[0-9]\\[1\\]"),
                          !!sym("mu[0-9]\\[1\\]"),
                          !!sym("lambda[0-9]\\[1\\]"),
                          !!sym("^c$"),
                          regex = TRUE) %>%
    dplyr::group_by(!!sym(".variable")) %>%
    summarise(median = median(!!sym(".value")),
              lower = quantile(!!sym(".value"), 0.025),
              upper = quantile(!!sym(".value"), 0.975)) %>%
    dplyr::ungroup() %>%
    dplyr::rename("variable" = !!sym(".variable")) %>%
    dplyr::mutate(variable = gsub("\\[1\\]", "", variable))

  check_converge <- rstan::summary(rc_fit)$summary[1:length(parnames),c("mean", "se_mean", "n_eff", "Rhat")]

  return(list(pars_df = pars_df, fit_df = dfit, check_converge = check_converge))

}
jessieyeung/rcbayes documentation built on Jan. 3, 2024, 8:38 p.m.