vignettes/internal_doc/stan_doc/small-run-vignette.R

  library(fpemdata)
  library(fpemmodeling)
 

## This data part really needs review by somebody who is familiary with how
## models in FPET will really need to be run w.r.t. marital status, ages, etc...

### Filter the country-data from fpemdata for just sub-region 957
  country_data = fpemdata:::get_divisions() %>%
    dplyr::filter(is_country == "Y") %>%
    dplyr::left_join(
      y = fpemdata:::get_division_classifications(),
      by = 'division_numeric_code') %>%
    dplyr::filter(!is.na(sub_region_numeric_code)) %>%
    dplyr::filter(sub_region_numeric_code == 957) %>%
    dplyr::mutate(is_developing = factor(is_developed_region))
  

## This obs_data gets cut down to just include the selected sub-region
  obs_data = fpemdata:::long_format_observations() %>% 
    dplyr::filter(age_range == "15-49", is_in_union == "Y") %>%
    dplyr::filter(!is.na(measurement)) %>%
    dplyr::mutate(measurement = 
      dplyr::if_else(measurement == 0, measurement + 1e-4, measurement)) %>%
    dplyr::mutate(measurement = 
      dplyr::if_else(measurement == 1, measurement - 1e-4, measurement)) %>%
    dplyr::filter(
      division_numeric_code %in% country_data[['division_numeric_code']]) %>%
    dplyr::mutate(
      has_se = !is.na(se), 
      se = dplyr::if_else(is.na(se), 0, se),
      zero_se = factor(se == 0),
      log_se = log(0.0125)) %>%
    dplyr::filter(start_date > 1975)

## Make those 'units'...
  unit_data = define_units(data = list(countries = country_data))

## Keep a map between the various indexing options for countries:
  division_unit_map = unit_data %>%
    dplyr::select(numeric_unit_code, division_numeric_code) %>% 
    dplyr::mutate(internal_unit_code = as.numeric(factor(division_numeric_code)))

## Include a matching index column in the 'obs_data' object so we can plot 
## more easily later:
  obs_data = dplyr::left_join(obs_data, division_unit_map, by = 'division_numeric_code')


############## This is the settings bit, don't really need all the features
  ############ for the one-country run but it works and lets us use the
  ############ same code

  ## Use the global model numbers and country-specific numbers:
  process_settings = fpemmodeling::fpem_process(
    models = list(
      P_tilde ~ 1 + (1 | division_numeric_code),
      omega ~ 1 +  (1 | division_numeric_code),
      Omega ~ 1 +  (1 | division_numeric_code),
      R_tilde ~ 1 + (1 | division_numeric_code),
      phi ~ 1 + (1 | division_numeric_code),
      Phi ~ 1 + (1 | division_numeric_code),
      Z_star ~ 1 + state(P) + state(P2) + (1 | division_numeric_code)
    ), 
    data = unit_data
  )
  
  observation_settings = fpemmodeling::fpem_observation(
    models = list(
      proportion ~ 0 + constant(measurement),
      sigma ~ 0 + constant(log_se)
    ),
    data = obs_data
  )

## Smae old stuff here for indexing and final Stan-related formatting.
 time_frame = fpemmodeling::seq_factory(obs_data[['start_date']], c(1975, 2019)) 

  process_data = do.call(c, c(process_settings$inputs, use.names=FALSE))
  observation_data = do.call(c, c(observation_settings$inputs, use.names=FALSE))

  auxilliary_data = list(
    N = nrow(obs_data),
    C = nrow(unit_data),
    T = max(time_frame$index()),
    M = 4,
    t_star = which(time_frame$sequence() == 1990),
    get_o_i = factor(obs_data$type),
    get_c_i = factor(obs_data$division_numeric_code),
    get_t_i = match(x = round(time_frame$.items), time_frame$sequence()),
    n_fixed_ar_hyper = 3,
    data_rho = rep(0.5, 3),
    data_tau = rep(0.08,3),
    model_type = array(data = c(1L, 1L))
  )

  simple_auxilliary_data = lapply(auxilliary_data, function(x) {
    if (is.factor(x))
      return(as.numeric(x))
    if (is.character(x))
      stop("Character data must be pre-converted to factors.")
    if (is.numeric(x))
      return(x)
    stop("Something is wrong.")
  })


  stan_model_data = c(simple_auxilliary_data, process_data, observation_data)


samples_fixed_AR = fpemmodeling::run(
  data = stan_model_data, 
  model =  fpemmodeling:::default_models()[2],
  cache = '.my_cache',
  control = list(iter = 1200, warmup = 1000, 
		 init='random', init_r=1,
		 control = list(adapt_delta = 0.9, max_treedepth=16)),
  cores = 4)

stan_model_data$n_fixed_ar_hyper = 0
stan_model_data$data_rho = array(vector('numeric', length=0))
stan_model_data$data_tau = array(vector('numeric', length=0))

samples_estimate_AR = fpemmodeling::run(
  data = stan_model_data, 
  model =  fpemmodeling:::default_models()[2],
  cache = '.my_cache',
  control = list(iter = 1200, warmup = 1000, 
		 init='random', init_r=1,
		 control = list(adapt_delta = 0.9, max_treedepth=16)),
  cores = 4)

# Unfinished post proc
# PRZ = rstan::extract(samples_estimate_AR, pars = c("P", "R", "Z"), 
#                      inc_warmup=FALSE, permuted=TRUE)
# 
# 
# 
# # The format of one-country-run output is different from all-country, will make code below into a function eventually
# all_vec <- fpemreporting::calculate_3proportions_vec(PRZ$P %>% unlist() %>% as.vector(),
#                                                      PRZ$R %>% unlist() %>% as.vector(),
#                                                      PRZ$Z %>% unlist() %>% as.vector())
# 
# 
# nyears <- time_frame$`.->.index` %>% length()
# years <- time_frame$.sequence
# ndivs <- unit_data$division_numeric_code %>% length()
# divmap <- unit_data$division_numeric_code
# 
# # makes an array n chains x n iterations x n years x proportions.
# # saves individual country arrays 
# 
# props = c("P", "R", "Z")
# dir.create(file.path(getwd(), "data_processed"), showWarnings = FALSE)
# for( i in 1:ndivs){
#   for(j in 1:props){
#     PRZ[[j]] <-
#   }
#   all_vec <- calculate_3proportions_vec(unlist(df[,P_cols]), unlist(df[,R_cols]), unlist(df[,Z_cols]))
#   country_array <- array(data = all_vec, dim = c(1, nrow(df), nyears, length(props)), dimnames = list(chain = 1, iteration = 1:nrow(df), unit_time = 1:nyears, props = props)) # added a dimension for chain
#   saveRDS(country_array, paste0("data_processed/output-", stringr::str_pad(divmap[i], 3, pad = "0"), ".rds"))
# }
# 
# 
# 
# 
# data_set_path <- fpemdata:::default_data_set_path()
# contraceptive_use <- fpemdata::get_contraceptive_use(data_set_path = data_set_path)
# population_counts <- fpemdata::get_population_counts(data_set_path = data_set_path)
# divisions <- fpemdata:::get_divisions(data_set_path = data_set_path)
# 
# divisions_country <- divisions %>%
#   subset(is_country =="Y")
# country_div_codes <- divisions_country %>%
#   dplyr::select(division_numeric_code) %>%
#   unlist()
# country_names <- divisions_country %>%
#   dplyr::select(name) %>%
#   unlist()
# 
# 
# results <- fpemreporting::fpem_calculate_results(
#   posterior_samples = posterior_samples,
#   country_population_counts = population_counts,
#   first_year = min(years))
# 
# indicator = "contraceptive_use_all"
# fpemreporting::fpem_plot_country_results(
#   results$total_cpr_proportions,
#   contraceptive_use = contraceptive_use %>% dplyr::filter(division_numeric_code == (!! code)),
#   indicator = indicator,
#   title = paste(name, indicator),
#   y_label = "Proportion",
#   breaks = seq(min(years), 2020, by = 5)
# ) %>% print()
# 
# 
# indicator = "contraceptive_use_modern"
# fpemreporting::fpem_plot_country_results(
#   results$modern_cpr_proportions,
#   contraceptive_use = contraceptive_use %>% dplyr::filter(division_numeric_code == (!! code)),
#   indicator = indicator,
#   title = paste(name, indicator),
#   y_label = "Proportion",
#   breaks = seq(min(years), 2020, by = 5)
# ) %>% print()
# 
# indicator = "contraceptive_use_traditional"
# fpemreporting::fpem_plot_country_results(
#   results$traditional_cpr_proportions,
#   contraceptive_use = contraceptive_use %>% dplyr::filter(division_numeric_code == (!! code)),
#   indicator = indicator,
#   title = paste(name, indicator),
#   y_label = "Proportion",
#   breaks = seq(min(years), 2020, by = 5)
# ) %>% print()
# 
# indicator = "unmet_need_for_any_method"
# fpemreporting::fpem_plot_country_results(
#   results$unmet_proportions,
#   contraceptive_use = contraceptive_use %>% dplyr::filter(division_numeric_code == (!! code)),
#   indicator = indicator,
#   title = paste(name, indicator),
#   y_label = "Proportion",
#   breaks = seq(min(years), 2020, by = 5)
# ) %>% print()
# 
# 
# 
FPRgroup/FPEM documentation built on March 3, 2020, 8:19 a.m.