old_code/sampler.R

## sample one cohort in no-region model


## inputs
## counts_births - vector of length 'n_step' with numbers of births
## counts_deaths - vector of length 'n_step' with numbers of deaths
## rates_births - vector of length 'n_step' with birth rates
## rates_deaths - vector of length 'n_step' with death rates
## rates_immigration1 - vector of length 'n_step' with expected number of immigrants1
## rates_immigration2 - vector of length 'n_step' with expected number of immigrants2
## rates_emigration1 - vector of length 'n_step' with emigration1 rates
## rates_emigration2 - vector of length 'n_step' with emigration2 rates
## n_part - number of particles
## val_stock_init - prior value for initial stock
## priormod_stock_init - prior model for initial stock (which is
##         some function of 'prior_stock_init')
## datamods - list of data models for population and migration. Length
##    equal to total number of data sources.
## data_stock - optional vector of length 'n_step' giving observed
##    values for stock at end of interval
## dispersion_stock - vector of length 'n_step' giving values for
##    dispersion parameter to be used with 'data_stock'.
##    Supplied iff 'data_stock' supplied.



## outputs
## counts_immigration1 - n_part x n_step matrix of counts of immigrants1
## counts_immigration2 - n_part x n_step matrix of counts of immigrants2
## counts_emigration1 - n_part x n_step matrix of counts of emigrants1
## counts_emigration2 - n_part x n_step matrix of counts of emigrants2
## ess - n_step vector of effective sample sizes
## likelihood - scalar, p(data | parameters)


## internal objects
## stock - matrix with size n_part x (n_step + 1)
## n_step - number of age-time steps - ie number of Lexis triangles traversed
## 
## datamods_init - a list of data models to be used for initial values
## 'logwt_unnorm' - unnormalised log weights
## wt - normalised weights - n_part x (n_step + 1)


## functions
## 'calc_loglik_init' - given scalar value 'x', calculate the log likelihood of
##     vector 'y', given data models 'datamods_init' DONE
## 'calc_loglik - given scalar value 'x', calculate the log likelihood
##     of scalar value 'y', given data model 'datamod'
## 'calc_wt_norm' given unnnormaliseed log weights, calculate normalised
##    weights (not logged)

## sample one cohort



n_step <- length(counts_deaths)
stock <- matrix(nrow = n_part, ncol = n_step + 1L)

## create cohort log-likelihood calculators

cloglik_stock <- cloglik(


## initialise
for (i_part in seq_len(n_part)) {
    ## draw from importance distribution
    stock[i_part, 1L] <- draw_stock_init(mod = priormod_stock_init,
                                         val = val_stock_init)
    logprior <- calc_logprior_stock_init(x = stock[i_part, 1L],
                                         mod = priormod_stock_init)
    loglik <- calc_loglik_init(y = y_init,
                               x = stock[i_part, 1L],
                               datamods = datamods_init)
    logwt_unnorm[i_part] <- logprior + loglik
}
wt[ , 1L] <- calc_wt_norm(logwt_unnorm)


## subsequent
for (i_step in seq_len(n_step)) {
    indices_parents[, i_step] <- draw_indices_parents(wt) ## resampling
    for (i_part in seq_len(n_part)) {
        ## DRAW FROM IMPORTANCE DISTRIBUTION ----------------------------------
        ## extract values needed for calculations
        i_parent <- indices_parents[i_part, i_step]
        stock_start <- stock[i_parent, i_step]
        count_dth <- counts_deaths[i_step]
        count_bth <- counts_births[i_step]
        rate_bth <- rates_births[i_step]
        rate_dth <- rates_deaths[i_step]
        rate_im1 <- rates_immigration1[i_step]
        rate_im2 <- rates_immigration2[i_step]
        rate_em1 <- rates_emigration[i_step]
        rate_em2 <- rates_emigration[i_step]
        exposure <- 0.5 * stock_start
        ## calculate combined migration parameters
        expected_im  <- rate_im1 + rate_im2
        expected_em <- (rate_em1 + rate_em2) * exposure
        ## generate stock at end of interval
        if (has_stock_data) {
            stock_end <- rdbpois1(lambda = data_stock,
                                  psi = dispersion_stock)
            netmig <- stock_end - stock_start + count_dth
        }
        else {
            lower_netmig <- count_dth - stock_start
            netmig  <- rskeltr1(mu1 = expected_im,
                                mu2 = expected_em,
                                lower = lower_netmig)
            stock_end <- stock_start - count_dth + net_mig
        }
        ## split combined net migration into combined
        ## immigration and combined emigration
        lambda_grossmig <- expected_im + expected_em
        lower_grossmig <- abs(netmig)
        grossmig <- rpoistr(lambda = lambda_grossmig,
                            lower = lower_grossmig)
        count_im <- (netmig + grossmig) %/% 2L
        count_em <- (netmig - grossmig) %/% 2L
        ## split combined immigration and emigration
        prob_im1 <- rate_im1 / (rate_im1 + rate_im2)
        prob_em1 <- rate_em1 / (rate_em1 + rate_em2)
        count_im1 <- rbinom(n = 1L,
                            size = count_im,
                            prob = prob_im1)
        count_em1 <- rbinom(n = 1L,
                            size = count_em,
                            prob = prob_em1)
        count_im2 <- count_im - count_im1
        count_em2 <- count_em - count_em1
        ## collect results
        stock[i_part, i_step + 1L] <- stock_end
        counts_immigration1[i_part, n_step] <- count_im1
        counts_immigration2[i_part, n_step] <- count_im2
        counts_emigration1[i_part, n_step] <- count_em1
        counts_emigration2[i_part, n_step] <- count_em2
        ## CALCULATE WEIGHTS ----------------------------------
        loglik <- (calc_loglik(cohort_loglik_stock,
                               count_true = stock_end,
                               i_step = i_step)
            + calc_loglik(cohort_loglik_im1,
                          count_true = count_im1,
                          i_step = i_step)
            + calc_loglik(cohort_loglik_im2,
                          count_true = count_im2,
                          i_step = i_step)
            + calc_loglik(cohort_loglik_em1,
                          count_true = count_em1,
                          i_step = i_step)
            + calc_loglik(cohort_loglik_em2,
                          count_true = count_em2,
                          i_step = i_step))
        logratio <- (dpois(count_bth, rate_bth * exposure, log = TRUE)
            + dpois(count_dth,
                    lambda = rate_dth * exposure,
                    log = TRUE)
            + ppois(abs(netmig),
                    lambda = expected_im + expected_em,
                    low.tail = FALSE,
                    log.p = TRUE)
            + dbinom(count_im,
                     size = grossmig,
                     prob = expected_im / (expected_im + expected_em),
                     log = TRUE)
            - pskel(count_dth - stock_start,
                    mu1 = expected_im,
                    mu2 = expected_em,
                    low.tail = FALSE,
                    log.p = TRUE))
        if (has_stock_data)
            final_term <- ddbpois(stock_end,
                                  lambda = data_stock,
                                  psi = dispersion_stock,
                                  log = TRUE)
        else
            final_term <- dskeltr(netmig,
                                  mu1 = expected_im,
                                  mu2 = expected_em,
                                  lower = lower_netmig,
                                  log = TRUE)
        logratio <- logratio - final_term
        logwt_unnorm[i_part] <- loglik + logratio
    }
    wt[ , i_step + 1L] <- calc_wt_norm(logwt_unnorm)
}




calc_loglik_init <- function(x, y, datamod) {
    n <- length(y)
    ans <- 0
    for (i in seq_len(n)) {
        ans <- ans + calc_loglik(x = x,
                                 y = y[i],
                                 datamod = datamod[i])
    }
    ans
}


## based on https://stats.stackexchange.com/questions/66616/converting-normalizing-very-small-likelihood-values-to-probability
calc_wt_norm <- function(x) {
    cutoff <- 2.225074e-308 ## .Machine$double.xmin
    x <- x - max(x)
    x[x < cutoff] <- 0
    x <- exp(x)
    x / sum(x)
}


## simplest algorithm - maybe replace in future
draw_indices_parents <- function(x) {
    n <- length(x)
    times <- rmultinom(n = 1,
                       size = n,
                       prob = x)
    s <- seq_len(n)
    rep(s, times = times)
}
ONSdigital/Bayesian-demographic-accounts documentation built on Jan. 10, 2022, 12:34 a.m.