R/cdm-constructors.R

Defines functions new_CdmWithregNbinom new_CdmNoregNbinom new_CdmWithregPoibin new_CdmNoregPoibin

Documented in new_CdmNoregNbinom new_CdmNoregPoibin new_CdmWithregNbinom new_CdmWithregPoibin

## Poisson binomial -----------------------------------------------------------

## HAS_TESTS
#' Create an object of class Rcpp_CdmNoregPoibin
#'
#' Create an object of (C++) class \code{Rcpp_CdmNoregPoibin}.
#' Objects of class \code{Rcpp_CdmNoregPoibin} are cohort-level
#' data models, plus cohort-level data, for a single dataset. The data
#' does not have a region dimension. The data model
#' is based on a Poisson-binomial mixture.
#'
#' \code{new_CdmNoregPoibin} is an internal function, and should
#' never be needed by end users.
#' 
#' @param counts_data  A numeric vector of non-negative whole numbers.
#'   \code{NA}s are allowed.
#' @param prob A numeric scalar between 0 and 1 (exclusive).
#' 
#' @return An S4 object with class "Rcpp_CdmNoregPoibin"
#'
#' @keywords internal
#'
#' @export
new_CdmNoregPoibin <- function(counts_data, prob) {
    check_counts_data(counts_data)
    check_prob(prob)
    methods::new(CdmNoregPoibin,
                 counts_data = counts_data,
                 prob = prob)
}

## HAS_TESTS
#' Create an object of class Rcpp_CdmWithregPoibin
#'
#' Create an object of (C++) class \code{Rcpp_CdmWithregPoibin}.
#' Objects of class \code{Rcpp_CdmWithregPoibin} are cohort-level
#' data models, plus cohort-level data, for a single dataset. The data
#' has a region dimension. The data model
#' is based on a Poisson-binomial mixture.
#'
#' \code{new_CdmWithregPoibin} is an internal function, and should
#' never be needed by end users.
#'
#' @inheritParams new_CdmNoregPoibin
#' @param counts_data  A numeric matrix of non-negative whole numbers.
#'   \code{NA}s are allowed.
#' 
#' @return An S4 object with class "Rcpp_CdmWithregPoibin"
#'
#' @keywords internal
#'
#' @export
new_CdmWithregPoibin <- function(counts_data, prob) {
    check_counts_data(counts_data)
    if (!is.matrix(counts_data))
        stop("'counts_data' has class '", class(counts_data), "'")
    check_prob(prob)
    methods::new(CdmWithregPoibin,
                 counts_data = counts_data,
                 prob = prob)
}



## Negative binomial ----------------------------------------------------------

## HAS_TESTS
#' Create an object of class Rcpp_CdmNoregNbinom
#'
#' Create an object of (C++) class \code{Rcpp_CdmNoregNbinom}.
#' Objects of class \code{Rcpp_CdmNoregNbinom} are cohort-level
#' data models, plus cohort-level data, for a single dataset. The data
#' does not have a region dimension. The data model
#' is based on the negative binomial distribution.
#'
#' \code{new_CdmNoregNbinom} is an internal function, and should
#' never be needed by end users.
#' 
#' @param counts_data  A numeric vector of non-negative whole numbers.
#'   \code{NA}s are allowed.
#' @param ratio A numeric vector of non-negative numbers, the
#'   same length as \code{counts_data}. \code{NA}s are allowed
#'   at positions where \code{counts_data} is \code{NA}.
#' @param disp A numeric vector of non-negative numbers, the
#'   same length as \code{counts_data}. \code{NA}s are allowed
#'   at positions where \code{counts_data} is \code{NA}.
#' 
#' @return An S4 object with class "Rcpp_CdmNoregNbinom"
#'
#' @keywords internal
#'
#' @export
new_CdmNoregNbinom <- function(counts_data, ratio, disp) {
    check_counts_data(counts_data)
    check_ratio_cdm(ratio = ratio,
                    counts_data = counts_data)
    check_disp_cdm(disp = disp,
                   counts_data = counts_data)
    methods::new(CdmNoregNbinom,
                 counts_data = counts_data,
                 ratio = ratio,
                 disp = disp)
}

## HAS_TESTS
#' Create an object of class Rcpp_CdmWithregNbinom
#'
#' Create an object of (C++) class \code{Rcpp_CdmWithregNbinom}.
#' Objects of class \code{Rcpp_CdmWithregNbinom} are cohort-level
#' data models, plus cohort-level data, for a single dataset. The data
#' has a region dimension. The data model
#' is based on a negative binomial distribution.
#'
#' \code{new_CdmWithregNbinom} is an internal function, and should
#' never be needed by end users.
#'
#' @param counts_data  A numeric matrix of non-negative whole numbers.
#'   \code{NA}s are allowed.
#' @param ratio A numeric matrix of non-negative numbers, with the
#'   same dimensions as \code{counts_data}. \code{NA}s are allowed
#'   at positions where \code{counts_data} is \code{NA}.
#' @param disp A numeric matrix of non-negative numbers, with the
#'   same dimensions as \code{counts_data}. \code{NA}s are allowed
#'   at positions where code{counts_data} is \code{NA}.
#' 
#' @return An S4 object with class "Rcpp_CdmWithregNbinom"
#'
#' @keywords internal
#'
#' @export
new_CdmWithregNbinom <- function(counts_data, ratio, disp) {
    check_counts_data(counts_data)
    check_ratio_cdm(ratio = ratio,
                    counts_data = counts_data)
    check_disp_cdm(disp = disp,
                   counts_data = counts_data)
    for (name in c("counts_data", "ratio", "disp")) {
        val <- get(name)
        if (!is.matrix(val))
            stop("'", name, "' has class '", class(val), "'")
    }
    methods::new(CdmWithregNbinom,
                 counts_data = counts_data,
                 ratio = ratio,
                 disp = disp)
}


## R version of classes:

## base classes

## READY_TO_TRANSLATE
cdm_noreg <-
    R6::R6Class("cdm_noreg",
                private = list(counts_data = numeric()))

## READY_TO_TRANSLATE
cdm_withreg <-
    R6::R6Class("cdm_withreg",
                private = list(counts_data = matrix())) ## matrix should be numeric


## actual nbinom classes

## READY_TO_TRANSLATE
cdm_noreg_nbinom <-
    R6::R6Class("cdm_noreg_nbinom",
                public = list(
                    initialize = function(counts_data, ratio, disp) {
                        private$counts_data <- counts_data
                        private$ratio <- ratio
                        private$disp <- disp
                    },
                    calc_loglik = function(counts_true, i_interval) {
                        counts_data <- private$counts_data
                        ratio <- private$ratio
                        disp <- private$disp
                        n_particle <- length(counts_true)
                        val_data <- counts_data[[i_interval]]
                        val_ratio <- ratio[[i_interval]]
                        val_size <- 1 / disp[[i_interval]]
                        ans <- rep(0, times = n_particle)
                        if (!is.na(val_data)) {
                            for (i_particle in seq_len(n_particle)) {
                                val_true <- counts_true[[i_particle]]
                                mu <- val_ratio * val_true
                                ans[[i_particle]] <- stats::dnbinom(x = val_data,
                                                                    size = val_size,
                                                                    mu = mu,
                                                                    log = TRUE)
                            }
                        }
                        ans
                    },
                    ## C++ version should modify 'counts_true' in place
                    fill_counts_true = function(counts_true, i_interval) {
                        counts_data <- private$counts_data
                        val_data <- counts_data[[i_interval]]
                        have_data <- !is.na(val_data)
                        if (!have_data)
                            return(counts_true)
                        ratio <- private$ratio
                        disp <- private$disp
                        val_ratio <- ratio[[i_interval]]
                        val_size <- 1 / disp[[i_interval]]
                        n_particle <- length(counts_true)
                        for (i_particle in seq_len(n_particle)) {
                            mu <- val_data / val_ratio
                            counts_true[[i_particle]] <- stats::rnbinom(n = 1L,
                                                                        size = val_size,
                                                                        mu = mu)
                        }
                        counts_true
                    },
                    ## C++ version should modify 'logimp' in place
                    fill_logimp = function(logimp, counts_true, i_interval) {
                        counts_data <- private$counts_data
                        val_data <- counts_data[[i_interval]]
                        have_data <- !is.na(val_data)
                        if (!have_data)
                            return(logimp)
                        ratio <- private$ratio
                        disp <- private$disp
                        val_ratio <- ratio[[i_interval]]
                        mu <- val_data / val_ratio
                        val_size <- 1 / disp[[i_interval]]
                        n_particle <- length(logimp)
                        for (i in seq_len(n_particle)) {
                            val_true <- counts_true[[i]]
                            logimp[[i]] <- stats::dnbinom(x = val_true,
                                                          size = val_size,
                                                          mu = mu,
                                                          log = TRUE)
                        }
                        logimp
                    }
                ),
                private = list(
                    ratio = matrix(), ## numeric
                    disp = matrix()   ## numeric
                ),
                inherit = cdm_noreg)

## READY_TO_TRANSLATE
cdm_withreg_nbinom <-
    R6::R6Class("cdm_withreg_nbinom",
                public = list(
                    initialize = function(counts_data, ratio, disp) {
                        private$counts_data <- counts_data
                        private$ratio <- ratio
                        private$disp <- disp
                    },
                    calc_loglik = function(counts_true, i_interval) {
                        counts_data <- private$counts_data
                        ratio <- private$ratio
                        disp <- private$disp
                        n_particle <- nrow(counts_true)
                        n_region <- ncol(counts_true)
                        ans <- numeric(length = n_particle)
                        for (i_region in seq_len(n_region)) {
                            val_data <- counts_data[i_region, i_interval]
                            has_data <- !is.na(val_data)
                            if (has_data) {
                                val_ratio <- ratio[i_region, i_interval]
                                val_size <- 1 / disp[i_region, i_interval]
                                for (i_particle in seq_len(n_particle)) {
                                    val_true <- counts_true[i_particle, i_region]
                                    mu <- val_ratio * val_true
                                    ans[[i_particle]] <- (ans[[i_particle]]
                                        + dnbinom(x = val_data,
                                                  size = val_size,
                                                  mu = mu,
                                                  log = TRUE))
                                }
                            }
                        }
                        ans
                    },
                    ## C++ version should modify 'counts_true' in place
                    fill_counts_true = function(counts_true, i_interval) {
                        counts_data <- private$counts_data
                        ratio <- private$ratio
                        disp <- private$disp
                        n_particle <- nrow(counts_true)
                        n_region <- ncol(counts_true)
                        ## some columns of 'counts_true' can be filled
                        ## while others are empty, because previous datasets
                        ## had observations for some regions but not others
                        for (i_region in seq_len(n_region)) {
                            val_true <- counts_true[1L, i_region]
                            region_has_been_filled <- !is.na(val_true)
                            if (region_has_been_filled)
                                next
                            val_data <- counts_data[i_region, i_interval]
                            have_data_for_region <- !is.na(val_data)
                            if (!have_data_for_region)
                                next
                            val_ratio <- ratio[i_region, i_interval]
                            val_size <- 1 / disp[i_region, i_interval]
                            for (i_particle in seq_len(n_particle)) {
                                mu <- val_data / val_ratio
                                counts_true[i_particle, i_region] <-
                                    stats::rnbinom(n = 1L,
                                                   size = val_size,
                                                   mu = mu)
                            }
                        }
                        counts_true
                    },
                    ## C++ version should modify 'logimp' in place
                    fill_logimp = function(logimp, counts_true, i_interval) {
                        counts_data <- private$counts_data
                        ratio <- private$ratio
                        disp <- private$disp
                        n_particle <- nrow(logimp)
                        n_region <- ncol(logimp)
                        for (i_region in seq_len(n_region)) {
                            val_logimp <- logimp[1L, i_region]
                            region_has_been_filled <- !is.na(val_logimp)
                            if (region_has_been_filled)
                                next
                            val_data <- counts_data[i_region, i_interval]
                            have_data_for_region <- !is.na(val_data)
                            if (!have_data_for_region)
                                next
                            val_ratio <- ratio[i_region, i_interval]
                            mu <- val_data / val_ratio
                            val_size <- 1 / disp[i_region, i_interval]
                            for (i_particle in seq_len(n_particle)) {
                                val_true <- counts_true[i_particle, i_region]
                                logimp[i_particle, i_region] <- stats::dnbinom(x = val_true,
                                                                               size = val_size,
                                                                               mu = mu,
                                                                               log = TRUE)
                            }
                        }
                        logimp
                    }),
                private = list(
                    ratio = matrix(), ## numeric
                    disp = matrix()   ## numeric
                ),
                inherit = cdm_withreg)
                  
ONSdigital/Bayesian-demographic-accounts documentation built on Jan. 10, 2022, 12:34 a.m.