## Poisson-binomial mixture ---------------------------------------------------
## HAS_TESTS
#' Create a data model based on a Poisson-binomial mixture
#'
#' Create a data model where the distribution of the
#' reported value is a Poisson-binomial mixture.
#' If \code{Y ~ binom(n, p)} and \code{Z ~ pois(n(1-p))} then
#' \code{X = Y + Z} is a Poisson-binomial mixture.
#'
#' @param data A data frame, described in \code{\link{data-arg}}.
#' @param prob A number between 0 and 1.
#' @param nm_series The name of the demographic series
#' that \code{data} describes.
#' @param nm_data The name of the dataset. If no value
#' supplied, then \code{nm_data} is assumed to
#' equal the name of the object supplied as the \code{data}
#' argument.
#'
#' @return An object of class \code{"dm_poibin"}.
#'
#' @examples
#' reg_immig <- account::gl_reg_immig
#' reg_immig_dm <- dm_poibin(data = reg_immig,
#' prob = 0.95,
#' nm_series = "immigration")
#' reg_immig_dm
#' @export
dm_poibin <- function(data, prob, nm_series, nm_data = NULL) {
nm_data_arg <- deparse(substitute(data))
check_nm_series(nm_series = nm_series,
names_data = names(data))
is_popn <- identical(nm_series, "population")
check_data_dm(data = data,
is_popn = is_popn)
check_prob(prob)
data <- tidy_data_dm(data)
if (is.null(nm_data))
nm_data <- nm_data_arg
else
check_nm_data(nm_data)
new_dm_poibin(data = data,
prob = prob,
nm_series = nm_series,
nm_data = nm_data)
}
## HAS_TESTS
new_dm_poibin <- function(data, prob, nm_series, nm_data) {
ans <- list(data = data,
prob = prob,
nm_series = nm_series,
nm_data = nm_data)
class(ans) <- c("dm_poibin", "datamodel")
ans
}
## HAS_TESTS
validate_dm_poibin <- function(x) {
data <- x$data
prob <- x$prob
nm_series <- x$nm_series
nm_data <- x$nm_data
check_nm_series(nm_series = nm_series,
names_data = names(data))
is_popn <- identical(nm_series, "population")
check_data_dm(data = data,
is_popn = is_popn)
check_prob(prob)
check_nm_data(nm_data)
check_is_not_births_deaths(x)
x
}
## Exact ----------------------------------------------------------------------
## HAS_TESTS
#' Create a data model for a series treated as error-free
#'
#' Create a data model in which the observed data are
#' treated as error-free and are copied straight into
#' the account and not subsequently updated.
#'
#' @section Warning:
#'
#' \code{dm_exact} is the only valid data model for births
#' and deaths. \code{dm_exact} cannot be used for
#' any series other than births and deaths.
#'
#' @inheritParams dm_poibin
#'
#' @return An object of class \code{"dm_exact"}.
#'
#' @examples
#' reg_deaths <- dm_exact(data = account::gl_reg_deaths,
#' nm_series = "deaths")
#' reg_deaths
#' @export
dm_exact <- function(data,
nm_series = c("births", "deaths"),
nm_data = NULL) {
nm_data_arg <- deparse(substitute(data))
check_data_dm(data = data,
is_popn = FALSE)
nm_series <- match.arg(nm_series)
is_births <- nm_series == "births"
check_data_exact(data = data,
is_births = is_births)
data <- tidy_data_dm(data)
if (is.null(nm_data))
nm_data <- nm_data_arg
else
check_nm_data(nm_data)
new_dm_exact(data = data,
nm_series = nm_series,
nm_data = nm_data)
}
## HAS_TESTS
new_dm_exact <- function(data, nm_series, nm_data) {
ans <- list(data = data,
nm_series = nm_series,
nm_data = nm_data)
class(ans) <- c("dm_exact", "datamodel")
ans
}
## HAS_TESTS
validate_dm_exact <- function(x) {
data <- x$data
nm_series <- x$nm_series
nm_data <- x$nm_data
check_data_dm(data = data,
is_popn = FALSE)
check_nm_series(nm_series = nm_series,
names_data = names(data))
check_is_births_deaths(x)
is_births <- nm_series == "births"
check_data_exact(data = data,
is_births = is_births)
check_nm_data(nm_data)
x
}
## Negative binomial ----------------------------------------------------------
## NO_TESTS
#' Create a data model based on a negative binomial distribution
#'
#' TODO - EDIT THIS
#'
#' Create a data model where the
#' reported value has a negative binomial distribution.
#' The negative binomial distribution has a mean
#' mean-dispersion parameterisation.
#'
#' \code{ratio} and \code{disp} can both be data frames
#' or single numbers
#'
#' \code{ratio} can be zero, but \code{disp} cannot. Neither
#' can be negative.
#'
#' The \code{"ratio"} column in data frame \code{ratio}
#' gives expected coverage ratios,
#' that is, the number of people or events that the dataset
#' is expected to report for each actual person or event.
#' If \code{ratio$ratio[i]} is the coverage ratio,
#' and \code{true$count[i]} is the true number of people or events,
#' then the expected value for \code{data$count[i]} is
#' \code{ratio$ratio[i] * true$count[i]}.
#'
#' All elements ratio$ratio must be non-negative,
#' and can only be \code{NA} if the corresponding
#' value of \code{data$data} is.
#'
#' The \code{disp} argument measures the amount of dispersion
#' beyond what would be expected for a Poisson distribution.
#' It equals the reciprocal of the \code{size} argument
#' in \code{\link[stats]{NegBinomial}} Setting \code{disp}
#' to \code{0} is equivalent to having Poisson variance,
#' and setting \code{disp} to a higher number induces
#' greater variable. In general, the less reliable the data source,
#' the higher \code{disp} should be.
#'
#' \code{disp} can be a single number, in which case all
#' values of \code{data} have the same dispersion, or it
#' can be a data frame with a column called \code{"disp"}.
#'
#' If \code{disp} is a single number, it must be non-negative,
#' and cannot be \code{NA}. If \code{disp} is a data frame,
#' all elements disp$disp must be non-negative,
#' and can only be \code{NA} if the corresponding
#' value of \code{data$data} is.d
#'
#'
#' If \code{ratio} or \code{disp} are data frames,
#' then they do not need to have all the variables that
#' are in \code{data}. Values for \code{ratio}
#' or \code{disp} are assumed to be constant across
#' the missing variables. For instance, if \code{disp}
#' does not have a \code{time} variable, then
#' values for \code{dism} are assumed to be constant
#' across time.
#'
#' If \code{ratio} and \code{disp} are data frames,
#' then every row in \code{data} must map on to
#' them. However, not every row in
#' \code{ratio} and \code{disp}
#' needs to map on to a row in \code{data}: any
#' rows that do not map on to \code{data}
#' are silently dropped.
#'
#' @inheritParams dm_poibin
#'
#' @param ratio A data frame, identical to \code{data}, except that
#' the \code{"count"} variable is replaced by a \code{"ratio"} variable
#' giving expected coverage ratios.
#' @param disp A single number or a data frame. If a data frame,
#' it is identical to \code{data}, except that
#' the \code{"count"} variable is replaced by a \code{"disp"} variable
#' giving values for dispersion.
#'
#' @return An object of class \code{"dm_nbinom"}.
#'
#' @examples
#' ## Use a constant ratio across all categories
#' ## but use higher dispersion for males than for females,
#' ## and higher dispersion for ages 20-29 than for
#' ## other age groups.
#' reg_popn <- account::gl_reg_popn
#' ratio <- 1
#' disp <- within(reg_popn, {
#' rm(count)
#' disp <- ifelse(gender == "Female", 1.1, 1.2)
#' disp <- ifelse(age %in% 20:29, disp * 1.3, disp)
#' })
#' reg_popn_dm <- dm_nbinom(data = reg_popn,
#' ratio = ratio,
#' disp = disp,
#' nm_series = "population")
#' reg_popn_dm
#' @export
dm_nbinom <- function(data, ratio, disp, nm_series, nm_data = NULL) {
nm_data_arg <- deparse(substitute(data))
check_nm_series(nm_series = nm_series,
names_data = names(data))
is_popn <- identical(nm_series, "population")
check_data_dm(data = data,
is_popn = is_popn)
data <- tidy_data_dm(data)
check_ratio_dm(ratio = ratio,
data = data)
check_disp_dm(disp = disp,
data = data)
if (is.numeric(disp)) {
classif_vars <- data[-match("count", names(data))]
disp <- rep(disp, times = nrow(classif_vars))
disp <- data.frame(classif_vars, disp)
}
if (is.null(nm_data))
nm_data <- nm_data_arg
else
check_nm_data(nm_data)
new_dm_nbinom(data = data,
ratio = ratio,
disp = disp,
nm_series = nm_series,
nm_data = nm_data)
}
## NO_TESTS
new_dm_nbinom <- function(data, ratio, disp, nm_series, nm_data) {
stopifnot(is.data.frame(disp))
ans <- list(data = data,
ratio = ratio,
disp = disp,
nm_series = nm_series,
nm_data = nm_data)
class(ans) <- c("dm_nbinom", "datamodel")
ans
}
## NO_TESTS
validate_dm_nbinom <- function(x) {
data <- x$data
ratio <- x$ratio
disp <- x$disp
nm_series <- x$nm_series
nm_data <- x$nm_data
check_nm_series(nm_series = nm_series,
names_data = names(data))
is_popn <- identical(nm_series, "population")
check_data_dm(data = data,
is_popn = is_popn)
check_ratio_dm(ratio = ratio,
data = data)
stopifnot(is.data.frame(disp))
check_disp_dm(disp = disp,
data = data)
check_nm_data(nm_data)
check_is_not_births_deaths(x)
x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.