## HAS_TESTS
## Check that an account is valid.
## 'account' - a named list of data frames
## return value - TRUE or an error
check_account <- function(account) {
## 'account' is list
if (!is.list(account))
stop("'account' is not a list")
## elements of 'account' all data frames
if (!all(vapply(account, is.data.frame, TRUE)))
stop("'account' has elements that are not data frames")
## names for 'account' are valid demographic series
nms_account <- names(account)
check_names_series_valid(names = nms_account,
nm_obj = "account",
incl_popn = TRUE,
incl_stock = FALSE,
subset_ok = FALSE)
## has 'region' variable iff has 'internal_in'
check_region_iff_internal(nms_vars = names(account[[1L]]),
nms_series = nms_account)
## internal consistency of each data frame
for (i in seq_along(account)) {
df <- account[[i]]
nm_obj <- nms_account[[i]]
nms_vars <- names(df)
nm_measure_var <- "count"
measure_var <- df[[nm_measure_var]]
nms_classif_vars <- setdiff(nms_vars, nm_measure_var)
classif_vars <- df[nms_classif_vars]
incl_cohort <- nm_obj != "population"
incl_sexgender <- TRUE
incl_region <- "internal_in" %in% nms_account
no_min_age <- nm_obj == "births"
na_ok <- FALSE
frac_ok <- FALSE
tryCatch({account
check_names_df_valid(nms_vars = nms_vars,
nm_obj = nm_obj,
incl_cohort = incl_cohort,
incl_sexgender = incl_sexgender,
incl_region = incl_region,
nm_measure_var = nm_measure_var)
check_classif_vars_valid(classif_vars = classif_vars,
nm_obj = nm_obj)
check_classif_vars_complete(classif_vars = classif_vars,
nm_obj = nm_obj,
no_min_age = no_min_age)
check_measure_var_valid(var = measure_var,
nm_var = nm_measure_var,
nm_obj = nm_obj,
na_ok = na_ok,
frac_ok = frac_ok)
},
error = function(e) {
msg <- paste("problem with 'account' :", e$message)
stop(msg, call. = FALSE)
})
}
## consistency across data frames
nms_classif_all <- lapply(account, function(x) setdiff(names(x), nm_measure_var))
classif_vars_all <- lapply(seq_along(account), function(i) account[[i]][nms_classif_all[[i]]])
i_deaths <- match("deaths", nms_account)
nms_classif_template <- nms_classif_all[[i_deaths]]
classif_vars_template <- account[[i_deaths]][nms_classif_template]
no_cohort <- "population"
no_sexgender <- character()
absent_ok <- FALSE
ignore_age_cohort <- "births"
omit_first_time <- "population"
check_classif_vars_consistent_names(nms_classif_all = nms_classif_all,
nms_obj_all = nms_account,
nms_classif_template = nms_classif_template,
no_cohort = no_cohort,
no_sexgender = no_sexgender)
check_classif_vars_consistent_levels(classif_vars_all = classif_vars_all,
nms_obj_all = nms_account,
classif_vars_template = classif_vars_template,
absent_ok = absent_ok,
ignore_age_cohort = ignore_age_cohort,
omit_first_time = omit_first_time)
check_identities(account)
## return TRUE if all tests pass
TRUE
}
## HAS_TESTS
## Check a data frame or numeric argument (other than 'data')
## used for constructing a data model in a 'dm' function
## is valid
## 'arg' - the argument being checked, a data frame or numeric scalar
## 'nm_arg' - the name of the argument being checked
## 'neg_ok' - whether 'arg' can have negative values
## 'zero_ok' - whether 'arg' can have zeros
## return value - TRUE or an error
check_arg_dm <- function(arg, nm_arg, data, neg_ok, zero_ok) {
if (is.data.frame(arg))
check_arg_dm_df(arg = arg,
nm_arg = nm_arg,
data = data,
neg_ok = neg_ok,
zero_ok = zero_ok)
else if (is.numeric(arg))
check_arg_dm_num(arg = arg,
nm_arg = nm_arg,
neg_ok = neg_ok,
zero_ok = zero_ok)
else
stop("'", nm_arg, "' has class '", class(arg), "'")
}
## HAS_TESTS
## Check a data frame argument to a data model constructor.
## Assume that 'is.data.frame(arg)' is TRUE.
## 'arg' - the argument being checked, a data frame
## 'nm_arg' - the name of the argument being checked
## 'neg_ok' - whether the 'nm_arg' column of 'arg' can have negative values
## 'zero_ok' - whether the 'nm_arg' column of 'arg' can have zeros
## return value - TRUE or an error
check_arg_dm_df <- function(arg, nm_arg, data, neg_ok, zero_ok) {
nms_arg <- names(arg)
nms_data <- names(data)
nms_classif_arg <- setdiff(nms_arg, nm_arg)
nms_classif_data <- setdiff(nms_data, "count")
## 'nm_arg' in 'nms_arg'
if (!(nm_arg %in% nms_arg))
stop("'", nm_arg, "' does not have a variable called '", nm_arg, "'")
## 'nm_arg' not in 'nms_data'
if (nm_arg %in% nms_data)
stop("argument '", nm_arg, "' has the same name as a variable in 'data'")
## all elements of 'nms_arg', except for 'nm_arg', in 'nms_data'
is_in_data <- nms_classif_arg %in% nms_data
i_not_in_data <- match(FALSE, is_in_data, nomatch = 0L)
if (i_not_in_data > 0L)
stop("'", nm_arg, "' has a variable [", nms_classif_arg[[i_not_in_data]],
"] not found in 'data'")
## all rows of 'data' map to a row of 'arg'
arg_check_map <- arg
arg_check_map[[nm_arg]] <- TRUE
merged_check_map <- merge(data, arg_check_map,
by = nms_classif_arg,
all.x = TRUE)
is_not_in_arg <- is.na(merged_check_map[[nm_arg]])
i_not_in_arg <- match(TRUE, is_not_in_arg, nomatch = 0L)
if (i_not_in_arg > 0L) {
row_not_in_arg <- merged_check_map[i_not_in_arg, nms_classif_data]
str_not_in_arg <- paste(row_not_in_arg, collapse = ", ")
stop("row of 'data' with classifying variables ", str_not_in_arg,
" does not have corresponding row in '", nm_arg, "'")
}
## measure variable for 'arg' only has NAs in places where 'data' does
merged_check_na <- merge(data, arg,
by = nms_classif_arg,
all.x = TRUE)
is_na_clash <- !is.na(merged_check_na$count) & is.na(merged_check_na[[nm_arg]])
i_na_clash <- match(TRUE, is_na_clash, nomatch = 0L)
if (i_na_clash > 0L) {
row_na_clash <- merged_check_map[i_na_clash, nms_classif_data]
str_na_clash <- paste(row_na_clash, collapse = ", ")
stop("row of 'data' with classifying variables ", str_na_clash,
" has non-NA value for 'count' but corresponding row in '", nm_arg,
"' has NA value for '", nm_arg, "'")
}
## if 'neg_ok' is FALSE, no elements of measure variable in 'arg' are negative
if (!neg_ok && any(arg[[nm_arg]] < 0, na.rm = TRUE))
stop("'", nm_arg, "' variable in '", nm_arg, "' has negative values")
## if 'zero_ok' is FALSE, no elements of measure variable in 'arg' are zero
if (!zero_ok && any(arg[[nm_arg]] == 0, na.rm = TRUE))
stop("'", nm_arg, "' variable in '", nm_arg, "' has zeros")
## if all tests pass, return TRUE
TRUE
}
## HAS_TESTS
## Check a numeric scalar argument to a data model constructor.
## Assume that 'is.numeric(arg)' is TRUE.
## 'arg' - the argument being checked, a numeric scalar
## 'nm_arg' - the name of the argument being checked
## 'neg_ok' - whether 'arg' can be negative
## 'zero_ok' - whether 'arg' can be zero
## return value - TRUE or an error
check_arg_dm_num <- function(arg, nm_arg, neg_ok, zero_ok) {
if (!identical(length(arg), 1L))
stop("'", nm_arg, "' does not have length 1")
if (is.na(arg))
stop("'", nm_arg, "' is NA")
if (!neg_ok && (arg < 0))
stop("'", nm_arg, "' is negative")
if (!zero_ok && (arg == 0))
stop("'", nm_arg, "' equals 0")
TRUE
}
## HAS_TESTS
## Check that 'classif_vars' contains all intermediate categories
## of numeric variables, and all possible combinations
## of all classifying variables. Assumes that 'classif_vars'
## has already been checked for duplicates and invalid
## combiniations of age, time, and cohort. If 'no_min_age'
## is TRUE, then age can start from any non-negative number;
## otherwise age must start from 0.
## 'classif_vars' - a data frame containing the classification
## variables
## 'nm_obj' - the name of the object that the classification
## variables were taken from
## 'no_min_age' - whether the minimum value for 'age' can be
## higher than 0
## return value - TRUE or an error message
check_classif_vars_complete <- function(classif_vars, nm_obj, no_min_age) {
nms <- names(classif_vars)
nms_numeric <- intersect(nms, c("cohort", "time", "age"))
## except when 'classif_vars' is birth-related, check that the minimum age is 0
if (!no_min_age) {
age <- classif_vars$age
min_age <- min(age)
if (min_age > 0L)
stop("minimum value for 'age' [", min_age, "] in '", nm_obj,
"' is not zero")
}
## check that numeric classification variables contain all intermediate values
for (nm in nms_numeric) {
vals <- classif_vars[[nm]]
range_vals <- seq.int(from = min(vals), to = max(vals))
is_included <- range_vals %in% vals
i_not_included <- match(FALSE, is_included, nomatch = 0L)
if (i_not_included > 0L)
stop("'", nm, "' variable in '", nm_obj, "' missing intermediate value [",
range_vals[[i_not_included]], "]")
}
## all combinations of classification variables are included
levels_classif <- lapply(classif_vars, unique)
has_cohort <- "cohort" %in% nms
if (has_cohort) {
classif_vars_implied <- expand.grid(levels_classif)
is_valid_lexis <- with(classif_vars_implied,
(time - cohort - age) %in% c(0L, 1L))
nrow_expected <- sum(is_valid_lexis)
}
else {
n_levels_classif <- vapply(levels_classif, length, 1L)
nrow_expected <- prod(n_levels_classif)
}
nrow_obtained <- nrow(classif_vars)
if (nrow_obtained < nrow_expected)
stop("'", nm_obj, "' does not include all possible combinations",
" of classification variables")
TRUE
}
## HAS_TESTS
## Check that classification variables have expected levels.
## The classifications are not assumed to all have the same
## variables - the completeness of variables needs to be
## checked separately, eg via 'check_classif_vars_complete'.
## 'check_classif_vars_consistent_levels' also does not check
## that all combinations of the levels are present - this
## needs to be checked separately, eg via 'check_classif_vars_complete'.
## 'classif_vars_all' - a list of data frames with classification variables
## 'nms_obj_all' - the names of the data frames
## 'classif_vars_template' - a data frame with the target classification
## 'absent_ok' - whether a data frame is allowed to omit some levels
## contained in 'classif_vars_template'
## 'ignore_age_cohort' - names of data frames where age and cohort
## are omitted from the comparison
## return value - TRUE or an error message
check_classif_vars_consistent_levels <- function(classif_vars_all,
nms_obj_all,
classif_vars_template,
absent_ok,
ignore_age_cohort = character(),
omit_first_time = character()) {
levels_expected <- lapply(classif_vars_template, unique)
nms_levels_expected <- names(classif_vars_template)
for (i in seq_along(classif_vars_all)) {
classif_vars <- classif_vars_all[[i]]
nm_obj <- nms_obj_all[[i]]
if (nm_obj %in% ignore_age_cohort) {
non_age_cohort <- setdiff(names(classif_vars), c("age", "cohort"))
classif_vars <- classif_vars[non_age_cohort]
}
levels_obtained <- lapply(classif_vars, unique)
nms_levels_obtained <- names(classif_vars)
for (nm in nms_levels_expected) {
if (nm %in% nms_levels_obtained) {
levels_expected_var <- levels_expected[[nm]]
levels_obtained_var <- levels_obtained[[nm]]
if ((nm == "time") && (nm_obj %in% omit_first_time))
levels_obtained_var <- levels_obtained_var[-1L]
is_valid <- levels_obtained_var %in% levels_expected_var
i_invalid <- match(FALSE, is_valid, nomatch = 0L)
if (i_invalid > 0L)
stop("variable '", nm, "' in '", nm_obj, "' has unexpected value [",
levels_obtained_var[[i_invalid]], "]")
if (!absent_ok) {
is_present <- levels_expected_var %in% levels_obtained_var
i_absent <- match(FALSE, is_present, nomatch = 0L)
if (i_absent > 0L)
stop("variable '", nm, "' in '", nm_obj, "' does not have value '",
levels_expected_var[[i_absent]], "'")
}
}
}
}
TRUE
}
## HAS_TESTS
## Check that a set of data frames has 'consistent' classification variables.
## Consistent does not mean identical - some data frames may be required
## to omit a cohort or sex/gender variable found in other data frames.
## 'nms_classif_all' - a list of names of classification variables
## 'nms_obj_all' - the names of the data frames
## 'nms_classif_template' - a set of target names for classification variables
## 'no_cohort' - names of data frames that should not have a cohort variable
## 'no_sexgender' - names of data frames that should not have a sex or
## gender variable
## return value - TRUE or an error message
check_classif_vars_consistent_names <- function(nms_classif_all,
nms_obj_all,
nms_classif_template,
no_cohort = character(),
no_sexgender = character()) {
for (i in seq_along(nms_classif_all)) {
nms_obtained <- nms_classif_all[[i]]
nm_obj <- nms_obj_all[[i]]
nms_expected <- nms_classif_template
if (nm_obj %in% no_cohort)
nms_expected <- setdiff(nms_expected, "cohort")
if (nm_obj %in% no_sexgender)
nms_expected <- setdiff(nms_expected, c("sex", "gender"))
if (!setequal(nms_obtained, nms_expected))
stop("'", nm_obj, "' has classification variables ",
paste(nms_obtained, collapse = ", "), " : expected variables ",
paste(nms_expected, collapse = ", "))
}
TRUE
}
## HAS_TESTS
## Check that classification variables are valid
## (though not necessarily complete.) Assumes that
## 'classif_vars' object has correct columns.
## 'classif_vars' - a data frame containing the classification
## variables
## 'nm_obj' - the name of the object that the classification
## variables were taken from
## return value - TRUE or an error message
check_classif_vars_valid <- function(classif_vars, nm_obj) {
nms <- names(classif_vars)
nms_numeric <- intersect(nms, c("cohort", "time", "age"))
## check that classification variables have no NAs
for (nm in nms) {
if (anyNA(classif_vars[[nm]]))
stop("'", nm, "' variable in '", nm_obj, "' has NAs")
}
## check that classification variables have no duplicated rows
is_duplicated <- duplicated(classif_vars)
i_duplicated <- match(TRUE, is_duplicated, nomatch = 0L)
if (i_duplicated > 0L) {
str_dup <- paste(classif_vars[i_duplicated, ], collapse = ", ")
stop("'", nm_obj, "' has more than one row with values : ", str_dup)
}
## check numeric columns are all non-negative whole numbers
for (nm in nms_numeric) {
vals <- classif_vars[[nm]]
if (!is.numeric(vals))
stop("'", nm, "' variable in '", nm_obj, "' is non-numeric")
if (any(vals != round(vals)))
stop("'", nm, "' variable in '", nm_obj, "' has non-integer values")
if (any(vals < 0L))
stop("'", nm, "' variable in '", nm_obj, "' has negative values")
}
## check combinations of age, time, and cohort are valid
has_cohort <- "cohort" %in% nms
if (has_cohort)
valid_age_time_cohort(age = classif_vars$age,
time = classif_vars$time,
cohort = classif_vars$cohort)
## return TRUE if all tests passed
TRUE
}
## HAS_TESTS
## Check that the names of demographic series, and the
## classification variables, of 'account' and 'rates'
## are consistent with each other. Note that 'consistent'
## does not mean identical.
## 'account' - a list of data frames
## 'rates' - a list of data frames
## return value - TRUE or an error
check_consistent_account_rates <- function(account, rates) {
nms_series_account <- names(account)
nms_series_rates <- names(rates)
## same series, apart from population
nms_nopop <- setdiff(nms_series_account, "population")
if (!setequal(nms_nopop, nms_series_rates))
stop("series in 'account' [", paste(nms_series_account, collapse = ", "),
"] inconsistent with series in 'rates' [",
paste(nms_series_rates, collapse = ", "), "]")
## same classification variables, apart from cohort
## and (for births) sex/gender
for (nm_series in nms_series_rates) {
nms_vars_account <- names(account[[nm_series]])
nms_vars_rates <- names(rates[[nm_series]])
nms_classif_vars_account <- setdiff(nms_vars_account, "count")
nms_classif_vars_rates <- setdiff(nms_vars_rates, "rate")
nms_classif_vars_account_trim <- setdiff(nms_classif_vars_account, "cohort")
if (nm_series == "births")
nms_classif_vars_account_trim <- setdiff(nms_classif_vars_account_trim,
c("sex", "gender"))
if (!setequal(nms_classif_vars_account_trim, nms_classif_vars_rates)) {
str_account <- paste(nms_classif_vars_account, collapse = ", ")
str_rates <- paste(nms_classif_vars_rates, collapse = ", ")
stop("names of classification variables [", str_account, "] for series '",
nm_series, "' in 'account' not consistent with names of",
" classification variables [", str_rates, "] for series '",
nm_series, "' in 'rates'")
}
}
## levels of classification variables for account
## identical to levels of of classification
## variables for rates, apart from time
for (nm_series in nms_series_rates) {
data_account <- account[[nm_series]]
data_rates <- rates[[nm_series]]
nms_rates <- names(data_rates)
nms_check <- setdiff(nms_rates, c("time", "rate"))
for (nm in nms_check) {
var_account <- data_account[[nm]]
var_rates <- data_rates[[nm]]
levels_account <- unique(var_account)
levels_rates <- unique(var_rates)
is_in_rates <- levels_account %in% levels_rates
i_not_in_rates <- match(FALSE, is_in_rates, nomatch = 0L)
if (i_not_in_rates > 0L)
stop("'", nm, "' variable for series '", nm_series,
"' in 'account' has value [", levels_account[[i_not_in_rates]],
"] not found in '", nm, "' variable for series '",
nm_series, "' in 'rates'")
is_in_account <- levels_rates %in% levels_account
i_not_in_account <- match(FALSE, is_in_account, nomatch = 0L)
if (i_not_in_account > 0L)
stop("'", nm, "' variable for series '", nm_series,
"' in 'rates' has value [", levels_rates[[i_not_in_account]],
"] not found in '", nm, "' variable for series '",
nm_series, "' in 'account'")
}
}
TRUE
}
## HAS_TESTS
## Check that the classification variables of 'rates' and
## 'data_models' are consistent.
## 'rates' - a list of data frames
## 'data_models - a list of data models
## return value - TRUE or an error
check_consistent_rates_data_models <- function(rates, data_models) {
nms_series_rates <- names(rates)
nms_series_dm <- vapply(data_models, function(x) x$nm_series, "nm")
nms_data_dm <- vapply(data_models, function(x) x$nm_data, "nm")
## 'data_models' series, apart from "population",
## are subset of 'rates' series
is_in_rates <- nms_series_dm %in% nms_series_rates
is_popn <- nms_series_dm == "population"
i_not_in <- match(FALSE, is_in_rates | is_popn, nomatch = 0L)
if (i_not_in > 0L) {
nm_not_in <- nms_series_dm[[i_not_in]]
stop("'data_models' has model for series [", nm_not_in,
"] not found in 'rates'")
}
## same classification variables, apart from cohort,
## and (for births) sex/gender
for (i_dm in seq_along(data_models)) {
nm_series <- nms_series_dm[[i_dm]]
if (nm_series != "population") {
i_rates <- match(nm_series, nms_series_rates)
nms_classif_vars_dm <- setdiff(names(data_models[[i_dm]]$data), "count")
nms_classif_vars_rates <- setdiff(names(rates[[i_rates]]), "rate")
nms_classif_vars_dm_trim <- setdiff(nms_classif_vars_dm, "cohort")
if (nm_series == "births")
nms_classif_vars_dm_trim <- setdiff(nms_classif_vars_dm_trim, c("sex", "gender"))
if (!setequal(nms_classif_vars_dm_trim, nms_classif_vars_rates)) {
str_dm <- paste(nms_classif_vars_dm, collapse = ", ")
str_rates <- paste(nms_classif_vars_rates, collapse = ", ")
stop("names of classification variables [", str_dm, "] for data model ",
"for dataset '", nms_data_dm[[i_dm]], "' and series '",
nm_series, "' in 'data_models' not consistent with names of ",
"classification variables [", str_rates,
"] for series '", nm_series, "' in 'rates'")
}
}
}
## levels of classification variables for data models
## are subset of levels of classification
## variables for rates
for (i_dm in seq_along(data_models)) {
nm_series <- nms_series_dm[[i_dm]]
if (nm_series != "population") {
i_rates <- match(nm_series, nms_series_rates)
data_dm <- data_models[[i_dm]]$data
data_rates <- rates[[i_rates]]
nms_vars_rates <- names(data_rates)
nms_classif_vars_rates <- setdiff(nms_vars_rates, "rate")
for (nm in nms_classif_vars_rates) {
var_dm <- data_dm[[nm]]
var_rates <- data_rates[[nm]]
levels_dm <- unique(var_dm)
levels_rates <- unique(var_rates)
is_included <- levels_dm %in% levels_rates
i_not_included <- match(FALSE, is_included, nomatch = 0L)
if (i_not_included > 0L)
stop("'", nm, "' variable from data model for dataset '",
nms_data_dm[[i_dm]], "' and series '", nm_series,
"' in 'data_models' has value [", levels_dm[[i_not_included]],
"] not found in '", nm, "' variable from series '",
nm_series, "' in 'rates'")
}
}
}
TRUE
}
## HAS_TESTS
## Check that 'data' meets the requirements for an
## object of class "datamodel"
## 'data' - a data frame
## 'is_popn' - whether the data refers to population
## return value - TRUE or an error
check_data_dm <- function(data, is_popn) {
if (!is.data.frame(data))
stop("'data' has class \"", class(data), "\"")
nms <- names(data)
check_names_df_valid(nms_vars = nms,
nm_obj = "data",
incl_cohort = !is_popn,
incl_sexgender = TRUE,
incl_region = NULL,
nm_measure_var = "count")
nms_classif_vars <- setdiff(nms, "count")
classif_vars <- data[nms_classif_vars]
check_classif_vars_valid(classif_vars = classif_vars,
nm_obj = "data")
check_measure_var_valid(var = data[["count"]],
nm_var = "count",
nm_obj = "data",
na_ok = TRUE,
frac_ok = FALSE)
TRUE
}
## HAS_TESTS
## Check that a to be used in an 'exact' data model
## has all levels of classification variables
## (which is not required in other data model classes.)
## If 'data' is births, then lower and upper
## age groups can be missing.
## 'data' - a data frame
## 'is_births' - whether the data refers to births
check_data_exact <- function(data, is_births) {
nms_classif_vars <- setdiff(names(data), "count")
classif_vars <- data[nms_classif_vars]
check_classif_vars_complete(classif_vars = classif_vars,
nm_obj = "data",
no_min_age = is_births)
TRUE
}
## HAS_TESTS
## Check that 'data_models' argument is valid.
## Do not need to check internal consistency
## of individual data models, since this is done
## by checking functions for data models.
## 'data_models' - a list of object of class "data_model"
## return value - TRUE or an error
check_data_models <- function(data_models) {
## 'data_models' is list
if (!is.list(data_models))
stop("'data_models' is not a list")
## return if length 0
n <- length(data_models)
if (identical(n, 0L))
return(TRUE)
## all elements are data models
is_data_model <- vapply(data_models,
FUN = inherits,
FUN.VALUE = TRUE,
what = "datamodel")
i_not_data_model <- match(FALSE, is_data_model, nomatch = 0L)
if (i_not_data_model > 0L)
stop("'data_models' has element with class \"",
class(data_models[[i_not_data_model]]),
"\"")
## datasets unique
nms_data <- vapply(data_models, function(x) x$nm_data, "nm")
is_duplicated_data <- duplicated(nms_data)
i_duplicated_data <- match(TRUE, is_duplicated_data, nomatch = 0L)
if (i_duplicated_data > 0L)
stop("two models in 'data_models' refer to the same dataset : '",
nms_data[[i_duplicated_data]], "'")
## exactly one 'births' series and 'deaths' series
nms_series <- vapply(data_models, function(x) x$nm_series, "nm")
for (nm_series in c("births", "deaths")) {
is_series <- nm_series == nms_series
if (sum(is_series) == 0L)
stop("'data_models' does not have a model for ", nm_series)
if (sum(is_series) >= 2L)
stop("'data_models' has two or more models for ", nm_series)
}
## consistency across data models
nm_measure_var <- "count"
nms_classif_all <- lapply(data_models,
function(x) setdiff(names(x$data), nm_measure_var))
classif_vars_all <- lapply(seq_along(data_models),
function(i) data_models[[i]]$data[nms_classif_all[[i]]])
i_deaths <- match("deaths", nms_series)
nms_classif_template <- nms_classif_all[[i_deaths]]
classif_vars_template <- data_models[[i_deaths]]$data[nms_classif_template]
nms_popn <- nms_data[nms_series == "population"]
no_cohort <- nms_popn
no_sexgender <- character()
absent_ok <- TRUE
ignore_age_cohort <- "births"
omit_first_time <- nms_popn
check_classif_vars_consistent_names(nms_classif_all = nms_classif_all,
nms_obj_all = nms_data,
nms_classif_template = nms_classif_template,
no_cohort = no_cohort,
no_sexgender = no_sexgender)
check_classif_vars_consistent_levels(classif_vars_all = classif_vars_all,
nms_obj_all = nms_data,
classif_vars_template = classif_vars_template,
absent_ok = absent_ok,
ignore_age_cohort = ignore_age_cohort,
omit_first_time = omit_first_time)
## return TRUE if all tests pass
TRUE
}
## HAS_TESTS
## Check that the 'disp' argument in a data model is valid
## 'disp' - a data frame or numeric scalar
## 'data' - a data frame
## return value - TRUE or an error
check_disp_dm <- function(disp, data) {
check_arg_dm(arg = disp,
nm_arg = "disp",
data = data,
neg_ok = FALSE,
zero_ok = FALSE)
}
## HAS_TESTS
## Check that the 'dominant' argument is
## a valid string.
## 'dominant' - a character vector of length 1
## return value - TRUE or an error
check_dominant <- function(dominant) {
## is character
if (!is.character(dominant))
stop("'dominant' is non-character")
## has length 1
if (!identical(length(dominant), 1L))
stop("'dominant' does not have length 1")
## not NA
if (is.na(dominant))
stop("'dominant' is NA")
## not blank
if (!nzchar(dominant))
stop("'dominant' is blank")
TRUE
}
## HAS_TESTS
## Check that accounting identities are satisfied.
## 'account' - a demographic account
## return value - TRUE or a warning (not an error)
check_identities <- function(account) {
increments <- to_increments(account)
if (any(increments$discrepancy != 0))
warning("accounting identities not satisfied")
TRUE
}
## HAS_TESTS
## Check that a data model refers to births or deaths
## 'x' - a data model
## TRUE or an error
check_is_births_deaths <- function(x) {
nm_series <- x$nm_series
if (!(nm_series %in% c("births", "deaths")))
stop("data model has class '", class(x)[[1L]],
"' but series is '", nm_series, "'")
TRUE
}
## HAS_TESTS
## Check that a data model does not refer to births or deaths
## 'x' - a data model
## TRUE or an error
check_is_not_births_deaths <- function(x) {
nm_series <- x$nm_series
if (nm_series %in% c("births", "deaths"))
stop("data model has class '", class(x)[[1L]],
"' but series is '", nm_series, "'")
TRUE
}
## HAS_TESTS
## Check that a measure variable is valid. The measure variable
## can be numeric or a list column: it is up to the calling
## function to check whether either form is appropriate.
## 'var' - the measure variable
## 'nm_var' - the name of the measure variable
## 'nm_obj' - the name of the data frame that 'var' was taken from
## 'na_ok' - whether 'var' can contain NAs
## 'frac_ok' - whether 'var' can contain non-integer values
## return value - TRUE or an error message
check_measure_var_valid <- function(var, nm_var, nm_obj, na_ok, frac_ok) {
if (is.numeric(var))
check_measure_var_valid_num(var = var,
nm_var = nm_var,
nm_obj = nm_obj,
na_ok = na_ok,
frac_ok = frac_ok)
else if (is.list(var))
check_measure_var_valid_list(var = var,
nm_var = nm_var,
nm_obj = nm_obj,
na_ok = na_ok,
frac_ok = frac_ok)
else
stop("'", nm_var, "' variable in '", nm_obj, "' is not numeric or list")
}
## HAS_TESTS
## Check that a measure variable is valid, given that
## the measure variable is a list of numeric vectors
## 'var' - the measure variable
## 'nm_var' - the name of the measure variable
## 'nm_obj' - the name of the data frame that 'var' was taken from
## 'na_ok' - whether 'var' can contain NAs
## 'frac_ok' - whether 'var' can contain non-integer values
## return value - TRUE or an error message
check_measure_var_valid_list <- function(var, nm_var, nm_obj, na_ok, frac_ok) {
## elements numeric
is_numeric <- vapply(var, is.numeric, TRUE)
if (!all(is_numeric))
stop("'", nm_var, "' variable in '", nm_obj, "' has non-numeric elements")
## all elements have the same length
if (length(var) > 1L) {
lengths <- vapply(var, length, 1L)
if (any(lengths[-1L] != lengths[[1L]]))
stop("elements of '", nm_var, "' variable in '", nm_obj, "' have different lengths")
}
## non-NA
if (!na_ok) {
has_na <- vapply(var, anyNA, TRUE)
if (any(has_na))
stop("'", nm_var, "' variable in '", nm_obj, "' has elements with NAs")
}
## non-negative
x_has_neg <- function(x) any(x[!is.na(x)] < 0)
has_neg <- vapply(var, x_has_neg, TRUE)
if (any(has_neg))
stop("'", nm_var, "' variable in '", nm_obj, "' has elements with negative values")
## whole number
if (!frac_ok) {
x_has_frac <- function(x) any(x[!is.na(x)] != round(x[!is.na(x)]))
has_frac <- vapply(var, x_has_frac, TRUE)
if (any(has_frac))
stop("'", nm_var, "' variable in '", nm_obj, "' has elements with non-integer values")
}
## return
TRUE
}
## HAS_TESTS
## Check that a measure variable is valid, given that
## the measure variable is a numeric vector
## 'var' - the measure variable
## 'nm_var' - the name of the measure variable
## 'nm_obj' - the name of the data frame that 'var' was taken from
## 'na_ok' - whether 'var' can contain NAs
## 'frac_ok' - whether 'var' can contain non-integer values
## return value - TRUE or an error message
check_measure_var_valid_num <- function(var, nm_var, nm_obj, na_ok, frac_ok) {
## non-NA
if (!na_ok) {
if (anyNA(var))
stop("'", nm_var, "' variable in '", nm_obj, "' has NAs")
}
var <- var[!is.na(var)]
## non-negative
if (any(var < 0))
stop("'", nm_var, "' variable in '", nm_obj, "' has negative values")
## whole number
if (!frac_ok) {
if (any(var != round(var)))
stop("'", nm_var, "' variable in '", nm_obj, "' has non-integer values")
}
## return
TRUE
}
## HAS_TESTS
## Check that 'n_particle' is a positive whole number
## 'n_particle' - a numeric vector of length 1
## return value - TRUE or an error
check_n_particle <- function(n_particle) {
## is numeric
if (!is.numeric(n_particle))
stop("'n_particle' is non-numeric")
## has length 1
if (!identical(length(n_particle), 1L))
stop("'n_particle' does not have length 1")
## not NA
if (is.na(n_particle))
stop("'n_particle' is NA")
## is an integer
if (n_particle != round(n_particle))
stop("'n_particle' is not an integer")
## is at least 1
if (n_particle < 1)
stop("'n_particle' is less than 1")
TRUE
}
## HAS_TESTS
## Check that 'n_thin' is a positive whole number
## 'n_thin' - a numeric vector of length 1
## return value - TRUE or an error
check_n_thin <- function(n_thin) {
## is numeric
if (!is.numeric(n_thin))
stop("'n_thin' is non-numeric")
## has length 1
if (!identical(length(n_thin), 1L))
stop("'n_thin' does not have length 1")
## not NA
if (is.na(n_thin))
stop("'n_thin' is NA")
## is an integer
if (n_thin != round(n_thin))
stop("'n_thin' is not an integer")
## is at least 1
if (n_thin < 1)
stop("'n_thin' is less than 1")
TRUE
}
## HAS_TESTS
## Check that 'n_thread' is NULL
## Will revise as part of introducing
## parallel processing.
check_n_thread <- function(n_thread) {
if (!is.null(n_thread))
warning("'n_thread' is currently ignored")
TRUE
}
## HAS_TESTS
## Check that the column names of a data frame holding
## demographic rates or counts are valid.
## If an 'incl' argument is TRUE, then 'nms_vars' must include
## the associated name. If an 'incl' argument is FALSE, then
## 'nms_vars' must not include the associated name. If an
## 'incl' argument is NULL, then 'nms_vars' can either include or
## not include the associated name.
## If 'incl_sexgender' is TRUE, then the associated
## column can be called "sex" or "gender".
## 'nms_vars' - the column nms_vars of the data frame
## 'nm_obj' - the name of the data frame
## 'incl_cohort' - whether the nms_vars should include "cohort"
## 'incl_sexgender' - whether the nms_vars should include "sex" or "gender"
## 'incl_region' - whether the nms_vars should include "region"
check_names_df_valid <- function(nms_vars,
nm_obj,
incl_cohort,
incl_sexgender,
incl_region,
nm_measure_var) {
check_names_valid(names = nms_vars,
nm_obj = nm_obj)
sets_names_classif_vars <- get_sets_names_classif_vars(incl_cohort = incl_cohort,
incl_sexgender = incl_sexgender,
incl_region = incl_region)
sets_names_vars <- lapply(sets_names_classif_vars,
append,
values = nm_measure_var)
found <- FALSE
for (set_nms_vars in sets_names_vars) {
found <- setequal(nms_vars, set_nms_vars)
if (found)
break
}
if (!found)
stop("'", nm_obj, "' variables [", paste(nms_vars, collapse = ", "),
"] incomplete or invalid")
TRUE
}
## HAS_TESTS
## Check that a combination of series names is valid.
## Under all settings, "births" and "deaths" are required.
## 'names' - the series names to be checked
## 'nm_obj' - the name of the object that the names
## were taken from
## 'incl_popn' - whether then names should include "population"
## 'incl_stock' - whether then names should include "stock"
## 'subset_ok' - whether 'names' can be a subset of the target names
## or has to have every member
## return value - TRUE or an error
check_names_series_valid <- function(names,
nm_obj,
incl_popn,
incl_stock,
subset_ok) {
check_names_valid(names = names,
nm_obj = nm_obj)
sets_names_series <- get_sets_names_series(incl_popn = incl_popn,
incl_stock = incl_stock)
found <- FALSE
for (set_names in sets_names_series) {
if (subset_ok)
found <- (all(names %in% set_names)
&& ("births" %in% names)
&& ("deaths" %in% names))
else
found <- setequal(names, set_names)
if (found)
break
}
if (!found)
stop("'", nm_obj, "' series names [", paste(names, collapse = ", "),
"] incomplete or invalid")
TRUE
}
## HAS_TESTS
## Check that 'names' is non-NULL,
## and has no NAs, blanks, or duplicates
## 'names' - a vector of possible names, or NULL
## 'nm' - the name of the object that the names
## were taken from
## return value - TRUE or an error
check_names_valid <- function(names, nm_obj) {
if (is.null(names))
stop("'", nm_obj, "' does not have names")
## names have no NAs
if (anyNA(names))
stop("names for '", nm_obj, "' have NAs")
## names have no blanks
if (any(names == ""))
stop("names for '", nm_obj, "' have blanks")
## names have no duplicates
if (any(duplicated(names)))
stop("names for '", nm_obj, "' have duplicates")
TRUE
}
## HAS_TESTS
## Check that 'nm_data' is a valid dataset name
check_nm_data <- function(nm_data) {
## is character
if (!is.character(nm_data))
stop("'nm_data' is non-character")
## has length 1
if (!identical(length(nm_data), 1L))
stop("'nm_data' does not have length 1")
## is not NA
if (is.na(nm_data))
stop("'nm_data' is NA")
## not blank
if (!nzchar(nm_data))
stop("'nm_data' is blank")
TRUE
}
## HAS_TESTS
## Check that 'nm_series' is a valid series name, which
## depends on whether 'names_data' includes "region"
check_nm_series <- function(nm_series, names_data) {
## is character
if (!is.character(nm_series))
stop("'nm_series' is non-character")
## has length 1
if (!identical(length(nm_series), 1L))
stop("'nm_series' does not have length 1")
## is not NA
if (is.na(nm_series))
stop("'nm_series' is NA")
## is valid series
incl_internal <- "region" %in% names_data
valid_nms_series <- get_names_series(incl_popn = TRUE,
incl_stock = FALSE,
one_imem = NULL,
incl_internal = incl_internal)
if (!(nm_series %in% valid_nms_series))
stop("'nm_series' has invalid value [\"", nm_series, "\"]")
TRUE
}
## HAS_TESTS
## Check that 'prob' is a number between 0 and 1 (exclusive)
## 'prob' - A numeric scalar
## return value - TRUE or an error
check_prob <- function(prob) {
if (!is.numeric(prob))
stop("'prob' is non-numeric")
if (!identical(length(prob), 1L))
stop("'prob' does not have length 1")
if (is.na(prob))
stop("'prob' is NA")
if (prob <= 0)
stop("'prob' is less than or equal to 0")
if (prob >= 1)
stop("'prob' is greater than or equal to 1")
TRUE
}
## HAS_TESTS
## Check 'rates' argument.
## 'rates' - a list of data frames
## return value - TRUE or an error
check_rates <- function(rates) {
## 'rates' is list
if (!is.list(rates))
stop("'rates' is not a list")
## elements of 'rates' all data frames
if (!all(vapply(rates, is.data.frame, TRUE)))
stop("'rates' has elements that are not data frames")
## names for 'rates' are valid demographic series
nms_rates <- names(rates)
check_names_series_valid(names = nms_rates,
nm_obj = "rates",
incl_popn = FALSE,
incl_stock = FALSE,
subset_ok = FALSE)
## Has 'region' variable iff has 'internal_in'. Only need to
## check first data frame - if data frames inconsistent,
## this will be picked up later.
check_region_iff_internal(nms_vars = names(rates[[1L]]),
nms_series = nms_rates)
## internal consistency of each data frame
for (i in seq_along(rates)) {
df <- rates[[i]]
nm_obj <- nms_rates[[i]]
nms_vars <- names(df)
nm_measure_var <- "rate"
measure_var <- df[[nm_measure_var]]
nms_classif_vars <- setdiff(nms_vars, nm_measure_var)
classif_vars <- df[nms_classif_vars]
incl_cohort <- FALSE
incl_sexgender <- nm_obj != "births"
incl_region <- "internal_in" %in% nms_rates
no_min_age <- nm_obj == "births"
na_ok <- FALSE
frac_ok <- TRUE
tryCatch({
check_names_df_valid(nms_vars = nms_vars,
nm_obj = nm_obj,
incl_cohort = incl_cohort,
incl_sexgender = incl_sexgender,
incl_region = incl_region,
nm_measure_var = nm_measure_var)
check_classif_vars_valid(classif_vars = classif_vars,
nm_obj = nm_obj)
check_classif_vars_complete(classif_vars = classif_vars,
nm_obj = nm_obj,
no_min_age = no_min_age)
check_measure_var_valid(var = measure_var,
nm_var = nm_measure_var,
nm_obj = nm_obj,
na_ok = na_ok,
frac_ok = frac_ok)
},
error = function(e) {
msg <- paste("problem with 'rates' :", e$message)
stop(msg, call. = FALSE)
})
}
## consistency across data frames
nms_classif_all <- lapply(rates, function(x) setdiff(names(x), nm_measure_var))
classif_vars_all <- lapply(seq_along(rates), function(i) rates[[i]][nms_classif_all[[i]]])
i_deaths <- match("deaths", nms_rates)
nms_classif_template <- nms_classif_all[[i_deaths]]
classif_vars_template <- rates[[i_deaths]][nms_classif_template]
no_cohort <- character()
no_sexgender <- "births"
absent_ok <- FALSE
ignore_age_cohort <- "births"
check_classif_vars_consistent_names(nms_classif_all = nms_classif_all,
nms_obj_all = nms_rates,
nms_classif_template = nms_classif_template,
no_cohort = no_cohort,
no_sexgender = no_sexgender)
check_classif_vars_consistent_levels(classif_vars_all = classif_vars_all,
nms_obj_all = nms_rates,
classif_vars_template = classif_vars_template,
absent_ok = absent_ok,
ignore_age_cohort = ignore_age_cohort)
## return TRUE if all tests pass
TRUE
}
## HAS_TESTS
## Check that the 'ratio' argument in a data model is valid
## 'ratio' - a data frame or numeric scalar
## 'data' - a data frame
## return value - TRUE or an error
check_ratio_dm <- function(ratio, data) {
check_arg_dm(arg = ratio,
nm_arg = "ratio",
data = data,
neg_ok = FALSE,
zero_ok = TRUE)
}
## HAS_TESTS
## Check that classification variables contain 'region' iff
## series include 'internal_in'.
## 'nms_vars' - names of variables
## 'nms_series' - names of demographic series
## return value - TRUE or an error message
check_region_iff_internal <- function(nms_vars, nms_series) {
has_region <- "region" %in% nms_vars
has_internal <- "internal_in" %in% nms_series
## if has 'internal_in', then all elements have "region" variable
if (has_region && !has_internal)
stop("have 'region' variable but do not have series",
" 'internal_in' and 'internal_out'")
if (!has_region & has_internal)
stop("have series 'internal_in' and 'internal_out' but do not",
" have 'region' variable")
TRUE
}
## HAS_TESTS
## Check that 'threshold' is a number between 0 and 1 (inclusive)
## 'threshold' - a number
## return value - TRUE or an error
check_threshold <- function(threshold) {
if (!is.numeric(threshold))
stop("'threshold' is non-numeric")
if (!identical(length(threshold), 1L))
stop("'threshold' does not have length 1")
if (is.na(threshold))
stop("'threshold' is NA")
if (threshold < 0)
stop("'threshold' is negative")
if (threshold > 1)
stop("'threshold' is greater than 1")
TRUE
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.