## HAS_TESTS
## Add an extra combinations to classification
## variables, to represent initial values.
## For existing cohorts, this means the population
## at the end the period before the main estimation period;
## for new cohorts, this means accession to age 0, ie birth.
add_initial_to_classif <- function(classif_vars) {
time_min <- min(classif_vars$time)
is_time_min <- with(classif_vars, time == min(time))
is_age_min <- with(classif_vars, age == 0L)
is_upper <- with(classif_vars, cohort == time - age - 1L)
## existing cohorts
initial_existing <- classif_vars[is_time_min & is_upper, ]
initial_existing$time <- initial_existing$time - 1L
## new_cohorts
initial_new <- classif_vars[is_age_min & !is_upper, ]
initial_new$age <- -1L
## result
classif_vars <- rbind(initial_existing,
initial_new,
classif_vars)
classif_vars
}
## HAS_TESTS
fake_classif_vars <- function(n_age = 3L,
n_time = 3L,
sex = TRUE,
n_region = 0L) {
args <- list(age = seq.int(from = 0L, to = n_age - 1L),
time = seq.int(from = 2000L, length.out = n_time),
is_lower = c(TRUE, FALSE))
if (sex)
sexgender <- list(sex = c("Female", "Male"))
else
sexgender <- list(gender = c("Female", "Male", "Diverse"))
args <- c(args, sexgender)
if (n_region > 0L)
args <- c(args, list(region = seq_len(n_region)))
ans <- expand.grid(args,
stringsAsFactors = FALSE,
KEEP.OUT.ATTRS = FALSE)
if (n_region > 0L)
ans$region <- factor(ans$region, levels = seq_len(n_region))
ans$cohort <- ans$time - ans$age - 1L + ans$is_lower
ans <- ans[-match("is_lower", names(ans))]
ans <- sort_classif_cols(ans)
ans <- sort_df(ans)
ans
}
## HAS_TESTS
fake_classif_vars_popn <- function(n_age = 3L,
n_time = 3L,
sex = TRUE,
n_region = 0L) {
args <- list(age = seq.int(from = 0L, to = n_age - 1L),
time = seq.int(from = 1999L, length.out = n_time + 1L))
if (sex)
sexgender <- list(sex = c("Female", "Male"))
else
sexgender <- list(gender = c("Female", "Male", "Diverse"))
args <- c(args, sexgender)
if (n_region > 0L)
args <- c(args, list(region = seq_len(n_region)))
ans <- expand.grid(args,
stringsAsFactors = FALSE,
KEEP.OUT.ATTRS = FALSE)
if (n_region > 0L)
ans$region <- factor(ans$region, levels = seq_len(n_region))
ans <- sort_classif_cols(ans)
ans <- sort_df(ans)
ans
}
## HAS_TESTS
is_lower_triangle <- function(age, time, cohort) {
(time - cohort) == age
}
## HAS_TESTS
## Make list of vectors or matrices of values from a
## data model that align to 'classif_vars'.
## 'df' - a data frame extracted from an object of class "data_model"
## 'classif_vars' - a data frame
## 'is_popn' - whether the data model refers to population
## return value - a list of vectors or matrices
make_list_dm_df <- function(df, classif_vars, is_popn) {
if (is_popn) {
df$cohort <- df$time - df$age
classif_vars <- add_initial_to_classif(classif_vars)
}
nms_classif_vars <- names(classif_vars)
nm_measure_var <- setdiff(names(df), nms_classif_vars)
merged <- merge(classif_vars,
df,
by = nms_classif_vars,
all.x = TRUE,
sort = FALSE)
merged <- sort_classif_cols(merged, ignore = nm_measure_var)
merged <- sort_df(merged, ignore = nm_measure_var)
nm_f <- intersect(c("cohort", "sex", "gender"), names(merged))
ans_df <- to_list_col(df = merged,
nm_f = nm_f,
nm_x = nm_measure_var)
has_region <- "region" %in% names(merged)
if (has_region) {
n_region <- nlevels(classif_vars$region)
ans_df[[nm_measure_var]] <- lapply(ans_df[[nm_measure_var]], matrix, nrow = n_region)
}
ans_df[[nm_measure_var]]
}
## HAS_TESTS
sort_classif_cols <- function(x, ignore = NULL) {
cols_order <- c("cohort", "sex", "gender", "time", "age", "region")
has_ignore <- !is.null(ignore)
if (has_ignore) {
i_ignore <- match(ignore, names(x), nomatch = 0L)
i_invalid <- match(0L, i_ignore, nomatch = 0L)
if (i_invalid > 0L)
stop("\"", ignore[[i_invalid]], "\" is not a valid column name")
cols_ignore <- x[i_ignore]
x <- x[-i_ignore]
}
cols_classif <- names(x)
is_valid <- cols_classif %in% cols_order
i_invalid <- match(FALSE, is_valid, nomatch = 0L)
if (i_invalid > 0L)
stop("invalid name for classification variable : \"",
cols_classif[[i_invalid]], "\"")
cols <- intersect(cols_order, cols_classif)
ans <- x[cols]
if (has_ignore)
ans <- data.frame(ans, cols_ignore)
ans
}
## HAS_TESTS
## Sort the rows of a data froame, with the left-most columns
## varying slowest and right-most fastest.
sort_df <- function(x, ignore = NULL) {
if (is.null(ignore))
args <- x
else {
i_ignore <- match(ignore, names(x), nomatch = 0L)
i_invalid <- match(0L, i_ignore, nomatch = 0L)
if (i_invalid > 0L)
stop("\"", ignore[[i_invalid]], "\" is not a valid column name")
args <- x[-i_ignore]
}
ord <- do.call(order, args = args)
ans <- x[ord, ]
rownames(ans) <- NULL
ans
}
## NO_TESTS
## 'df' is a data frame
split_into_rows <- function(df) {
f <- seq_len(nrow(df))
split(x = df,
f = f)
}
## HAS_TESTS
## Create data frame from 'df' with classifying variables
## specified by 'nm_f' and measure variable(s) 'nm_x',
## where the measure variables are put into list columns,
## each entry of which can have any length (not necessarily
## the same). This is an internal function,
## and we assume that all arguments are valid.
## df - a data frame
## nm_f - the names of 1 or more columns of 'df'
## nm_x - the names of 1 or more columns of 'df'
to_list_col <- function(df, nm_f, nm_x) {
f <- df[nm_f]
f_unique <- unique(f)
vals_f <- do.call(paste, f)
vals_f_unique <- do.call(paste, f_unique)
indices <- lapply(vals_f_unique, grep, x = vals_f)
ans <- f_unique
rownames(ans) <- NULL
for (nm in nm_x) {
vals <- df[[nm]]
get_indexed_vals <- function(index) vals[index]
ans[[nm]] <- lapply(indices, get_indexed_vals)
}
ans
}
## HAS_TESTS
valid_age_time_cohort <- function(age, time, cohort) {
diff <- (time - cohort) - age
is_valid <- diff %in% c(0L, 1L)
i_invalid <- match(FALSE, is_valid, nomatch = 0L)
if (i_invalid > 0L)
stop("invalid combination of age [", age[[i_invalid]],
"], time [", time[[i_invalid]], "], and cohort [",
cohort[[i_invalid]], "]")
TRUE
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.