## HAS_TESTS
## Create a data frame with classification vartiables 'cohort'
## and 'sex'/'gender', and with measure variables 'is_new_cohort',
## and 'births_of'. 'is_new_cohort' is a logical vector.
## 'births_of' is a list of scalars for national data,
## and a list of vectors for regional data,
## with NAs where 'is_new_cohort' is FALSE.
## 'births_of' is births of members of the cohort,
## ans is the initial population for a cohort born during the period.
make_births_of <- function(data_births, classif_vars) {
nms_classif <- names(classif_vars)
has_region <- "region" %in% nms_classif
nms_classif_ans <- intersect(c("cohort", "sex", "gender"), nms_classif)
nms_keep <- intersect(c("time", "sex", "gender", "region"), nms_classif)
data_ag <- stats::aggregate(data_births["count"],
data_births[nms_keep],
sum)
names(data_ag)[match("time", names(data_ag))] <- "cohort"
if (has_region)
data_ag <- sort_df(data_ag, ignore = "count") ## to sort regions
data_ag <- to_list_col(df = data_ag,
nm_f = nms_classif_ans,
nm_x = "count")
names(data_ag)[match("count", names(data_ag))] <- "counts_births_of"
ans <- unique(classif_vars[nms_classif_ans])
ans <- sort_df(ans)
ans$is_new_cohort <- ans$cohort %in% data_ag$cohort
ans <- merge(ans, data_ag,
by = nms_classif_ans,
all.x = TRUE,
sort = FALSE)
ans <- sort_df(ans, ignore = c("counts_births_of", "is_new_cohort"))
ans
}
## HAS_TESTS
## Create a data frame with classification variables "cohort",
## and "sex"/"gender", and with measure variable "births_to",
## which is a list of scalars for national data, and a list of vectors for
## regional data. The measurement is the number of births occurring
## to members of a cohort. The value is 0 where
## the sex/gender is not dominant.
make_births_to <- function(data_births, classif_vars, dominant) {
nms_classif <- names(classif_vars)
nms_classif_nosexgender <- setdiff(nms_classif, c("sex", "gender"))
nm_sexgender <- intersect(nms_classif, c("sex", "gender"))
nms_classif_ans <- c("cohort", nm_sexgender)
has_region <- "region" %in% nms_classif
data_ag <- stats::aggregate(data_births["count"],
data_births[nms_classif_nosexgender],
sum)
data_ag[[nm_sexgender]] <- dominant
data_ag <- sort_classif_cols(data_ag, ignore = "count")
data_ag <- sort_df(data_ag, ignore = "count")
data_ag <- to_list_col(df = data_ag,
nm_f = nms_classif_ans,
nm_x = "count")
names(data_ag)[match("count", names(data_ag))] <- "counts_births_to"
if (has_region) {
n_region <- nlevels(classif_vars$region)
data_ag$counts_births_to <- lapply(data_ag$counts_births_to, matrix, nrow = n_region)
}
ans <- unique(classif_vars[nms_classif_ans])
is_dominant <- ans[[nm_sexgender]] == dominant
is_in_repr_ages <- ans$cohort %in% data_ag$cohort
ans$has_births_to <- is_dominant & is_in_repr_ages
ans <- merge(ans, data_ag,
by = nms_classif_ans,
all.x = TRUE,
sort = FALSE)
ans <- sort_df(ans,
ignore = c("has_births_to", "counts_births_to"))
ans
}
## HAS_TESTS
## rates is a list of data frames that
## has been checked via 'check_rates_list',
## 'check_rates_single' and 'check_rates_consistent'
make_classif_vars <- function(rates) {
deaths <- rates$deaths
cols_base <- intersect(c("age", "sex", "gender", "time", "region"),
names(deaths))
ans <- deaths[cols_base]
ans <- rbind(ans, ans)
is_upper <- rep(c(FALSE, TRUE), each = nrow(ans) / 2L)
ans$cohort <- ans$time - ans$age - is_upper
ans <- sort_classif_cols(ans)
has_region <- "region" %in% names(ans)
if (has_region)
if (!is.factor(ans$region))
ans$region <- factor(ans$region, levels = unique(ans$region))
ans <- sort_df(ans)
ans
}
## HAS_TESTS
## Create a data frame of deaths aligned exactly to 'classif_vars'.
## Assume that 'data_deaths' has been checked - including
## checking that it has all the combinations of variables
## in 'classif_vars'. The measurement variable, "deaths",
## in the return value is a list of matrices if
## the 'data_deaths' has a 'region' column, and
## otherwise a list of vectors.
make_deaths <- function(data_deaths, classif_vars) {
nms_classif <- names(classif_vars)
nms_classif_ans <- intersect(c("cohort", "sex", "gender"), nms_classif)
has_region <- "region" %in% nms_classif
ans <- sort_classif_cols(data_deaths, ignore = "count")
ans <- sort_df(ans, ignore = "count") # 'region' varies fastest if present
ans <- to_list_col(df = ans,
nm_f = nms_classif_ans,
nm_x = "count")
names(ans)[match("count", names(ans))] <- "counts_deaths"
if (has_region) {
n_region <- nlevels(classif_vars$region)
ans$counts_deaths <- lapply(ans$counts_deaths, matrix, nrow = n_region)
}
ans
}
## HAS_TESTS
## make counts of births and deaths
make_df_birthdeath <- function(data_models, classif_vars, dominant) {
nm_series <- vapply(data_models, function(x) x$nm_series, "")
data_births <- data_models[[match("births", nm_series)]]$data
data_deaths <- data_models[[match("deaths", nm_series)]]$data
nms_classif <- names(classif_vars)
nms_classif_ans <- intersect(c("cohort", "sex", "gender"), nms_classif)
births_of <- make_births_of(data_births = data_births,
classif_vars = classif_vars)
births_to <- make_births_to(data_births = data_births,
classif_vars = classif_vars,
dominant = dominant)
deaths <- make_deaths(data_deaths = data_deaths,
classif_vars = classif_vars)
ans <- unique(classif_vars[nms_classif_ans])
left_join <- function(x, y)
merge(x, y, by = nms_classif_ans, sort = FALSE)
ans <- left_join(ans, births_of)
ans <- left_join(ans, births_to)
ans <- left_join(ans, deaths)
ans
}
## HAS_TESTS
## 'data-models' is a list of object of class "datamodel"
## 'classif_vars' is a data frame
make_df_cdms <- function(data_models, classif_vars) {
nms_classif <- names(classif_vars)
has_region <- "region" %in% nms_classif
nms_classif_ans <- intersect(c("cohort", "sex", "gender"), nms_classif)
nms_series_ans <- get_nms_series_cdms(nms_classif)
nms_series_dm <- vapply(data_models, function(x) x$nm_series, "")
is_birth_death <- nms_series_dm %in% c("births", "deaths")
data_models <- data_models[!is_birth_death]
nms_series_dm <- nms_series_dm[!is_birth_death]
ans <- unique(classif_vars[nms_classif_ans])
for (i in seq_along(data_models)) {
nm <- nms_series_dm[[i]]
if (identical(nm, "population")) {
nms_series_dm[[i]] <- "stock"
cohort <- with(data_models[[i]]$data, time - age)
data_models[[i]]$data$cohort <- cohort
}
}
nms_series_dm[grep("^immigration$", nms_series_dm)] <- "immigration1"
nms_series_dm[grep("^emigration$", nms_series_dm)] <- "emigration1"
l <- lapply(data_models,
make_list_cdm,
classif_vars = classif_vars)
constructor <- if (has_region) new_CdmsWithreg else new_CdmsNoreg
for (nm in nms_series_ans) {
i_series <- grep(nm, nms_series_dm)
n_series <- length(i_series)
if (n_series > 0L) {
l_series <- l[i_series]
l_series <- do.call(function(...) mapply(list, ..., SIMPLIFY = FALSE),
args = l_series)
l_series <- lapply(l_series, constructor)
}
else
l_series <- rep(list(constructor()), times = nrow(ans))
cdms_nm <- paste("cdms", nm, sep = "_")
ans[[cdms_nm]] <- l_series
}
ans
}
make_df_estimate <- function(rates,
data_models,
dominant) {
## 'classif_vars' is a data frame with cols "cohort", "sex"/"gender",
## "time", "age", and, optionally, "region"
classif_vars <- make_classif_vars(rates)
nms_classif <- names(classif_vars)
nms_classif_ans <- intersect(c("cohort", "sex", "gender"), nms_classif)
## 'df_birthdeath' is a data frame with cols "cohort", "sex"/"gender",
## "is_new_cohort", "counts_births_of", "has_births_to", "counts_births_to",
## "counts_deaths"
df_birthdeath <- make_df_birthdeath(data_models = data_models,
classif_vars = classif_vars,
dominant = dominant)
## 'df_rates' is a data frame with cols "cohort", "sex"/"gender",
## "rates_births", "rates_deaths", "rates_immigration1",
## "rates_emigration1", "rates_immigration2", "rates_emigration2",
## and possibly "rates_internal_in", "rates_internal_out"
df_rates <- make_df_rates(rates = rates,
classif_vars = classif_vars,
dominant = dominant)
## 'df_cdms' is a data frame with cols "cohort", "sex"/"gender",
## "cdms_stock", "cdms_immigration1", "cdms_emigration1",
## "cdms_immigration2", "cdms_emigration2", and possibly
## "cdms_internal_in", "cdms_internal_out"
df_cdms <- make_df_cdms(data_models = data_models,
classif_vars = classif_vars)
## store metadata
df_meta <- make_df_meta(classif_vars = classif_vars)
## combine 'df_birthdeath', 'df_rates', 'df_cdms' to create
## single data frame with all inputs
df <- merge(df_birthdeath, df_rates,
by = nms_classif_ans,
sort = FALSE)
df <- merge(df, df_cdms,
by = nms_classif_ans,
sort = FALSE)
## final processing
i_sexgender <- grep("^sex$|^gender$", names(df))
df$sexgender <- df[[i_sexgender]]
df$initial_stock_fixed <- df$is_new_cohort
df$n_interval <- vapply(df$counts_deaths, length, 1L)
## return
df
}
make_df_extend <- function(account,
rates,
data_models,
dominant) {
stop("not written yet")
}
make_df_forecast <- function(account,
rates,
dominant) {
stop("not written yet")
}
## HAS_TESTS
make_df_meta <- function(classif_vars) {
classif_vars <- sort_classif_cols(classif_vars)
classif_vars <- sort_df(classif_vars)
nms_classif <- names(classif_vars)
nms_classif_ans <- intersect(c("cohort", "sex", "gender"), nms_classif)
nms_meta <- setdiff(nms_classif, nms_classif_ans)
ans <- unique(classif_vars[nms_classif_ans])
rownames(ans) <- NULL
meta <- split(x = classif_vars[nms_meta],
f = classif_vars[nms_classif_ans],
lex.order = TRUE)
names(meta) <- NULL
meta <- lapply(meta, function(x) {rownames(x) <- NULL; x})
ans$meta <- meta
ans
}
## HAS_TESTS
make_df_rates <- function(rates, classif_vars, dominant) {
ans <- classif_vars
nms_classif <- names(classif_vars)
nms_classif_by <- setdiff(nms_classif, "cohort") # rates do not have cohort variable
nms_classif_ans <- intersect(c("cohort", "sex", "gender"), nms_classif)
has_region <- "region" %in% nms_classif
left_join <- function(x, y)
merge(x, y, by = nms_classif_by, sort = FALSE, all.x = TRUE)
nms_measure <- c("births", "deaths",
"immigration1", "emigration1",
"immigration2", "emigration2")
if (has_region)
nms_measure <- c(nms_measure, "internal_in", "internal_out")
nms_measure <- paste("rates", nms_measure, sep = "_")
## rename 'rates' columns to names of series
for (i in seq_along(rates))
names(rates[[i]])[match("rate", names(rates[[i]]))] <-
paste("rates", names(rates)[[i]], sep = "_")
## births - unlike other series, birth rates do not
## have a sex/gender dimension.
births <- rates[["births"]]
sexgender <- intersect(c("sex", "gender"), nms_classif)
births[[sexgender]] <- dominant
ans <- left_join(ans, births)
ans$rates_births[is.na(ans$rates_births)] <- 0
## deaths
deaths <- rates[["deaths"]]
ans <- left_join(ans, deaths)
## internal migration (if present)
if ("internal_in" %in% names(rates)) {
for (name in c("internal_in", "internal_out")) {
val <- rates[[name]]
ans <- left_join(ans, val)
}
}
## add immigration/emigration
if ("immigration" %in% names(rates)) {
for (name in c("immigration", "emigration")) {
val <- rates[[name]]
name_measure <- paste("rates", name, sep = "_")
names(val)[match(name_measure, names(val))] <- paste0(name_measure, 1)
ans <- left_join(ans, val)
}
ans$rates_immigration2 <- 0
ans$rates_emigration2 <- 0
}
else {
for (name in c("immigration1", "emigration1", "immigration2", "emigration2")) {
val <- rates[[name]]
ans <- left_join(ans, val)
}
}
## sort columns and rows
ans <- sort_classif_cols(ans, ignore = nms_measure)
ans <- sort_df(ans, ignore = nms_measure)
## nest rates within 'cohort' and 'sex'/'gender' and return
ans <- to_list_col(ans,
nm_f = nms_classif_ans,
nm_x = nms_measure)
if (has_region) {
n_region <- nlevels(classif_vars$region)
for (nm in nms_measure)
ans[[nm]] <- lapply(ans[[nm]], matrix, nrow = n_region)
}
ans
}
## HAS_TESTS
tidy_data_dm <- function(data) {
integer_colnames <- c("age", "cohort", "time")
numeric_colnames <- "count"
cols_int <- intersect(colnames(data), integer_colnames)
cols_num <- intersect(colnames(data), numeric_colnames)
data[cols_int] <- lapply(data[cols_int], as.integer)
data[cols_num] <- lapply(data[cols_num], as.numeric)
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.