knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6, fig.height = 4 )
This is an reanalysis of an example provided in Theodore R. Holford, 1983, The Estimation of Age, Period and Cohort Effects for Vital Rates, Biometrics 39(2): 311-324. The data are prostate cancer deaths for nonwhites in the U.S. Age and period are grouped into five-year intervals.
For details on these results, see this working paper.
library("dplyr") library("ggplot2") library("texreg") library("weightedcontrasts") data("prostate") # provided with weightedcontrasts package d <- prostate %>% mutate(rate = 100000 * deaths / population, log_rate = log(rate)) %>% mutate(across(c(age, period, cohort), as.factor))
This is simply a reconstruction of Figure 2 in Holford (1983).
m_ac <- glm(deaths ~ age + cohort + offset(log(d$population)), data = d, family = poisson()) d_pred <- mutate(d, pred = log(predict(m_ac, type = "response") / population * 100000)) p <- d %>% ggplot(aes(x = cohort, y = log_rate, color = age, group = age)) + geom_point() + geom_line() + geom_point(data = d_pred, aes(x = cohort, y = pred), shape = 4, size = 2) + theme_bw() p
The first helper function extracts the non-linear effects from the model, while the second function computes a total (linear and non-linear) effect. The last function is used to override the linear contrast below; this is done to obtain exactly the same point estimates as Holford.
get_deviations <- function(model, set, contrasts) { extract_coefs <- coef(model)[grepl(set, names(coef(model)))] extract_coefs <- extract_coefs[2:length(extract_coefs)] deviations <- contrasts[, 2:(1 + length(extract_coefs)), drop = FALSE] %*% extract_coefs deviations[, 1] } get_total_effect <- function(contrasts, linear_coef, nonlinear_coefs) { linear_contrast <- contrasts[, 1] nonlinear_contrasts <- contrasts[, 2:ncol(contrasts)] linear_contrast * linear_coef + nonlinear_contrasts %*% nonlinear_coefs } get_linear_contrast <- function(vector) { 1:n_distinct(vector) - 1/2 * n_distinct(vector) - 1/2 }
We first replicate Holford's original model. The built-in R function contr.poly
uses
unweighted orthogonal polynomials. We then use one of the helper functions defined above to extract
the identified non-linear effects.
data_holford <- as_tibble(d) # use contr.poly but adjust for the linear term contrasts(data_holford$age) <- contr.poly contrasts(data_holford$age)[, 1] <- get_linear_contrast(data_holford$age) contrasts(data_holford$period) <- contr.poly contrasts(data_holford$period)[, 1] <- get_linear_contrast(data_holford$period) contrasts(data_holford$cohort) <- contr.poly contrasts(data_holford$cohort)[, 1] <- get_linear_contrast(data_holford$cohort) holford <- glm(deaths ~ age + cohort + period + offset(log(data_holford$population)), data = data_holford, family = poisson()) # get deviations from linear trend and age/cohort linear effects # (these are the true linear effects when period linear effect = 0) holford_age <- get_deviations(holford, "^age", contrasts(data_holford$age)) holford_period <- get_deviations(holford, "^period", contrasts(data_holford$period)) holford_cohort <- get_deviations(holford, "^cohort", contrasts(data_holford$cohort)) holford_age_linear <- coef(holford)["age.L"] holford_cohort_linear <- coef(holford)["cohort.L"]
This code is very similar, but uses the function contr.poly.weighted
from the weightedcontrasts
package.
We also specify the width of the age groups (5-year intervals in this case), to obtain
an interpretable linear term.
data_corrected <- as_tibble(d) contrasts(data_corrected$age) <- contr.poly.weighted(data_corrected$age, width = 5) contrasts(data_corrected$period) <- contr.poly.weighted(data_corrected$period, width = 5) contrasts(data_corrected$cohort) <- contr.poly.weighted(data_corrected$cohort, width = 5) # zero out the period linear effect contrasts(data_corrected$period)[, 1] <- 0 corrected <- glm(deaths ~ age + cohort + period + offset(log(data_corrected$population)), data = data_corrected, family = poisson()) # get deviations from linear trend and age/cohort linear effects # (these are the true linear effects when period linear effect = 0) corrected_age <- get_deviations(corrected, "^age", contrasts(data_corrected$age)) corrected_period <- get_deviations(corrected, "^period", contrasts(data_corrected$period)) corrected_cohort <- get_deviations(corrected, "^cohort", contrasts(data_corrected$cohort)) corrected_age_linear <- coef(corrected)["age.L"] corrected_cohort_linear <- coef(corrected)["cohort.L"]
By dropping higher-order polynomial terms, we can fit a much more parsimonious model. The model that contains only linear and quadratic effects fits almost equally well.
designmatrix <- model.matrix(deaths ~ age + cohort + period + offset(log(data_corrected$population)), data = data_corrected) d_reduced <- designmatrix[, c("age.L", "cohort.L", "period.L", "age.Q", "cohort.Q", "period.Q")] %>% as_tibble() %>% mutate(deaths = d$deaths, population = d$population) corrected_p <- glm(deaths ~ age.L + cohort.L + period.L + age.Q + cohort.Q + period.Q + offset(log(population)), data = d_reduced, family = poisson())
htmlreg(list(holford, corrected, corrected_p), single.row = TRUE)
# does not affect age and period terms all.equal(holford_age, corrected_age) all.equal(holford_period, corrected_period) # comparison of cohort non-linear effects p <- tibble(effect = c(corrected_cohort, holford_cohort), cohort = rep(names(corrected_cohort), 2), which = c(rep("This paper", length(corrected_cohort)), rep("Holford 1983", length(holford_cohort)))) %>% mutate(which = factor(which, levels = c("This paper", "Holford 1983"))) %>% ggplot(aes(x = cohort, y = effect, group = which, linetype = which)) + geom_point() + geom_line() + geom_hline(yintercept = 0) + labs(y = "Non-linear effect", x = "Cohort", group = "", linetype = "") + theme_bw() p
# ggsave("holford1983.pdf", p, height = 4)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.