knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 6,
  fig.height = 4
)

This is an reanalysis of an example provided in Theodore R. Holford, 1983, The Estimation of Age, Period and Cohort Effects for Vital Rates, Biometrics 39(2): 311-324. The data are prostate cancer deaths for nonwhites in the U.S. Age and period are grouped into five-year intervals.

For details on these results, see this working paper.

Load packages and data

library("dplyr")
library("ggplot2")
library("texreg")
library("weightedcontrasts")

data("prostate") # provided with weightedcontrasts package
d <- prostate %>%
    mutate(rate = 100000 * deaths / population,
        log_rate = log(rate)) %>%
    mutate(across(c(age, period, cohort), as.factor))

Replication: Figure 2

This is simply a reconstruction of Figure 2 in Holford (1983).

m_ac <- glm(deaths ~ age + cohort + offset(log(d$population)), data = d, family = poisson())
d_pred <- mutate(d, pred = log(predict(m_ac, type = "response") / population * 100000))

p <- d %>%
    ggplot(aes(x = cohort, y = log_rate, color = age, group = age)) +
        geom_point() + geom_line() +
        geom_point(data = d_pred, aes(x = cohort, y = pred), shape = 4, size = 2) +
        theme_bw()
p

Helper functions

The first helper function extracts the non-linear effects from the model, while the second function computes a total (linear and non-linear) effect. The last function is used to override the linear contrast below; this is done to obtain exactly the same point estimates as Holford.

get_deviations <- function(model, set, contrasts) {
    extract_coefs <- coef(model)[grepl(set, names(coef(model)))]
    extract_coefs <- extract_coefs[2:length(extract_coefs)]
    deviations <- contrasts[, 2:(1 + length(extract_coefs)), drop = FALSE] %*% extract_coefs
    deviations[, 1]
}

get_total_effect <- function(contrasts, linear_coef, nonlinear_coefs) {
    linear_contrast <- contrasts[, 1]
    nonlinear_contrasts <- contrasts[, 2:ncol(contrasts)]
    linear_contrast * linear_coef + nonlinear_contrasts %*% nonlinear_coefs
}

get_linear_contrast <- function(vector) {
    1:n_distinct(vector) - 1/2 * n_distinct(vector) - 1/2
}

Holford's original model

We first replicate Holford's original model. The built-in R function contr.poly uses unweighted orthogonal polynomials. We then use one of the helper functions defined above to extract the identified non-linear effects.

data_holford <- as_tibble(d)

# use contr.poly but adjust for the linear term
contrasts(data_holford$age) <- contr.poly
contrasts(data_holford$age)[, 1] <- get_linear_contrast(data_holford$age)
contrasts(data_holford$period) <- contr.poly
contrasts(data_holford$period)[, 1] <- get_linear_contrast(data_holford$period)
contrasts(data_holford$cohort) <- contr.poly
contrasts(data_holford$cohort)[, 1] <- get_linear_contrast(data_holford$cohort)

holford <- glm(deaths ~ age + cohort + period + offset(log(data_holford$population)),
    data = data_holford, family = poisson())

# get deviations from linear trend and age/cohort linear effects
# (these are the true linear effects when period linear effect = 0)
holford_age <- get_deviations(holford, "^age", contrasts(data_holford$age))
holford_period <- get_deviations(holford, "^period", contrasts(data_holford$period))
holford_cohort <- get_deviations(holford, "^cohort", contrasts(data_holford$cohort))
holford_age_linear <- coef(holford)["age.L"]
holford_cohort_linear <- coef(holford)["cohort.L"]

Corrected model

This code is very similar, but uses the function contr.poly.weighted from the weightedcontrasts package. We also specify the width of the age groups (5-year intervals in this case), to obtain an interpretable linear term.

data_corrected <- as_tibble(d)

contrasts(data_corrected$age) <- contr.poly.weighted(data_corrected$age, width = 5)
contrasts(data_corrected$period) <- contr.poly.weighted(data_corrected$period, width = 5)
contrasts(data_corrected$cohort) <- contr.poly.weighted(data_corrected$cohort, width = 5)
# zero out the period linear effect
contrasts(data_corrected$period)[, 1] <- 0

corrected <- glm(deaths ~ age + cohort + period + offset(log(data_corrected$population)),
    data = data_corrected, family = poisson())

# get deviations from linear trend and age/cohort linear effects
# (these are the true linear effects when period linear effect = 0)
corrected_age <- get_deviations(corrected, "^age", contrasts(data_corrected$age))
corrected_period <- get_deviations(corrected, "^period", contrasts(data_corrected$period))
corrected_cohort <- get_deviations(corrected, "^cohort", contrasts(data_corrected$cohort))
corrected_age_linear <- coef(corrected)["age.L"]
corrected_cohort_linear <- coef(corrected)["cohort.L"]

More parsimonious model

By dropping higher-order polynomial terms, we can fit a much more parsimonious model. The model that contains only linear and quadratic effects fits almost equally well.

designmatrix <- model.matrix(deaths ~ age + cohort + period + offset(log(data_corrected$population)),
    data = data_corrected)
d_reduced <- designmatrix[, c("age.L", "cohort.L", "period.L", "age.Q", "cohort.Q", "period.Q")] %>%
    as_tibble() %>%
    mutate(deaths = d$deaths, population = d$population)
corrected_p <- glm(deaths ~ age.L + cohort.L + period.L + age.Q + cohort.Q + period.Q +
    offset(log(population)), data = d_reduced, family = poisson())

Comparison

htmlreg(list(holford, corrected, corrected_p), single.row = TRUE)
# does not affect age and period terms
all.equal(holford_age, corrected_age)
all.equal(holford_period, corrected_period)

# comparison of cohort non-linear effects
p <- tibble(effect = c(corrected_cohort, holford_cohort),
    cohort = rep(names(corrected_cohort), 2),
    which = c(rep("This paper", length(corrected_cohort)),
        rep("Holford 1983", length(holford_cohort)))) %>%
    mutate(which = factor(which, levels = c("This paper", "Holford 1983"))) %>%
    ggplot(aes(x = cohort, y = effect, group = which, linetype = which)) +
    geom_point() + geom_line() +
    geom_hline(yintercept = 0) +
    labs(y = "Non-linear effect", x = "Cohort", group = "", linetype = "") +
    theme_bw()
p
# ggsave("holford1983.pdf", p, height = 4)


elbersb/weightedcontrasts documentation built on Dec. 20, 2021, 4:15 a.m.