knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(conmat) library(deSolve) library(tidyr) library(ggplot2) library(dplyr) library(purrr)
SIR (Susceptible, Infected, Recovered) models, sometimes known as compartmental models, are a mathematical modelling technique used to understand facets of an epidemic. They can help answer questions like:
The SIR refers to the number of:
people at a given time point. We can model how these numbers change at each time step, based on initial population numbers, and other parameters like how infection spreads (is it more likely to infect younger or older people?),
We start with a complicated version of a relatively simple model: an age-stratified SIR Model, but with all age groups acting exactly the same.
We will use 17 age groups, each in 5 year age bands, and turn these into a conmat_population
object. This is an object that knows which columns represent age and population, which is used by other functions within conmat
.
#| label: create-population homogeneous_population <- data.frame( age = seq(0, 80, by = 10), population = rep(100, times = 9) ) |> as_conmat_population( age = age, population = population ) homogeneous_population
Then, we extrapolate these into a set of contact matrices, which we can construct using setting_prediction_matrix
. We set these as matrices of 1 - the contact rate is homogenous and exactly the same.
#| label: homogenous-contact age_breaks_0_80_plus <- c(seq(0, 80, by = 10), Inf) mat_ones <- matrix(1, nrow = 9, ncol = 9) # Relative number of contacts between individuals in 2 age categories # Think of as P(contact) homogeneous_contact <- setting_prediction_matrix( home = mat_ones, work = mat_ones, school = mat_ones, other = mat_ones, age_breaks = age_breaks_0_80_plus ) homogeneous_contact
Similarly, we construct a set of transmission matrices, which provide the probability of transmission for each age group, using transmission_probability_matrix
. These all have the same transmission probability - 0.05 (1 in 20).
#| label: homogenous-transmission mat_05 <- matrix(0.05, nrow = 9, ncol = 9) transmission_matrix <- transmission_probability_matrix( home = mat_05, work = mat_05, school = mat_05, other = mat_05, age_breaks = age_breaks_0_80_plus ) transmission_matrix
We also need to set up our population structures. We'll have all the S states, then I, then R. Since we're using deSolve
to solve this system, we need to make sure this order stays the same throughout!
#| label: initial-condition S0 <- rep(999, times = 9) I0 <- rep(1, times = 9) R0 <- rep(0, times = 9) initial_condition <- c(S0, I0, R0) names(initial_condition) <- paste( rep(c("S0", "I0", "R0"), each = 9), age_breaks_0_80_plus[1:9], sep = "_" ) initial_condition
For an SIR model, we need to compute the force of infection, which is $$\lambda(t) = \beta I(t).$$ The $\beta$ term is the product of the probability of infection given contact, and the probability of contact, for which we can use the matrices we have just defined:
#| label: parameters parameters <- list( "transmission_matrix" = transmission_matrix, "homogeneous_contact" = homogeneous_contact, "gamma" = 1, "s_indexes" = 1:9, "i_indexes" = 10:18, "r_indexes" = 19:27 ) parameters
Now we construct a function for the age structured SIR model, to pass to deSolve. This calculates the force of infection for each setting.
age_structured_sir <- function(time, state, parameters) { # Calculate the force of infection for each setting: # unstructured SIR beta is age_group_n / pop_n N_by_age <- map_dbl( .x = parameters$s_indexes, .f = function(i) { current_indexes_to_sum <- c( parameters$s_indexes[i], parameters$i_indexes[i], parameters$r_indexes[i] ) sum(state[current_indexes_to_sum]) } ) # normalise by the age population N_infected_by_age <- state[parameters$i_indexes] / N_by_age # functional method for takign the product of two matrices product <- function(transmission, contact) { map2(transmission, contact, `*`) } age_normalise <- function(beta) { # matrix multiply by infected and normalise by age population map(beta, function(beta) { beta %*% N_infected_by_age }) } lambdas <- tibble( setting = names(parameters$transmission_matrix), transmission_matrix = parameters$transmission_matrix, homogeneous_contact = parameters$homogeneous_contact[1:4] ) %>% mutate( beta = product(transmission_matrix, homogeneous_contact), lambda = age_normalise(beta) ) # Combine them all into one term for ease of computation lambda_total <- Reduce("+", lambdas$lambda) # Don't forget to normalise your infection rate by the population! dSdt <- -lambda_total * state[parameters$s_indexes] dIdt <- lambda_total * state[parameters$s_indexes] - parameters$gamma * state[parameters$i_indexes] dRdt <- parameters$gamma * state[parameters$i_indexes] return( list( c( dSdt, dIdt, dRdt ) ) ) }
Then we solve the ODE like so:
#| label: solve-ode-homogeneous times <- seq(0, 20, by = 0.1) homogeneous_soln <- ode( y = initial_condition, times = times, func = age_structured_sir, parms = parameters ) # Have to convert ode output to a data frame to do any plotting homogeneous_soln <- as.data.frame(homogeneous_soln) %>% as_tibble()
Now, let's compare this to an SIR model with no stratification - as in, no age groups:
#| label: standard-sir parameters_sir <- c("beta" = 1.8, "gamma" = 1) initial_condition_sir <- c("S" = 8991, "I" = 9, "R" = 0) sir <- function(time, state, parameters) { N <- sum(state) lambda_total <- parameters["beta"] * state["I"] dSdt <- -lambda_total / N * state["S"] dIdt <- parameters["beta"] / N * state["S"] * state["I"] - parameters["gamma"] * state["I"] dRdt <- parameters["gamma"] * state["I"] return(list(c(dSdt, dIdt, dRdt))) } sir_soln <- ode( y = initial_condition_sir, times = times, func = sir, parms = parameters_sir ) sir_soln <- as_tibble(as.data.frame(sir_soln))
#| label: plot-standard-sir ungrouped_structure <- sir_soln %>% pivot_longer(cols = -time) # we are going to tidy up ODE output a few times, so wrap it into a function: tidy_ode <- function(ode_soln) { ode_soln %>% pivot_longer(cols = -time) %>% mutate(parent_state = substr(name, 1, 1)) %>% group_by(time, parent_state) %>% summarise(value = sum(value)) %>% ungroup() %>% rename(name = parent_state) } # For the stratified model, we have to add up all the age categories together for a fair comparison. grouped_structure <- tidy_ode(homogeneous_soln) combined_solutions <- bind_rows( "non_structured" = ungrouped_structure, "age_structured" = grouped_structure, .id = "type" ) %>% mutate( name = factor(name, levels = c("S", "I", "R")) ) combined_solutions
Now let's plot these two models approaches - the age structure and the non age structured:
#| label: plot-both-models gg_combined_solutions <- ggplot( combined_solutions, aes(x = time, y = value, colour = name, linetype = type) ) + geom_line() + labs(x = "Time", y = "Value", colour = "State", linetype = "Model") gg_combined_solutions
Voila! These lines are the same! We can double check this by plotting them as facets:
#| label: facetted-sir gg_combined_solutions + facet_wrap(~type)
So, we have successfully collapsed our stratified model down to the non-stratified model, which is a great sense check for every time you write out a complicated model.
Now that we've established an age-structured SIR model, we can repeat the process with conmat
matrices. This process is the same as in the vignette, "Data Sources".
world_data <- socialmixr::wpp_age() %>% mutate( new_lower_age = if_else(lower.age.limit >= 75, 75L, lower.age.limit) ) %>% group_by(new_lower_age, country, year) %>% summarise( population = sum(population) ) germany_2015 <- age_population( data = world_data, location_col = country, location = "Germany", age_col = new_lower_age, year_col = year, year = 2015 ) germany_2015
Now let's construct a non-homogenous contact matrix, and transmission probability matrix from the data we have on Germany.
age_breaks_socialmixr <- c(seq(0, 75, by = 5), Inf) germany_contacts <- extrapolate_polymod( population = germany_2015, age_breaks = age_breaks_socialmixr ) n_finite_states <- length(age_breaks_socialmixr) - 1 socialmixr_matrix <- matrix(0.1761765, nrow = n_finite_states, ncol = n_finite_states ) transmission_matrix <- transmission_probability_matrix( home = socialmixr_matrix, work = socialmixr_matrix, school = socialmixr_matrix, other = socialmixr_matrix, age_breaks = age_breaks_socialmixr ) parameters <- list( "transmission_matrix" = transmission_matrix, "homogeneous_contact" = germany_contacts, "gamma" = 1, "s_indexes" = 1:n_finite_states, "i_indexes" = (n_finite_states + 1):(2 * n_finite_states), "r_indexes" = (2 * n_finite_states + 1):(3 * n_finite_states) ) S0 <- germany_2015$population I0 <- rep(1, times = n_finite_states) R0 <- rep(0, times = n_finite_states) initial_condition <- c(S0, I0, R0) names(initial_condition) <- paste( rep(c("S0", "I0", "R0"), each = n_finite_states), age_breaks_socialmixr[1:n_finite_states], sep = "_" )
Then, similar to above, we solve the ODE
times <- seq(0, 100, by = 0.1) germany_soln <- ode( y = initial_condition, times = times, func = age_structured_sir, parms = parameters ) # Have to convert ode output to a data frame to do any plotting germany_soln <- as_tibble(as.data.frame(germany_soln)) head(germany_soln) tail(germany_soln) germany_soln_long <- germany_soln %>% tidy_ode() %>% mutate(type = "age_structured") germany_soln_long
gg_germany_sir <- ggplot( germany_soln_long, aes(x = time, y = value / sum(initial_condition), colour = name) ) + geom_line() + labs(x = "Time", y = "Proportion") gg_germany_sir
Let's compare to the Prem matrices. Prem only has 16 age classes so we do need to re-do our population.
# NOTE - consider ways to present this data nicer # str(prem_germany_contact_matrices) as_setting_prediction_matrix( prem_germany_contact_matrices, age_breaks = seq(0, 80, by = 5) )
So we go through a similar process, setting up parameters, and solving the ODE
parameters_prem <- list( "transmission_matrix" = transmission_matrix, "homogeneous_contact" = prem_germany_contact_matrices, "gamma" = 1, "s_indexes" = 1:n_finite_states, "i_indexes" = (n_finite_states + 1):(2 * n_finite_states), "r_indexes" = (2 * n_finite_states + 1):(3 * n_finite_states) ) prem_soln <- ode( y = initial_condition, times = times, func = age_structured_sir, parms = parameters_prem ) # Have to convert ode output to a data frame to do any plotting prem_soln <- as_tibble(as.data.frame(prem_soln)) tail(prem_soln)
germany_aggregated <- tidy_ode(germany_soln) # For the stratified model, we have to add up all the age categories together for a fair comparison. prem_aggregated <- tidy_ode(prem_soln) conmat_prem_soln <- bind_rows( conmat = germany_aggregated, prem = prem_aggregated, .id = "type" ) %>% mutate(name = factor(name, levels = c("S", "I", "R"))) head(conmat_prem_soln) tail(conmat_prem_soln)
ggplot(conmat_prem_soln, aes(x = time, y = value, colour = type)) + geom_line() + labs(x = "Time", y = "Value", colour = "Model") + facet_wrap(~name, nrow = 1)
These are really different, but we have to be careful about why. The contact matrices might refer to the same quantity, but if we dive a little deeper, we find out that might not be the case...
To fairly compare a dynamic disease model that differs only by it's contact matrices, it's important to remember that the $(i,j)$th element of one of these matrices is the relative number of contacts between individuals of age $i$ and age $j$. But, what the number is relative to might be different, and this will lead to different basic reproduction numbers, which will give misleading model conclusions.
At this point, it is important to point out the two definitions of a next generation matrix.
conmat
calculates the first of these in it's functions (such as generate_ngm
), hence why the arguments to these functions have no concept of an infectious period (which is analogous to 'death' in a branching process).
Following the approach of Diekmann, Heesterbrook and Roberts (2009), one can think of the NGM generated by conmat
as only the transmissions term of Equation 2.9. So, to ensure both models have the same value of $R_0$, we can multiply each matrix by a scaling factor to give a target $R_0$.
To target $R_0=1.5$ for example,
calculate_R0 <- function(multiplier, transmission_matrices, contact_matrices) { total_matrix <- transmission_matrices$home * contact_matrices$home + transmission_matrices$work * contact_matrices$work + transmission_matrices$school * contact_matrices$school + transmission_matrices$other * contact_matrices$other abs(Re(eigen(total_matrix * multiplier)$values[1]) - 1.5) } scaling_factor <- function(contact_matrix) { optimize( f = calculate_R0, interval = c(0.001, 5), transmission_matrices = transmission_matrix, contact_matrices = contact_matrix ) } scaling_factor_prem <- scaling_factor(prem_germany_contact_matrices) scaling_factor_socialmixr <- scaling_factor(germany_contacts) scaling_factor_prem$minimum scaling_factor_socialmixr$minimum
We can adjust our contact matrices with these factors, and then our R0s will be the same, meaning that the only difference between the two models should be differences in the contact matrices.
prem_germany_contact_matrices <- lapply(prem_germany_contact_matrices, `*`, scaling_factor_prem$minimum) germany_contacts <- lapply(germany_contacts, `*`, scaling_factor_socialmixr$minimum) parameters$homogeneous_contact <- germany_contacts germany_soln <- ode( y = initial_condition, times = times, func = age_structured_sir, parms = parameters ) parameters$homogeneous_contact <- prem_germany_contact_matrices prem_soln <- ode( y = initial_condition, times = times, func = age_structured_sir, parms = parameters ) germany_aggregated <- tidy_ode(as_tibble(as.data.frame(germany_soln))) prem_aggregated <- tidy_ode(as_tibble(as.data.frame(prem_soln))) conmat_prem_soln <- bind_rows( conmat = germany_aggregated, prem = prem_aggregated, .id = "type" ) %>% mutate(name = factor(name, levels = c("S", "I", "R")))
library(scales) conmat_prem_soln %>% filter(time <= 50) %>% ggplot(aes(x = time, y = value, colour = type)) + geom_line() + facet_wrap(~name, ncol = 1, scales = "free_y", labeller = labeller( name = c( S = "Susceptible", I = "Infected", R = "Recovered" ) ) ) + scale_y_continuous( labels = label_number(scale_cut = cut_si("")), n.breaks = 3 ) + scale_colour_brewer(palette = "Dark2") + labs(x = "Time", y = "Population", colour = "Model") + theme_minimal()
So now we have as fair of a comparison of the two matrices as we will get, and yet, there are significant differences in the dynamics of the two models.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.