knitr::opts_chunk$set(echo = FALSE, include = TRUE,
                      warning = FALSE, message = FALSE,
                      root.dir = here::here(),
                      fig.width = 12, fig.height = 9)
options(digits = 1)
library(here)
library(forecast.vocs)
library(dplyr)
library(loo)
library(scoringutils)
library(knitr)
library(data.table)
# load packages
library(ggplot2)
library(scales)
library(dplyr)
library(here)
library(readr)
library(data.table)

# load functions
source(here("R", "load-local-data.R"))
source(here("R", "plot-summary.R"))
source(here("R", "plot-daily-cases.R"))
source(here("R", "plot-omicron-95.R"))
source(here("R", "plot-cumulative-percent.R"))

# get latest date
date_latest <- get_latest_date()

# load data
daily <- load_local_data(date_latest)
pop <- load_population()

# load results
sgtf <- load_results(date_latest)
bias <- load_results(date_latest, type = "bias")

sgtfposterior <- sgtf$posterior[,
   variant_relationship := stringr::str_to_sentence(variant_relationship)
]
sgtf_posterior <- sgtf$posterior[
  variant_relationship == "Correlated"
]

bias_posterior <- bias$posterior[variant_relationship == "correlated"]
# get cases from correlated model
cases <- summary(sgtf_posterior, type = "cases")

# get VOC fraction from correlated model
voc_frac <- summary(sgtf_posterior, type = "voc_frac")

# cumulative percentage with each variant
cum_per <- cumulative_percentage(cases, pop) %>%
  filter(type %in% "Omicron")
# set up dates
date_data_start <- min(as.data.table(sgtf$data)[!is.na(seq_total)]$date)
date_forecast_start <- max(cases$date) - 2 * 7
date_forecast_end <- max(cases$date)
  1. Centre for the Mathematical Modelling of Infectious Diseases, London School of Hygiene & Tropical Medicine, London WC1E 7HT, United Kingdom

Estimates are updated daily as new data is released but the summary of the situation is updated less frequently and so may not match current estimates. All data (both raw data and estimates) are available here and the report should be fully reproducible. Reports and data as available at the time of release are available from the release page. See our news file for details of what updates were made when.

Introduction

Since being highlighted by scientists in South Africa Omicron has spread rapidly globally. Using initial South African data, it has been estimated that Omicron may be both more transmissible and have greater immune escape than the previously dominant Delta variant [@pearson-omicron].

In this work, we use S-gene target failure (SGTF) as a proxy of variant status combined with reported case counts to explore the evidence for changes in transmission advantage over time for the Omicron variant. If present this could indicate the impact of immune escape, sampling bias in SGTF data or differences in the populations within which the variants are circulating. We also report estimates for growth rates by variant and overall, case counts overall and by variant for a 14 day forecast window assuming constant future growth, the date at which Omicron will become dominant in England and in each UKHSA region, and the estimated cumulative percentage of the population with a reported Omicron case. We also explore the potential for bias in S-gene target sampling by comparing cases that have a reported SGTF status to cases with an unknown SGTF status.

Methods

Data

We use S-gene status by specimen date sourced from UKHSA as a direct proxy for the Omicron variant with a target failure indicating a case has the Omicron variant. We augment this data with reported cases counts by date of specimen truncated by two days to account for delayed reporting. Data was available for England and for UKHSA region from both sources.

Models

We consider two autoregressive models with different assumptions about the relationship between growth rates for Omicron and non-Omicron cases. Both models estimate expected cases for each variant as a combination of expected cases from the previous day and the exponential of the log growth rate. The growth rate is then itself modelled as a differenced AR(1) process.

For the first model variants are assumed to have growth rates related by a fixed scaling (described from now on as the scaled model). In this model, variant growth rates then share a single differenced AR(1) process meaning that they vary over time in the same way. This model assumes that variants differ only due to a transmission advantage and that there are no time-varying biases in the reported data.

In the second model, we relax the assumption that variants co-vary using a vector autoregression structure which assume that the 1st differences of the variant growth rates are drawn from a multivariate normal distribution. This model formulation can account for variant differences other than a transmission advantage and can also better handle time-varying biases in data sources. Crucially, in the absence of evidence that variants do not co-vary, it reduces to the co-varying model.

For both models, we fit jointly to reported cases and SGTF data assuming a negative binomial and beta-binomial observation model respectively. Day of the week reporting periodicty for case counts is captured using a random effect fo r the day of the week. We initialise both models by fitting to a week of case only data where the Omicron variant is assumed to not be present (from the 17th of November to the 22nd).

A full description of the models described here can be found in the documentation for the forecast.vocs R package [@forecast.vocs].

Statistical Inference

We first visualised our combined data sources (cases by specimen date and SGTF status by specimen date). We then fit both models separately to data for England and to each UKHSA region. Using these model fits we report posterior estimates from the best fitting model for the following summary statistics of epidemiological interest: growth rates by variant and overall, the time-varying transmission advantage for the Omicron variant, case counts overall and by variant for a 14 day forecast window assuming constant future growth, the date at which Omicron will become dominant in England and in each UKHSA region, and the estimated cumulative percentage of the population with a reported Omicron case.

We explored the potential for bias in S-gene target sampling by comparing cases that have a reported SGTF status to cases with an unknown SGTF status. We fit the correlated model to case counts and S-gene tested status and reported the apparent transmission advantage for those with a S-gene status over time. Any variation in this metric from 100% may be interpreted as indicating biased sampling of those with known S-gene status, which may bias our main results.

Implementation

All models were implemented using the forecast.vocs R package [@R; @forecast.vocs] and fit using stan [@stan] and cmdstanr [@cmdstanr]. Each model was fit using 2 chains with each chain having 1000 warmup steps and 2000 sampling steps. Convergence was assessed using the Rhat diagnostic [@stan]. Models were compared using approximate leave-one-out (LOO) cross-validation [@loo; @loo-paper] where negative values indicate an improved fit for the correlated model.

Limitations

Results

Summary

Last updated: 2021-12-31

Latest available estimates using data up to: r date_forecast_start

plot_summary(sgtf_posterior = sgtf_posterior,
             cum_per = cum_per,
             bias_posterior = bias_posterior,
             date_forecast_start = date_forecast_start)

Data description

plot_daily_cases(
  daily, truncate_date = date_forecast_start, caption = "",
  start_date = date_data_start, smooth_total = TRUE
)

Model comparison

scores <- janitor::clean_names(
  select(sgtf$loo, -forecast_date), case = "sentence"
)
kable(scores, caption = "Estimated differences in the ELPD metric (with
 standard errors) between the scaled and correlated models for England and UKHSA
 regions. Negative values indicate that the correlated model is estimated to be
 a better fit to the data. ")

Growth rate of reported cases overall, with the Omicron variant, and not with the Omicron variant

plot_growth(sgtf_posterior) +
  facet_wrap(~ region)

Proportion of cases with the Omicron variant

Natural scale

plot(
  sgtf$posterior, as.data.table(sgtf$data), type = "voc_frac",
  fill = variant_relationship, voc_label = "Omicron variant", log = FALSE
) +
  labs(fill = "Variant relationship") +
  facet_wrap(~ region)

Logit scale

plot(
  sgtf$posterior, as.data.table(sgtf$data), type = "voc_frac",
  fill = variant_relationship, voc_label = "Omicron variant"
) +
  labs(fill = "Variant relationship") +
  facet_wrap(~ region)

Date at which Omicron estimated to account for 95% of reported cases

plot_omicron_95(
  voc_frac = voc_frac,
  forecast_start = date_forecast_start,
  forecast_end = date_forecast_end
)

Time-varying transmission advantage of the Omicron variant

plot(
  sgtf$posterior, type = "voc_advantage",
  fill = variant_relationship, group = variant_relationship,
  voc_label = "Omicron variant"
) +
  labs(fill = "Variant relationship",
       y = "Transmission advantage for the Omicron variant"
  ) +
  facet_wrap(~ region)

Posterior predictions and forecasts of reported cases

Natural

plot(
  sgtf_posterior, as.data.table(sgtf$data), type = "cases", log = FALSE
) +
  facet_wrap(~region, scales = "free_y")

Log

plot(
  sgtf_posterior, as.data.table(sgtf$data), type = "cases"
) +
  facet_wrap(~region, scales = "free_y")

Cumulative percentage of the population with a reported Omicron case

plot_cumulative_percent(
  cum_per, forecast_start = date_forecast_start, data_start = date_data_start
)

Evidence of sampling bias in S-gene target results among Covid-19 test-positive cases

plot(bias$posterior, type = "voc_advantage") +
  geom_hline(yintercept = 1, linetype = 3, alpha = 0.8) +
  labs(y = "Transmission advantage of cases with a reported S gene
            status versus those without") +
  facet_wrap(~ region)

References



epiforecasts/omicron-sgtf-forecast documentation built on Jan. 21, 2022, 9:19 p.m.