inst/doc/estimate_from_individual_data.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  dpi = 300,
  out.width = "100%"
)

## ----message = FALSE, warning=FALSE, eval = TRUE------------------------------
# load {cfr}
library(cfr)

# packages to wrangle and plot data
library(dplyr)
library(magrittr)
library(ggplot2)
library(tidyr)
library(incidence2)

## -----------------------------------------------------------------------------
# Simulate data
nn <- 1e3 # Number of cases to simulate
pp <- 0.1 # Assumed CFR
set.seed(10) # Set seed for reproducibility

# Generate random case onset timings in Jan & Feb 2024
case_onsets <- as.Date("2024-01-01") + sample.int(60, nn, replace = TRUE)

# Define current date of data availability (i.e. max follow up)
max_obs <- as.Date("2024-01-20")

# Sample delays from onset to outcome

# 1. Deaths: assume mean delay = 5 days, sd = 5 days
log_param <- c(meanlog = 1.262864, sdlog = 0.8325546)
delay_death <- function(x) {
  dlnorm(x,
    meanlog = log_param[["meanlog"]],
    sdlog = log_param[["sdlog"]]
  )
}
outcome_death <- round(rlnorm(round(nn * pp),
  meanlog = log_param[["meanlog"]],
  sdlog = log_param[["sdlog"]]
))

# 2. Recoveries: assume mean delay = 15 days, sd = 5 days
log_param <- c(meanlog = 2.65537, sdlog = 0.3245928)
outcome_recovery <- round(rlnorm(round(nn * (1 - pp)),
  meanlog = log_param[["meanlog"]],
  sdlog = log_param[["sdlog"]]
))

# Create vector of outcome dates
all_outcomes <- case_onsets + c(outcome_death, outcome_recovery)

# Create vector of outcome types
type_outcome <- c(rep("D", length(outcome_death)),
                  rep("R", length(outcome_recovery)))

# Create vector of known outcomes as of max_obs
known_outcomes <- type_outcome
known_outcomes[(all_outcomes > max_obs)] <- ""

## ----fig.cap = "Individual level timings of onsets and outcomes, for individuals with a known outcome as of 20th Jan 2024.", class.source = 'fold-hide'----
# Create a data frame with onset and outcome dates, and outcome type
data <- data.frame(
  id = 1:nn,
  case_onsets = case_onsets,
  outcome_dates = all_outcomes,
  outcome_type = type_outcome,
  known_outcome = known_outcomes
)

# Filter out unknown outcomes (after the max_obs date)
data <- data %>% filter(nzchar(known_outcome))

# Arrange data by onset date
data <- data %>%
  arrange(case_onsets) %>%
  mutate(id_ordered = row_number()) # Assign a new 'id_ordered'

# Prepare data for plotting (onset and outcome events in same column)
plot_data <- data %>%
  tidyr::pivot_longer(
    cols = c(case_onsets, outcome_dates),
    names_to = "event_type",
    values_to = "date"
  ) %>%
  mutate(
    event_label = ifelse(event_type == "case_onsets", "Onset", "Outcome")
  )

# Create plot with lines linking onset and outcome
ggplot() +
  geom_segment(
    data = data, aes(x = case_onsets,
                     xend = outcome_dates,
                     y = id_ordered,
                     yend = id_ordered),
    color = ifelse(data$outcome_type == "D", "red", "green"),
    size = 0.5
  ) +
  geom_point(data = plot_data, aes(x = date,
                                   y = id_ordered,
                                   color = outcome_type,
                                   shape = event_label),
             size = 2) +
  geom_vline(xintercept = as.numeric(max_obs),
             linetype = "dashed",
             color = "black",
             size = 0.5) +
  scale_color_manual(values = c(D = "darkred", R = "darkgreen")) +
  scale_shape_manual(values = c(Onset = 16, Outcome = 17)) +
  labs(
    x = "Date",
    y = "Individual (Ordered by onset date)",
    color = "Outcome type",
    shape = "Event type"
  ) +
  theme_minimal()

## -----------------------------------------------------------------------------
# Filter on known outcomes
total_deaths <- sum(known_outcomes == "D")
total_outcomes <- (total_deaths + sum(known_outcomes == "R"))

# Calculate CFR with 95% CI
cfr_filter <- binom.test(total_deaths, total_outcomes)
cfr_estimate <- (signif(as.numeric(c(cfr_filter$estimate,
                                     cfr_filter$conf.int)), 3))
cfr_estimate

## -----------------------------------------------------------------------------
# Get times of death for fatal outcomes
death_times <- (case_onsets)[type_outcome == "D"] + outcome_death

# Create a single data.frame with event types
events <- data.frame(
  dates = c(case_onsets, death_times),
  event = c(rep("cases", length(case_onsets)),
            rep("deaths", length(death_times)))
)

# Use incidence2 to calculate counts of cases and deaths by day
counts <- incidence2::incidence(events,
                                date_index = "dates",
                                groups = "event",
                                complete_dates = TRUE)

# Pivot incidence object to get data.frame with counts for cases and deaths
df <- counts %>%
  tidyr::pivot_wider(names_from = event,
                     values_from = count,
                     values_fill = 0) %>%
  dplyr::rename(date = date_index)

cfr_static(df, delay_death)

## -----------------------------------------------------------------------------
# Calculate total deaths and total cases
total_deaths <- sum(df$deaths)
total_cases <- sum(df$cases)

# Create data.frame with cases over time only
df_case <- df
df_case$deaths <- 0

# Calculate the expected number of known fatal outcomes over time
e_outcomes <- estimate_outcomes(df_case, delay_death)

# Calculate the CFR
total_deaths / (total_cases * tail(e_outcomes$u_t, 1))

## -----------------------------------------------------------------------------
# Create data.frame with cases over time only
df_cases <- df
df_cases$deaths <- 0

# Add total deaths to the final row in the 'deaths' column
df_cases$deaths[nrow(df_cases)] <- sum(df$deaths)

# Calculate CFR
cfr_static(df_cases, delay_death)

Try the cfr package in your browser

Any scripts or data that you put into this service are public.

cfr documentation built on April 3, 2025, 9:38 p.m.