Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
dpi = 300,
out.width = "100%"
)
## ----message = FALSE, warning=FALSE, eval = TRUE------------------------------
# load {cfr}
library(cfr)
# packages to wrangle and plot data
library(dplyr)
library(magrittr)
library(ggplot2)
library(tidyr)
library(incidence2)
## -----------------------------------------------------------------------------
# Simulate data
nn <- 1e3 # Number of cases to simulate
pp <- 0.1 # Assumed CFR
set.seed(10) # Set seed for reproducibility
# Generate random case onset timings in Jan & Feb 2024
case_onsets <- as.Date("2024-01-01") + sample.int(60, nn, replace = TRUE)
# Define current date of data availability (i.e. max follow up)
max_obs <- as.Date("2024-01-20")
# Sample delays from onset to outcome
# 1. Deaths: assume mean delay = 5 days, sd = 5 days
log_param <- c(meanlog = 1.262864, sdlog = 0.8325546)
delay_death <- function(x) {
dlnorm(x,
meanlog = log_param[["meanlog"]],
sdlog = log_param[["sdlog"]]
)
}
outcome_death <- round(rlnorm(round(nn * pp),
meanlog = log_param[["meanlog"]],
sdlog = log_param[["sdlog"]]
))
# 2. Recoveries: assume mean delay = 15 days, sd = 5 days
log_param <- c(meanlog = 2.65537, sdlog = 0.3245928)
outcome_recovery <- round(rlnorm(round(nn * (1 - pp)),
meanlog = log_param[["meanlog"]],
sdlog = log_param[["sdlog"]]
))
# Create vector of outcome dates
all_outcomes <- case_onsets + c(outcome_death, outcome_recovery)
# Create vector of outcome types
type_outcome <- c(rep("D", length(outcome_death)),
rep("R", length(outcome_recovery)))
# Create vector of known outcomes as of max_obs
known_outcomes <- type_outcome
known_outcomes[(all_outcomes > max_obs)] <- ""
## ----fig.cap = "Individual level timings of onsets and outcomes, for individuals with a known outcome as of 20th Jan 2024.", class.source = 'fold-hide'----
# Create a data frame with onset and outcome dates, and outcome type
data <- data.frame(
id = 1:nn,
case_onsets = case_onsets,
outcome_dates = all_outcomes,
outcome_type = type_outcome,
known_outcome = known_outcomes
)
# Filter out unknown outcomes (after the max_obs date)
data <- data %>% filter(nzchar(known_outcome))
# Arrange data by onset date
data <- data %>%
arrange(case_onsets) %>%
mutate(id_ordered = row_number()) # Assign a new 'id_ordered'
# Prepare data for plotting (onset and outcome events in same column)
plot_data <- data %>%
tidyr::pivot_longer(
cols = c(case_onsets, outcome_dates),
names_to = "event_type",
values_to = "date"
) %>%
mutate(
event_label = ifelse(event_type == "case_onsets", "Onset", "Outcome")
)
# Create plot with lines linking onset and outcome
ggplot() +
geom_segment(
data = data, aes(x = case_onsets,
xend = outcome_dates,
y = id_ordered,
yend = id_ordered),
color = ifelse(data$outcome_type == "D", "red", "green"),
size = 0.5
) +
geom_point(data = plot_data, aes(x = date,
y = id_ordered,
color = outcome_type,
shape = event_label),
size = 2) +
geom_vline(xintercept = as.numeric(max_obs),
linetype = "dashed",
color = "black",
size = 0.5) +
scale_color_manual(values = c(D = "darkred", R = "darkgreen")) +
scale_shape_manual(values = c(Onset = 16, Outcome = 17)) +
labs(
x = "Date",
y = "Individual (Ordered by onset date)",
color = "Outcome type",
shape = "Event type"
) +
theme_minimal()
## -----------------------------------------------------------------------------
# Filter on known outcomes
total_deaths <- sum(known_outcomes == "D")
total_outcomes <- (total_deaths + sum(known_outcomes == "R"))
# Calculate CFR with 95% CI
cfr_filter <- binom.test(total_deaths, total_outcomes)
cfr_estimate <- (signif(as.numeric(c(cfr_filter$estimate,
cfr_filter$conf.int)), 3))
cfr_estimate
## -----------------------------------------------------------------------------
# Get times of death for fatal outcomes
death_times <- (case_onsets)[type_outcome == "D"] + outcome_death
# Create a single data.frame with event types
events <- data.frame(
dates = c(case_onsets, death_times),
event = c(rep("cases", length(case_onsets)),
rep("deaths", length(death_times)))
)
# Use incidence2 to calculate counts of cases and deaths by day
counts <- incidence2::incidence(events,
date_index = "dates",
groups = "event",
complete_dates = TRUE)
# Pivot incidence object to get data.frame with counts for cases and deaths
df <- counts %>%
tidyr::pivot_wider(names_from = event,
values_from = count,
values_fill = 0) %>%
dplyr::rename(date = date_index)
cfr_static(df, delay_death)
## -----------------------------------------------------------------------------
# Calculate total deaths and total cases
total_deaths <- sum(df$deaths)
total_cases <- sum(df$cases)
# Create data.frame with cases over time only
df_case <- df
df_case$deaths <- 0
# Calculate the expected number of known fatal outcomes over time
e_outcomes <- estimate_outcomes(df_case, delay_death)
# Calculate the CFR
total_deaths / (total_cases * tail(e_outcomes$u_t, 1))
## -----------------------------------------------------------------------------
# Create data.frame with cases over time only
df_cases <- df
df_cases$deaths <- 0
# Add total deaths to the final row in the 'deaths' column
df_cases$deaths[nrow(df_cases)] <- sum(df$deaths)
# Calculate CFR
cfr_static(df_cases, delay_death)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.