Nothing
## ----setup, message = FALSE, warning = FALSE, collapse=TRUE-------------------
library(rtestim)
library(ggplot2)
library(dplyr)
library(nnet)
library(forcats)
library(tidyr)
theme_set(theme_bw())
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
dpi = 300,
collapse = FALSE,
comment = "#>",
fig.asp = 0.618,
fig.width = 6,
out.width = "80%",
fig.align = "center"
)
## ----gamma-pdf, echo = FALSE--------------------------------------------------
ggplot(data.frame(x = seq(.001, 20, length.out = 100)), aes(x)) +
stat_function(fun = dgamma, args = list(shape = 2.5, scale = 2.5), color = "orange") +
stat_function(fun = dgamma, args = list(shape = 1, scale = 2), color = "cornflowerblue") +
stat_function(fun = dgamma, args = list(shape = 2, scale = 1), color = "forestgreen") +
scale_y_continuous(expand = expansion(c(0, 0.05)))
## ----can-default--------------------------------------------------------------
can_default <- estimate_rt(cancovid$incident_cases, x = cancovid$date, nsol = 20L)
plot(can_default) + coord_cartesian(ylim = c(0.5, 2))
## ----cancov-nonpar------------------------------------------------------------
# Data from Backer et al.
delay <- read.csv("backer.csv") |>
filter(delay > 0) |>
select(-delay)
delay <- rowSums(delay)
delay <- delay / sum(delay)
## ----cancov-nonpar-plot, echo=FALSE-------------------------------------------
ggplot(
data.frame(delay = 1:length(delay), probability = delay),
aes(delay, probability)
) +
geom_point(colour = "cornflowerblue") +
geom_segment(yend = 0, colour = "cornflowerblue") +
scale_y_continuous(expand = expansion(c(0, 0.05)))
## ----can-nonpar---------------------------------------------------------------
can_nonpar <- estimate_rt(
cancovid$incident_cases,
x = cancovid$date,
delay_distn = delay,
nsol = 20L)
plot(can_nonpar) + coord_cartesian(ylim = c(0.5, 2))
## ----cdfs, echo = FALSE-------------------------------------------------------
cdfs <- data.frame(
x = 0:20,
default = pgamma(0:20, shape = 2.5, scale = 2.5),
backer = cumsum(c(0, delay, rep(0, 20 - length(delay))))
)
ggplot(cdfs |> pivot_longer(-x), aes(x, value, color = name)) +
geom_step() +
scale_y_continuous(expand = expansion(0)) +
scale_color_manual(values = c("orange", "cornflowerblue"), name = "")
## ----backer-matrix, eval=FALSE------------------------------------------------
# # library(Matrix)
# n <- nrow(cancovid)
# backer_delay <- c(0, delay, rep(0, n - length(delay) - 1))
# delay_mat <- matrix(0, n, n)
# delay_mat[1,1] <- 1
# for (iter in 2:n) delay_mat[iter, 1:iter] <- rev(backer_delay[1:iter])
# delay_mat <- drop0(as(delay_mat, "CsparseMatrix")) # make it sparse, not necessary
# delay_mat <- delay_mat / rowSums(delay_mat) # renormalize
## ----duotang-processing, eval=FALSE, echo=TRUE--------------------------------
# # Run on 19 April 2024
# duotang <- read_tsv("https://github.com/CoVaRR-NET/duotang/raw/main/data_needed/virusseq.metadata.csv.gz")
# columnlist <- c(
# "fasta_header_name", "province", "host_gender", "host_age_bin",
# "sample_collection_date", "sample_collected_by",
# "purpose_of_sampling", "purpose_of_sequencing", "lineage",
# "raw_lineage", "gisaid_accession", "isolate"
# )
# unknown.str <- c(
# "Undeclared", "Not Provided", "Restricted Access", "Missing",
# "Not Applicable", "", "NA", "unknow"
# )
# duotang <- duotang |>
# rename(province = geo_loc_name_state_province_territory) |>
# select(all_of(columnlist))
#
# meta <- duotang |>
# mutate(
# week = cut(sample_collection_date, "week"),
# month = gsub("-..$", "", as.character(cut(sample_collection_date, "month")))
# )
# source("https://github.com/CoVaRR-NET/duotang/raw/main/scripts/scanlineages.R")
# meta <- meta |>
# mutate(gisaid_accession = str_replace(gisaid_accession, "EPI_ISL_", "")) |>
# rename(GID = gisaid_accession) |>
# rowwise() |>
# mutate(raw_lineage = ifelse(
# grepl("^X", raw_lineage),
# str_replace_all(paste0(
# realtorawlineage(substr(
# raw_lineage, 1, str_locate(raw_lineage, "\\.") - 1
# )),
# ".",
# substr(raw_lineage, str_locate(raw_lineage, "\\.") + 1, nchar(raw_lineage))
# ), "[\r\n]", ""),
# raw_lineage
# )) |>
# ungroup()
# dico <- makepangolindico() # rebuild the lineage dictionary so the correct names gets assigned for XBB descedants not named XBB
#
# VOCVOI <- read_csv("https://raw.githubusercontent.com/CoVaRR-NET/duotang/main/resources/vocvoi.csv")
# meta$pango_group <- create.pango.group(VOCVOI, meta)
# meta <- select(meta, province, week, pango_group) |>
# mutate(week = as.Date(week))
#
# counts <- group_by(meta, province, week, pango_group) |>
# count() |>
# ungroup() |>
# arrange(province, week, pango_group)
# can_counts <- group_by(meta, week, pango_group) |>
# count() |>
# ungroup() |>
# arrange(week, pango_group) |>
# mutate(province = "Canada")
# counts <- bind_rows(can_counts, counts)
# saveRDS(counts, "duotang-counts.rds")
## ----duotang-counts, echo=FALSE, message=FALSE--------------------------------
props <- readRDS("duotang-counts.rds") |>
pivot_wider(names_from = pango_group, values_from = n, values_fill = 0) |>
mutate(total = rowSums(across(-c(week, province)))) |>
mutate(across(-c(week, province, total), ~ .x / total)) |>
select(-total)
smooth_it <- function(props_group) {
z <- props_group |> select(-week, -province)
n <- names(z)
nn <- gsub(" ", "_", n)
names(z) <- nn
form_resp <- paste0("cbind(", paste0(names(z), collapse = ",") ,") ~ ")
z$time <- as.numeric(props_group$week)
form <- as.formula(paste0(form_resp, "poly(time, degree = 3)"))
fits <- multinom(form, z, trace = FALSE)
rng <- range(props_group$week)
alltime <- as.numeric(seq(rng[1], rng[2], by = 1))
z <- as_tibble(predict(fits, data.frame(time = alltime), type = "probs")) |>
mutate(Date = as.Date(alltime))
z
}
can_props_smoothed <- smooth_it(props |> filter(province == "Canada"))
can_props_smoothed |>
pivot_longer(-Date) |>
ggplot(aes(Date, y = value, fill = name)) +
geom_area(position = "stack") +
ylab("Variants in circulation") +
xlab("") +
theme_bw() +
scale_x_date(name = "", date_breaks = "1 year", date_labels = "%Y", expand = c(0,0)) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_viridis_d(name = "")
can_pred_class <- can_props_smoothed |>
filter(Date <= max(cancovid$date)) |>
pivot_longer(-Date) |>
group_by(Date) |>
summarise(var = name[which.max(value)], .groups = "drop") |>
mutate(var = case_when(
var == "other" ~ "Ancestral lineage",
var == "Alpha" ~ "Alpha",
var == "Beta" ~ "Beta",
var == "Delta" ~ "Delta",
TRUE ~ "Omicron"
))
boot <- data.frame(
Date = seq(min(cancovid$date), min(can_props_smoothed$Date) - 1, by = "day"),
var = "Ancestral lineage"
)
can_pred_class <- bind_rows(boot, can_pred_class)
## ----echo=FALSE, fig.height=1, dev='png'--------------------------------------
ggplot(can_pred_class, aes(x = Date, fill = fct_relevel(var, "Ancestral lineage"))) +
geom_ribbon(aes(ymax = 1, ymin = 0)) +
scale_y_continuous(expand = expansion(0)) +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
scale_fill_brewer(palette = "Dark2", name = "")
## ----meta-analysis, eval=FALSE------------------------------------------------
# data_raw <- readxl::read_excel("xu-etal-DATA_RAW.xlsx") |>
# select(type, para, n = Sample_size, mean, sd, se, median) |>
# filter(!is.na(type)) |>
# mutate(across(-c(type, para), as.numeric))
#
# bonehead_meta <- data_raw |>
# group_by(type, para) |>
# mutate(
# no_n = all(is.na(n)),
# n = case_when(!is.na(n) ~ n, no_n ~ 1, TRUE ~ median(n, na.rm = TRUE))
# ) |>
# ungroup() |>
# mutate(
# mean = case_when(!is.na(mean) ~ mean, TRUE ~ median),
# sd = case_when(!is.na(sd) ~ sd, TRUE ~ se * sqrt(n))
# ) |>
# group_by(type, para) |>
# summarise(
# mean = median(mean, na.rm = TRUE),
# sd = median(sd, na.rm = TRUE),
# .groups = "drop"
# )
# ## There's only one Beta and only IP.
# ## We use the corresponding sd for Alpha IP,
# ## and duplicate Alpha for GT / ST
#
# Beta_IP <- bonehead_meta |> filter(type == "Beta")
# Beta_IP$sd = bonehead_meta |>
# filter(type == "Alpha", para == "IP") |> pull(sd)
# Beta <- bind_rows(
# Beta_IP,
# bonehead_meta |>
# filter(type == "Alpha", para != "IP") |>
# mutate(type = "Beta")
# )
#
# delay_dstns_byvar <- bonehead_meta |>
# filter(type != "Beta") |>
# bind_rows(Beta) |>
# arrange(type, para) |>
# mutate(shape = mean^2 / sd^2, scale = mean / shape)
# saveRDS(delay_dstns_byvar, "delay-distns-byvar.rds")
## ----variant-si-plot, echo=FALSE----------------------------------------------
delay_dstns_can <- readRDS("delay-distns-byvar.rds") |>
filter(para == "SI", type %in% unique(can_pred_class$var))
delay_dstns_can |>
rowwise() |>
mutate(probability = list(discretize_gamma(0:20, shape, scale))) |>
ungroup() |>
select(type, probability) |>
unnest(probability) |>
group_by(type) |>
mutate(delay = row_number() - 1) |>
ggplot(aes(delay, probability, colour = type)) +
geom_line() +
scale_color_brewer(palette = "Dark2", name = "") +
scale_y_continuous(expand = expansion(c(0, 0.05)))
## ----tvar-matrix, message=FALSE-----------------------------------------------
library(Matrix)
n <- nrow(cancovid)
delay_mat <- matrix(0, n, n)
delay_mat[1,1] <- 1
for (iter in 2:n) {
current_var <- can_pred_class$var[iter]
current_pars <- delay_dstns_can |> filter(type == current_var)
delay <- discretize_gamma(0:(iter - 1), current_pars$shape, current_pars$scale)
delay_mat[iter, 1:iter] <- rev(delay)
}
delay_mat <- drop0(as(delay_mat, "CsparseMatrix")) # make it sparse, not necessary
delay_mat <- delay_mat / rowSums(delay_mat) # renormalize
## ----tvar-Rt------------------------------------------------------------------
can_tvar <- estimate_rt(
cancovid$incident_cases,
x = cancovid$date,
delay_distn = delay_mat,
nsol = 20L)
plot(can_tvar) + coord_cartesian(ylim = c(0.5, 2))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.