suppressPackageStartupMessages({
  library(covmuller)
  library(tidyverse)
})
theme_set(CovmullerTheme())

Load preprocessed data

We load a qs object created from the metadata tar from GISAID for faster loading. See the Introduction vignette to see how the object was created.

date <- "2022_06_11"
gisaid_metadata <- qs::qread(file = paste0("~/data/epicov/metadata_tsv_", date, ".qs"))
# filter our sequences from USA
gisaid_usa <- gisaid_metadata %>%
  filter(Country == "USA") %>%
  filter(Host == "Human")
# format metadata
gisaid_usa <- FormatGISAIDMetadata(gisaid_usa)
gisaid_usa <- gisaid_usa %>% arrange(State, MonthYearCollected)
gisaid_usa$State <- CleanAmericanStates(gisaid_usa$State)
gisaid_usa <- gisaid_usa %>% filter(State %in% datasets::state.name)

Plot total sequenced cases

country_seq_stats <- TotalSequencesPerMonthCountrywise(gisaid_usa, rename_country_as_state = TRUE)
p0 <- BarPlot(country_seq_stats, ylabel = "Sequenced per month", color = "slateblue1", label_si = TRUE, xangle = 90, title = "USA")
p0

Get VOCs

vocs <- GetVOCs()
omicron <- vocs[["omicron"]]
vocs[["omicron"]] <- NULL
custom_voc_mapping <- list(
  `BA.1.1` = "BA.1.1",
  `BA.1` = "BA.1",
  `BA.2` = "BA.2",
  `BA.2.1` = "BA.2.1",
  `BA.2.10` = "BA.2.10",
  `BA.2.10.1` = "BA.2.10.1",
  `BA.2.12` = "BA.2.12",
  `BA.2.12.1` = "BA.2.12.1",
  `BA.3` = "BA.3",
  `BA.4` = "BA.4",
  `BA.5` = "BA.5"
)
gisaid_usa_collapsed <- CollapseLineageToVOCs(
  variant_df = gisaid_usa,
  vocs = vocs,
  custom_voc_mapping = custom_voc_mapping,
  summarize = FALSE
)

gisaid_usa_collapsed_sel <- gisaid_usa_collapsed %>%
  filter(MonthYearCollected >= "Oct 2022") %>%
  filter(lineage_collapsed != "Unassigned")

vocs_to_keep <- table(gisaid_usa_collapsed_sel$lineage_collapsed)
vocs_to_keep <- vocs_to_keep[vocs_to_keep > 20]

gisaid_usa_collapsed_sel <- gisaid_usa_collapsed_sel %>% filter(lineage_collapsed %in% names(vocs_to_keep))
gisaid_usa_shared_dateweek <- SummarizeVariantsDatewise(gisaid_usa_collapsed_sel, by_state = TRUE)
head(gisaid_usa_shared_dateweek)
fit_usa_multi_predsbystate <- FitMultinomStatewiseDaily(gisaid_usa_shared_dateweek)
head(fit_usa_multi_predsbystate)

Plot Smooth Muller Plots

muller_usabystate_mfit <- PlotMullerDailyPrevalence(fit_usa_multi_predsbystate, ncol = 5)
muller_usabystate_mfit


saketkc/covmuller documentation built on April 19, 2024, 10:14 a.m.