suppressPackageStartupMessages({
  library(covmuller)
  library(tidyverse)
})
theme_set(CovmullerTheme())

Get variants data for India

gisaid_metadata <- qs::qread("~/data/epicov/metadata_tsv_2024_04_11.qs")
gisaid_india <- FilterGISAIDIndia(gisaid_metadata_all = gisaid_metadata)

vocs <- GetVOCs()
omicron <- vocs[["omicron"]]
vocs[["omicron"]] <- NULL
custom_voc_mapping <- list(
  `BA.1` = "BA.1",
  `BA.1.*` = "BA.1",
  `BA.2` = "BA.2",
  `BA.2.*` = "BA.2",
  `BA.3` = "BA.3",
  `BA.3.*` = "BA.3",
  `BA.4` = "BA.4",
  `BA.4.*` = "BA.4",
  `BA.5` = "BA.5",
  `BA.5.*` = "BA.5",
  `XBB` = "XBB",
  `XBB.1` = "XBB.1+",
  `XBB.1.*` = "XBB.1+",
  `XBB.1.5` = "XBB.1.5",
  `BQ.1` = "BQ.1",
  `BQ.1.*` = "BQ.1"
)
gisaid_india <- gisaid_india %>%
  filter(pangolin_lineage != "None") %>%
  filter(pangolin_lineage != "Unassigned")

gisaid_india <- CollapseLineageToVOCs(
  variant_df = gisaid_india,
  vocs = vocs,
  custom_voc_mapping = custom_voc_mapping,
  summarize = FALSE
)

Top 10 cities with most number of sequenced samples

state_counts <- as.data.frame(table(gisaid_india$State)) %>% rename(State = Var1, `Total sequences` = Freq)

DT::datatable(state_counts %>% arrange(desc(`Total sequences`)))

Get monthly cases for Tamilnadu

Tamilnadu_monthly_cases <- GetIndiaConfirmedCasesMonthlyLong() %>% filter(State == "Tamil Nadu")
head(Tamilnadu_monthly_cases)

Plot monthly cases for Tamilnadu

p1 <- BarPlot(Tamilnadu_monthly_cases, ylabel = "Cases per month", label_si = TRUE, title = "Total cases per month - Tamilnadu (India)", caption = "**Source: covid19bharat.org**<br>")
p1

Get weekly cases for Tamilnadu

Tamilnadu_seq_stats <- TotalSequencesPerMonthCountrywise(gisaid_india %>% filter(State == "Tamil Nadu"), rename_country_as_state = TRUE)


p2 <- BarPlot(Tamilnadu_seq_stats, ylabel = "Sequenced per month", color = "slateblue1", label_si = TRUE, title = "Total sequences deposited to GISAID from Tamilnadu (India)", caption = "**Source: gisaid.org **<br>")
p2

Overall, how much has Tamilnadu sequenced over months?

While the absolute numbers are informative, a more useful metric is the proportion of cases (cases sequenced over total cases) that are getting sequenced. Here we look at the proportion of cases that have been sequenced in India over the course of the pandemic:

Tamilnadu_monthly_cases_tmp <- Tamilnadu_monthly_cases %>% select(MonthYear, value)
Tamilnadu_monthly_cases_tmp$State <- "India"
Tamilnadu_monthly_cases_tmp$type <- "Confirmed"

Tamilnadu_monthly_cases_tmp <- Tamilnadu_monthly_cases_tmp %>% select(MonthYear, State, value, type)
sequencing_proportion <- CombineSequencedCases(
  cases_sequenced = Tamilnadu_seq_stats,
  confirmed_long = Tamilnadu_monthly_cases_tmp
)
p3 <- BarPlot(sequencing_proportion, yaxis = "percent_sequenced_collected", ylabel = "%  deposited to GISAID", color = "yellowgreen", title = "Proportion of cases deposited to GISAID from Tamilnadu (India)", caption = "**Source: gisaid.org and covid19bharat.org**<br>")
p3
p1 / p2 / p3

Distribution of variants

state_month_counts <- SummarizeVariantsMonthwise(gisaid_india %>% filter(State == "Tamil Nadu"))
state_month_counts$State <- "Tamil Nadu"
state_month_prevalence <- CountsToPrevalence(state_month_counts)
vocs <- GetVOCs()
omicron <- vocs[["omicron"]]

vocs[["omicron"]] <- NULL
state_month_prevalence <- CollapseLineageToVOCs(
  variant_df = state_month_prevalence,
  vocs = vocs,
  custom_voc_mapping = custom_voc_mapping, summarize = FALSE
)

p5 <- StackedBarPlotPrevalence(state_month_prevalence)
p5

Project weekly cases to variant prevalence data from GISAID

confirmed_subset_dateweekwise_long <- GetIndiaConfirmedCasesWeeklyLong()



gisaid_dist_weekwise <- SummarizeVariantsWeekwise(gisaid_india %>% filter(State == "Tamil Nadu") %>% arrange(WeekYearCollected))
confirmed_subset_dateweekwise_long_dist <- confirmed_subset_dateweekwise_long %>%
  filter(State %in% c("Tamil Nadu")) %>%
  rename(n = value) %>%
  rename(WeekYearCollected = WeekYear) %>%
  dplyr::select(-contains("type")) %>%
  filter(WeekYearCollected >= min(gisaid_dist_weekwise$WeekYearCollected))
confirmed_subset_dateweekwise_long_dist$State <- NULL

voc_to_keep <- gisaid_dist_weekwise %>%
  group_by(lineage_collapsed) %>%
  summarise(n_sum = sum(n)) %>%
  filter(n_sum > 1) %>%
  pull(lineage_collapsed) %>%
  unique()
gisaid_dist_weekwise <- gisaid_dist_weekwise %>% filter(lineage_collapsed %in% voc_to_keep)

india_cases_pred_prob_sel_long <- FitMultinomWeekly(gisaid_dist_weekwise, confirmed_subset_dateweekwise_long_dist)
the_anim <- PlotVariantPrevalenceAnimated(india_cases_pred_prob_sel_long, title = "Estimated cases (weekly average) in Tamil Nadu (India) by variant", caption = "**Source: gisaid.org and covid19bharat.org**", date_breaks = "28 days")
gganimate::anim_save(filename = here::here("docs/articles/Tamilnadu_animated.gif"), animation = the_anim)

Look at cases after January, 2022 only:

confirmed_subset_dateweekwise_long <- GetIndiaConfirmedCasesWeeklyLong()

confirmed_subset_dateweekwise_long <- confirmed_subset_dateweekwise_long %>%
  filter(WeekYear >= tsibble::yearweek("2021 W35")) %>%
  filter(State %in% c("Tamil Nadu")) %>%
  group_by(WeekYear) %>%
  summarise(n = sum(value)) %>%
  arrange(WeekYear) %>%
  rename(WeekYearCollected = WeekYear)

gisaid_dist <- gisaid_india %>%
  filter(MonthYearCollected > "Dec 2021") %>%
  filter(State == "Tamil Nadu") %>%
  arrange(WeekYearCollected)

gisaid_weekwise <- SummarizeVariantsWeekwise(gisaid_dist)

voc_to_keep <- gisaid_weekwise %>%
  group_by(lineage_collapsed) %>%
  summarise(n_sum = sum(n)) %>%
  filter(n_sum > 1) %>%
  pull(lineage_collapsed) %>%
  unique()
gisaid_weekwise <- gisaid_weekwise %>% filter(lineage_collapsed %in% voc_to_keep)

cases_pred_prob_sel_long <- FitMultinomWeekly(gisaid_weekwise, confirmed_subset_dateweekwise_long)
the_anim <- PlotVariantPrevalenceAnimated(cases_pred_prob_sel_long, title = "Estimated cases (weekly average) in Tamil Nadu (India) by variant", caption = "**Source: gisaid.org and covid19bharat.org**<br>")
gganimate::anim_save(filename = here::here("docs/articles/Tamilnadu_animated_2021.gif"), animation = the_anim)

Look at cases in the last few weeks:

confirmed_subset_dateweekwise_long <- GetIndiaConfirmedCasesWeeklyLong()

confirmed_subset_dateweekwise_long <- confirmed_subset_dateweekwise_long %>%
  filter(WeekYear >= tsibble::yearweek("2022 W12")) %>%
  filter(State %in% c("Tamil Nadu")) %>%
  group_by(WeekYear) %>%
  summarise(n = sum(value)) %>%
  arrange(WeekYear) %>%
  rename(WeekYearCollected = WeekYear)

gisaid_dist <- gisaid_india %>%
  filter(MonthYearCollected > "Dec 2021") %>%
  filter(State == "Tamil Nadu") %>%
  arrange(WeekYearCollected)

gisaid_weekwise <- SummarizeVariantsWeekwise(gisaid_dist)

voc_to_keep <- gisaid_weekwise %>%
  group_by(lineage_collapsed) %>%
  summarise(n_sum = sum(n)) %>%
  filter(n_sum > 1) %>%
  pull(lineage_collapsed) %>%
  unique()
gisaid_weekwise <- gisaid_weekwise %>% filter(lineage_collapsed %in% voc_to_keep)

cases_pred_prob_sel_long <- FitMultinomWeekly(gisaid_weekwise, confirmed_subset_dateweekwise_long)
the_anim <- PlotVariantPrevalenceAnimated(cases_pred_prob_sel_long, title = "Estimated cases (weekly average) in Tamil Nadu (India) by variant", caption = "**Source: gisaid.org and covid19bharat.org**<br>")
gganimate::anim_save(filename = here::here("docs/articles/Tamilnadu_animated_2022.gif"), animation = the_anim)



saketkc/covmuller documentation built on April 19, 2024, 10:14 a.m.