suppressPackageStartupMessages({
  library(covmuller)
  library(tidyverse)
})
theme_set(CovmullerTheme())

Get variants data for India

gisaid_metadata <- qs::qread("~/data/epicov/metadata_tsv_2024_04_11.qs")
gisaid_india <- FilterGISAIDIndia(gisaid_metadata_all = gisaid_metadata)

vocs <- GetVOCs()
omicron <- vocs[["omicron"]]
vocs[["omicron"]] <- NULL
custom_voc_mapping <- list(
  `BA.1.1` = "BA.1",
  `BA.1` = "BA.1",
  `BA.2` = "BA.2",
  `BA.2.10` = "BA.2.X",
  `BA.2.10.1` = "BA.2.X",
  `BA.2.12` = "BA.2.X",
  `BA.2.12.1` = "BA.2.X",
  `BA.3` = "BA.3",
  `BA.4` = "BA.4",
  `BA.5` = "BA.5",
  `BA.2.74` = "BA.2.X",
  `BA.2.75` = "BA.2.75",
  `BA.2.76` = "BA.2.X",
  `XBB.*` = "XBB",
  `BQ.1` = "BQ.1+",
  `BQ.1.*` = "BQ.1+"
)
gisaid_india <- gisaid_india %>%
  filter(pangolin_lineage != "None") %>%
  filter(pangolin_lineage != "Unassigned")

gisaid_india$District <- stringr::str_to_title(gisaid_india$District)
gisaid_india$City <- stringr::str_to_title(gisaid_india$City)

gisaid_india$custom_city <- gisaid_india$City
gisaid_india$custom_city[gisaid_india$custom_city == ""] <- gisaid_india$District[gisaid_india$custom_city == ""]

gisaid_india$custom_city <- stringr::str_to_title(gisaid_india$custom_city)

gisaid_india <- CollapseLineageToVOCs(
  variant_df = gisaid_india,
  vocs = vocs,
  custom_voc_mapping = custom_voc_mapping,
  summarize = FALSE
)

Top 10 cities with most number of sequenced samples

city_counts <- as.data.frame(table(gisaid_india$custom_city)) %>% rename(City = Var1, `Total sequences` = Freq)

DT::datatable(city_counts %>% arrange(desc(`Total sequences`)))

Get monthly cases for Kolkata

chennai_monthly_cases <- GetIndiaConfirmedCasesMonthlyLong(level = "district") %>% filter(District == "Kolkata")
head(chennai_monthly_cases)

Plot monthly cases for Kolkata

p1 <- BarPlot(chennai_monthly_cases, ylabel = "Cases per month", label_si = TRUE, title = "Total cases per month - Kolkata (India)", caption = "**Source: covid19bharat.org**<br>")
p1

Get weekly cases for Kolkata

chennai_seq_stats <- TotalSequencesPerMonthCountrywise(gisaid_india %>% filter(custom_city == "Kolkata"), rename_country_as_state = TRUE)


p2 <- BarPlot(chennai_seq_stats, ylabel = "Sequenced per month", color = "slateblue1", label_si = TRUE, title = "Total sequences deposited to GISAID from Kolkata (India)", caption = "**Source: gisaid.org **<br>")
p2

Overall, how much has Kolkata sequenced over months?

While the absolute numbers are informative, a more useful metric is the proportion of cases (cases sequenced over total cases) that are getting sequenced. Here we look at the proportion of cases that have been sequenced in India over the course of the pandemic:

chennai_monthly_cases_tmp <- chennai_monthly_cases %>% select(MonthYear, value)
chennai_monthly_cases_tmp$State <- "India"
chennai_monthly_cases_tmp$type <- "Confirmed"

chennai_monthly_cases_tmp <- chennai_monthly_cases_tmp %>% select(MonthYear, State, value, type)
sequencing_proportion <- CombineSequencedCases(
  cases_sequenced = chennai_seq_stats,
  confirmed_long = chennai_monthly_cases_tmp
)
p3 <- BarPlot(sequencing_proportion, yaxis = "percent_sequenced_collected", ylabel = "%  deposited to GISAID", color = "yellowgreen", title = "Proportion of cases deposited to GISAID from Kolkata (India)", caption = "**Source: gisaid.org and covid19bharat.org**<br>")
p3
p1 / p2 / p3

Project weekly cases to variant prevalence data from GISAID

confirmed_subset_dateweekwise_long <- GetIndiaConfirmedCasesWeeklyLong(level = "district")



gisaid_dist_weekwise <- SummarizeVariantsWeekwise(gisaid_india %>% filter(custom_city == "Kolkata") %>% arrange(WeekYearCollected))
confirmed_subset_dateweekwise_long_dist <- confirmed_subset_dateweekwise_long %>%
  filter(District %in% c("Kolkata")) %>%
  rename(n = value) %>%
  rename(WeekYearCollected = WeekYear) %>%
  dplyr::select(-contains("type")) %>%
  filter(WeekYearCollected >= min(gisaid_dist_weekwise$WeekYearCollected))
confirmed_subset_dateweekwise_long_dist$State <- NULL

voc_to_keep <- gisaid_dist_weekwise %>%
  group_by(lineage_collapsed) %>%
  summarise(n_sum = sum(n)) %>%
  filter(n_sum > 1) %>%
  pull(lineage_collapsed) %>%
  unique()
gisaid_dist_weekwise <- gisaid_dist_weekwise %>% filter(lineage_collapsed %in% voc_to_keep)

india_cases_pred_prob_sel_long <- FitMultinomWeekly(gisaid_dist_weekwise, confirmed_subset_dateweekwise_long_dist)
the_anim <- PlotVariantPrevalenceAnimated(india_cases_pred_prob_sel_long, title = "Estimated cases (weekly average) in Kolkata (India) by variant", caption = "**Source: gisaid.org and covid19bharat.org**", date_breaks = "28 days")
gganimate::anim_save(filename = here::here("docs/articles/Kolkata_animated.gif"), animation = the_anim)

Look at cases after January, 2022 only:

confirmed_subset_dateweekwise_long <- GetIndiaConfirmedCasesWeeklyLong(level = "district")

confirmed_subset_dateweekwise_long <- confirmed_subset_dateweekwise_long %>%
  filter(WeekYear >= tsibble::yearweek("2021 W35")) %>%
  filter(District %in% c("Kolkata")) %>%
  group_by(WeekYear) %>%
  summarise(n = sum(value)) %>%
  arrange(WeekYear) %>%
  rename(WeekYearCollected = WeekYear)

gisaid_dist <- gisaid_india %>%
  filter(MonthYearCollected > "Dec 2021") %>%
  filter(custom_city == "Kolkata") %>%
  arrange(WeekYearCollected)

gisaid_weekwise <- SummarizeVariantsWeekwise(gisaid_dist)

voc_to_keep <- gisaid_weekwise %>%
  group_by(lineage_collapsed) %>%
  summarise(n_sum = sum(n)) %>%
  filter(n_sum > 1) %>%
  pull(lineage_collapsed) %>%
  unique()
gisaid_weekwise <- gisaid_weekwise %>% filter(lineage_collapsed %in% voc_to_keep)

cases_pred_prob_sel_long <- FitMultinomWeekly(gisaid_weekwise, confirmed_subset_dateweekwise_long)
the_anim <- PlotVariantPrevalenceAnimated(cases_pred_prob_sel_long, title = "Estimated cases (weekly average) in Kolkata (India) by variant", caption = "**Source: gisaid.org and covid19bharat.org**<br>")
gganimate::anim_save(filename = here::here("docs/articles/Kolkata_animated_2021.gif"), animation = the_anim)

Look at cases in the last few weeks:

confirmed_subset_dateweekwise_long <- GetIndiaConfirmedCasesWeeklyLong(level = "district")

confirmed_subset_dateweekwise_long <- confirmed_subset_dateweekwise_long %>%
  filter(WeekYear >= tsibble::yearweek("2022 W12")) %>%
  filter(District %in% c("Kolkata")) %>%
  group_by(WeekYear) %>%
  summarise(n = sum(value)) %>%
  arrange(WeekYear) %>%
  rename(WeekYearCollected = WeekYear)

gisaid_dist <- gisaid_india %>%
  filter(MonthYearCollected > "Dec 2021") %>%
  filter(custom_city == "Kolkata") %>%
  arrange(WeekYearCollected)

gisaid_weekwise <- SummarizeVariantsWeekwise(gisaid_dist)

voc_to_keep <- gisaid_weekwise %>%
  group_by(lineage_collapsed) %>%
  summarise(n_sum = sum(n)) %>%
  filter(n_sum > 1) %>%
  pull(lineage_collapsed) %>%
  unique()
gisaid_weekwise <- gisaid_weekwise %>% filter(lineage_collapsed %in% voc_to_keep)

cases_pred_prob_sel_long <- FitMultinomWeekly(gisaid_weekwise, confirmed_subset_dateweekwise_long)
the_anim <- PlotVariantPrevalenceAnimated(cases_pred_prob_sel_long, title = "Estimated cases (weekly average) in Kolkata (India) by variant", caption = "**Source: gisaid.org and covid19bharat.org**<br>")
gganimate::anim_save(filename = here::here("docs/articles/Kolkata_animated_2022.gif"), animation = the_anim)



saketkc/covmuller documentation built on April 19, 2024, 10:14 a.m.