knitr::opts_chunk$set( collapse = TRUE, comment = "#>", gganimate = list( nframes = 0 ), out.width = "100%" ) knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.dim = c(12, 5))
suppressPackageStartupMessages({ library(covmuller) library(tidyverse) })
We use covid19bharat.org to get a tally of daily confirmed cases and then summarize it to monthly level.
indian_state_cases <- GetIndiaConfirmedCasesMonthlyLong() india_cases <- indian_state_cases %>% filter(State == "India") %>% filter(value > 1) head(india_cases)
It is easy to visualize the monthly case counts on a bar plot:
p1 <- BarPlot(india_cases, ylabel = "Cases per month", label_si = TRUE, title = "Total cases per month - India", caption = "**Source: covid19bharat.org**<br>") p1
We utilize GISAID data to look at prevalence of variants. To access this data, GISAID requires registration.
current_date <- "2024_04_11" fpath.tar <- paste0("~/data/epicov/metadata_tsv_", current_date, ".tar.xz") fpath.qs <- paste0("~/data/epicov/metadata_tsv_", current_date, ".qs") if (file.exists(fpath.qs)) { gisaid_metadata <- qs::qread(file = fpath.qs) } else { gisaid_metadata <- ReadGISAIDMetada(path = fpath.tar) qs::qsave(gisaid_metadata, fpath.qs) }
We can look at the absolute number of cases that have been sequenced from a country by filtering out information from the metadata made available by GISAID (which includes all countries). Here, we visualize the total sequenced cases coming from India:
gisaid_india <- FilterGISAIDIndia(gisaid_metadata_all = gisaid_metadata) country_seq_stats <- TotalSequencesPerMonthCountrywise(gisaid_india, rename_country_as_state = TRUE) p2 <- BarPlot(country_seq_stats, ylabel = "Sequenced per month", color = "slateblue1", label_si = TRUE, title = "Total sequences deposited to GISAID from India", caption = "**Source: gisaid.org **<br>") p2
While the absolute numbers are informative, a more useful metric is the proportion of cases (cases sequenced over total cases) that are getting sequenced. Here we look at the proportion of cases that have been sequenced in India over the course of the pandemic:
# india_cases_long <- GetIndiaConfirmedCasesMonthlyLong() %>% filter(State == "India") GetIndiaCases <- function() { data <- read.csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/cases_deaths/new_cases.csv") confirmed <- data %>% select(date, India) colnames(confirmed)[2] <- c("cases") confirmed$MonthYear <- GetMonthYear(confirmed$date) confirmed_subset_weekwise <- confirmed %>% group_by(MonthYear) %>% summarise(value = sum(cases, na.rm = T)) %>% arrange(MonthYear) } india_cases_long <- GetIndiaCases() india_cases_long$State <- "India" india_cases_long$type <- "Confirmed" india_sequencing_proportion <- CombineSequencedCases( cases_sequenced = country_seq_stats, confirmed_long = india_cases_long ) p3 <- BarPlot(india_sequencing_proportion, yaxis = "percent_sequenced_collected", ylabel = "% deposited to GISAID", color = "yellowgreen", title = "Proportion of cases deposited to GISAID from India", caption = "**Source: gisaid.org and ourworldindata.org/coronavirus**<br>") p3
We can further break down the proportion of sequenced cases at the state level:
state_seq_stats <- TotalSequencesPerMonthStatewise(gisaid_india, drop_country = TRUE) seq_stats <- rbind(country_seq_stats, state_seq_stats) state_cases_long <- GetIndiaConfirmedCasesMonthlyLong() india_sequencing_proportion <- CombineSequencedCases( cases_sequenced = seq_stats, confirmed_long = state_cases_long, month.min = "Jan 2022", month.max = "Feb 2023", max.percent = 5 ) india_sequencing_proportion$State <- factor( x = india_sequencing_proportion$State, levels = as.character(GetIndianStates()) ) p4 <- PlotSequencedPropHeatmap(india_sequencing_proportion) # p4
# In terms of absolute numbers sequenced: seq_stats2 <- seq_stats # seq_stats2 <- seq_stats2 %>% filter(MonthYear >= "July 2022") seq_stats2$MonthYear <- factor(seq_stats$MonthYear) seq_stats2$State <- factor( x = seq_stats2$State, levels = as.character(GetIndianStates()) ) p4.total <- PlotTotalHeatmap(df = seq_stats2) p4.total
Finally, we look at the prevalence of variants and variants of concern (VOCs):
india_month_counts <- SummarizeVariantsMonthwise(gisaid_india) india_month_counts$State <- "India" india_month_prevalence <- CountsToPrevalence(india_month_counts) vocs <- GetVOCs() custom_voc_mapping <- list( `JN.1` = "JN.1", `JN.1.*` = "JN.1", `HV.1` = "HV.1", `HV.1.*` = "HV.1", `B.1` = "B.1", `B.1.1.306` = "B.1", `B.1.1.306.*` = "B.1", `B.1.1.326` = "B.1", `B.1.36.29` = "B.1", `B.1.560` = "B.1", `B.1.1` = "B.1", `B.1.210` = "B.1", `B.1.36.8` = "B.1", `B.1.36` = "B.1", `B.1.36.*` = "B.1" ) india_month_prevalence <- CollapseLineageToVOCs( variant_df = india_month_prevalence, vocs = vocs, custom_voc_mapping = custom_voc_mapping, summarize = FALSE ) p5 <- StackedBarPlotPrevalence(india_month_prevalence) p5
For an animated version of the prevalence plot, check out VariantAnimation.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.