library(data.table) library(jsonlite) library(gh) library(purrr) library(ggplot2) library(here) library(covidregionaldata)
cases <- fread("https://raw.githubusercontent.com/epiforecasts/covid19-forecast-hub-europe/main/data-truth/JHU/truth_JHU-Incident%20Cases.csv") # nolint # Format date cases[, date := as.Date(date)] # Order data by date and location setkey(cases, location_name, date) # Summarise to weekly cases starting on Saturday to Sync with the forecast hubs cases[, cases := frollsum(value, n = 7), by = c("location_name")] # Filter from the 1st of January and keep only Saturdays cases <- cases[date >= as.Date("2021-01-01")] cases <- cases[weekdays(date) %in% "Saturday"] # Only most recent case data is available cases[, cases_available := date] # Drop unnecessary columns set(cases, j = c("value"), value = NULL) # Summary summary(cases)
download_covariants_sequences <- function(sha, path = "cluster_tables/21A.Delta_data.json") { # nolint if (missing(sha)) { url <- paste0( "https://raw.githubusercontent.com/hodcroftlab/covariants/master/", path ) } else { url <- paste( "https://raw.githubusercontent.com/hodcroftlab/covariants", sha, path, sep = "/" ) } sequences <- jsonlite::fromJSON(url) sequences <- purrr::map(sequences, as.data.table) sequences <- data.table::rbindlist(sequences, idcol = "location_name") return(sequences[]) }
latest_sequences <- download_covariants_sequences() latest_sequences
covariants_file_commits <- function(path = "cluster_tables/21A.Delta_data.json") { # nolint commits <- gh::gh( "/repos/hodcroftlab/covariants/commits?path={path}", owner = "hodcroftlab", repo = "covariants", path = path, .limit = 1000 ) commits <- purrr::map( commits, ~ data.table( date = as.Date(as.character(.$commit$committer$date)), datetime = lubridate::as_datetime( as.character(.$commit$committer$date) ), author = .$commit$committer$name, message = .$commit$message, sha = .$sha ) ) commits <- data.table::rbindlist(commits) return(commits[]) } delta_sequence_commits <- covariants_file_commits() delta_sequence_commits
sequences <- delta_sequence_commits[order(date)][, .SD[datetime == max(datetime)], by = date ] setnames(sequences, "date", "seq_available") sequences[, data := purrr::map(sha, download_covariants_sequences)] sequences <- sequences[, rbindlist(data), by = seq_available] sequences
sequences <- sequences[ , .( seq_available = seq_available, location_name, week_starting = as.Date(week), week_ending = as.Date(week) + 6, seq_voc = unsmoothed_cluster_sequences, seq_total = unsmoothed_total_sequences ) ][, share_voc := seq_voc / seq_total][] sequences
first_seq <- sequences[, .SD[seq_available == max(seq_available)][ seq_voc >= 2 & shift(seq_voc, type = "lead") >= 2 ][ share_voc >= 0.001 & shift(share_voc, type = "lead") >= 0.001 ][ order(week_ending) ][1, ], by = "location_name" ][!is.na(seq_voc)][] first_seq
filt_sequences <- merge( sequences, first_seq[, .(location_name, intro_date = week_ending)], by = "location_name" ) filt_sequences <- filt_sequences[week_ending >= intro_date][ , intro_date := NULL ][]
last_seq <- sequences[seq_available == max(seq_available)][, .SD[seq_total > 10][ share_voc >= 0.99 & shift(share_voc, type = "lead") >= 0.99 ][ order(week_ending) ][1, ], by = "location_name" ][!is.na(seq_voc)][] last_seq
filt_sequences <- merge( filt_sequences, last_seq[, .(location_name, end_date = week_ending)], by = "location_name" ) filt_sequences[is.na(end_date), end_date := max(week_ending), by = "location_name" ] filt_sequences <- filt_sequences[week_ending <= end_date][ , end_date := NULL ][]
filt_cases <- Reduce( function(x, y) { merge(x, y, by = "location_name", all.x = TRUE) }, list( cases, first_seq[, .(location_name, intro_date = week_ending - 1)], last_seq[, .(location_name, end_date = week_ending - 1)] ) ) filt_cases <- filt_cases[date >= (intro_date - 7 * 4)] filt_cases <- filt_cases[date <= (end_date + 7 * 4)] filt_cases[, c("intro_date", "end_date") := NULL]
adjusted_seq <- copy(filt_sequences)[ , date := week_ending - 1 ][, c("week_starting", "week_ending") := NULL] notifications <- merge(filt_cases, adjusted_seq, by = c("date", "location_name"), all.x = TRUE ) setorder(notifications, seq_available) setorder(notifications, location_name, date) setorderv(notifications, c("location_name", "date", "seq_available"))
problem_countries <- unique( notifications[cases < 0 | seq_total < 0]$location_name ) problem_countries notifications <- notifications[!(location_name %in% problem_countries)]
# save to observations folder fwrite(notifications, file = here("data/observations/covariants.csv")) # Summary summary(notifications)
not_cases <- unique(notifications[, .(date, cases, location_name)]) # plot cases ggplot(not_cases) + aes(x = date, y = cases, col = location_name) + geom_line(alpha = 0.6) + theme_bw() + theme(legend.position = "bottom")
ggplot(copy(not_cases)[, cases := cases / max(cases), by = "location_name"]) + aes(x = date, y = cases, col = location_name) + geom_line(alpha = 0.6) + theme_bw() + theme(legend.position = "bottom")
# get comparison data sources rki <- fread(here("data", "observations", "rki.csv")) who <- setDT( covidregionaldata::get_national_data( "Germany", source = "WHO", verbose = FALSE ) ) # make the who data source weekly who[, cases := frollsum(cases_new, n = 7)] who <- who[weekdays(date) %in% "Saturday"] germany <- rbind( unique(rki[, .(date = as.Date(date), cases, source = "RKI")]), not_cases[location_name == "Germany"][, .(date, cases, source = "JHU")], who[date >= min(rki$date) & date <= max(rki$date)][, .(date, cases = cases, source = "WHO")] ) ggplot(germany) + aes(x = date, y = cases, col = source) + geom_point(size = 1.4, alpha = 0.8) + geom_line(size = 1.1, alpha = 0.6) + scale_colour_brewer(palette = "Dark2") + theme_bw() + theme(legend.position = "bottom") + labs(x = "Date", y = "COVID-19 notifications", col = "Source")
ggplot(notifications[seq_available == max(seq_available, na.rm = TRUE)]) + aes(x = date, y = share_voc, col = location_name) + geom_line(alpha = 0.6) + theme_bw() + theme(legend.position = "bottom")
ggplot(notifications[seq_available == min(seq_available, na.rm = TRUE)]) + aes(x = date, y = share_voc, col = location_name) + geom_line(alpha = 0.6) + theme_bw() + theme(legend.position = "bottom")
ggplot( notifications[!is.na(seq_available)][ , seq_available := as.factor(seq_available) ] ) + aes(x = date, y = share_voc, col = seq_available) + geom_line(alpha = 0.6) + theme_bw() + theme(legend.position = "bottom") + facet_wrap(vars(location_name))
latest_seq <- notifications[, n := .N, by = c("date", "location_name") ][, .SD[seq_available == max(seq_available)], by = c("date", "location_name") ][n > 1] seq_change <- merge( notifications[!is.na(seq_available)][ , seq_available := as.factor(seq_available) ][ , .(location_name, date, share_voc, seq_available) ], latest_seq[, .(location_name, date, latest_voc = share_voc)], by = c("location_name", "date") ) seq_change[, per_latest := share_voc / latest_voc] seq_change <- seq_change[date <= as.Date("2021-09-01")] ggplot(seq_change) + aes(x = date, y = per_latest, col = seq_available, group = seq_available) + geom_point(size = 1.1, alpha = 0.8) + geom_line(alpha = 0.6) + theme_bw() + theme(legend.position = "bottom") + facet_wrap(vars(location_name), scales = "free")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.