#######################################################
### read flu df from Jacob and clean
### df scrape from: https://apps.who.int/flumart/Default?ReportNo=12
### https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6086842/
### https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0193263
#######################################################
library(dplyr)
library(janitor)
library(stringr)
library(readr)
library(countrycode)
library(tidyr)
library(lubridate)
strains <- c(
"a_total",
"a_h1",
"a_h3",
"b_total",
"b_victoria_lineage",
"b_yamagata_lineage"
)
hemi <- read.csv("./data-raw/hemi.csv")
usethis::use_data(strains, overwrite = TRUE)
make_bins <- function(tmp) {
tmp %>% mutate(
pct = (count / total_number_of_influenza_positive_viruses) * 100
) -> tmp
tmp %>%
subset(
!is.na(pct) & pct > 0
) -> tmp_nonzero
tmp_nonzero %>%
.$pct %>%
classInt::classIntervals(n = 3, style = "quantile") -> bins
tmp_nonzero %>%
mutate(cat = cut(pct, breaks = bins$brks, include.lowest = TRUE, dig.lab = 10)) -> tmp_nonzero
levels(tmp_nonzero$cat) <- c("Low", "Moderate", "High")
tmp %>%
subset(
!(!is.na(pct) & !!pct > 0)
) %>%
mutate(cat = ifelse(pct == 0, "Zero", NA)) -> tmp_zero
combined <- rbind(tmp_nonzero, tmp_zero) %>%
mutate(cat = relevel(cat, "Zero")) %>%
as.data.frame()
combined
}
add_moyr <- function(data, date_format, intervalz) {
data %>%
mutate(
moyr = as.character(format(end_date, date_format)),
interval = intervalz
)
}
tidyiddr::who.influenza() %>%
janitor::clean_names() %>%
subset(end_date >= "2009-10-01" & !is.na(end_date)) %>%
mutate(
country_area_or_territory = recode(country_area_or_territory, "Venezuela (Bolivarian Republic of)" = "Venezuela"),
country_area_or_territory = stringr::str_replace(country_area_or_territory, " \\s*\\([^\\)]+\\)", ""),
country_area_or_territory = gsub(")", "", country_area_or_territory),
iso_a3 = countrycode(
sourcevar = country_area_or_territory,
origin = "country.name",
destination = "iso3c",
warn = TRUE,
nomatch = NA,
custom_dict = NULL,
custom_match = NULL,
origin_regex = T
),
iso_a3 = factor(case_when(
country_area_or_territory == "Kosovo" ~ "KOS",
T ~ iso_a3
)),
season = factor(ifelse(lubridate::month(end_date) %in% c(11, 12, 1, 2, 3, 4), 0, 1), levels = c(0, 1), labels = c("N", "S")),
season = factor(ifelse(season == "N",
paste0(season, lubridate::year(end_date), "/", lubridate::year(end_date) + 1),
paste0(season, lubridate::year(end_date) - 1, "/", lubridate::year(end_date))
))
) %>%
merge(hemi, by = "iso_a3", all.x = T) -> tmp
seasons <- levels(tmp$season)
a <- seasons[grepl("N", seasons)]
b <- seasons[grepl("S", seasons)]
idx <- order(c(seq_along(b), seq_along(a)))
map_seasons <- (c(b, a))[idx]
usethis::use_data(map_seasons, overwrite = TRUE)
rbind(add_moyr(tmp, "%Y-%m", "year"), add_moyr(tmp, "%Y", "month"), tmp %>% mutate(moyr = season, interval = "season")) %>%
select(c(strains, "total_number_of_influenza_positive_viruses", moyr, iso_a3, interval, hemi)) -> tmp2
process <- function(data, hemi = F) {
grouping <- c("moyr", "interval", "hemi")
if (hemi == F) {
grouping <- c("iso_a3", grouping)
}
data %>%
group_by_at(c(grouping)) %>%
summarise_at(
c(strains, "total_number_of_influenza_positive_viruses"), sum,
na.rm = T,
) %>%
ungroup() %>%
pivot_longer(strains, names_to = "strain", values_to = "count") %>%
mutate(interval = factor(interval), strainmoyr = factor(paste0(moyr, strain)), across(c(count, total_number_of_influenza_positive_viruses), as.integer)) %>%
split("strainmoyr") %>%
lapply(make_bins) %>%
do.call("rbind", .) %>%
select(-strainmoyr) %>%
pivot_wider(names_from = "strain", values_from = c("count", "cat", "pct"))
}
map <- merge(process(tmp2), process(tmp2, hemi = T), by = c("moyr", "hemi", "interval"), suffixes = c("", "_hemi"), all.x = T)
usethis::use_data(map, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.