data-raw/map.R

#######################################################
### read flu df from Jacob and clean
### df scrape from:  https://apps.who.int/flumart/Default?ReportNo=12
### https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6086842/
### https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0193263
#######################################################
library(dplyr)
library(janitor)
library(stringr)
library(readr)
library(countrycode)
library(tidyr)
library(lubridate)

strains <- c(
    "a_total",
    "a_h1",
    "a_h3",
    "b_total",
    "b_victoria_lineage",
    "b_yamagata_lineage"
)

hemi <- read.csv("./data-raw/hemi.csv")
usethis::use_data(strains, overwrite = TRUE)


make_bins <- function(tmp) {
    tmp %>% mutate(
        pct = (count / total_number_of_influenza_positive_viruses) * 100
    ) -> tmp
    tmp %>%
        subset(
            !is.na(pct) & pct > 0
        ) -> tmp_nonzero

    tmp_nonzero %>%
        .$pct %>%
        classInt::classIntervals(n = 3, style = "quantile") -> bins


    tmp_nonzero %>%
        mutate(cat = cut(pct, breaks = bins$brks, include.lowest = TRUE, dig.lab = 10)) -> tmp_nonzero

    levels(tmp_nonzero$cat) <- c("Low", "Moderate", "High")



    tmp %>%
        subset(
            !(!is.na(pct) & !!pct > 0)
        ) %>%
        mutate(cat = ifelse(pct == 0, "Zero", NA)) -> tmp_zero

    combined <- rbind(tmp_nonzero, tmp_zero) %>%
        mutate(cat = relevel(cat, "Zero")) %>%
        as.data.frame()

    combined
}

add_moyr <- function(data, date_format, intervalz) {
    data %>%
        mutate(
            moyr = as.character(format(end_date, date_format)),
            interval = intervalz
        )
}

tidyiddr::who.influenza() %>%
    janitor::clean_names() %>%
    subset(end_date >= "2009-10-01" & !is.na(end_date)) %>%
    mutate(
        country_area_or_territory = recode(country_area_or_territory, "Venezuela (Bolivarian Republic of)" = "Venezuela"),
        country_area_or_territory = stringr::str_replace(country_area_or_territory, " \\s*\\([^\\)]+\\)", ""),
        country_area_or_territory = gsub(")", "", country_area_or_territory),
        iso_a3 = countrycode(
            sourcevar = country_area_or_territory,
            origin = "country.name",
            destination = "iso3c",
            warn = TRUE,
            nomatch = NA,
            custom_dict = NULL,
            custom_match = NULL,
            origin_regex = T
        ),
        iso_a3 = factor(case_when(
            country_area_or_territory == "Kosovo" ~ "KOS",
            T ~ iso_a3
        )),
        season = factor(ifelse(lubridate::month(end_date) %in% c(11, 12, 1, 2, 3, 4), 0, 1), levels = c(0, 1), labels = c("N", "S")),
        season = factor(ifelse(season == "N",
            paste0(season, lubridate::year(end_date), "/", lubridate::year(end_date) + 1),
            paste0(season, lubridate::year(end_date) - 1, "/", lubridate::year(end_date))
        ))
    ) %>%
    merge(hemi, by = "iso_a3", all.x = T) -> tmp

seasons <- levels(tmp$season)
a <- seasons[grepl("N", seasons)]
b <- seasons[grepl("S", seasons)]
idx <- order(c(seq_along(b), seq_along(a)))

map_seasons <- (c(b, a))[idx]

usethis::use_data(map_seasons, overwrite = TRUE)


rbind(add_moyr(tmp, "%Y-%m", "year"), add_moyr(tmp, "%Y", "month"), tmp %>% mutate(moyr = season, interval = "season")) %>%
    select(c(strains, "total_number_of_influenza_positive_viruses", moyr, iso_a3, interval, hemi)) -> tmp2
    
process <- function(data, hemi = F) {
    grouping <- c("moyr", "interval", "hemi")
    if (hemi == F) {
        grouping <- c("iso_a3", grouping)
    }
    data %>%
        group_by_at(c(grouping)) %>%
        summarise_at(
            c(strains, "total_number_of_influenza_positive_viruses"), sum,
            na.rm = T,
        ) %>%
        ungroup() %>%
        pivot_longer(strains, names_to = "strain", values_to = "count") %>%
        mutate(interval = factor(interval), strainmoyr = factor(paste0(moyr, strain)), across(c(count, total_number_of_influenza_positive_viruses), as.integer)) %>%
        split("strainmoyr") %>%
        lapply(make_bins) %>%
        do.call("rbind", .) %>%
        select(-strainmoyr) %>%
        pivot_wider(names_from = "strain", values_from = c("count", "cat", "pct"))
}

map <- merge(process(tmp2), process(tmp2, hemi = T), by = c("moyr", "hemi", "interval"), suffixes = c("", "_hemi"), all.x = T)


usethis::use_data(map, overwrite = TRUE)
QuartzSoftwareLLC/shiny.fluToolKit documentation built on April 28, 2022, 6:25 a.m.