covmuller: A tool to model COVID19 variant prevalence using data from GISAID

Documented in CleanAmericanStates CleanCanadaStates CleanIndianStates CleanSouthAfricanStates CollapseLineageToVOCs CombineSequencedCases CountsToPrevalence FilterGISAIDIndia FixDate GetFiletype GetMonthYear GetVOCs SanitizeColumn SummarizeVariantsDatewise SummarizeVariantsMonthwise SummarizeVariantsWeekwise TotalSequencesPerMonthCountrywise TotalSequencesPerMonthStatewise

#' Get MonthYear (zoo) from a dataframe string formatted data column
#' @param datecol A vector of string formatted dates
#' @param datefmt Format of the date (default is Y-m-d)
#'
#' @returns A vector with dates converted to MonthYear format (zoo::as.yearmon)
#' @importFrom  dplyr pull
#' @importFrom zoo as.yearmon
#' @export
GetMonthYear <- function(datecol, datefmt = "%Y-%m-%d") {
  Date <- as.Date(datecol, format = datefmt)
  Month <- strftime(Date, "%m")
  Year <- strftime(Date, "%Y")
  MonthYear <- paste0(Month, "/", Year)
  MonthYear <- as.yearmon(MonthYear, format = "%m/%Y")
  return(MonthYear)
}


#' Fix date format
#' @importFrom stringr str_count
#' @export
FixDate <- function(datecol, datefmt = "%Y-%m-%d") {
  datecol <- as.character(x = datecol)
  datecol[datecol == "-"] <- ""
  number_dashes <- str_count(string = datecol, pattern = "-")
  if (datefmt == "%Y-%m-%d") {
    datecol[number_dashes == 1] <- paste0(datecol[number_dashes == 1], "-01")
  } else if (datefmt == "%d-%m-%Y") {
    datecol[number_dashes == 1] <- paste0("01-", datecol[number_dashes == 1])
  }
  return(datecol)
}

#' Determine filetype
#' @param path Path to file
#' @returns Extension of file
GetFiletype <- function(path) {
  fopen <- file(path)
  extension <- summary(fopen)$class
  close.connection(fopen)
  return(extension)
}

#' Get variants of concern
#' @returns A list of variants of concern with their pangolin lineage
#' @export
GetVOCs <- function() {
  alpha_voc <- c("B.1.1.7", "Q.1", "Q.2", "Q.3", "Q.4", "Q.5", "Q.6", "Q.7", "Q.8")
  beta_voc <- c("B.1.351", "B.1.351.1", "B.1.351.2", "B.1.351.3", "B.1.351.4", "B.1.351.5")
  gamma_voc <- c(
    "P.1", "P.1.2", "P.1.3", "P.1.4", "P.1.5", "P.1.6", "P.1.7", "P.1.7.1", "P.1.8", "P.1.9", "P.1.10", "P.1.10.1", "P.1.10.2", "P.1.11", "P.1.12",
    "P.1.12.1", "P.1.13", "P.1.14", "P.1.15", "P.1.16", "P.1.17", "P.1.17.1"
  )
  delta_voc <- c(
    "AY.1", "AY.10", "AY.100", "AY.101", "AY.102", "AY.102.1", "AY.102.2", "AY.103",
    "AY.103.1", "AY.103.2", "AY.104", "AY.105", "AY.106", "AY.107", "AY.108", "AY.109",
    "AY.11", "AY.110", "AY.111", "AY.112", "AY.112.1", "AY.112.2", "AY.113", "AY.114",
    "AY.115", "AY.116", "AY.116.1", "AY.117", "AY.118", "AY.119", "AY.119.1", "AY.119.2",
    "AY.12", "AY.120", "AY.120.1", "AY.120.2", "AY.120.2.1", "AY.121", "AY.121.1", "AY.122",
    "AY.122.1", "AY.122.2", "AY.122.3", "AY.122.4", "AY.122.5", "AY.123", "AY.123.1", "AY.124",
    "AY.124.1", "AY.124.1.1", "AY.125", "AY.125.1", "AY.126", "AY.127", "AY.127.1", "AY.127.2",
    "AY.128", "AY.129", "AY.13", "AY.130", "AY.131", "AY.132", "AY.133", "AY.14", "AY.15", "AY.16",
    "AY.16.1", "AY.17", "AY.18", "AY.19", "AY.2", "AY.20", "AY.20.1", "AY.21", "AY.22", "AY.23",
    "AY.23.1", "AY.23.2", "AY.24", "AY.24.1", "AY.25", "AY.25.1", "AY.25.1.1", "AY.25.1.2", "AY.25.2",
    "AY.25.3", "AY.26", "AY.26.1", "AY.27", "AY.28", "AY.29", "AY.29.1", "AY.3", "AY.3.1", "AY.3.2",
    "AY.3.3", "AY.3.4", "AY.30", "AY.31", "AY.32", "AY.33", "AY.33.1", "AY.34", "AY.34.1", "AY.34.1.1",
    "AY.34.2", "AY.35", "AY.36", "AY.37", "AY.38", "AY.39", "AY.39.1", "AY.39.1.1", "AY.39.1.2", "AY.39.1.3",
    "AY.39.2", "AY.39.3", "AY.4", "AY.4.1", "AY.4.10", "AY.4.11", "AY.4.12", "AY.4.13", "AY.4.14", "AY.4.15",
    "AY.4.16", "AY.4.17", "AY.4.2", "AY.4.2.1", "AY.4.2.2", "AY.4.2.3", "AY.4.2.4", "AY.4.3", "AY.4.4", "AY.4.5",
    "AY.4.6", "AY.4.7", "AY.4.8", "AY.4.9", "AY.40", "AY.41", "AY.42", "AY.42.1", "AY.43", "AY.43.1", "AY.43.2",
    "AY.43.3", "AY.43.4", "AY.43.5", "AY.43.6", "AY.43.7", "AY.43.8", "AY.43.9", "AY.44", "AY.45", "AY.46",
    "AY.46.1", "AY.46.2", "AY.46.3", "AY.46.4", "AY.46.5", "AY.46.6", "AY.46.6.1", "AY.47", "AY.48", "AY.49",
    "AY.5", "AY.5.1", "AY.5.2", "AY.5.3", "AY.5.4", "AY.5.5", "AY.5.6", "AY.50", "AY.51", "AY.52", "AY.53",
    "AY.54", "AY.55", "AY.56", "AY.57", "AY.58", "AY.59", "AY.6", "AY.60", "AY.61", "AY.62", "AY.63", "AY.64",
    "AY.65", "AY.66", "AY.67", "AY.68", "AY.69", "AY.7", "AY.7.1", "AY.7.2", "AY.70", "AY.71", "AY.72", "AY.73",
    "AY.74", "AY.75", "AY.75.1", "AY.75.2", "AY.75.3", "AY.76", "AY.77", "AY.78", "AY.79", "AY.8", "AY.80", "AY.81",
    "AY.82", "AY.83", "AY.84", "AY.85", "AY.86", "AY.87", "AY.88", "AY.89", "AY.9", "AY.9.1", "AY.9.2", "AY.9.2.1",
    "AY.9.2.2", "AY.90", "AY.91", "AY.91.1", "AY.92", "AY.93", "AY.94", "AY.95", "AY.96", "AY.97", "AY.98", "AY.98.1",
    "AY.99", "AY.99.1", "AY.99.2", "B.1.617.2"
  )
  omicron_voc <- c(
    "B.1.1.529", "BA.1", "BA.1.1", "BA.1.1.1", "BA.1.1.10", "BA.1.1.11", "BA.1.1.12", "BA.1.1.13",
    "BA.1.1.14", "BA.1.1.15", "BA.1.1.16", "BA.1.1.17", "BA.1.1.2", "BA.1.1.3", "BA.1.1.4", "BA.1.1.5",
    "BA.1.1.6", "BA.1.1.7", "BA.1.1.8", "BA.1.1.9", "BA.1.10", "BA.1.11", "BA.1.12", "BA.1.13", "BA.1.13.1",
    "BA.1.14", "BA.1.14.1", "BA.1.14.2", "BA.1.15", "BA.1.15.1", "BA.1.15.2", "BA.1.16", "BA.1.16.1", "BA.1.16.2",
    "BA.1.17", "BA.1.17.1", "BA.1.17.2", "BA.1.18", "BA.1.19", "BA.1.2", "BA.1.20", "BA.1.21", "BA.1.21.1",
    "BA.1.3", "BA.1.4", "BA.1.5", "BA.1.6", "BA.1.7", "BA.1.8", "BA.1.9", "BA.2", "BA.2.1", "BA.2.10", "BA.2.10.1",
    "BA.2.11", "BA.2.12", "BA.2.12.1", "BA.2.13", "BA.2.14", "BA.2.15", "BA.2.16", "BA.2.2", "BA.2.3", "BA.2.3.1",
    "BA.2.3.2", "BA.2.4", "BA.2.5", "BA.2.6", "BA.2.7", "BA.2.75", "BA.2.75.2", "BA.2.8", "BA.2.9", "BA.2.9.1",
    "BA.3", "BA.4", "BA.4", "BA.4.*", "BA.4.6", "BA.5", "BA.5", "BA.5.*", "BA.5.2.1", "BF.7", "BF.7",
    "BQ.1", "BQ.1.*", "CH.1.1.*", "EG.5", "EG.5.*",
    "XBB", "XBB.1", "XBB.1.16", "XBB.1.16.*", "XBB.1.5", "XBB.1.5.*", "XBB.1.9.1", "XBB.1.9.1.*", "XBB.1.9.2",
    "XBB.1.9.2.*"
  )
  lambda_voc <- c("C.37", "C.37.1")
  mu_voc <- c("B.1.621", "B.1.621.1", "B.1.621.2", "BB.1", "BB.2")
  epsilon_voc <- c("B.1.429", "B.1.427", "B.1.429.1")

  eta_voc <- c("B.1.525")
  iota_voc <- c("B.1.526")
  theta_voc <- c("P.3")
  zeta_voc <- c("P.2")

  vocs <- list(
    alpha = alpha_voc,
    beta = beta_voc,
    gamma = gamma_voc,
    delta = delta_voc,
    omicron = omicron_voc
    # B.1 = c("B.1")
    # lambda = lambda_voc,
    # mu = mu_voc,
    # epsilon = epsilon_voc,
    # eta = eta_voc,
    # iota = iota_voc
  )
  return(vocs)
}

#' Sanitize a vector to remove special characters and convert it to tile
#'
#' This function removes:
#' 1. Commas with space
#' 2. Hyphen with space
#' 3. Multiple dots with single space
#' 4. Multiple spacies with single space
#' 5. NA with "UNKNOWN"
#' @param col String vector
#' @returns A string vector with sanitized entries
#' @importFrom stringr str_squish str_to_title
#' @export
SanitizeColumn <- function(col) {
  col <- gsub(pattern = ",", replacement = " ", x = col)
  col <- gsub(pattern = "-", replacement = " ", x = col)
  col <- gsub(pattern = "\\.", replacement = " ", x = col)
  col <- gsub(pattern = "\\s+", replacement = " ", x = col)

  col <- str_squish(string = col)
  col <- str_to_title(string = col)
  col[is.na(col)] <- "UNKNOWN"
  col[col == " "] <- "UNKNOWN"
  return(col)
}

#' Clean Indian States
#' @param state A vector of state names
#' @returns A vector of cleaned state names
#' @importFrom stringr str_squish str_to_title
#' @importFrom dplyr recode
#' @export
CleanIndianStates <- function(states) {
  states <- str_squish(states)
  states <- str_to_title(states)
  states <- recode(
    .x = states,
    "Maharastra" = "Maharashtra",
    "Andhra pradesh" = "Andhra Pradesh", Pondicherry = "Puducherry",
    "Jammu and Kashmīr" = "Jammu and Kashmir", "Andaman & Nicobar" = "Andaman & Nicobar",
    Chhatisgarh = "Chhattisgarh", Jaipur = "Rajasthan", "Dadra and Nagar Haveli" = "Dadra and Nagar Haveli and Daman and Diu",
    Harayana = "Haryana", Jammu = "Jammu and Kashmir", "Jammu and kashmir" = "Jammu and Kashmir",
    "Jammu & Kashmir" = "Jammu and Kashmir", Maharasthra = "Maharashtra",
    "Andaman and Nicobar Islands" = "Andaman & Nicobar",
    "Andaman And Nicobar Islands" = "Andaman & Nicobar",
    "Dadra And Nagar Haveli" = "Dadra and Nagar Haveli and Daman and Diu",
    "Dadra and Nagar Haveli and Daman and Diu" = "Dadra and Nagar Haveli and Daman and Diu",
    "Dadra And Nagar Haveli And Daman And Diu" = "Dadra and Nagar Haveli and Daman and Diu",
    "Dadra and Nagar Haveli and Daman and Diui" = "Dadra and Nagar Haveli and Daman and Diu",
    Tamilnadu = "Tamil Nadu",
    Maharshtra = "Maharashtra",
    "Jammu & kashmīr" = "Jammu and Kashmir",
    "Gujrat" = "Gujarat",
    "Gujart" = "Gujarat",
    "Chandighar" = "Chandigarh",
    "Jammu And Kashmir" = "Jammu and Kashmir",
    "Jammu And Kashmīr" = "Jammu and Kashmir",
    "New Delhi" = "Delhi"
  )
  states[states == "Undefined"] <- "Unknown"
  states[is.na(states)] <- "Unknown"
  states[states == ""] <- "Unknown"
  return(states)
}

#' Clean states from South Africa
#' @param state A vector of state names
#' @returns A vector of cleaned state names
#' @importFrom stringr str_squish str_to_title
#' @importFrom dplyr recode
#' @export
CleanSouthAfricanStates <- function(states) {
  states <- str_squish(states)
  states <- str_to_title(states)
  states <- gsub(pattern = " Province", replacement = "", x = states)
  states <- recode(
    .x = states,
    "Kwazulu Natal" = "Kwazulu-Natal",
    "Kzn" = "Kwazulu-Natal"
  )
  states[states == "Undefined"] <- "Unknown"
  states[is.na(states)] <- "Unknown"
  states[states == ""] <- "Unknown"
  return(states)
}

#' Clean states from the USA
#' @param state A vector of state names
#' @returns A vector of cleaned state names
#' @importFrom stringr str_squish str_to_title str_split_fixed
#' @importFrom dplyr recode
#' @export
CleanAmericanStates <- function(states) {
  states <- str_squish(states)
  states <- str_to_title(states)
  states <- str_split_fixed(string = states, pattern = ",", n = 2)[, 1]
  states <- recode(
    .x = states,
    "Virgin Islands Of The U.s." = "US Virgin Islands",
    "Us Virgin Islands" = "US Virgin Islands",
  )
  states[states == "Undefined"] <- "Unknown"
  states[is.na(states)] <- "Unknown"
  states[states == ""] <- "Unknown"
  states[states == "Southwest"] <- "Unknown"
  return(states)
}

#' Clean states from Canada
#' @param state A vector of state names
#' @returns A vector of cleaned state names
#' @importFrom stringr str_squish str_to_title str_split_fixed
#' @importFrom dplyr recode
#' @export
CleanCanadaStates <- function(states) {
  states <- str_squish(states)
  states <- recode(
    .x = states,
    "Newfoundland" = "Newfoundland and Labrador"
  )
  states[states == "Undefined"] <- "Unknown"
  states[is.na(states)] <- "Unknown"
  states[states == ""] <- "Unknown"
  return(states)
}

#' Create a combined dataframe of sequenced cases and confirmed cases
#' @param cases_sequenced A long dataframe of per state sequenced cases
#' @param cases_total A long dataframe of total monthly cases
#' @param prune_oversequenced Whether to round of percentages above 100 to 100. Default is TRUE
#'
#' @returns a combined dataframe with case load and sequenced
#' @importFrom zoo as.yearmon
#' @importFrom magrittr %>%
#' @importFrom dplyr bind_rows group_by summarise_all filter
#' @importFrom tidyr spread
#' @export
CombineSequencedCases <- function(cases_sequenced, confirmed_long,
                                  prune_oversequenced = TRUE,
                                  month.min = NULL, month.max = NULL,
                                  max.percent = 100) {
  empty_df <- expand.grid(as.yearmon(x = unique(x = as.character(confirmed_long$MonthYear))),
    sort(unique(confirmed_long$State)),
    0,
    stringsAsFactors = F
  )
  colnames(empty_df) <- c("MonthYear", "State", "value") # , 'type')
  empty_df <- empty_df %>% arrange(State, MonthYear)
  total_seq <- bind_rows(empty_df, cases_sequenced) %>%
    group_by(State, MonthYear) %>%
    summarise_all(list(value = sum))
  total_seq$type <- "Sequenced"

  cases_and_shared <- bind_rows(confirmed_long, total_seq) %>% arrange(State, MonthYear)
  cases_and_shared <- spread(data = cases_and_shared, key = type, value = value)
  cases_and_shared[is.na(cases_and_shared)] <- 0

  cases_and_shared["percent_sequenced_collected"] <- 100 * cases_and_shared$Sequenced / cases_and_shared$Confirmed
  cases_and_shared$percent_sequenced_collected[is.infinite(cases_and_shared$percent_sequenced_collected)] <- NA
  if (prune_oversequenced) {
    cases_and_shared$percent_sequenced_collected[(cases_and_shared$percent_sequenced_collected) > 100] <- 100
  }
  if (!is.null(month.min)) {
    cases_and_shared <- cases_and_shared %>% filter(MonthYear > month.min)
  }
  if (!is.null(month.max)) {
    cases_and_shared <- cases_and_shared %>% filter(MonthYear <= month.max)
  }
  cases_and_shared$percent_sequenced_toplot <- cases_and_shared$percent_sequenced_collected
  cases_and_shared$percent_sequenced_toplot[cases_and_shared$percent_sequenced_toplot > max.percent] <- max.percent
  cases_and_shared$MonthYear <- factor(x = cases_and_shared$MonthYear)

  return(cases_and_shared)
}


#' Filter GISAID India Metadata for India
#' @param gisaid_metadata_all A dataframe with all GISAID metadata
#' @returns A dataframe with only Indian entries in Human and where the date is known
#' @export
#' @importFrom magrittr %>%
#' @importFrom dplyr bind_rows group_by summarise_all
FilterGISAIDIndia <- function(gisaid_metadata_all) {
  gisaid_india_all <- gisaid_metadata_all %>%
    filter(Country == "India") %>%
    filter(Host == "Human")
  india_states <- GetIndianStates()
  gisaid_india_all$State <- CleanIndianStates(gisaid_india_all$State)
  gisaid_india_all <- gisaid_india_all %>% filter(State %in% india_states)
  gisaid_india_all <- gisaid_india_all %>%
    arrange(State, MonthYearCollected)
  return(gisaid_india_all)
}


#' Collpase pangolin lineage a list of VOCs
#' @param variant_df A dataframe with "pangolin_lineage" column
#' @param vocs A named list with VOC (variant of concernt) name as key and a list of lineages under the VOC.
#' The method will replace all lineages under a VOC to its name.
#' @param custom_voc_mapping A named vector with a custom mapping for naming some lineages. See example.
#' @returns  A dataframe with a new column "lineage_collpased".
#'
#' @importFrom magrittr %>%
#' @importFrom dplyr case_when  group_by mutate select_if summarise_all
#' @importFrom stringr str_to_title
#' @export
CollapseLineageToVOCs <- function(variant_df, vocs = GetVOCs(), custom_voc_mapping = NULL, summarize = TRUE) {
  if (!"pangolin_lineage" %in% colnames(variant_df)) {
    stop("pangolin_lineage column is not present in the input")
  }
  variant_df$lineage_collapsed <- "Others"
  for (name in names(vocs)) {
    variant_df <- variant_df %>% mutate(lineage_collapsed = case_when(
      # pangolin_lineage %in% vocs[[!!name]] ~ str_to_title(!!name),
      grepl(pattern = paste(vocs[[!!name]], collapse = "|"), x = pangolin_lineage) ~ str_to_title(!!name),
      TRUE ~ lineage_collapsed
    ))
  }
  if (!is.null(custom_voc_mapping)) {
    for (name in names(custom_voc_mapping)) {
      if (grepl(pattern = "\\*", x = name)) {
        variant_df <- variant_df %>% mutate(lineage_collapsed = case_when(
          grepl(pattern = !!name, x = pangolin_lineage) ~ custom_voc_mapping[[!!name]],
          TRUE ~ lineage_collapsed
        ))
      } else {
        variant_df <- variant_df %>% mutate(lineage_collapsed = case_when(
          pangolin_lineage %in% c(name) ~ custom_voc_mapping[[!!name]],
          TRUE ~ lineage_collapsed
        ))
      }
    }
  }
  if (summarize) {
    variant_df$pangolin_lineage <- NULL

    variant_df <- variant_df %>%
      group_by(MonthYearCollected, lineage_collapsed) %>%
      select_if(is.numeric) %>%
      summarise_all(list(prevalence = sum)) %>%
      ungroup()
  }
  variant_df$MonthYearCollectedFactor <- factor(as.character(variant_df$MonthYearCollected),
    levels = as.character(sort(unique(variant_df$MonthYearCollected)))
  )

  return(variant_df)
}

#' Summarize the total number of variants per month
#' @param variant_df A dataframe
#' @returns A dataframe with monthwise counts of each variant sequenced
#' @importFrom dplyr count group_by ungroup
#' @importFrom tidyr drop_na
#' @export
SummarizeVariantsMonthwise <- function(variant_df, by_state = FALSE) {
  if (by_state) {
    variant_df <- variant_df %>%
      group_by(State, MonthYearCollected, pangolin_lineage) %>%
      count() %>%
      ungroup() %>%
      drop_na()
    return(variant_df)
  } else {
    variant_df <- variant_df %>%
      group_by(MonthYearCollected, pangolin_lineage) %>%
      count() %>%
      ungroup() %>%
      drop_na()
  }
  return(variant_df)
}

#' Summarize the total number of variants per week
#' @param variant_df A dataframe
#' @returns A dataframe with monthwise counts of each variant sequenced
#' @importFrom dplyr count group_by ungroup
#' @importFrom tidyr drop_na
#' @export
SummarizeVariantsWeekwise <- function(variant_df, by_state = FALSE) {
  if (by_state) {
    variant_df <- variant_df %>%
      group_by(State, WeekYearCollected, lineage_collapsed) %>%
      count() %>%
      ungroup() %>%
      drop_na()
    return(variant_df)
  } else {
    variant_df <- variant_df %>%
      group_by(WeekYearCollected, lineage_collapsed) %>%
      count() %>%
      ungroup() %>%
      drop_na()
  }
}


#' Summarize the total number of variants per week
#' @param variant_df A dataframe
#' @returns A dataframe with monthwise counts of each variant sequenced
#' @importFrom dplyr count group_by ungroup
#' @importFrom tidyr drop_na
#' @export
SummarizeVariantsDatewise <- function(variant_df, by_state = FALSE) {
  if (by_state) {
    variant_df <- variant_df %>%
      group_by(State, DateCollectedNumeric, lineage_collapsed) %>%
      count() %>%
      ungroup() %>%
      drop_na()
    return(variant_df)
  } else {
    variant_df <- variant_df %>%
      group_by(DateCollectedNumeric, lineage_collapsed) %>%
      count() %>%
      ungroup() %>%
      drop_na()
  }
}



#' Convert monthwise counts to prevalence
#' @param variant_df A dataframe
#' @returns A dataframe with monthwise prevalence of variants
#' @importFrom magrittr %>%
#' @importFrom stringr str_to_title
#' @importFrom dplyr group_by mutate summarise ungroup filter
#' @export
CountsToPrevalence <- function(variant_df) {
  variant_df <- variant_df %>%
    group_by(MonthYearCollected) %>%
    mutate(n_sum = sum(n)) %>%
    ungroup() %>%
    group_by(MonthYearCollected, pangolin_lineage) %>%
    summarise(prevalence = 100 * n / n_sum) %>%
    ungroup() %>%
    filter(!is.na(MonthYearCollected))
  variant_df$prevalence <- as.numeric(variant_df$prevalence)
  return(variant_df)
}


#' Get total number of sequenced samples per month in a Country
#' @returns A dataframe with sequencing statistics per state per country
#' @importFrom dplyr arrange count group_by rename
#' @importFrom tidyr drop_na
#' @export
TotalSequencesPerMonthStatewise <- function(variant_df, drop_country = FALSE) {
  df <- variant_df %>%
    group_by(Country, State, MonthYearCollected) %>%
    count() %>%
    arrange(Country, State, MonthYearCollected) %>%
    drop_na() %>%
    rename(MonthYear = MonthYearCollected, value = n)
  if (drop_country) {
    df$Country <- NULL
  }
  return(df)
}

#' Get total sequenced samples per country
#' @param variant_df A dataframe
#' @returns A dataframe with sequencing statistics per country
#' @importFrom dplyr arrange count group_by rename
#' @importFrom tidyr drop_na
#' @export
TotalSequencesPerMonthCountrywise <- function(variant_df, rename_country_as_state = TRUE) {
  df <- variant_df %>%
    group_by(Country, MonthYearCollected) %>%
    count() %>%
    arrange(Country, MonthYearCollected) %>%
    drop_na() %>%
    rename(MonthYear = MonthYearCollected, value = n)

  if (rename_country_as_state) {
    # drop the country
    # replace the state with
    df$State <- df$Country
    df$Country <- NULL
  }
  return(df)
}


KeepNonYellow <- function(hex_color) {
  rgb.array <- col2rgb(hex_color)

  # Convert hex color code to RGB values
  r <- rgb.array[1, ]
  g <- rgb.array[2, ]
  b <- rgb.array[3, ]


  # Define thresholds for yellow-like colors (adjust as needed)
  r_threshold <- 200
  g_threshold <- 190
  b_threshold <- 150

  # Check if the color is yellow-like
  is_yellow_like <- (r > r_threshold) && (g > g_threshold) && (b < b_threshold)

  return(!is_yellow_like)
}
saketkc/covmuller documentation built on April 19, 2024, 10:14 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
saketkc/covmuller
A tool to model COVID19 variant prevalence using data from GISAID

R/utilities.R
In saketkc/covmuller: A tool to model COVID19 variant prevalence using data from GISAID

R Package Documentation

Browse R Packages

We want your feedback!

saketkc/covmuller A tool to model COVID19 variant prevalence using data from GISAID

R/utilities.R In saketkc/covmuller: A tool to model COVID19 variant prevalence using data from GISAID

R Package Documentation

Browse R Packages

We want your feedback!

saketkc/covmuller
A tool to model COVID19 variant prevalence using data from GISAID

R/utilities.R
In saketkc/covmuller: A tool to model COVID19 variant prevalence using data from GISAID