R/conversion_functions.R

Defines functions convert_gem_file_to_standard_format convert_idd_file_to_standard_format

Documented in convert_gem_file_to_standard_format convert_idd_file_to_standard_format

#########################################################################################################
# Function to convert IDD aggregated file to standard (i.e. APL aggregated format) for plotting
# Input - IDD aggregate file name <fname>, should have the following columns:
# -- "time", "quantile", "hosp_curr", "cum_death", "death", "infections", "cum_infections","hosp",
# -- and OPTIONALLY, "USPS"
#
# Output - standard formatted data frame; if writefile flag is TRUE, will write file to
#     csv format, output filename is "<fname>_apl_format.csv"
#########################################################################################################

#' Convert from IDD formatted files to standard APL Output aggegation format
#'
#' This function will convert a idd-formatted file to standard APL model output, and
#' optionally, save as a new file in same path
#' @param fname string path to filename to convert
#' @param writefile default is FALSE, use TRUE to write to file (in same path)
#' @param useCases default is TRUE. This will look specifically at "confirmed" and "cum_confirmed" as the columns to be used
#' for confirmed cases (previously, the package looked by default as infections and cum_infections, which were then manipulated
#' to estimate confirmed cases, for example, using the hospscalar parameter in plotting functions)
#' @return Either returns the converted data frame if writefile=FALSE, otherwise, returns the name of the newly written file
#' @export
#' @examples
#' convert_idd_file_to_standard_format("iddfile.csv")
#' convert_idd_file_to_standard_format("iddfile.csv", writefile=TRUE)
convert_idd_file_to_standard_format <- function(fname,writefile=F, useCases = T) {
  idd <- readr::read_csv(fname, col_types = readr::cols())
  if(useCases) {
    idd <- idd %>% dplyr::rename(date=time,q=quantile,inf=confirmed,cuminf=cum_confirmed,deaths=death, cumdeaths=cum_death, hosps=hosp)
  } else {
    idd <- idd %>% dplyr::rename(date=time,q=quantile,inf=infections,cuminf=cum_infections,deaths=death, cumdeaths=cum_death, hosps=hosp)
  }

  idd <- idd %>% dplyr::mutate(date_inds = as.double(date-as.Date("2019-12-31")))

  if(!"USPS" %in% colnames(idd)) {
    idd <- idd %>%
      dplyr::mutate(USPS=as.character(""))
  }
  idd <- idd %>%
    dplyr::select(USPS, date_inds,inf,hosps,hosp_curr,deaths,cuminf,cumdeaths,q,date) %>%
    dplyr::mutate(q = dplyr::case_when(
      q=="1%"~"p010",    q=="2.5%"~"p025",    q=="5%"~"p050",    q=="10%"~"p100",    q=="15%"~"p150",    q=="20%"~"p200",    q=="25%"~"p250",
      q=="30%"~"p300",    q=="35%"~"p350",    q=="40%"~"p400",    q=="45%"~"p450",    q=="50%"~"p500",    q=="55%"~"p550",    q=="60%"~"p600",
      q=="65%"~"p650",    q=="70%"~"p700",    q=="75%"~"p750",    q=="80%"~"p800",    q=="85%"~"p850",    q=="90%"~"p900",    q=="95%"~"p950",
      q=="97.5%"~"p975",    q=="99%"~"p990"  ))

  if(writefile) {
    newfilename = paste0(stringr::str_remove(fname,".csv"),"_apl_format.csv")
    readr::write_csv(idd, newfilename)
    cat("\nConverted File written to: ")
    cat(newfilename)
    cat("\n")
    return(newfilename)
  } else {
    return(idd)
  }

}

#########################################################################################################
# Function to convert GEM [non]aggregated file to standard (i.e. APL [non]aggregated format) for plotting
# Input - GEM [non]aggregate file name <fname>, should have the following columns:
#
# -- "USPS", "time", "quantile", "daily_cases", "cum_cases", "daily_deaths", "cum_deaths", "hosp"
#
#
# Output - standard formatted data frame; if writefile flag is TRUE, will write file to
#     csv format, output filename is "<fname>_apl_format.csv"
#########################################################################################################
#' Convert from GEM formatted files to standard APL Output aggegation format
#'
#' This function will convert a gem-formatted file to standard APL model output, and
#' optionally, save as a new file in same path
#' @param fname string path to filename to convert
#' @param writefile default is FALSE, use TRUE to write to file (in same path)
#' @return Either returns the converted data frame if writefile=FALSE, otherwise, returns the name of the newly written file
#' @export
#' @examples
#' convert_gem_file_to_standard_format("gemfile.csv")
#' convert_gem_file_to_standard_format("gemfile.csv", writefile=TRUE)
convert_gem_file_to_standard_format <- function(fname,writefile=F) {

  #read in the file
  gem <- readr::read_csv(fname, col_types = readr::cols())

  #convert the first column name to "USPS"
  if(colnames(gem)[1] == "state_name") {
    gem <- gem %>%
      dplyr::mutate(state_name=dplyr::if_else(state_name=="US Virgin Islands", "Virgin Islands",state_name)) %>%
      dplyr::inner_join(iddplotting::statenames(), by=c("state_name"="state_name")) %>%
      dplyr::mutate(state_name=state_abbreviation) %>%
      dplyr::select(-state_abbreviation)
  } else {
    if(colnames(gem)[1]!="country") {
      stop("Possible Error - first column expected as 'country' or 'state_name'", call.F)
    }
  }
  gem <- gem %>%
    #rename first column
    dplyr::rename(USPS=1) %>%
    #rename other key columns %>%
    dplyr::rename(inf=daily_cases,cuminf=cumulative_cases,deaths=daily_deaths, cumdeaths=cumulative_deaths) %>%
    #create date_inds col
    dplyr::mutate(date_inds = as.double(date-as.Date("2019-12-31"))) %>%
    #select key cols
    dplyr::select(USPS, date_inds,inf,deaths,cuminf,cumdeaths,q,date) %>%
    #fix quantiles
    dplyr::mutate(q = dplyr::case_when(
      q==0.025~"p025",
      q==0.25~"p250",
      q==0.5~"p500",
      q==0.75~"p750",
      q==0.975~"p975",
      TRUE~as.character(NA))) %>%
    dplyr::filter(!is.na(q))

  #if required q rows are missing, add them
  missingp <- setdiff(c("p025","p250","p500","p750","p975"),unique(gem$q))
  if(length(missingp)>0) {
    mdfs <- lapply(missingp, function(x) {
      dplyr::inner_join(
        gem %>% dplyr::select(USPS,date) %>% distinct() %>% mutate(q=x),
        gem %>% select(-q))})
    gem <- dplyr::bind_rows(mdfs,gem)
  }


  if(writefile) {
    newfilename = paste0(stringr::str_remove(fname,".csv"),"_apl_format.csv")
    readr::write_csv(gem, newfilename)
    cat("\nConverted File written to: ")
    cat(newfilename)
    cat("\n")
    return(newfilename)
  } else {
    return(gem)
  }
}
lmullany/iddplotting documentation built on July 26, 2020, 8:05 p.m.