scripts/generate_idd_plots.R

#! /usr/bin/env
options(warn=-1)
suppressMessages({
  library("optparse",quietly = T)
  library("iddplotting",quietly = T)
})

option_list <- list(
  make_option(c("-r", "--rundate"), default="", help="Run date for the model"),
  make_option(c("-g", "--geolevel"), default="", help="Geo level for model (us, state, county)"),
  make_option(c("-s", "--scenario"), default="", help="Description for Plot Caption"),
  make_option(c("-m", "--modelfile"), default="", help = "File with model data"),
  make_option("--convertIDD", action="store_true",default=FALSE, help="Should source file be converted from IDD format?"),
  make_option("--modelstart", default="2020-03-01", help="Start date for displaying model data, default is 2020-03-01"),
  make_option("--modelend", default="2020-09-30", help="End date for displaying model data, default is 2020-07-31"),
  make_option(c("-e", "--empirical"), default="usafacts", help = "Source for empirical data ('usafacts' [default], 'csse' or a file path to custom empirical data source)"),
  make_option(c("-c","--cssepath"), default="", help="Path to CSSE git repo"),
  make_option("--savepath",default=".", help="Path to save plots [default is current path]"),
  make_option("--hospscaler",default="0", help="Scaler for conversion of Infections to Cases using Hosp (defaults to 0 [No Scaling])"),
  make_option("--infscaler",default="1", help="Scaler for conversion of Infections to Cases using Inf (default to 1 [No Scaling])"),
  make_option("--caption2", default="", help="Optional 2nd line caption, defaults to none"),
  make_option("--ytransform", default="sqrt", help="Transformation of the y-axis for plots (default is sqrt)"),
  make_option("--smooth", action="store_true", default=TRUE, help="Add smoothing curve to empirical data? (uses cubic spline regression) (default is TRUE)"),
  make_option("--smooth_ci", action="store_true", default=FALSE, help="Add CI to smoothing curve for empirical data (default is FALSE)"),
  make_option("--legend_location",default="belowplot", help = "Legend location; either belowplot (default), topleft, or bottomright"),
  make_option("--labsize", default="8", help="Size of axis label and legend text (default = 8"),
  make_option("--figscale", default="1.6", help="Overall figure scaling; default is 1.6, higher numbers will reduce text size overall")
)


opt <- parse_args(OptionParser(option_list=option_list))

#some checks // required arguments
if(opt$empirical %in% c("usafacts","csse") & opt$cssepath=="") {stop("CSSE path must be provided, unless --empirical is set to a custom file")}
if(opt$rundate=="") {stop("rundate must be provided")}
if(opt$scenario=="") {stop("scenario description must be provided")}
if(opt$modelfile=="") {stop("modelfile must be provided")}
if(opt$geolevel=="") {stop("geolevel (one of us, state, county) must be provided")}

#some checks // existence of paths?
if(opt$empirical %in% c("usafacts","csse") & !dir.exists(opt$cssepath)) {stop("cssepath does not exist")}
if(!dir.exists(opt$savepath)) {stop("savepath does not exist")}
if(!file.exists(opt$modelfile)) {stop(paste0("modelfile does not exist",opt$modelfile))}

base_caption = opt$scenario
if(opt$caption2!="") {
  base_caption = paste(base_caption, opt$caption2, sep="\n")
}

#if conversion required, do so
if(opt$convertIDD) {
  opt$modelfile <- convert_idd_file_to_standard_format(opt$modelfile, useCases = T, writefile = T)
}

#prepare the plot data
basenames <- list("us"=NULL, "state"=NULL, "county"=NULL)
basenames[[opt$geolevel]] <- opt$modelfile

#hosp scaling?
opt$hospscaler <- as.double(opt$hospscaler)
if(opt$hospscaler>=1) {
  #we have hospital scaling
  cat("\n","Scaling all infections by hospitalization x ", opt$hospscaler,"\n")
  hospscaler = opt$hospscaler
} else {
  hospscaler = NULL
}

opt$labsize=as.numeric(opt$labsize)
opt$figscale=as.numeric(opt$figscale)

pd <- prepare_plot_datasets(us_model_file_name = basenames$us,
                            state_model_file_name = basenames$state,
                            county_model_file_name = basenames$county,
                            model_filter_dates = c(opt$modelstart, opt$modelend),
                            empirical_data_type = opt$empirical,
                            csse_repo_path = opt$cssepath,
                            hospscaler = hospscaler)

###################################################
#function to scale infections, using infscaler
###################################################
scale_infections <- function(df, factor) {
  df <- df %>% dplyr::mutate_at(.vars=dplyr::vars(dplyr::starts_with("infections"), dplyr::starts_with("cuminfections")), ~.x*factor)
  return(df)
}


opt$infscaler = as.double(opt$infscaler)
if(opt$infscaler!=1) {
  cat("\n","Scaling all infections by 1/", opt$infscaler,"\n")
  pd <- lapply(pd, scale_infections, factor=opt$infscaler)
}


#set custom dates
#customdates = as.Date(c("2020-03-01","2020-03-15","2020-04-01","2020-04-15","2020-05-01","2020-05-15",
#                        "2020-06-01","2020-06-15", "2020-07-01","2020-07-15","2020-08-01"))

if(opt$geolevel == "us") {
  invisible(lapply(c("Cases", "Deaths"), function(outcome_type) {
    if(!is.null(hospscaler) & outcome_type=="Cases") {
      cumultypes = c(FALSE)
    } else {
      cumultypes = c(TRUE,FALSE)
    }
    invisible(lapply(cumultypes, function(cumul_type) {
      #get plot data
      plt_df <- get_plot_data(pd$us, outcome=outcome_type, geo_level = "us", cumulative=cumul_type)
      #second, get plot
      plt <- invisible(get_base_plot(plt_df, ytransform = opt$ytransform, caption_below_plot = base_caption,
                                     smooth = opt$smooth, smooth_ci = opt$smooth_ci, legend_location = opt$legend_location, labsize = opt$labsize))
      #get name for plot
      fname = paste0(opt$savepath, "/us_", outcome_type, "_", dplyr::if_else(cumul_type,"cumulative_", ""), opt$rundate, ".png")
      print(fname)
      #save plot
      invisible(ggplot2::ggsave(fname, plt, height=4,width=7, unit="in", scale=opt$figscale))
    }))
  }))
}

if(opt$geolevel == "state") {
  invisible(lapply(c("Cases", "Deaths"), function(outcome_type) {
    if(!is.null(hospscaler) & outcome_type=="Cases") {
      cumultypes = c(FALSE)
    } else {
      cumultypes = c(TRUE,FALSE)
    }
    invisible(lapply(cumultypes, function(cumul_type) {
      invisible(lapply(pd$state %>% dplyr::distinct(USPS) %>% dplyr::pull(), function(s) {
        #first get the plot sub_dataframe
        plt_df <- get_plot_data(pd$state,outcome=outcome_type, geo_level="state", state = s, cumulative=cumul_type)
        #second, get plot
        plt <- get_base_plot(plt_df, ytransform = opt$ytransform, caption_below_plot = base_caption,
                             smooth = opt$smooth, smooth_ci = opt$smooth_ci,legend_location = opt$legend_location,labsize = opt$labsize)
        #get name for plot
        fname = paste0(opt$savepath, s,"_",outcome_type, "_", dplyr::if_else(cumul_type,"cumulative_", ""), opt$rundate, ".png")
        print(fname)
        #save plot
        ggplot2::ggsave(fname, plt, height=4,width=7, unit="in", scale=opt$figscale)
    }))
  }))
}))
}
lmullany/iddplotting documentation built on July 26, 2020, 8:05 p.m.