R/plot.R

Defines functions plot.incidence_fit_list plot.incidence_fit add_incidence_fit plot.incidence

Documented in add_incidence_fit plot.incidence plot.incidence_fit plot.incidence_fit_list

#' Plot function for incidence objects
#'
#' This function is used to visualise the output of the [incidence()]
#' function using the package `ggplot2`. #'
#'
#' @export
#'
#' @importFrom graphics plot
#'
#' @author Thibaut Jombart \email{thibautjombart@@gmail.com}
#'   Zhian N. Kamvar \email{zkamvar@@gmail.com}
#'
#' @seealso The [incidence()] function to generate the 'incidence'
#' objects.
#'
#' @param x An incidence object, generated by the function
#' [incidence()].
#'
#' @param fit An 'incidence_fit' object as returned by [fit()].
#'
#' @param stack A logical indicating if bars of multiple groups should be
#' stacked, or displayed side-by-side.
#'
#' @param color The color to be used for the filling of the bars; NA for
#' invisible bars; defaults to "black".
#'
#' @param border The color to be used for the borders of the bars; NA for
#' invisible borders; defaults to NA.
#'
#' @param col_pal The color palette to be used for the groups; defaults to
#' `incidence_pal1`. See [incidence_pal1()] for other palettes implemented in
#' incidence.
#'
#' @param alpha The alpha level for color transparency, with 1 being fully
#' opaque and 0 fully transparent; defaults to 0.7.
#'
#' @param xlab The label to be used for the x-axis; empty by default.
#'
#' @param ylab The label to be used for the y-axis; by default, a label will be
#' generated automatically according to the time interval used in incidence
#' computation.
#'
#' @param labels_week a logical value indicating whether labels x axis tick
#' marks are in week format YYYY-Www when plotting weekly incidence; defaults to
#' TRUE.
#'
#' @param labels_iso (deprecated) This has been superceded by `labels_iso`.
#' Previously:a logical value indicating whether labels x axis tick marks are
#' in ISO 8601 week format yyyy-Www when plotting ISO week-based weekly
#' incidence; defaults to be TRUE.
#'
#' @param show_cases if `TRUE` (default: `FALSE`), then each observation will be
#' colored by a border. The border defaults to a white border unless specified
#' otherwise. This is normally used outbreaks with a small number of cases.
#' Note: this can only be used if `stack = TRUE`
#'
#' @param n_breaks the ideal number of breaks to be used for the x-axis
#'   labeling
#' 
#' @return
#'  - `plot()` a [ggplot2::ggplot()] object.
#'  - `make_breaks()` a two-element list. The "breaks" element will contain the
#'    evenly-spaced breaks as either dates or numbers and the "labels" element
#'    will contain either a vector of weeks OR a [ggplot2::waiver()] object.
#'  - `scale_x_incidence()` a \pkg{ggplot2} "ScaleContinuous" object. 
#'
#' @details 
#'  - `plot()` will visualise an incidence object using `ggplot2`
#'  - `make_breaks()` calculates breaks from an incidence object that always
#'    align with the bins and start on the first observed incidence. 
#'  - `scale_x_incidence()` produces and appropriate `ggplot2` scale based on
#'    an incidence object. 
#' 
#' @examples
#'
#' if(require(outbreaks) && require(ggplot2)) { withAutoprint({
#'   onset <- outbreaks::ebola_sim$linelist$date_of_onset
#'
#'   ## daily incidence
#'   inc <- incidence(onset)
#'   inc
#'   plot(inc)
#'
#'   ## weekly incidence
#'   inc.week <- incidence(onset, interval = 7)
#'   inc.week
#'   plot(inc.week) # default to label x axis tick marks with isoweeks
#'   plot(inc.week, labels_week = FALSE) # label x axis tick marks with dates
#'   plot(inc.week, border = "white") # with visible border
#'
#'   ## use group information
#'   sex <- outbreaks::ebola_sim$linelist$gender
#'   inc.week.gender <- incidence(onset, interval = "1 epiweek", groups = sex)
#'   plot(inc.week.gender)
#'   plot(inc.week.gender, labels_week = FALSE)
#'
#'   ## show individual cases at the beginning of the epidemic
#'   inc.week.8 <- subset(inc.week.gender, to = "2014-06-01")
#'   p <- plot(inc.week.8, show_cases = TRUE, border = "black")
#'   p
#'
#'   ## update the range of the scale
#'   lim <- c(min(get_dates(inc.week.8)) - 7*5, 
#'            aweek::week2date("2014-W50", "Sunday"))
#'   lim
#'   p + scale_x_incidence(inc.week.gender, limits = lim)
#'
#'   ## customize plot with ggplot2
#'   plot(inc.week.8, show_cases = TRUE, border = "black") +
#'     theme_classic(base_size = 16) +
#'     theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) 
#'
#'   ## adding fit
#'   fit <- fit_optim_split(inc.week.gender)$fit
#'   plot(inc.week.gender, fit = fit)
#'   plot(inc.week.gender, fit = fit, labels_week = FALSE)
#'
#' })}
#'
plot.incidence <- function(x, ..., fit = NULL, stack = is.null(fit),
                           color = "black", border = NA, col_pal = incidence_pal1,
                           alpha = .7, xlab = "", ylab = NULL,
                           labels_week = !is.null(x$weeks),
                           labels_iso = !is.null(x$isoweeks),
                           show_cases = FALSE,
                           n_breaks = 6) {
  stopifnot(is.logical(labels_iso), is.logical(labels_week))

  the_call <- match.call()
  if (any(names(the_call) == "labels_iso"))  {
    if (any(names(the_call) == "labels_week")) {
      warning("labels_iso is deprecated. The value of `labels_week` will be used.")
    } else {
      warning("labels_iso is deprecated. Use `labels_week` instead.")
      labels_week <- labels_iso
    }
  }
  ## extract data in suitable format for ggplot2
  df <- as.data.frame(x, long = TRUE, stringsAsFactors = TRUE)
  n.groups <- ncol(x$counts)
  gnames   <- group_names(x)

  
  ## Use custom labels for usual time intervals
  if (is.null(ylab)) {
    if (is.numeric(x$interval)) {
      if (x$interval == 1) {
        ylab <- "daily incidence"
      } else if (x$interval == 7) {
        ylab <- "weekly incidence"
      } else if (x$interval == 14) {
        ylab <- "semi-weekly incidence"
      } else {
        ylab <- sprintf("incidence by period of %d days",
                        x$interval)
      }
    } else if (is.character(x$interval)) {
      
      # capturing the number and type
      p     <- "(\\d*)\\s?([a-z]+?)s?$"
      num   <- gsub(p, "\\1", tolower(x$interval))
      itype <- gsub(p, "\\2", tolower(x$interval))
      if (num == "" || num == "1") {
        ylab <- sprintf("%sly incidence", itype)
      } else {
        ylab <- sprintf("incidence by a period of %s %ss", num, itype)
      }
    }
    if (!is.null(x$weeks)) {
      type_of_week <- get_type_of_week(x)
      ylab <- gsub("(weekl?y?)", sprintf("%s \\1", type_of_week), ylab)
    }
    if (isTRUE(x$cumulative)) {
      ylab <- sub("incidence", "cumulative incidence", ylab)
    }
    first_letter          <- substring(ylab, 1, 1)
    substring(ylab, 1, 1) <- toupper(first_letter)
  }

  ## Handle stacking
  stack.txt <- if (stack) "stack" else "dodge"

  ## By default, the annotation of bars in geom_bar puts the label in the
  ## middle of the bar. This is wrong in our case as the annotation of a time
  ## interval is the lower (left) bound, and should therefore be left-aligned
  ## with the bar. Note that we cannot use position_nudge to create the
  ## x-offset as we need the 'position' argument for stacking. This can be
  ## addressed by adding interval/2 to the x-axis, but this only works until we
  ## have an interval such as "month", "quarter", or "year" where the number of
  ## days for each can vary. To alleviate this, we can create a new column that
  ## counts the number of days within each interval.

  ## Adding a variable for width in ggplot
  df$interval_days <- get_interval(x, integer = TRUE)
  ## if the date type is POSIXct, then the interval is actually interval seconds
  ## and needs to be converted to days
  if (inherits(df$dates, "POSIXct")) {
    df$interval_days <- df$interval_days * 86400 # 24h * 60m * 60s
  }
  ## Important note: it seems safest to specify the aes() as part of the geom,
  ## not in ggplot(), as it interacts badly with some other geoms like
  ## geom_ribbon - used e.g. in projections::add_projections().


  ## add mid-interval positions for x-axis

  ## THIS BREAKS THE PLOT WITH ggplot2_3.3.0
  ## See bug reports at:
  ## https://github.com/reconhub/incidence/issues/119
  ## https://github.com/tidyverse/ggplot2/issues/3873
  ## Temporary fix: changing placement to default of ggplot2::scale_x_date
  
  x_axis <- "dates + (interval_days/2)"
  y_axis <- "counts"
  out <- ggplot2::ggplot(df) +
    ggplot2::geom_col(ggplot2::aes_string(
                        x = x_axis,
                        y = y_axis
                        ),
                      width = df$interval_days,
                      position = stack.txt,
                      color = border,
                      alpha = alpha) +
    ggplot2::labs(x = xlab, y = ylab)

  ## Handle show_cases here

  if (show_cases && stack) {
    squaredf <- df[rep(seq.int(nrow(df)), df$counts), ]
    squaredf$counts <- 1
    squares <- ggplot2::geom_col(ggplot2::aes_string(
                                   x = x_axis,
                                   y = y_axis
                                   ),
                                 color = if (is.na(border)) "white" else border,
                                 fill  = NA,
                                 position = "stack",
                                 data = squaredf,
                                 width = squaredf$interval_days
                                 )
    out <- out + squares
  }
  if (show_cases && !stack) {
    message("the argument `show_cases` requires the argument `stack = TRUE`")
  }
  ## Handle fit objects here; 'fit' can be either an 'incidence_fit' object,
  ## or a list of these. In the case of a list, we add geoms one after the
  ## other.

  if (!is.null(fit)) {
    if (inherits(fit, "incidence_fit")) {
      out <- add_incidence_fit(out, fit)
    } else if (is.list(fit)) {
      for (i in seq_along(fit)) {
        fit.i <- fit[[i]]
        if (!inherits(fit.i, c("incidence_fit", "incidence_fit_list"))) {
          stop(sprintf(
            "The %d-th item in 'fit' is not an 'incidence_fit' object, but a %s",
            i, class(fit.i)))
        }
        out <- add_incidence_fit(out, fit.i)
      }
    } else {
      stop("Fit must be a 'incidence_fit' object, or a list of these")
    }
  }


  ## Handle colors

  ## Note 1: because of the way 'fill' works, we need to specify it through
  ## 'aes' if not directly in the geom. This causes the kludge below, where we
  ## make a fake constant group to specify the color and remove the legend.

  ## Note 2: when there are groups, and the 'color' argument does not have one
  ## value per group, we generate colors from a color palette. This means that
  ## by default, the palette is used, but the user can manually specify the
  ## colors.

  if (n.groups < 2 && is.null(gnames)) {
    out <- out + ggplot2::aes(fill = 'a') +
      ggplot2::scale_fill_manual(values = color, guide = FALSE)
  } else {
    if (!is.null(names(color))) {
      tmp     <- color[gnames] 
      matched <- names(color) %in% names(tmp)
      if (!all(matched)) {
        removed <- paste(names(color)[!matched], 
                         color[!matched],
                         sep = '" = "',
                         collapse = '", "')
        message(sprintf("%d colors were not used: \"%s\"", sum(!matched), removed))
      }
      color <- tmp
    }
                                 
    ## find group colors
    if (length(color) != n.groups) {
      msg <- "The number of colors (%d) did not match the number of groups (%d)"
      msg <- paste0(msg, ".\nUsing `col_pal` instead.")
      default_color <- length(color) == 1L && color == "black"
      if (!default_color) {
        message(sprintf(msg, length(color), n.groups))
      }
      group.colors <- col_pal(n.groups)
    } else {
      group.colors <- color
    }

    ## add colors to the plot
    out <- out + ggplot2::aes_string(fill = "groups") +
      ggplot2::scale_fill_manual(values = group.colors)
    if (!is.null(fit)) {
      out <- out + ggplot2::aes_string(color = "groups") +
        ggplot2::scale_color_manual(values = group.colors)
    }
  }

  out <- out + scale_x_incidence(x, n_breaks, labels_week, ...)
  out
}


## This function will take an existing 'incidence' plot object ('p') and add lines from an
## 'incidence_fit' object ('x')

#' @export
#' @rdname plot.incidence
#'
#' @param p An existing incidence plot.
add_incidence_fit <- function(p, x, col_pal = incidence_pal1){
  if (inherits(x, "incidence_fit_list")) {
    x <- get_fit(x)
  }
  ## 'x' could be a list of fit, in which case all fits are added to the plot
  if (is.list(x) && !inherits(x, "incidence_fit")) {
    out <- p
    for (e in x) {
      if (inherits(e, "incidence_fit")) {
        out <- add_incidence_fit(out, e, col_pal)
      }
    }
    return(out)
  }
  df <- get_info(x, "pred")

  # In the case that the incidence object is of the type POSIXt, the data from
  # the fit object must be presented as POSIXt or ggplot2 will throw a fit.

  if (inherits(p$data$dates, "POSIXt")) {
    # I add half a day here because any fractional days (0.5) will be thrown out
    # on conversion and I'm not quite sure why that is
    df$dates <- as.POSIXlt(df$dates) + 43200 # adding half a day
    if (inherits(p$data$dates, "POSIXct")) {
      df$dates <- as.POSIXct(df$dates)
    } 
  }

  out <- suppressMessages(
    p + ggplot2::geom_line(
      data = df,
      ggplot2::aes_string(x = "dates", y = "fit"), linetype = 1) +
      ggplot2::geom_line(
        data = df,
        ggplot2::aes_string(x = "dates", y = "lwr"), linetype = 2) +
      ggplot2::geom_line(
        data = df,
        ggplot2::aes_string(x = "dates", y = "upr"), linetype = 2)
  )


  if ("groups" %in% names(df)) {
    n.groups <- length(levels(df$groups))
    out <- out + ggplot2::aes_string(color = "groups") +
      ggplot2::scale_color_manual(values = col_pal(n.groups))
  }

  out
}





#' @export
#' @rdname plot.incidence

plot.incidence_fit <- function(x, ...){
  base <- ggplot2::ggplot()
  out <- add_incidence_fit(base, x, ...) +
    ggplot2::labs(x = "", y = "Predicted incidence")
  out
}

#' @export
#' @rdname plot.incidence

plot.incidence_fit_list <- function(x, ...){
  base <- ggplot2::ggplot()
  fits <- get_fit(x)
  out  <- add_incidence_fit(base, fits, ...) +
    ggplot2::labs(x = "", y = "Predicted incidence")
  out
}

Try the incidence package in your browser

Any scripts or data that you put into this service are public.

incidence documentation built on Nov. 8, 2020, 4:30 p.m.