Nothing
#' Create an epidemic curve plot or bin/count observations by date periods
#'
#' Creates a epicurve plot for visualizing epidemic case counts in outbreaks (epidemiological curves).
#' An epicurve is a bar plot, where every case is outlined. \code{geom_epicurve} additionally provides
#' date-based aggregation of cases (e.g. per week or month and many more).
#' - For week aggregation both isoweek (World + ECDC) and epiweek (US CDC) are supported.
#' - `stat_bin_date` and its alias `stat_date_count` provide date based binning only. After binning the by date, these
#' stats behave like [ggplot2::stat_count].
#'
#' @param mapping Set of aesthetic mappings created by \code{\link[ggplot2]{aes}}. Commonly used mappings:
#' * **x or y**: date or datetime. Numeric is technically supported.
#' * **fill**: for colouring groups
#' * **weight**: if data is already aggregated (e.g. case counts)
#' @param data The data frame containing the variables for the plot
#' @param stat either "`epicurve`" for outlines around cases or "`bin_date`" for outlines around (fill) groups.
#' For large numbers of cases please use "`bin_date`" to reduce the number of drawn rectangles.
#' @param position Position adjustment. Currently supports "`stack`" for `geom_epicurve()`.
#' @param date_resolution Character string specifying the time unit for date aggregation.
#' Set to \code{NULL} or `NA` for no date aggregation \cr
#' Possible values are: `"day"`, `"week"`, `"month"`, `"bimonth"`, `"season"`, `"quarter"`, `"halfyear"`, `"year"`.
#' To special values enforce ISO or US week standard:
#' - `isoweek` will force `date_resolution = week` and `week_start = 1` (ISO and ECDC Standard)
#' - `epiweek` will force `date_resolution = week` and `week_start = 7` (US CDC Standard)
#' @param week_start Integer specifying the start of the week (1 = Monday, 7 = Sunday). \cr
#' Only used when date_resolution includes weeks. Defaults to 1 (Monday). \cr
#' For isoweek use \code{week_start = 1} and for epiweek use \code{week_start = 7}.
#' @param width Numeric value specifying the width of the bars. If \code{NULL}, calculated
#' based on resolution and relative.width
#' @param relative.width Numeric value between 0 and 1 adjusting the relative width
#' of bars. Defaults to 1
#' @param geom The geometric object to use to display the data for this layer.
#' When using a `stat_*()` function to construct a layer, the `geom` argument
#' can be used to override the default coupling between stats and geoms.
#' @param ... Other arguments passed to \code{\link[ggplot2]{layer}}. For example:
#' * \code{colour} Colour of the outlines around cases. Disable with colour = NA. Defaults to "white".
#' * \code{linewidth} Width of the case outlines.
#' @inheritParams ggplot2::geom_bar
#'
#' @details
#' Epi Curves are a public health tool for outbreak investigation. For more details see the references.
#'
#' @return A `ggplot2` geom layer that can be added to a plot
#' @seealso [scale_y_cases_5er()], [geom_vline_year()]
#' @export
#'
#' @references
#' - Centers for Disease Control and Prevention. Quick-Learn Lesson:
#' Using an Epi Curve to Determine Mode of Spread. USA. \url{https://www.cdc.gov/training/quicklearns/epimode/}
#' - Dicker, Richard C., Fátima Coronado, Denise Koo, and R. Gibson Parrish. 2006.
#' Principles of Epidemiology in Public Health Practice; an Introduction to Applied Epidemiology and Biostatistics.
#' 3rd ed. USA. \url{https://stacks.cdc.gov/view/cdc/6914}
#'
#' @examples
#' # Basic epicurve with dates
#' library(ggplot2)
#' set.seed(1)
#'
#' plot_data_epicurve_imp <- data.frame(
#' date = rep(as.Date("2023-12-01") + ((0:300) * 1), times = rpois(301, 0.5))
#' )
#'
#' ggplot(plot_data_epicurve_imp, aes(x = date, weight = 2)) +
#' geom_vline_year(break_type = "week") +
#' geom_epicurve(date_resolution = "week") +
#' labs(title = "Epicurve Example") +
#' scale_y_cases_5er() +
#' scale_x_date(date_breaks = "4 weeks", date_labels = "W%V'%g") + # Correct ISOWeek labels week'year
#' coord_equal(ratio = 7) + # Use coord_equal for square boxes. 'ratio' are the days per week.
#' theme_bw()
#'
#' # Categorical epicurve
#' library(tidyr)
#' library(outbreaks)
#'
#' sars_canada_2003 |> # SARS dataset from outbreaks
#' pivot_longer(starts_with("cases"), names_prefix = "cases_", names_to = "origin") |>
#' ggplot(aes(x = date, weight = value, fill = origin)) +
#' geom_epicurve(date_resolution = "week") +
#' scale_x_date(date_labels = "W%V'%g", date_breaks = "2 weeks") +
#' scale_y_cases_5er() +
#' theme_classic()
geom_epicurve <- function(mapping = NULL, data = NULL,
stat = "epicurve", position = "stack",
date_resolution = NULL,
week_start = getOption("lubridate.week.start", 1),
width = NULL, relative.width = 1,
..., na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
ggplot2::layer(
geom = GeomEpicurve,
mapping = mapping,
data = data,
stat = stat,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
width = width,
relative.width = relative.width,
date_resolution = date_resolution,
week_start = week_start,
na.rm = na.rm,
...
)
)
}
#' @rdname geom_epicurve
#' @export
stat_bin_date <- function(mapping = NULL, data = NULL,
geom = "line", position = "identity",
date_resolution = NULL,
week_start = getOption("lubridate.week.start", 1),
...,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
ggplot2::layer(
data = data,
mapping = mapping,
stat = StatBinDate,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
date_resolution = date_resolution,
week_start = week_start,
na.rm = na.rm,
...
)
)
}
#' @rdname geom_epicurve
#' @export
stat_date_count <- stat_bin_date
#' @import ggplot2
#' @import dplyr
#' @import rlang
#' @importFrom cli cli_abort
#' @rdname geom_epicurve
#' @format NULL
#' @usage NULL
#' @export
StatEpicurve <- ggproto("StatEpicurve", Stat,
required_aes = "x|y",
default_aes = aes(x = after_stat(count), y = after_stat(count), group = row_number, weight = 1),
extra_params = c("na.rm", "date_resolution", "week_start"),
setup_params = function(self, data, params) {
params$flipped_aes <- ggplot2::has_flipped_aes(data, params, main_is_orthogonal = FALSE)
has_x <- !(is.null(data$x) && is.null(params$x))
has_y <- !(is.null(data$y) && is.null(params$y))
if (!has_x && !has_y) {
cli::cli_abort("stat_epicurve requires an x or y aesthetic.")
}
if (has_x && has_y) {
cli::cli_abort("stat_epicurve must only have an x or y aesthetic.")
}
params
},
compute_layer = function(self, data, params, scales, ...) {
date_resolution <- params$date_resolution %||% NA
week_start <- params$week_start %||% 1
flipped_aes <- params$flipped_aes %||% any(data$flipped_aes) %||% FALSE
if (!is.na(date_resolution) & date_resolution == "isoweek") {
date_resolution <- "week"
week_start <- 1
} # ISO
if (!is.na(date_resolution) & date_resolution == "epiweek") {
date_resolution <- "week"
week_start <- 7
} # US
data <- ggplot2::flip_data(data, params$flipped_aes)
# Check for CoordFlip since it flips some thing and not others
if (!flipped_aes) {
sel_scale <- scales$panel_scales_x[[1]]
sel_axis <- "x-axis"
} else {
sel_scale <- scales$panel_scales_y[[1]]
sel_axis <- "y-axis"
}
# Check scale class to detect date or datetime
if (inherits(sel_scale, "ScaleContinuousDate")) {
is_date <- TRUE
} else if (inherits(sel_scale, "ScaleContinuousDatetime")) {
is_date <- FALSE
} else {
is_date <- TRUE
cli::cli_warn("{sel_axis} is not date or datetime. Assuming date scale.")
}
# Drop missing x
complete <- stats::complete.cases(data$x)
data <- data |> dplyr::filter(complete)
if (!all(complete) && !params$na.rm) {
cli::cli_warn(paste0(
"Removed {sum(!complete)} row{?s} containing missing values (geom_epicurve)."
))
}
if (!is.na(date_resolution)) {
if (is_date) data$x <- lubridate::as_date(data$x) else data$x <- lubridate::as_datetime(data$x)
data$x_ll <- as.numeric(lubridate::floor_date(data$x,
unit = date_resolution,
week_start = week_start
))
# Use ceiling to be able to infer resolution in days
data$x_ul <- as.numeric(lubridate::ceiling_date(data$x,
unit = date_resolution,
week_start = week_start,
change_on_boundary = TRUE
))
} else {
data$x_ll <- data$x
data$x_ul <- data$x
}
if (is.na(date_resolution) & !is_date) {
cli::cli_warn("It seems you provided a datetime format. Column used as specified.
Please use date_resolution = 'day' to round to day (stat_epicurve).")
}
weight <- data$weight %||% rep(1, length(data$x))
data <- data |> expand_counts(weight)
bars <- data |>
dplyr::mutate(
x = x_ll,
x_ul = x_ul,
width = if (!is.na(date_resolution)) x_ul - x_ll else NULL,
row_number = dplyr::row_number(data),
count = 1,
flipped_aes = params$flipped_aes
)
ggplot2::flip_data(bars, params$flipped_aes)
},
dropped_aes = "weight"
)
#' @import ggplot2
#' @import dplyr
#' @import rlang
#' @importFrom cli cli_abort cli_warn
#' @rdname geom_epicurve
#' @format NULL
#' @usage NULL
#' @export
StatBinDate <- ggproto("StatBinDate", Stat,
required_aes = "x|y",
default_aes = aes(!!!StatCount$default_aes),
extra_params = c("na.rm", "date_resolution", "week_start"),
setup_params = function(self, data, params) {
params$flipped_aes <- ggplot2::has_flipped_aes(data, params, main_is_orthogonal = FALSE)
has_x <- !(is.null(data$x) && is.null(params$x))
has_y <- !(is.null(data$y) && is.null(params$y))
if (!has_x && !has_y) {
cli::cli_abort("stat_epicurve requires an x or y aesthetic.")
}
if (has_x && has_y) {
cli::cli_abort("stat_epicurve must only have an x or y aesthetic.")
}
params
},
compute_group = function(self, data, scales, flipped_aes = FALSE, date_resolution = NA, week_start = 1) {
date_resolution <- date_resolution %||% NA
week_start <- week_start %||% 1
flipped_aes <- flipped_aes %||% any(data$flipped_aes) %||% FALSE
if (is.na(date_resolution)) {
cli::cli_warn("It seems you provided no date_resolution. Column used as specified.
Please use date_resolution = 'week' to round to week (stat_bin_date/date_count).")
}
if (!is.na(date_resolution) & date_resolution == "isoweek") {
date_resolution <- "week"
week_start <- 1
} # ISO
if (!is.na(date_resolution) & date_resolution == "epiweek") {
date_resolution <- "week"
week_start <- 7
} # US
data <- ggplot2::flip_data(data, flipped_aes)
# Check for CoordFlip since it flips some thing and not others
if (!flipped_aes) {
sel_scale <- scales$x
sel_axis <- "x-axis"
} else {
sel_scale <- scales$y
sel_axis <- "y-axis"
}
# Check scale class to detect date or datetime
if (inherits(sel_scale, "ScaleContinuousDate")) {
is_date <- TRUE
} else if (inherits(sel_scale, "ScaleContinuousDatetime")) {
is_date <- FALSE
} else {
is_date <- TRUE
cli::cli_warn("{sel_axis} is not date or datetime. Assuming date scale.")
}
# Drop missing x
complete <- stats::complete.cases(data$x)
data <- data |> dplyr::filter(complete)
if (!all(complete) && !params$na.rm) {
cli::cli_warn(paste0(
"Removed {sum(!complete)} row{?s} containing missing values (geom_epicurve)."
))
}
if (!is.na(date_resolution)) {
if (is_date) data$x <- lubridate::as_date(data$x) else data$x <- lubridate::as_datetime(data$x)
data$x_ll <- as.numeric(lubridate::floor_date(data$x,
unit = date_resolution,
week_start = week_start
))
# Use ceiling to be able to infer resolution in days
data$x_ul <- as.numeric(lubridate::ceiling_date(data$x,
unit = date_resolution,
week_start = week_start,
change_on_boundary = TRUE
))
} else {
data$x_ll <- data$x
data$x_ul <- data$x
}
data$weight <- data$weight %||% rep(1, length(data$x))
data <- data |>
dplyr::arrange(x_ll) |>
dplyr::group_by(x_ll, x_ul) |>
dplyr::tally(wt = weight)
bars <- data |>
dplyr::transmute(
count = n,
prop = n / sum(abs(n)), # abs? negative weights?
# TODO: incidence
x = x_ll,
x_ul = x_ul,
width = if (!is.na(date_resolution)) x_ul - x_ll else NULL,
.size = length(data$n),
flipped_aes = flipped_aes
)
ggplot2::flip_data(bars, flipped_aes)
},
dropped_aes = "weight"
)
#' @rdname geom_epicurve
#' @format NULL
#' @usage NULL
#' @export
StatDateCount <- ggproto("StatDateCount", StatBinDate)
#' @import ggplot2
#' @import dplyr
#' @import rlang
#' @import lubridate
#' @importFrom cli cli_alert_info cli_alert_warning
GeomEpicurve <- ggproto("GeomEpicurve", GeomBar,
default_aes = ggplot2:::defaults(
# colour = from_theme(paper), linewidth = from_theme(borderwidth)
aes(colour = "white", linewidth = 0.6, linetype = "solid"),
GeomBar$default_aes
),
extra_params = c(GeomBar$extra_params, "date_resolution", "relative.width"),
setup_params = function(data, params) {
params <- GeomBar$setup_params(data, params)
# Disable date binning if not specified
params$date_resolution <- params$date_resolution %||% NA
# Full (100%) width bars
params$relative.width <- params$relative.width %||% 1
if (!is.null(params$colour)) {
data$colour <- params$colour
}
if (!dplyr::between(params$relative.width, 0, 1)) {
cli::cli_warn("relative.width is {params$relative.width}.
relative.width should be between 0 and 1 (geom_epicurve).")
}
params
},
setup_data = function(data, params) {
data$flipped_aes <- params$flipped_aes
data <- flip_data(data, params$flipped_aes)
data$just <- params$just %||% 0.5
if (!is.na(params$date_resolution)) {
# Calculate width of bar in days based on specified rounding
# data$width <- (data$date - data$x) #Here or in stat?
data |>
dplyr::distinct(x, width, just) |>
dplyr::arrange(x) -> data_width
# Adjust Bars to avoid jittering when using months
# TODO: Call this function only when needed?
if (nrow(data_width) > 1 & dplyr::n_distinct(data_width$width) != 1) {
for (i in 2:nrow(data_width)) {
# Check if there is a space between bars
if ((data_width[i, ]$x - data_width[i - 1, ]$x) == data_width[i - 1, ]$width) {
# If there is a previous bar, adjust justification to avoid gaps or overlap
data_width[i, ]$just <- (data_width[i - 1, ]$width * data_width[i - 1, ]$just) / data_width[i, ]$width
}
}
}
# What about data width?
data |>
dplyr::select(-just, -width) |>
dplyr::left_join(data_width, by = "x") |>
dplyr::mutate(width = params$width %||% width * params$relative.width) -> data
} else {
data$width <- params$width %||% (resolution(data$x) * params$relative.width)
}
# Plotting Checks
max_bar_height <- data |>
dplyr::count(x) |>
dplyr::slice_max(n, n = 1, with_ties = FALSE) |>
dplyr::pull(n)
if (max_bar_height[1] > 200) {
cli::cli_alert_warning(
"To many observations per date. If you experience problems, please use color = NA to disable outlines (geom_epicurve)."
)
}
if ((max_bar_height[1] > 10000)) {
cli::cli_alert_warning(
"If you experience performance problems because of high case numbers, consider using stat = 'bin_date' (geom_epicurve)."
)
}
x_width <- diff(range(data$x)) / data[1, ]$width
if (x_width > 300) {
cli::cli_alert_warning(
"To many bars. If you experience problems, please change date_resolution to a lower resolution or use color = NA to disable outlines (geom_epicurve)."
)
}
data <- transform(data,
ymin = pmin(y, 0), ymax = pmax(y, 0),
xmin = x - width * just, xmax = x + width * (1 - just),
width = NULL, just = NULL
)
flip_data(data, params$flipped_aes)
},
rename_size = TRUE
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.