R/visualisation.R

Defines functions plot_counterfactual plot_station_measurements

Documented in plot_counterfactual plot_station_measurements

#' Descriptive plot of daily time series data
#'
#' This function produces descriptive time-series plots with smoothing
#' for the meteorological and potential target variables that were measured at a station.
#'
#' @param env_data A data table of measurements of one air quality measurement station.
#' The data should contain the following columns:
#' \describe{
#'   \item{Station}{Station identifier where the data was collected.}
#'   \item{Komponente}{The environmental component being measured
#'        (e.g., temperature, NO2).}
#'   \item{Wert}{The measured value of the component.}
#'   \item{date}{The timestamp for the observation,
#'          formatted as a Date-Time object in the format
#'          \code{"YYYY-MM-DD HH:MM:SS"} (e.g., "2010-01-01 07:00:00").}
#'   \item{Komponente_txt}{A textual description or label for the component.}
#' }
#' @param variables list of variables to plot. Must be in `env_data$Komponente`.
#' Meteorological variables can be obtained from params.yaml.
#' @param years Optional. A numeric vector, list, or a range specifying the
#' years to restrict the plotted data.
#'   You can provide:
#'   - A single year: `years = 2020`
#'   - A numeric vector of years: `years = c(2019, 2020, 2021)`
#'   - A range of years: `years = 2019:2021`
#'   If not provided, data for all available years will be used.
#' @param smoothing_factor A number that defines the magnitude of smoothing.
#' Default is 1. Smaller numbers correspond to less smoothing, larger numbers to more.
#' @return A `ggplot` object. This object contains:
#'   \itemize{
#'     \item A time-series line plot for each variable in `variables`.
#'     \item Smoothed lines, with smoothing defined by `smoothing_factor`.
#'   }
#' @examples
#' library(data.table)
#' env_data <- data.table(
#'   Station = "Station_1",
#'   Komponente = rep(c("TMP", "NO2"), length.out = 100),
#'   Wert = rnorm(100, mean = 20, sd = 5),
#'   date = rep(seq.POSIXt(as.POSIXct("2022-01-01"), , "hour", 50), each = 2),
#'   year = 2022,
#'   Komponente_txt = rep(c("Temperature", "NO2"), length.out = 100)
#' )
#' plot <- plot_station_measurements(env_data, variables = c("TMP", "NO2"))
#'
#' @export
#' @importFrom ggplot2 ggplot aes geom_line facet_wrap geom_smooth
plot_station_measurements <- function(env_data, variables, years = NULL, smoothing_factor = 1) {
  stopifnot("No data in the env_data data.table" = nrow(env_data) > 0)
  stopifnot(
    "More than one station in env_data. Use clean_data to specify which one to use" =
      length(unique(env_data$Station)) == 1
  )
  if (is.null(years)) {
    years <- unique(env_data$year)
  }
  if (!"day" %in% colnames(env_data)) {
    env_data <- .aggregate_data(env_data)
  }
  env_data[, Werte_aggregiert := data.table::frollmean(Wert,
    12 * 7 * smoothing_factor,
    na.rm = TRUE,
    align = "center"
  ),
  by = Komponente_txt
  ]
  p <- ggplot(env_data[Komponente %in% variables & year %in% years], aes(day, Wert)) +
    geom_line() +
    facet_wrap(~Komponente_txt, scales = "free_y", ncol = 1) +
    geom_line(aes(day, Werte_aggregiert), color = "blue", size = 1.5)
  p
}

#' Prepare Plot Data and Plot Counterfactuals
#'
#' Smooths the predictions using a rolling mean, prepares the data for plotting,
#' and generates the counterfactual plot for the application window. Data before
#' the red box are reference window, red box is buffer and values after black,
#' dotted line are effect window.
#'
#' The optional grey ribbon is a prediction interval for the hourly values. The
#' interpretation for a 90% prediction interval (to be defined in `alpha` parameter
#' of [ubair::run_counterfactual()]) is that 90% of the true hourly values
#' (not the rolled means) lie within the grey band. This might be helpful for
#' getting an idea of the variance of the data and predictions.
#'
#' @param predictions The data.table containing the predictions (hourly)
#' @param params Parameters for plotting, including the target variable.
#' @param window_size The window size for the rolling mean (default is 14 days).
#' @param date_effect_start A date. Start date of the
#' effect that is to be evaluated. The data from this point onwards is disregarded
#' for calculating model performance
#' @param buffer Integer. An additional, optional buffer window before
#' `date_effect_start` to account for uncertainty in the effect start point.
#' Disregards additional buffer data points for model evaluation.
#' Use `buffer=0` for no buffer.
#' @param plot_pred_interval Boolean. If `TRUE`, shows a grey band of the prediction
#' interval.
#' @return A ggplot object with the counterfactual plot. Can be adjusted further,
#' e.g. set limits for the y-axis for better visualisation.
#' @export
#' @importFrom ggplot2 ggplot aes geom_ribbon geom_line geom_vline theme_bw labs
#' @importFrom data.table melt
#' @importFrom data.table .SD
#' @importFrom data.table :=
plot_counterfactual <- function(predictions, params, window_size = 14,
                                date_effect_start = NULL, buffer = 0,
                                plot_pred_interval = TRUE) {
  stopifnot("No data in predictions" = nrow(predictions) > 0)
  stopifnot(
    "Not all of 'value', 'prediction', 'prediction_lower', 'prediction_upper' are present in predictions data.table" =
      c("value", "prediction", "prediction_lower", "prediction_upper") %in% colnames(predictions)
  )
  # Smooth the data using a rolling mean
  dt_plot <- data.table::copy(predictions)
  dt_plot <- dt_plot[,
    c("value", "prediction", "prediction_lower", "prediction_upper") :=
      lapply(.SD, \(x) data.table::frollmean(x, 24 * window_size, align = "center")),
    .SDcols = c("value", "prediction", "prediction_lower", "prediction_upper")
  ]

  dt_plot <- dt_plot[!is.na(value)]

  # Melt the data for plotting
  dt_plot_melted <- melt(dt_plot,
    id.vars = c("date", "prediction_lower", "prediction_upper"),
    measure.vars = c("value", "prediction"),
    variable.name = "variable",
    value.name = "value"
  )

  # Prepare the ggplot object
  p <- ggplot(dt_plot_melted, aes(x = date))
  if (plot_pred_interval) {
    p <- p +
      geom_ribbon(aes(ymin = prediction_lower, ymax = prediction_upper),
        fill = "grey70", alpha = 0.8
      )
  }
  p <- p + geom_line(aes(y = value, color = variable)) +
    theme_bw() +
    ggplot2::scale_x_datetime(
      date_minor_breaks = "1 month",
      date_breaks = "2 month"
    ) +
    labs(y = paste(params$target, paste("concentration", window_size, "d rolling mean")))

  # Add vertical lines for external effect if provided
  if (!is.null(date_effect_start) && length(date_effect_start) == 1) {
    p <- p + geom_vline(
      xintercept = date_effect_start, linetype = 4,
      colour = "black"
    )
  }
  if (buffer > 0) {
    p <- p + ggplot2::annotate("rect",
      xmin = date_effect_start - as.difftime(buffer, units = "hours"),
      xmax = date_effect_start,
      ymin = -Inf,
      ymax = Inf,
      alpha = 0.1,
      fill = "red"
    )
  }
  p
}

Try the ubair package in your browser

Any scripts or data that you put into this service are public.

ubair documentation built on April 12, 2025, 2:12 a.m.