Nothing
#' Descriptive plot of daily time series data
#'
#' This function produces descriptive time-series plots with smoothing
#' for the meteorological and potential target variables that were measured at a station.
#'
#' @param env_data A data table of measurements of one air quality measurement station.
#' The data should contain the following columns:
#' \describe{
#' \item{Station}{Station identifier where the data was collected.}
#' \item{Komponente}{The environmental component being measured
#' (e.g., temperature, NO2).}
#' \item{Wert}{The measured value of the component.}
#' \item{date}{The timestamp for the observation,
#' formatted as a Date-Time object in the format
#' \code{"YYYY-MM-DD HH:MM:SS"} (e.g., "2010-01-01 07:00:00").}
#' \item{Komponente_txt}{A textual description or label for the component.}
#' }
#' @param variables list of variables to plot. Must be in `env_data$Komponente`.
#' Meteorological variables can be obtained from params.yaml.
#' @param years Optional. A numeric vector, list, or a range specifying the
#' years to restrict the plotted data.
#' You can provide:
#' - A single year: `years = 2020`
#' - A numeric vector of years: `years = c(2019, 2020, 2021)`
#' - A range of years: `years = 2019:2021`
#' If not provided, data for all available years will be used.
#' @param smoothing_factor A number that defines the magnitude of smoothing.
#' Default is 1. Smaller numbers correspond to less smoothing, larger numbers to more.
#' @return A `ggplot` object. This object contains:
#' \itemize{
#' \item A time-series line plot for each variable in `variables`.
#' \item Smoothed lines, with smoothing defined by `smoothing_factor`.
#' }
#' @examples
#' library(data.table)
#' env_data <- data.table(
#' Station = "Station_1",
#' Komponente = rep(c("TMP", "NO2"), length.out = 100),
#' Wert = rnorm(100, mean = 20, sd = 5),
#' date = rep(seq.POSIXt(as.POSIXct("2022-01-01"), , "hour", 50), each = 2),
#' year = 2022,
#' Komponente_txt = rep(c("Temperature", "NO2"), length.out = 100)
#' )
#' plot <- plot_station_measurements(env_data, variables = c("TMP", "NO2"))
#'
#' @export
#' @importFrom ggplot2 ggplot aes geom_line facet_wrap geom_smooth
plot_station_measurements <- function(env_data, variables, years = NULL, smoothing_factor = 1) {
stopifnot("No data in the env_data data.table" = nrow(env_data) > 0)
stopifnot(
"More than one station in env_data. Use clean_data to specify which one to use" =
length(unique(env_data$Station)) == 1
)
if (is.null(years)) {
years <- unique(env_data$year)
}
if (!"day" %in% colnames(env_data)) {
env_data <- .aggregate_data(env_data)
}
env_data[, Werte_aggregiert := data.table::frollmean(Wert,
12 * 7 * smoothing_factor,
na.rm = TRUE,
align = "center"
),
by = Komponente_txt
]
p <- ggplot(env_data[Komponente %in% variables & year %in% years], aes(day, Wert)) +
geom_line() +
facet_wrap(~Komponente_txt, scales = "free_y", ncol = 1) +
geom_line(aes(day, Werte_aggregiert), color = "blue", size = 1.5)
p
}
#' Prepare Plot Data and Plot Counterfactuals
#'
#' Smooths the predictions using a rolling mean, prepares the data for plotting,
#' and generates the counterfactual plot for the application window. Data before
#' the red box are reference window, red box is buffer and values after black,
#' dotted line are effect window.
#'
#' The optional grey ribbon is a prediction interval for the hourly values. The
#' interpretation for a 90% prediction interval (to be defined in `alpha` parameter
#' of [ubair::run_counterfactual()]) is that 90% of the true hourly values
#' (not the rolled means) lie within the grey band. This might be helpful for
#' getting an idea of the variance of the data and predictions.
#'
#' @param predictions The data.table containing the predictions (hourly)
#' @param params Parameters for plotting, including the target variable.
#' @param window_size The window size for the rolling mean (default is 14 days).
#' @param date_effect_start A date. Start date of the
#' effect that is to be evaluated. The data from this point onwards is disregarded
#' for calculating model performance
#' @param buffer Integer. An additional, optional buffer window before
#' `date_effect_start` to account for uncertainty in the effect start point.
#' Disregards additional buffer data points for model evaluation.
#' Use `buffer=0` for no buffer.
#' @param plot_pred_interval Boolean. If `TRUE`, shows a grey band of the prediction
#' interval.
#' @return A ggplot object with the counterfactual plot. Can be adjusted further,
#' e.g. set limits for the y-axis for better visualisation.
#' @export
#' @importFrom ggplot2 ggplot aes geom_ribbon geom_line geom_vline theme_bw labs
#' @importFrom data.table melt
#' @importFrom data.table .SD
#' @importFrom data.table :=
plot_counterfactual <- function(predictions, params, window_size = 14,
date_effect_start = NULL, buffer = 0,
plot_pred_interval = TRUE) {
stopifnot("No data in predictions" = nrow(predictions) > 0)
stopifnot(
"Not all of 'value', 'prediction', 'prediction_lower', 'prediction_upper' are present in predictions data.table" =
c("value", "prediction", "prediction_lower", "prediction_upper") %in% colnames(predictions)
)
# Smooth the data using a rolling mean
dt_plot <- data.table::copy(predictions)
dt_plot <- dt_plot[,
c("value", "prediction", "prediction_lower", "prediction_upper") :=
lapply(.SD, \(x) data.table::frollmean(x, 24 * window_size, align = "center")),
.SDcols = c("value", "prediction", "prediction_lower", "prediction_upper")
]
dt_plot <- dt_plot[!is.na(value)]
# Melt the data for plotting
dt_plot_melted <- melt(dt_plot,
id.vars = c("date", "prediction_lower", "prediction_upper"),
measure.vars = c("value", "prediction"),
variable.name = "variable",
value.name = "value"
)
# Prepare the ggplot object
p <- ggplot(dt_plot_melted, aes(x = date))
if (plot_pred_interval) {
p <- p +
geom_ribbon(aes(ymin = prediction_lower, ymax = prediction_upper),
fill = "grey70", alpha = 0.8
)
}
p <- p + geom_line(aes(y = value, color = variable)) +
theme_bw() +
ggplot2::scale_x_datetime(
date_minor_breaks = "1 month",
date_breaks = "2 month"
) +
labs(y = paste(params$target, paste("concentration", window_size, "d rolling mean")))
# Add vertical lines for external effect if provided
if (!is.null(date_effect_start) && length(date_effect_start) == 1) {
p <- p + geom_vline(
xintercept = date_effect_start, linetype = 4,
colour = "black"
)
}
if (buffer > 0) {
p <- p + ggplot2::annotate("rect",
xmin = date_effect_start - as.difftime(buffer, units = "hours"),
xmax = date_effect_start,
ymin = -Inf,
ymax = Inf,
alpha = 0.1,
fill = "red"
)
}
p
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.