R/visualization.R

Defines functions iso_plot_continuous_flow_data.data.frame iso_plot_continuous_flow_data.iso_file_list iso_plot_continuous_flow_data.iso_file iso_plot_continuous_flow_data.default iso_plot_continuous_flow_data iso_plot_raw_data find_time_column

Documented in iso_plot_continuous_flow_data iso_plot_continuous_flow_data.data.frame iso_plot_continuous_flow_data.iso_file_list iso_plot_raw_data

# helper functions ===

# find the name of the time column and its unit in a raw data frame
find_time_column <- function(df) {
  # time column
  time_pattern <- "^time\\.(.*)$"
  time_column <- stringr::str_subset(names(df), time_pattern)
  if (length(time_column) != 1)
    stop("unclear which column is the time column, consider an explicit 'iso_convert_time' call, found: ",
         str_c(time_column, collapse = ", "), call. = FALSE)
  time_unit <- stringr::str_match(time_column, time_pattern) %>% {.[2] }
  return(list(column = time_column, unit = time_unit))
}

# raw data plots =====

#' Plot raw data from isoreader files
#'
#' Convenience function for making quick standard plots for raw isoreader data.
#' Calls \code{\link{iso_plot_continuous_flow_data}}, \code{\link{iso_plot_dual_inlet_data}} and \code{\link{iso_plot_scan_data}} for data specific plotting (see those functions for parameter details). For customizing plotting calls, it is recommended to use \code{\link{iso_plot_continuous_flow_data}}, \code{\link{iso_plot_dual_inlet_data}} and \code{\link{iso_plot_scan_data}} directly.
#'
#' @param iso_files collection of iso_file objects
#' @param ... parameters for the data specific plotting functions
#' @inheritParams iso_show_default_processor_parameters
#' @family plot functions
#' @export
iso_plot_raw_data <- function(iso_files, ..., quiet = default(quiet)) {
  if(!iso_is_object(iso_files)) stop("can only plot iso files or lists of iso files", call. = FALSE)

  iso_files <- iso_as_file_list(iso_files)

  if (!quiet) sprintf("Info: plotting data from %d data file(s)", length(iso_files)) %>% message()

  if (iso_is_continuous_flow(iso_files))
    iso_plot_continuous_flow_data (iso_files, ...)
  else if (iso_is_dual_inlet(iso_files))
    iso_plot_dual_inlet_data (iso_files, ...)
  else if (iso_is_scan(iso_files))
    iso_plot_scan_data (iso_files, ...)
  else
    stop("plotting of this type of iso_files not yet supported", call. = FALSE)
}

# continuous flow ======

#' Plot chromatogram from continuous flow data
#'
#' This function provides easy plotting for mass and ratio chromatograms from continuous flow IRMS data. It can be called either directly with a set of \code{iso_file} objects, or with a data frame prepared for plotting chromatographic data (see \code{\link{iso_prepare_continuous_flow_plot_data}}).
#'
#' @param ... S3 method placeholder parameters, see class specific functions for details on parameters
#' @family plot functions
#' @export
iso_plot_continuous_flow_data <- function(...) {
  UseMethod("iso_plot_continuous_flow_data")
}

#' @export
iso_plot_continuous_flow_data.default <- function(x, ...) {
  stop("this function is not defined for objects of type '",
       class(x)[1], "'", call. = FALSE)
}

#' @export
iso_plot_continuous_flow_data.iso_file <- function(iso_files, ...) {
  iso_plot_continuous_flow_data(iso_as_file_list(iso_files), ...)
}

#' @inheritParams iso_prepare_continuous_flow_plot_data
#' @param peak_table a data frame that describes the peaks in this chromatogram. By default, the peak table from the \code{iso_files} is used if any peak features are requested in the plot (e.g. \code{peak_marker=TRUE} or \code{peak_bounds=TRUE}). If \code{peak_table} is supplied with non-standard column names, the following parameters must also be set correctly: \code{file_id}, \code{rt}, \code{rt_start}, \code{rt_end}, and potentially \code{rt_unit}.
#' @rdname iso_plot_continuous_flow_data
#' @export
iso_plot_continuous_flow_data.iso_file_list <- function(
  iso_files, data = character(),
  time_interval = c(), time_interval_units = "seconds",
  filter = NULL,
  normalize = FALSE, zoom = NULL,
  panel = data, color = file_id, linetype = NULL, label = file_id,
  peak_table = iso_get_peak_table(iso_files, quiet = TRUE), file_id = default(file_id),
  rt = default(rt), rt_start = default(rt_start), rt_end = default(rt_end),
  rt_unit = NULL,
  peak_marker = FALSE, peak_bounds = FALSE, peak_bgrd = FALSE,
  peak_label = NULL, peak_label_filter = NULL,
  peak_label_options = list(
    size = peak_label_size,
    force = peak_label_repel
  ),
  peak_label_size = 2, peak_label_repel = 1) {

  # safety checks
  if(!iso_is_continuous_flow(iso_files))
    stop("iso_plot_continuous_flow_data can only plot continuous flow iso_files", call. = FALSE)

  # deprecated updates
  if (!missing(peak_label_size) || !missing(peak_label_repel)) {
    warning("'peak_label_size' and 'peak_label_repel' are now part of the more flexible 'peak_label_options' parameter (as 'size' and 'force', respectively) that is passed on to ?geom_label_repel. Please use 'peak_label_options' directly to avoid this warning.", immediate. = TRUE, call. = FALSE)
  }

  # need peak table?
  peak_table_quo <- enquo(peak_table)
  peak_label_quo <- enquo(peak_label)
  if (peak_marker || peak_bounds || peak_bgrd || !rlang::quo_is_null(peak_label_quo)) {
    peak_table <- rlang::eval_tidy(peak_table_quo)
  } else {
    peak_table <- NULL
  }

  # retrieve data (with all info so additional aesthetics are easy to include)
  panel_quo <- enquo(panel)
  plot_data <- iso_prepare_continuous_flow_plot_data(
    iso_files,
    data = data,
    include_file_info = everything(),
    time_interval = time_interval,
    time_interval_units = time_interval_units,
    filter = !!enquo(filter),
    normalize = normalize,
    zoom = zoom,
    zoom_group = !!panel_quo,
    peak_table = peak_table,
    file_id = !!enquo(file_id),
    rt = !!enquo(rt),
    rt_start = !!enquo(rt_start),
    rt_end = !!enquo(rt_end),
    rt_unit = rt_unit
  )

  # plot
  iso_plot_continuous_flow_data(
    plot_data,
    panel = !!enquo(panel),
    color = !!enquo(color),
    linetype = !!enquo(linetype),
    label = !!enquo(label),
    peak_marker = peak_marker,
    peak_bounds = peak_bounds,
    peak_label = !!peak_label_quo,
    peak_label_filter = !!enquo(peak_label_filter),
    peak_label_options = peak_label_options
  )
}

#' @rdname iso_plot_continuous_flow_data
#' @param df a data frame of the chromatographic data prepared for plotting (see \code{\link{iso_prepare_continuous_flow_plot_data}})
#' @param panel whether to panel plot by anything - any column or complex expression is possible (see notes in the \code{filter} parameter for available raw data columns and \code{\link{iso_get_file_info}} for available file info columns) but the most commonly used options are \code{panel = NULL} (overlay all), \code{panel = data} (by mass/ratio data), \code{panel = file_id} (panel by files, alternatively use any appropriate file_info column or expression that's unique for each file). The default is panelling by the \code{data} column.
#' @param color whether to color plot by anything, options are the same as for \code{panel} but the default is \code{file_id}
#' @param linetype whether to differentiate by linetype, options are the same as for \code{panel} but the default is \code{NULL} (i.e. no linetype aesthetic). Note that a limited number of linetypes (6) is defined by default and the plot will fail if a higher number is required unless specified using \code{\link[ggplot2]{scale_linetype}}.
#' @param label this is primarily of use for turning the generated ggplots into interactive plots via \code{\link[plotly]{ggplotly}} as the \code{label} will be rendered as an additional mousover label. Any unique file identifier is a useful choice, the default is \code{file_id}.
#' @param peak_marker whether to mark identified peaks with a vertical line at the peak retention time. Only works if a \code{peak_table} was provided to identify the peaks and will issue a warning if \code{peak_marker = TRUE} but no peaks were identified.
#' @param peak_bounds whether to mark the boundaries of identified peaks with a vertical line at peak start and end retention times. Only works if a \code{peak_table} was provided to identify the peaks and will issue a warning if \code{peak_bounds = TRUE} but no peaks were identified.
#' @param peak_bgrd NOT YET IMPLEMENTED whether to show the background of identified peaks from start to end retention times. Only works if a \code{peak_table} was provided that has \code{bgrdX_start} and \code{bgrdX_end} columns in the same units as the raw data.
#' @param peak_label whether to label identified peaks. Any valid column or complex expression is supported and ALL columns in the provided \code{peak_table} can be used in this expression. The easiest way to generate well constructed peak labels is via the \code{\link{iso_format}} function. To provide more space for peak labels, it is sometimes useful to use a \code{zoom} value smaller than 1 to zoom out a bit, e.g. \code{zoom = 0.9}. If peak labels overlap, consider changing \code{peak_label_size} and/or \code{peak_label_repel}. Note that this only works if a \code{peak_table} was provided to identify the peaks and will issue a warning if \code{peak_label} is set but no peaks were identified. Also note that peaks whose value at the peak retention time is not visible on the panel due to e.g. a high \code{zoom} value will not have a visible label either.
#' @param peak_label_filter a filter for the peak labels (if supplied). Can be useful for highlighting only a subset of peaks with peak labels (e.g. only one data trace, or only those in a certain portion of the chromatogram). Only interpreted if \code{peak_table} is set.
#' @param peak_label_options styling options to be used for the \link[ggrepel]{geom_label_repel} peak labels. All parameters suppored by \link[ggrepel]{geom_label_repel} are allowed. Particularly useful ones are \code{size}, \code{force}, \code{nudge_x}, \code{nudge_y}, \code{segment.color} ("black" is set by default, switch to \code{NULL} to get the same as the color aesthetic), \code{segment.size} and \code{segment.alpha}.
#' @param peak_label_size deprecated - please use \code{peak_label_options(size = 5)} instead.
#' @param peak_label_repel deprecated - please use \code{peak_label_options(force = 2)} instead.
#' @export
iso_plot_continuous_flow_data.data.frame <- function(
  df, panel = data, color = file_id, linetype = NULL, label = file_id,
  peak_marker = FALSE, peak_bounds = FALSE, peak_bgrd = FALSE,
  peak_label = NULL, peak_label_filter = NULL,
  peak_label_options = list(
    size = peak_label_size,
    force = peak_label_repel
  ),
  peak_label_size = 2, peak_label_repel = 1
  ) {

  # check for data
  if (nrow(df) == 0) stop("no data provided", call. = FALSE)

  # deprecated updates
  if (!missing(peak_label_size) || !missing(peak_label_repel)) {
    warning("'peak_label_size' and 'peak_label_repel' are now part of the more flexible 'peak_label_options' parameter (as 'size' and 'force', respectively) that is passed on to ?geom_label_repel. Please use 'peak_label_options' directly to avoid this warning.", immediate. = TRUE, call. = FALSE)
  }

  # check for time column
  time_info <- find_time_column(df)

  # quos and other column checks
  aes_quos <- list(panel = enquo(panel), color = enquo(color), linetype = enquo(linetype), label = enquo(label), peak_label = enquo(peak_label))

  aes_cols <- get_column_names(
    df,
    file_id = quo("file_id"),
    time_min = quo("time_min"), time_max = quo("time_max"),
    is_ratio = quo("is_ratio"), data = quo("data"), value = quo("value"))
  peak_cols <- c("peak_marker", "peak_point", "peak_start", "peak_end") %in% names(df)
  check_expressions(df, aes_quos$color, aes_quos$linetype, aes_quos$label, aes_quos$panel)

  # add panel column to allow expressions
  if (!quo_is_null(aes_quos$panel)) {
    df <- mutate(df, ..panel = !!aes_quos$panel)
  }

  # find overall plot parameters from data frame
  normalize <- col_in_df(df, "normalized") & df$normalized[1]
  zoom <- col_in_df(df, "zoom_cutoff") & !all(is.na(df$zoom_cutoff[1]))
  if (col_in_df(df, "normalize")) stopifnot(all(df$normalized == df$normalized[1])) # should be a single value

  # generate plot
  p <- ggplot(df) +
    aes(!!sym(time_info$column), value, group = paste(file_id, data)) +
    scale_x_continuous(str_c("Time ", time_info$unit), expand = c(0, 0)) +
    scale_y_continuous(if(normalize) "Normalized Signal" else "Signal", expand = c(0, 0)) +
    theme_bw()

  # peak cols safety check
  if (!all(peak_cols) && (peak_marker || peak_bounds || !quo_is_null(aes_quos$peak_label))) {
    peak_marker <- FALSE
    peak_bounds <- FALSE
    aes_quos$peak_label <- quo(NULL)
    glue::glue(
      "peak features requested but peak identifications seem to be missing - ",
      "ignoring all peak feature parameters. Please make sure to provide a peak_table.") %>%
      warning(immediate. = TRUE, call. = FALSE)
  }
  if (!quo_is_null(aes_quos$peak_label)) {
    check_expressions(df, aes_quos$peak_label)
  }

  # peak boundaries - consider making this an area background
  if (peak_bounds && nrow(dplyr::filter(df, peak_point > 0 & (peak_start | peak_end))) > 0) {
    p <- p +
      geom_rect(
        data = function(df) dplyr::filter(df, peak_point > 0 & (peak_start | peak_end)) %>%
          dplyr::group_by(peak_point) %>%
          dplyr::mutate(xmin = ifelse(any(peak_start),
                                      min((!!sym(time_info$column))[peak_start]), -Inf),
                        xmax = ifelse(any(peak_end),
                                      max((!!sym(time_info$column))[peak_end]), Inf)) %>%
          dplyr::ungroup() %>%
          dplyr::filter(!is.infinite(xmin) | !is.infinite(xmax)) %>%
          # collapse double entries without loosing any other potentially important aesthetics info
          dplyr::select(-!!sym(time_info$column), -value, -peak_start, -peak_end, -peak_marker) %>%
          { if ("tp" %in% names(.)) dplyr::select(., -tp) else . } %>%
          unique(),
        mapping = aes(x = NULL, y = NULL, xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf, color = NULL),
        fill = "grey20", color = NA, alpha = 0.1, show.legend = FALSE
      )
  }

  # peak markers
  if (peak_marker) {
    p <- p +
      geom_vline(
        data = function(df) dplyr::filter(df, peak_marker),
        mapping = aes(xintercept = !!sym(time_info$column), color = NULL),
        color = "black", linetype = 2
      )
  }

  # peak backgrounds
  # NOT YET IMPLEMENTED
  # note that this requires some scaling (just like the time units) to do it right
  if (peak_bgrd) {
    warning("sorry, peak bgrds are not yet implemented", call. = FALSE, immediate. = FALSE)
  }

  # draw chromatograms
  p <- p + geom_line()

  # peak labels
  if (!quo_is_null(aes_quos$peak_label)) {
    peak_label_filter_quo <- enquo(peak_label_filter)
    has_any_labels <-
      dplyr::filter(df, peak_marker) %>%
      {
        if(!quo_is_null(peak_label_filter_quo))
          dplyr::filter(., !!peak_label_filter_quo)
        else
          .
      } %>%
      nrow()

    # labels
    if (has_any_labels > 0) {

      # default arguments for geom_label_repel
      args <- list(
        data = function(df)
          dplyr::filter(df, peak_marker) %>%
          {
            if(!quo_is_null(peak_label_filter_quo))
              dplyr::filter(., !!peak_label_filter_quo)
            else
              .
          },
        mapping = aes_(label = aes_quos$peak_label),
        force = 1,
        min.segment.length = 0,
        size = 2,
        segment.color = "black",
        segment.alpha = 0.5,
        segment.size = 0.5,
        direction = "both",
        show.legend = FALSE
      ) %>%
        # modify if any passed in by user
        modifyList(peak_label_options)

      p <- p + do.call(ggrepel::geom_label_repel, args = args)

    }
  }

  # zoom ghost points to make sure the zooming frame remains the same (if zoom is set)
  if (zoom) {
    panel_zoom_group <- if (!rlang::quo_is_null(aes_quos$panel)) quo(..panel) else quo(1)

    get_column_names(df, baseline = quo(baseline)) # check that baseline exists
    p <- p +
      geom_point(data = function(df)
        df %>%
          group_by(!!panel_zoom_group) %>%
          summarize(time = mean(!!sym(time_info$column), na.omit = TRUE), value = min(baseline, na.rm = TRUE)) %>%
          dplyr::filter(!is.na(value)),
        mapping = aes(x = time, y = value), inherit.aes = FALSE,
        size = 0, alpha = 1, show.legend = FALSE) +
      geom_point(data = function(df)
        df %>%
          group_by(!!panel_zoom_group) %>%
          summarize(time = mean(!!sym(time_info$column), na.omit = TRUE), value = max(zoom_cutoff, na.rm = TRUE)) %>%
          dplyr::filter(!is.na(value)),
        mapping = aes(x = time, y = value), inherit.aes = FALSE,
        size = 0, alpha = 1, show.legend = FALSE)
  }

  # display full time scale
  if (!is.infinite(df$time_min[1]))
    p <- p + expand_limits(x = df$time_min[1])
  if (!is.infinite(df$time_max[1]))
    p <- p + expand_limits(x = df$time_max[1])

  # normalize plot y axis
  if (normalize)
    p <- p + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())

  # paneling
  if (!quo_is_null(aes_quos$panel))
    p <- p + facet_grid(..panel ~ ., scales = "free_y")

  # color
  if (!quo_is_null(aes_quos$color))
    p <- p %+% aes_(color = aes_quos$color)

  # linetype
  if (!quo_is_null(aes_quos$linetype))
    p <- p %+% aes_(linetype = aes_quos$linetype)

  # label
  if (!quo_is_null(aes_quos$label))
    p <- p %+% aes_(label = aes_quos$label)

  # return plot
  return(p)

}

#' Prepare plotting data from continuous flow files
#'
#' This function helps with the preparation of plotting data from continuous flow data files (i.e. the raw chromatogram data). Call either explicity and pass the result to \code{\link{iso_plot_continuous_flow_data}} or let \code{\link{iso_plot_continuous_flow_data}} take care of preparing the plotting data directly from the \code{iso_files}. If a \code{peak_table} is provided for peak annotation purposes, uses \code{\link{iso_combine_raw_data_with_peak_table}} to combine the raw data from the iso_files with the provided peak data table.
#'
#' @param iso_files collection of iso_file objects
#' @param data which masses and ratios to plot (e.g. \code{c("44", "45", "45/44")} - without the units), if omitted, all available masses and ratios are plotted. Note that ratios should be calculated using \code{\link{iso_calculate_ratios}} prior to plotting.
#' @param time_interval which time interval to plot
#' @param time_interval_units which units the time interval is in, default is "seconds"
#' @param filter any filter condition to apply to the data beyond the masses/ratio selection (param \code{data}) and time interval (param \code{time_interval}). For details on the available data columns see \link[isoreader]{iso_get_raw_data} with parameters \code{gather = TRUE} and \code{include_file_info = everything()} (i.e. all file info is available for plotting aesthetics).
#' @param normalize whether to normalize the data (default is FALSE, i.e. no normalization). If TRUE, normalizes each trace across all files (i.e. normalized to the global max/min). This is particularly useful for overlay plotting different mass and/or ratio traces (\code{panel = NULL}). Note that zooming (if \code{zoom} is set) is applied after normalizing.
#' @param zoom if not set, automatically scales to the maximum range in the selected time_interval in each plotting panel. If set, scales by the indicated factor, i.e. values > 1 are zoom in, values < 1 are zoom out, baseline always remains the bottom anchor point. Note that zooming is always relative to the max in each zoom_group (by default \code{zoom_group = data}, i.e. each trace is zoomed separately). The maximum considered may be outside the visible time window. Note that for \code{zoom_group} other than \code{data} (e.g. \code{file_id} or \code{NULL}), zooming is relative to the max signal across all mass traces. Typically it makes most sense to set the \code{zoom_group} to the same variable as the planned \code{panel} parameter to the plotting function. Lastly, note that zooming only affects masses, ratios are never zoomed.
#' @param peak_table a data frame that describes the peaks in this chromatogram. By default, the chromatographic data is prepared WITHOUT peaks information. Supply this parameter to add in the peaks data. Typically via \code{\link{iso_get_peak_table}}. Note that the following parameters must also be set correctly IF \code{peak_table} is supplied and has non-standard column names: \code{file_id}, \code{rt}, \code{rt_start}, \code{rt_end}.
#' @inheritParams iso_combine_raw_data_with_peak_table
#' @family plot functions
#' @export
iso_prepare_continuous_flow_plot_data <- function(
  iso_files, data = character(), include_file_info = NULL,
  time_interval = c(), time_interval_units = "seconds",
  filter = NULL,
  normalize = FALSE, zoom = NULL, zoom_group = data,
  peak_table = NULL, file_id = default(file_id),
  rt = default(rt), rt_start = default(rt_start), rt_end = default(rt_end),
  rt_unit = NULL) {

  # safety checks
  if(!iso_is_continuous_flow(iso_files))
    stop("can only prepare continuous flow iso_files for plotting", call. = FALSE)

  # global vars
  # FIXME for CRAN

  # collect raw data
  raw_data <- iso_get_raw_data(iso_files, gather = TRUE, quiet = TRUE)
  if (nrow(raw_data) == 0) stop("no raw data in supplied iso_files", call. = FALSE)

  # add in file info
  file_info <- iso_get_file_info(iso_files, select = !!enquo(include_file_info), quiet = TRUE)
  raw_data <- dplyr::left_join(raw_data, file_info, by = "file_id")

  # check for zoom_gruop column(s) existence
  aes_quos <- list(zoom_group = enquo(zoom_group))
  if (rlang::quo_is_null(aes_quos$zoom_group)) aes_quos$zoom_group <- quo(1)
  check_expressions(raw_data, aes_quos$zoom_group)

  # only work with desired data (masses and ratios)
  select_data <- if(!is.null(data) && length(data) == 0) unique(raw_data$data) else as.character(data)
  if ( length(missing <- setdiff(select_data, unique(raw_data$data))) > 0 )
    stop("data not available in the provided iso_files (don't include units): ", str_c(missing, collapse = ", "), call. = FALSE)
  raw_data <- dplyr::filter(raw_data, data %in% select_data)

  # time column
  time_info <- find_time_column(raw_data)

  # time interval
  if (length(time_interval) == 2) {
    time_interval <- scale_time(time_interval, to = time_info$unit, from = time_interval_units)
    raw_data <- mutate(raw_data, time_min = time_interval[1], time_max = time_interval[2])
  } else if (length(time_interval) > 0 && length(time_interval) != 2) {
    stop("time interval needs to be a vector with two numeric entries, found: ", str_c(time_interval, collapse = ", "), call. = FALSE)
  } else {
    raw_data <- mutate(raw_data, time_min = -Inf, time_max = Inf)
  }
  raw_data <- select(raw_data, 1:time_info$column, time_min, time_max, everything())

  # general filter
  filter_quo <- enquo(filter)
  if (!quo_is_null(filter_quo)) {
    raw_data <- dplyr::filter(raw_data, !!filter_quo)
  }

  # border extrapolation function
  extrapolate_border <- function(cutoff, border, change, time, value) {
    cutoff <- unique(cutoff)
    if (length(cutoff) != 1) stop("problematic cutoff", call. = FALSE)
    bi <- which(border) # border indices
    bi2 <- bi + 1 - change[bi] # end point of border
    bi1 <- bi - change[bi] # start point of border
    border_time <- (cutoff - value[bi1])/(value[bi2] - value[bi1]) * (time[bi2] - time[bi1]) + time[bi1]
    time[bi] <- border_time
    return(time)
  }

  # normalize data
  normalize_data <- function(df) {
    group_by(df, data) %>%
      mutate(value = (value - min(value, na.rm = TRUE))/
               (max(value, na.rm = TRUE) - min(value, na.rm = TRUE))) %>%
      ungroup()
  }

  # plot data
  plot_data <-
    raw_data %>%
    # make ratio identification simple
    mutate(is_ratio = category == "ratio") %>%
    # add units to data for proper grouping
    mutate(
      data_wo_units = data,
      data = ifelse(!is.na(units), str_c(data, " [", units, "]"), data)
    ) %>%
    select(1:category, is_ratio, data, data_wo_units, everything()) %>%
    arrange(!!sym(time_info$column)) %>%
    # find global zoom cutoffs per group before time filtering (don't consider ratios)
    {
      if (!is.null(zoom)) {
        mutate(., ..zoom_group = !!aes_quos$zoom_group) %>%
          group_by(..zoom_group) %>%
          mutate(
            baseline = value[!is_ratio] %>% { if(length(.) == 0) NA else min(.) },
            max_signal = value[!is_ratio] %>% { if(length(.) == 0) NA else max(.) }) %>%
          ungroup() %>%
          select(-..zoom_group)
      } else .
    } %>%
    # time filtering
    { if (length(time_interval) == 2) dplyr::filter(., dplyr::between(!!sym(time_info$column), time_interval[1], time_interval[2])) else . } %>%
    # info fields
    mutate(zoom_cutoff = NA_real_, normalized = FALSE) %>%
    # normalizing
    {
      if (normalize) {
        normalize_data(.) %>%
          # zooming always based on the full (0 to 1) interval
          mutate(baseline = 0, max_signal = 1, normalized = TRUE)
      } else .
    } %>%
    # zooming
    {
      if (!is.null(zoom)) {
        # extrapolate data (multi-panel requires this instead of coord_cartesian)
        group_by(., file_id, data) %>%
          # note: this does not do perfectly extrapolating the line when only a single
          # data point is missing (extrapolation on the tail is missing) because it
          # would require adding another row to account both for gap and extrapolated
          mutate(
            zoom_cutoff = 1/zoom * max_signal + (1 - 1/zoom) * baseline, # cutoffs
            discard = ifelse(!is_ratio, value > zoom_cutoff, FALSE), # never zoom ratios
            change = c(0, diff(discard)), # check for where the cutoffs
            border = change == 1 | c(change[-1], 0) == -1, # identify borders
            gap = c(0, change[-dplyr::n()]) == 1, # identify gaps next to one border so ggplot knows the line is interrupted
            !!sym(time_info$column) := extrapolate_border(zoom_cutoff, border, change, !!sym(time_info$column), value), # calculate time at border
            value = ifelse(border, zoom_cutoff, ifelse(gap, NA, value)) # assign values
          ) %>%
          ungroup() %>%
          dplyr::filter(!discard | border | gap) %>%
          select(-discard, -change, -border, -gap)
        # #%>%
        # { # re-normalize after zooming if normalizing is turned on
        #   if (normalize) normalize_data(.) %>% mutate(zoom_cutoff = 1.0)
        #   else .
        # }
      } else .
    } %>%
    # switch to factors for proper grouping
    {
      data_levels <- tibble::deframe(select(., data, data_wo_units) %>% unique())
      data_sorting <- sapply(select_data, function(x) which(data_levels == x)) %>% unlist(use.names = F)
      mutate(.,
             data = factor(data, levels = names(data_levels)[data_sorting]),
             data_wo_units = factor(data_wo_units, levels = unique(as.character(data_levels)[data_sorting])))
    } %>%
    dplyr::arrange(tp, data)

  # peaks table

  if (!is.null(peak_table) && nrow(peak_table) > 0) {
    plot_data <- iso_combine_raw_data_with_peak_table(
      plot_data, peak_table,
      file_id = !!enquo(file_id), data_trace = data,
      rt = !!enquo(rt), rt_start = !!enquo(rt_start), rt_end = !!enquo(rt_end),
      rt_unit = rt_unit)
  }

  # return
  return(plot_data)
}

# dual inlet ========

#' Plot data from dual inlet files
#'
#' This function provides easy plotting for mass, ratio and delta traces from IRMS dual inlet data. It can be called either directly with a set of \code{iso_file} objects, or with a data frame prepared for plotting dual inlet data (see \code{\link{iso_prepare_dual_inlet_plot_data}}).
#'
#' @family plot functions
#' @export
iso_plot_dual_inlet_data <- function(...) {
  UseMethod("iso_plot_dual_inlet_data")
}

#' @export
iso_plot_dual_inlet_data.default <- function(x, ...) {
  stop("this function is not defined for objects of type '",
       class(x)[1], "'", call. = FALSE)
}

#' @export
iso_plot_dual_inlet_data.iso_file <- function(iso_files, ...) {
  iso_plot_dual_inlet_data(iso_as_file_list(iso_files), ...)
}

#' @inheritParams iso_prepare_dual_inlet_plot_data
#' @rdname iso_plot_dual_inlet_data
#' @export
iso_plot_dual_inlet_data.iso_file_list <- function(
  iso_files, data = character(), filter = NULL,
  panel = data, color = file_id, linetype = NULL, shape = type, size = 2, label = file_id,
  ...) {

  # safety checks
  if(!iso_is_dual_inlet(iso_files))
    stop("iso_plot_dual_inlet_data can only plot dual_inlet iso_files", call. = FALSE)

  # retrieve data (with all info so additional aesthetics are easy to include)
  plot_data <- iso_prepare_dual_inlet_plot_data(
    iso_files,
    data = data,
    include_file_info = everything(),
    filter = !!enquo(filter)
  )

  # plot scan data
  iso_plot_dual_inlet_data(
    plot_data,
    panel = !!enquo(panel),
    color = !!enquo(color),
    shape = !!enquo(shape),
    linetype = !!enquo(linetype),
    label = !!enquo(label),
    size = !!enquo(size),
    ...
  )

}

#' @rdname iso_plot_dual_inlet_data
#' @param df a data frame of the dual inlet data prepared for plotting (see \code{\link{iso_prepare_dual_inlet_plot_data}})
#' @param ... additional parameters passed on to \link{iso_plot_data}
#' @inheritParams iso_prepare_dual_inlet_plot_data
#' @inheritParams iso_plot_data
#' @export
iso_plot_dual_inlet_data.data.frame <- function(
  df, panel = data, color = file_id, linetype = NULL, shape = type, size = 2, label = file_id, ...) {

  # check for data
  if (nrow(df) == 0) stop("no data provided", call. = FALSE)

  # generate plot
  p <- iso_plot_data(
    df, cycle, value, group = paste(file_id, type, data),
    color = !!enquo(color), linetype = !!enquo(linetype), label = !!enquo(label),
    size = !!enquo(size), panel = !!enquo(panel), shape = !!enquo(shape),
    points = TRUE, lines = TRUE, ...
  ) +
    scale_x_continuous(breaks = c(0:max(df$cycle))) +
    labs(x = "Cycle", y = "Signal")

  # return plot
  return(p)
}

#' Prepare plotting data from dual inlet files
#'
#' This function helps with the preparation of plotting data from dual inlet files. Call either explicity and pass the result to \code{\link{iso_plot_dual_inlet_data}} or let \code{\link{iso_plot_dual_inlet_data}} take care of preparing the plotting data directly from the \code{iso_files}.
#'
#' @param iso_files collection of iso_file objects
#' @param data which masses, ratios and deltas to plot (e.g. \code{c("44", "45", "45/44", "d45/44")} - without the units), if omitted, all available masses, ratios, and delta values are plotted. Note that ratios should be calculated using \code{\link{iso_calculate_ratios}} and delta values should be calculated using \code{\link{iso_calculate_deltas}} prior to plotting.
#' @param include_file_info which file information to include (see \link[isoreader]{iso_get_file_info}). Use c(...) to select multiple, supports all \link[dplyr]{select} syntax including renaming columns.
#' @param filter any filter condition to apply to the data beyond the masses/ratio/delta selection (param \code{data}). For details on the available data columns see \link[isoreader]{iso_get_raw_data} with parameters \code{gather = TRUE} and \code{include_file_info = everything()}.
#'
#' @family plot functions
#' @export
iso_prepare_dual_inlet_plot_data <- function(
  iso_files, data = character(), include_file_info = NULL, filter = NULL) {

  # safety checks
  if(!iso_is_dual_inlet(iso_files))
    stop("can only prepare dual inlet iso_files for plotting", call. = FALSE)

  # collect raw data
  raw_data <- iso_get_raw_data(iso_files, gather = TRUE, quiet = TRUE)
  if (nrow(raw_data) == 0) stop("no raw data in supplied iso_files", call. = FALSE)

  # add in file info
  file_info <- iso_get_file_info(iso_files, select = !!enquo(include_file_info), quiet = TRUE)
  raw_data <- dplyr::left_join(raw_data, file_info, by = "file_id")

  # only work with desired data
  available_data <- unique(raw_data$data)
  select_data <- if(!is.null(data) && length(data) == 0) available_data else as.character(data)
  if ( length(missing <- setdiff(select_data, unique(raw_data$data))) > 0 )
    stop("data not available in the provided iso_files (don't include units): ", str_c(missing, collapse = ", "), call. = FALSE)
  raw_data <- dplyr::filter(raw_data, .data$data %in% select_data)

  # general filter
  filter_quo <- enquo(filter)
  if (!quo_is_null(filter_quo)) {
    raw_data <- dplyr::filter(raw_data, !!filter_quo)
    if (nrow(raw_data) == 0) {
      sprintf("no data left with filter '%s'", rlang::as_label(filter_quo)) %>%
        stop(call. = FALSE)
    }
  }

  # plot data
  plot_data <-
    raw_data %>%
    # add units to data for proper grouping
    dplyr::mutate(
      data_wo_units = .data$data,
      data = ifelse(!is.na(.data$units), paste0(.data$data, " [", .data$units, "]"), .data$data)
    ) %>%
    dplyr::select(1:.data$category, .data$data, .data$data_wo_units, everything())

  # switch to factors for proper grouping
  data_levels <- tibble::deframe(select(plot_data, data, data_wo_units) %>% unique())
  data_sorting <- purrr::map_int(select_data, ~which(data_levels == .x)) %>% unlist(use.names = FALSE)
  plot_data <- plot_data %>%
    dplyr::mutate(
      data = factor(data, levels = names(data_levels)[data_sorting]),
      data_wo_units = factor(data_wo_units, levels = unique(as.character(data_levels)[data_sorting]))
    )

  # return
  return(plot_data)
}

# scan ======

#' Plot data from scan files
#'
#' This function provides easy plotting for mass and ratio traces from IRMSs scan data. It can be called either directly with a set of \code{iso_file} objects, or with a data frame prepared for plotting scan data (see \code{\link{iso_prepare_scan_plot_data}}).
#'
#' @family plot functions
#' @export
iso_plot_scan_data <- function(...) {
  UseMethod("iso_plot_scan_data")
}

#' @export
iso_plot_scan_data.default <- function(x, ...) {
  stop("this function is not defined for objects of type '",
       class(x)[1], "'", call. = FALSE)
}

#' @export
iso_plot_scan_data.iso_file <- function(iso_files, ...) {
  iso_plot_scan_data(iso_as_file_list(iso_files), ...)
}

#' @inheritParams iso_prepare_scan_plot_data
#' @rdname iso_plot_scan_data
#' @export
iso_plot_scan_data.iso_file_list <- function(
  iso_files, data = character(), type, filter = NULL,
  x_interval = c(), y_interval = c(),
  panel = file_id, color = data, linetype = NULL, label = data, ...) {

  # safety checks
  if(!iso_is_scan(iso_files))
    stop("iso_plot_scan_data can only plot scan iso_files", call. = FALSE)

  # retrieve data (with all info so additional aesthetics are easy to include)
  plot_data <- iso_prepare_scan_plot_data(
    iso_files,
    data = data,
    include_file_info = everything(),
    filter = !!enquo(filter)
  )

  # plot scan data
  iso_plot_scan_data(
    plot_data,
    type = !!rlang::enquo(type),
    x_interval = x_interval,
    y_interval = y_interval,
    panel = !!enquo(panel),
    color = !!enquo(color),
    linetype = !!enquo(linetype),
    label = !!enquo(label),
    ...
  )

}

#' @rdname iso_plot_scan_data
#' @param df a data frame of the scan data prepared for plotting (see \code{\link{iso_prepare_scan_plot_data}})
#' @param type which type of scan data to plot. Only required if there are more than one type of scan data.
#' @param x_interval optional constraints on x axis values
#' @param y_interval optional constraints on y axis values
#' @param ... additional parameters passed on to \link{iso_plot_data}
#' @inheritParams iso_prepare_scan_plot_data
#' @inheritParams iso_plot_data
#' @export
iso_plot_scan_data.data.frame <- function(
  df, type, x_interval = c(), y_interval = c(),
  panel = file_id, color = data, linetype = NULL, label = data, ...) {

  # check for data
  if (nrow(df) == 0) stop("no data provided", call. = FALSE)

  # check for type
  type_quo <- rlang::enquo(type)
  types <- unique(df$type)
  if (rlang::quo_is_missing(type_quo) && length(types) > 1) {
    # too many types
    sprintf(
      "found more than 1 type of scan file: '%s'. Please specify which type to plot by setting the 'type' parameter to one of these options: %s",
      paste(types, collapse = "', '"),
      paste(sprintf("'type = \"%s\"'", types), collapse = " or ")
    ) %>% stop(call. = FALSE)
  } else if (!rlang::quo_is_missing(type_quo)) {
    # type defined, let's filter by it
    filter_type <- rlang::eval_tidy(type_quo)
    df <- dplyr::filter(df, .data$type == !!filter_type)
    if (nrow(df) == 0) {
      sprintf("no data for type '%s'. Available type: '%s'",
              filter_type, paste(types, collapse = "', '")) %>%
        stop(call. = FALSE)
    }
  }

  # x interval safety checks
  if (length(x_interval) > 0 && (!is.numeric(x_interval) || length(x_interval) != 2)) {
    stop("x interval needs to be a vector with two numeric entries, found: ", str_c(x_interval, collapse = ", "), call. = FALSE)
  }

  # y interval safety checks
  if (length(y_interval) > 0 && (!is.numeric(y_interval) || length(y_interval) != 2)) {
    stop("y interval needs to be a vector with two numeric entries, found: ", str_c(y_interval, collapse = ", "), call. = FALSE)
  }

  # adjust y scale if x is set but y is not
  if (length(x_interval) == 2 && is.numeric(x_interval) && !(length(y_interval) == 2 && is.numeric(y_interval))) {
    df <- df %>%
      dplyr::arrange(x) %>%
      dplyr::group_by(file_id, data) %>%
      dplyr::mutate(
        ..discard = x < x_interval[1] | x > x_interval[2],
        ..change = c(0, diff(..discard)),
        ..border = ..change == 1 | c(..change[-1], 0) == -1
      ) %>%
      dplyr::ungroup() %>%
      dplyr::filter(!..discard | ..border) %>%
      dplyr::select(-..discard, -..change, -..border)
  }

  # x axis label
  x_lab <- sprintf("%s [%s]", df$type, df$x_units) %>% unique()
  if (length(x_lab) > 1) x_lab <- unique(df$type)

  # generate plot
  p <- iso_plot_data(
    df, x, value, group = paste(file_id, data),
    color = !!enquo(color), linetype = !!enquo(linetype), label = !!enquo(label),
    panel = !!enquo(panel), lines = TRUE, ...
  ) +
    scale_x_continuous(expand = c(0, 0)) +
    labs(x = x_lab, y = "Signal")

  # x and y intervals
  if (length(x_interval) == 2 && length(y_interval) == 2) {
    p <- p + coord_cartesian(xlim = x_interval, ylim = y_interval) +
      scale_y_continuous(expand = c(0, 0))
  } else if (length(x_interval) == 2) {
    p <- p + coord_cartesian(xlim = x_interval)
  } else if (length(y_interval) == 2) {
    p <- p + coord_cartesian(ylim = y_interval) +
      scale_y_continuous(expand = c(0, 0))
  }

  # return plot
  return(p)
}

#' Prepare plotting data from scan files
#'
#' This function helps with the preparation of plotting data from scan files. Call either explicity and pass the result to \code{\link{iso_plot_scan_data}} or let \code{\link{iso_plot_scan_data}} take care of preparing the plotting data directly from the \code{iso_files}.
#'
#' @param iso_files collection of iso_file objects
#' @param data which masses and ratios to plot (e.g. \code{c("44", "45", "45/44")} - without the units), if omitted, all available masses and ratios are plotted. Note that ratios should be calculated using \code{\link{iso_calculate_ratios}} prior to plotting.
#' @param include_file_info which file information to include (see \link[isoreader]{iso_get_file_info}). Use c(...) to select multiple, supports all \link[dplyr]{select} syntax including renaming columns.
#' @param filter any filter condition to apply to the data beyond the masses/ratio selection (param \code{data}). For details on the available data columns see \link[isoreader]{iso_get_raw_data} with parameters \code{gather = TRUE} and \code{include_file_info = everything()} (i.e. all file info is available for plotting aesthetics).
#'
#' @family plot functions
#' @export
iso_prepare_scan_plot_data <- function(
  iso_files, data = character(), include_file_info = type, filter = NULL) {

  # safety checks
  if(!iso_is_scan(iso_files))
    stop("can only prepare scan iso_files for plotting", call. = FALSE)

  # collect raw data
  raw_data <- iso_get_raw_data(iso_files, gather = TRUE, quiet = TRUE)
  if (nrow(raw_data) == 0) stop("no raw data in supplied iso_files", call. = FALSE)

  # add in file info
  file_info <- iso_get_file_info(iso_files, select = !!enquo(include_file_info), quiet = TRUE)
  if (!"type" %in% names(file_info)) {
    stop("'type' must be included in the file information", call. = FALSE)
  }
  raw_data <- dplyr::left_join(raw_data, file_info, by = "file_id")

  # only work with desired data
  available_data <- unique(raw_data$data)
  select_data <- if(!is.null(data) && length(data) == 0) available_data else as.character(data)
  if ( length(missing <- setdiff(select_data, unique(raw_data$data))) > 0 )
    stop("data not available in the provided iso_files (don't include units): ", str_c(missing, collapse = ", "), call. = FALSE)
  raw_data <- dplyr::filter(raw_data, .data$data %in% select_data)

  # general filter
  filter_quo <- enquo(filter)
  if (!quo_is_null(filter_quo)) {
    raw_data <- dplyr::filter(raw_data, !!filter_quo)
    if (nrow(raw_data) == 0) {
      sprintf("no data left with filter '%s'", rlang::as_label(filter_quo)) %>%
        stop(call. = FALSE)
    }
  }

  # plot data
  plot_data <-
    raw_data %>%
    # add units to data for proper grouping
    dplyr::mutate(
      data_wo_units = .data$data,
      data = ifelse(!is.na(.data$units), paste0(.data$data, " [", .data$units, "]"), .data$data)
    ) %>%
    dplyr::select(1:.data$category, .data$data, .data$data_wo_units, everything())

  # switch to factors for proper grouping
  data_levels <- tibble::deframe(select(plot_data, data, data_wo_units) %>% unique())
  data_sorting <- purrr::map_int(select_data, ~which(data_levels == .x)) %>% unlist(use.names = FALSE)
  plot_data <- plot_data %>%
    dplyr::mutate(
      data = factor(data, levels = names(data_levels)[data_sorting]),
      data_wo_units = factor(data_wo_units, levels = unique(as.character(data_levels)[data_sorting]))
    )

  # return
  return(plot_data)
}


# reference peaks =========

#' Plot reference peaks
#'
#' Visualize how consistent the reference peaks are across a serious of samples.
#'
#' @inheritParams iso_prepare_for_calibration
#' @param x which column to use for the x-axis
#' @param ratio which ratio column(s) to compare for the reference peaks (can be multiple)
#' @param group_id group identifier column(s) to clarify across which data groups the reference peak deviation should be calcualted. By default calculates reference peak variations within each analysis.
#' @param is_ref_condition condition to identify which of the peaks are reference peaks (unless the peaks are prefilterd already). Must be a column or expression that evaluates to a logical (TRUE/FALSE).
#' @param within_group deprectated, use \code{group_id} to group accordingly
#' @param is_ref_used deprecated, set an aesthetics directly via ... paramter to \link{iso_plot_data}
#' @param ... additional parameters passed to \link{iso_plot_data}
#' @family plot functions
#' @export
iso_plot_ref_peaks <- function(dt, x, ratio, ..., group_id = file_id, is_ref_condition = TRUE, within_group = TRUE, is_ref_used = NULL) {

  # safety checks
  param_quos <-
    list(dt = enquo(dt), x = enquo(x), ratio = enquo(ratio), group_id = enquo(group_id),
         is_ref_condition = enquo(is_ref_condition))
  check_params <-
    c(
      dt = "no data table supplied",
      x = "no x axis value supplied",
      ratio = "no ratio column to compare reference peaks provided"
    )
  missing <- param_quos[names(check_params)] %>% map_lgl(quo_is_missing)

  if (any(missing)) {
    glue("missing parameter(s) '{collapse(names(check_params)[missing], sep = \"', '\")}':\n",
         " - {collapse(check_params[missing], sep = '\n - ')}") %>%
      stop(call. = FALSE)
  }

  # evaluate dt
  dt <- rlang::eval_tidy(param_quos$dt)

  # warnings
  if(!missing(within_group))
    warning("'within_gorup' parameter is deprecated, use 'group_id' to group accordingly", immediate. = TRUE, call. = FALSE)
  if(!missing(is_ref_used))
    warning("'is_ref_used' parameter is deprecated, please set an aesthetic directly by using the ... passed on to iso_plot_data", immediate. = TRUE, call. = FALSE)

  # filter condition
  refs <- filter(dt, !!param_quos$is_ref_condition)

  if (nrow(refs) == 0)
    glue::glue("no data to visualize, check your data table and is_ref_condition filter ('{rlang::as_label(param_quos$is_ref_condition)}')") %>%
    stop(call. = FALSE)

  # ratios
  dt_cols <- get_column_names(dt, x = param_quos$x, ratio = param_quos$ratio, group_id = param_quos$group_id, n_reqs = list(group_id = "*", ratio = "+"))

  # calculate ratio deltas
  mutate_quos <-
    map(dt_cols$ratio, ~quo( (!!sym(.x) / mean(!!sym(.x), na.rm = TRUE) - 1) * 1000)) %>%
    setNames(names(dt_cols$ratio))
  refs <- refs %>%
    group_by(!!!map(dt_cols$group_id, sym)) %>%
    mutate(!!!mutate_quos) %>%
    ungroup()

  # visualize
  iso_plot_data(
    refs, x = !!sym(dt_cols$x), y = c(!!!map(names(mutate_quos), sym)),
    ...,
    geom_bar(stat = "identity", position = "dodge")
  ) + labs(y = "Deviation from average [\U2030]")
}

# data and calibration plots =========


#' Plot calibration range
#'
#' This function is deprecated, please use \link{iso_plot_data} and \link{iso_mark_calibration_range} instead.
#' @param ... deprecated
#' @export
iso_plot_calibration_range <- function(...) {
  warning("iso_plot_calibration_range was deprecated as part of a major change in treatment of calibration ranges in isoprocessor version 0.3.8. Please use iso_evaluate_calibration_range to calculate calibration ranges for your terms of interest, iso_plot_data to visualize the data, and iso_mark_calibration_range to highlight the calculated calibration ranges visually.", immediate. = TRUE, call. = FALSE)
}

#' Visualize the data
#'
#' General purpose convenience visualization function. Simply add other ggplot components after calling this function to customize more (e.g. with \link[ggplot2]{facet_wrap} or \link[ggplot2]{theme} calls). Make sure to specify \code{lines = TRUE} and/or \code{points = TRUE} to add the lines/points respectively. Accepts multiple y variables in which case they are plotted in a \link[ggplot2]{facet_wrap} with new variables \code{panel} holding the name of the y variable panels, \code{y_value} holding the values and \code{y_error} holding the error values (if \code{y_error} is supplied). Also always generates a new column called \code{variable} that holds the variable names (\code{y}) supplied to this function. All aesthetics parameters expect variables or expressions that are valid in the context of the \code{dt}. For convenience, all aesthetics can also be (re)-named on the fly with \code{c(new_name = expr)} and will include column units in the legend captions by default.
#'
#' @param dt data frame to plot data from
#' @param x the column or expression for the x-axis aesthetic. Can be a numeric, text or datetime column (text and datetime column labels will be automatically rotated by 90 degrees). For clarity of the plot, only one x variable or expression is allowed. Use named vector with \code{c(new_x = x)} to rename the x variable or expression on the fly. By default will show units in the axis names if there are any. If a datetime column is provided for \code{x}, parameters \code{date_breaks} (example: \code{date_breaks = "2 hours"}) and \code{date_labels} can be set to fine-tune the x-axis appearance. See \link[ggplot2]{scale_date} for additional details. Note that \code{x} can also be \code{x = NULL} for single data point plots - in this case the x axis is completely omitted.
#' @param y which columns/expressions to visualize. Combine with \code{c(y1, y2)} or use \link[dplyr]{select} syntax (e.g. \code{starts_with(...)}) to show multiple variables in a \link[ggplot2]{facet_wrap}. Use named vector with \code{c(new_y1 = y1)} to rename variables/expressions on the fly. By default will show units in the axis names if there are any.
#' @param group what to group by, multiple columns allowed (combine with \code{paste(...)}), usually not necessary if groupings are fully defined through other aesthetics
#' @param color variable to use for color aesthetic for the plot or constant value for the point and line color
#' @param fill variable to use for the fill aesthetic of the plot or constant value for the point fill
#' @param shape variable to use for shape aesthetic for the plot or constant value for the point shape
#' @param size variable to use for size aesthetic for the plot or constant value for the points size
#' @param linetype variable to use for linetype aesthetic for the plot or constant value for the line type
#' @param alpha variable to use for the opacity aesthetic for the plot or constant value for the point and line opacity (1 = 100\% opaque, 0 = completely transparent)
#' @param y_error an error column for drawing y error bars - if multiple \code{y} are provided, error needs to point to the same number of columns
#' @param lines whether to plot lines (FALSE by default)
#' @param points whether to plot points (FALSE by default)
#' @param label this is primarily of use for turning the generated ggplots into interactive plots via \code{\link[plotly]{ggplotly}} as the \code{label} will be rendered as an additional mousover label.
#' @param panel whether to panel the data by anything. If using a single parameter (e.g. \code{panel = panel}), will generate a \link[ggplot2]{facet_wrap}. If using a formula (e.g. \code{panel = panel ~ .} or \code{panel = file_id ~ panel}), will generate a \link[ggplot2]{facet_grid}. The default for this parameter is to panel via facet grid by the y variable name but only if multiple \code{y} columns are provided. Otherwise will not generate any facets. If additional facet parameters are desired, please leave use \link[ggplot2]{facet_wrap} and \link[ggplot2]{facet_grid} diretly.
#' @param panel_scales the \code{scales} parameter for the facets (if any are used)
#' @param date_breaks what breaks to use for the x axis if it is a datetime
#' @param date_labels datetime label pattern for x axis if it is a datetime
#' @param ... additional ggplot objects (e.g. \code{geom_smooth()}) that should be added to the plot PRIOR to the automatically generated layers for error bars, points and lines. Notet that to add geoms on top, please use regular \code{iso_plot_data() + geom_smooth() + ...} syntax instead.
#' @family plot functions
#' @export
iso_plot_data <- function(
  # aesthetics
  dt, x, y,
  # additional geom
  ...,
  # optional arguments
  y_error = NULL, group = NULL,
  color = NULL, fill = NULL, shape = NULL, size = 4,
  linetype = NULL, alpha = NULL, label = NULL,
  # panels
  panel = panel ~ ., panel_scales = "free_y",
  # geoms
  lines = FALSE, points = FALSE,
  # styling
  date_breaks = NULL, date_labels = "%d %b %H:%M") {

  # safety checks
  if (missing(dt)) stop("no data table supplied", call. = FALSE)
  if (nrow(dt) == 0) stop("the provided data table has no data to plot (0 rows)", call. = FALSE)
  if (missing(x)) stop("have to provide an x variable or expression to plot", call. = FALSE)
  if (missing(y)) stop("have to provide at least one y variable or expression to plot", call. = FALSE)

  #check additional geoms for issues
  add_geom_quos <- rlang::enquos(...)
  add_geom_quos <- add_geom_quos[!purrr::map_lgl(add_geom_quos, rlang::quo_is_null)]
  add_geoms <- map(add_geom_quos, rlang::eval_tidy)
  add_geom_quos <- add_geom_quos[!purrr::map_lgl(add_geoms, is.null)]
  add_geoms <- add_geoms[!purrr::map_lgl(add_geoms, is.null)]
  if (!all(good <- purrr::map_lgl(add_geoms, ~is(.x, "Layer")))) {
    dot_exprs <- purrr::map_chr(add_geom_quos[!good], rlang::quo_text)
    dot_names <- names(add_geoms[!good])
    if (!is.null(dot_names))
      dot_info <- ifelse(nchar(dot_names) > 0, sprintf("%s='%s'", dot_names, dot_exprs), sprintf("'%s'", dot_exprs))
    else
      dot_info <- sprintf("'%s'", dot_exprs)
    glue::glue(
      "ignoring unrecognized parameter(s) {paste(dot_info, collapse = ', ')}. ",
      "Can only add geoms as aditional parameters.") %>%
      warning(call. = FALSE, immediate. = TRUE)
    add_geoms <- add_geoms[good]
  }
  if (!lines && !points && length(add_geoms) == 0) {
    warning("no automatic geoms (points or lines) included, plot will be blank", immediate. = TRUE, call. = FALSE)
  }

  # info
  panel_missing <- missing(panel)

  # all quos
  all_quos <-
    list(x = enquo(x), y = enquo(y), y_error = enquo(y_error),
         group = enquo(group), panel = enquo(panel),
         color = enquo(color), fill = enquo(fill), shape = enquo(shape),
         linetype = enquo(linetype), alpha = enquo(alpha), size = enquo(size),
         label = enquo(label))
  mutate_quos <- map(all_quos, aesthetics_quo_to_mutate_quos)

  # evaluate x and y quos in context of column selections
  # if they are valid vars_select expressions, use the result
  # exception: assumes that the - operator is intended for its numerical
  # purpose, not as a column position operator (which tends to lead to
  # outcomes not expected by the user)
  if (!"-" %in% get_call_operations(all_quos$x)) {
    x_quo <- purrr::safely(tidyselect::vars_select)(names(dt), !!all_quos$x)
    if (is.null(x_quo$error)) mutate_quos$x <- quos(!!!map(x_quo$result, sym))
  }
  if (!"-" %in% get_call_operations(all_quos$y)) {
    y_quo <- purrr::safely(tidyselect::vars_select)(names(dt), !!all_quos$y)
    if (is.null(y_quo$error)) mutate_quos$y <- quos(!!!map(y_quo$result, sym))
  }

  # parse quos
  quos_lengths <- map(mutate_quos, length)
  quos_cols <- map(mutate_quos, names)
  quos_manual <- map2(mutate_quos, quos_lengths, ~all(quos_are_unnamed_values(.x)) && .y == 1L)
  quos_values <- map2(mutate_quos, quos_manual, ~if(.y) { rlang::eval_tidy(.x[[1]]) } else { NULL })
  x_y_mutate_quos <- unlist(unname(mutate_quos[c("x", "y", "y_error")]))
  other_mutate_quos <- unlist(unname(mutate_quos[!names(mutate_quos) %in% c("x", "y", "y_error", "panel") & !map_lgl(quos_manual, identity)]))

  # check lengths
  multi <- quos_lengths[map_int(quos_lengths, identity) > 1]
  multi <- multi[!names(multi) %in% c("y", "y_error")]
  if (length(multi) > 0) {
    glue::glue(
      "encountered multiple variables or expressions for aesthetic(s) that only ",
      "support a single variable/expression:\n - ",
      sprintf("%s = '%s'", names(multi), map_chr(all_quos[names(multi)], rlang::as_label)) %>%
        paste(collapse = "\n - ")
    ) %>% stop(call. = FALSE)
  }

  # double check same number of ys and errors
  if (quos_lengths$y_error > 0 && quos_lengths$y != quos_lengths$y_error )
    glue(
      "not the same number of y ",
      "('{paste(rlang::as_label(all_quos$y), collapse = \", \")}') ",
      "and y_error columns ",
      "('{paste(rlang::as_label(all_quos$y_error), collapse = \", \")}'}) ",
      "provided") %>%
    stop(call. = FALSE)

  # null x
  no_x <- FALSE
  if (quos_lengths$x == 0) {
    no_x <- TRUE
    mutate_quos$x <- quos(no_x = NA_character_)
  }

  # unique row ids for easier joins
  dt$..row_id.. <- 1:nrow(dt)

  # run mutates for x, y, y_error
  dt <- mutate(dplyr::ungroup(dt), !!!x_y_mutate_quos)
  dt_units <- iso_get_units(dt) %>% {ifelse(is.na(.), names(.), sprintf("%s [%s]", names(.), .))}

  # gather data if multiple ys
  if (quos_lengths$y == 1) {
    # add panel
    dt <- mutate(dt, panel = dt_units[[quos_cols$y]])
  } else if (quos_lengths$y > 1) {

    # safety check
    if ("panel" %in% names(dt)) {
      warning("existing column 'panel' overwritten in plotting data frame for plotting multiple y-values", immediate. = TRUE, call. = FALSE)
      dt$panel <- NULL
    }
    if ("y_value" %in% names(dt)) {
      warning("existing column 'y-value' overwritten in plotting data frame for plotting multiple y-values", immediate. = TRUE, call. = FALSE)
      dt$y_value <- NULL
    }

    # add y values as panels
    y_value_data <- gather(iso_strip_units(dt), panel, y_value, !!!map(quos_cols$y, sym)) %>%
      select(..row_id.., panel, y_value)
    dt <- left_join(dt, y_value_data, by = "..row_id..")

    # y errors
    if (quos_lengths$y_error > 0) {
      y_error_data <- gather(iso_strip_units(dt), panel_y_error, y_error, !!!map(quos_cols$y_error, sym))
      dt <-
        dt %>%
        left_join(
          tibble(panel = quos_cols$y, panel_y_error = quos_cols$y_error),
          by = "panel"
        ) %>%
        left_join(
          select(y_error_data, ..row_id.., panel_y_error, y_error),
          by = c("..row_id..", "panel_y_error")
        ) %>%
        select(-panel_y_error)
    }

    # remove missing y values
    dt <- filter(dt, !is.na(y_value))

    # factor panel in the order the variables were provided
    dt <- mutate(dt, panel = factor(panel, levels = quos_cols$y))

    # recode levels to include units (if the columns had them)
    units_recode <- setNames(names(dt_units[quos_cols$y]), as.character(dt_units[quos_cols$y]))
    dt <- mutate(dt, panel = forcats::fct_recode(panel, !!!units_recode))
  }

  # add variable as a synonym for panel
  dt <- mutate(dt, variable = panel)

  # run all other mutates except for panel and update units
  dt <- mutate(dplyr::ungroup(dt), !!!other_mutate_quos)
  dt_units <- iso_get_units(dt) %>% {ifelse(is.na(.), names(.), sprintf("%s [%s]", names(.), .))}

  # strip units if there are any to avoid ggplot warnings
  dt <- iso_strip_units(dt)

  # generate plot
  p <- ggplot(dt) + aes_q(x = sym(quos_cols$x)) +
    labs(x = dt_units[[quos_cols$x]]) +
    theme_bw() +
    theme(plot.margin = ggplot2::margin(10, 5, 15, 20))

  # y aesthetic
  if(quos_lengths$y > 1)
    p <- p + aes_q(y = sym("y_value")) + labs(y = NULL)
  else
    p <- p + aes_q(y = sym(quos_cols$y)) + labs(y = dt_units[[quos_cols$y]])

  # group aesthetic
  if(quos_lengths$group == 1)
    p <- p + aes_q(group = sym(quos_cols$group))

  # color aesthetic
  if(quos_lengths$color == 1 && !quos_manual$color)
    p <- p + aes_q(color = sym(quos_cols$color)) + labs(color = dt_units[[quos_cols$color]])

  # fill aesthetic
  if(quos_lengths$fill == 1 && !quos_manual$fill)
    p <- p + aes_q(fill = sym(quos_cols$fill)) + labs(fill = dt_units[[quos_cols$fill]])

  # shape aesthetic
  if(quos_lengths$shape == 1 && !quos_manual$shape)
    p <- p + aes_q(shape = sym(quos_cols$shape)) + labs(shape = dt_units[[quos_cols$shape]])

  # linetype aesthetic
  if(quos_lengths$linetype == 1 && !quos_manual$linetype)
    p <- p + aes_q(linetype = sym(quos_cols$linetype)) + labs(linetype = dt_units[[quos_cols$linetype]])

  # label aesthetic (for ggplotly)
  if(quos_lengths$label == 1 && !quos_manual$label)
    p <- p + aes_q(label = sym(quos_cols$label))

  # alpha aesthetic
  if(quos_lengths$alpha == 1 && !quos_manual$alpha)
    p <- p + aes_q(alpha = sym(quos_cols$alpha)) + labs(alpha = dt_units[[quos_cols$alpha]])

  # size aesthetic (not universally applied, only to points below)
  if (quos_lengths$size == 1 && !quos_manual$size)
    p <- p + labs(size = dt_units[[quos_cols$size]])

  # paneling
  if ( (panel_missing && quos_lengths$y == 1) || rlang::quo_is_null(all_quos$panel)) {
    # no panel
    aes_cols <- list()
  } else if (rlang::quo_is_symbol(all_quos$panel)) {
    # single symbol --> facet_wrap
    aes_cols <- list(panel = all_quos$panel %>% rlang::quo_squash())
  } else {
    # formula --> facet_grid
    aes_cols <- list(
      panel_rows = all_quos$panel %>% rlang::quo_squash() %>% rlang::f_lhs(),
      panel_cols = all_quos$panel %>% rlang::quo_squash() %>% rlang::f_rhs()
    )
  }
  if (length(aes_cols) > 0) {
    if ("panel" %in% names(aes_cols))
      # facet wrap
      p <- p + facet_wrap(rlang::new_formula(NULL, aes_cols$panel), scales = panel_scales)
    else
      # facet grid
      p <- p + facet_grid(rlang::new_formula(aes_cols$panel_rows, aes_cols$panel_cols), scales = panel_scales, switch = "y")
  }

  # x axis
  x <- dt[[quos_cols$x]]
  if (no_x) {
    p <- p + theme(panel.grid.major.x = element_blank(),
                   axis.ticks.x = element_blank(),
                   axis.text.x = element_blank()) +
      labs(x = NULL)
  } else if (is(x, "POSIXct")) {
    if (is.null(date_breaks)) date_breaks <- ggplot2::waiver()
    if (is.null(date_labels)) date_labels <- ggplot2::waiver()
    p <- p +
      scale_x_datetime(date_breaks = date_breaks, date_labels = date_labels) +
      theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
      labs(x = "")
  } else if (is.character(x) || is.factor(x)) {
    # just rotate x axis labels
    p <- p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
  }

  # other layers
  p <- p + add_geoms

  # y error bars
  if (quos_lengths$y_error == 1) {
    # single y value
    p <- p + geom_errorbar(
      data = function(df) filter(df, !is.na(!!sym(quos_cols$y_error))),
      mapping = aes(
        ymin = !!sym(quos_cols$y) - !!sym(quos_cols$y_error),
        ymax = !!sym(quos_cols$y) + !!sym(quos_cols$y_error)),
      width = 0)
  } else if (quos_lengths$y_error > 1) {
    # multiple y values in panel
    p <- p + geom_errorbar(
      data = function(df) filter(df, !is.na(y_error)),
      mapping = aes(ymin = y_value - y_error, ymax = y_value + y_error),
      width = 0)
  }

  # lines and points
  if (lines) {
    p <- p +
      if (quos_manual$alpha) geom_line(alpha = quos_values$alpha) else geom_line()
  }
  if (points) {
    # figure out all the aesthetics
    manual_quos <- c()
    aes_quos <- c()
    if (quos_lengths$size == 1L) {
      if (quos_manual$size)
        manual_quos <- c(manual_quos, quos(size = !!quos_values$size))
      else
        aes_quos <- c(aes_quos, quos(size = !!sym(quos_cols$size)))
    }
    if (quos_manual$alpha)
      manual_quos <- c(manual_quos, quos(alpha = !!quos_values$alpha))
    if (quos_manual$color)
      manual_quos <- c(manual_quos, quos(color = !!quos_values$color))
    if (quos_manual$fill)
      manual_quos <- c(manual_quos, quos(fill = !!quos_values$fill))
    if (quos_manual$shape)
      manual_quos <- c(manual_quos, quos(shape = !!quos_values$shape))
    # create geom_point
    if (length(manual_quos) > 0 && length(aes_quos) > 0) {
      point_quo <- quo(geom_point(mapping = aes(!!!aes_quos), !!!manual_quos))
    } else if (length(aes_quos) > 0)
      point_quo <- quo(geom_point(mapping = aes(!!!aes_quos)))
    else if (length(manual_quos) > 0)
      point_quo <- quo(geom_point(!!!manual_quos))
    else
      point_quo <- quo(geom_point())
    p <- p + rlang::eval_tidy(point_quo)
  }

  return(p)
}

#' Visualize regression residuals
#'
#' Convenience function to visualize the residuals from calibrations generated by \code{\link{iso_generate_calibration}}. Uses \link{iso_plot_data} internally and additional parameters (\code{...}) can be passed to \link{iso_plot_data}.
#'
#' @inheritParams iso_plot_data
#' @inheritParams iso_prepare_for_calibration
#' @param dt data frame to plot data from
#' @param trendlines whether to add trendlines to the residuals plot to highlight patterns more easily. To customize more, set \code{trendlines=FALSE} and provide a manual \code{geom_smooth()} in the \code{...} of this function.
#' @param value_ranges whether to add value ranges (via \link{iso_mark_value_range}) to the plot to highlight residuals variation. To customize more, set \code{value_ranges=FALSE} and add ranges back in using \link{iso_mark_value_range} directly.
#' @param ... additional parameters passed to \link{iso_plot_data}
#' @family plot functions
#' @export
iso_plot_residuals <- function(
  dt, x, y = calibration_residual(),
  calibration = last_calibration(dt),
  color = calibration_model_name(),
  panel = calibration_model_name(),
  points = TRUE,
  trendlines = TRUE,
  value_ranges = TRUE,
  ...) {

  # safety checks
  if (missing(dt)) stop("no data table supplied", call. = FALSE)
  if (missing(x)) stop("no x axis aesthetic specified", call. = FALSE)

  # check for model parameters
  calib_vars <- get_calibration_vars(calibration)
  check_calibration_cols(!!enquo(dt), calib_vars$model_params)

  # get calibration vars symbols
  calibration_model_name <- function() calib_vars$model_name
  calibration_residual <- function() calib_vars$residual
  eval_calibration_var <- function(pquo) {
    if (rlang::quo_is_call(pquo) && rlang::call_name(pquo) == "calibration_model_name")
      sym(calibration_model_name())
    else if (rlang::quo_is_call(pquo) && rlang::call_name(pquo) == "calibration_residual")
      sym(calibration_residual())
    else pquo
  }
  y_quo <- enquo(y) %>% eval_calibration_var()
  color_quo <- enquo(color) %>% eval_calibration_var()
  panel_quo <- enquo(panel) %>% eval_calibration_var()

  p <- dt %>%
    # fetch peak table with the added residuals from the calibration
    iso_get_calibration_data(quiet = TRUE) %>%
    filter(!!sym(calib_vars$in_reg)) %>%
    mutate(!!sym(calib_vars$model_name) :=
             if (is.factor(!!sym(calib_vars$model_name))) !!sym(calib_vars$model_name)
             else forcats::as_factor(!!sym(calib_vars$model_name)) %>% forcats::fct_inorder()
    ) %>%
    # visualize
    iso_plot_data(
      x = !!enquo(x), y = !!y_quo,
      color = !!color_quo,
      points = points,
      panel = !!panel_quo, panel_scales = "fixed",
      ...,
      geom_hline(yintercept = 0, color = "black", size = 1, linetype = 2),
      if (trendlines) geom_smooth(method = "loess", se = FALSE)
    ) +
    theme(legend.position = "bottom", legend.direction = "vertical")

  if (value_ranges)
    p <- iso_mark_value_range(p, mean = FALSE, plus_minus_sd = 1)

  return(p)
}

#' Visualize calibration parameters
#'
#' Convenience function to visualize the residuals from calibrations generated by \code{\link{iso_generate_calibration}}. Uses \link{iso_plot_data} internally and additional parameters (\code{...}) can be passed to \link{iso_plot_data}.
#'
#' @inheritParams iso_plot_data
#' @inheritParams iso_prepare_for_calibration
#' @param select_from_summary which parameters from the fit summary to include, by default includes the adjusted R2 (renamed just \code{R2}) and the residual standard deviation (\code{RSD}), which R often calls \link[stats]{sigma} (sometimes also called residual mean standard deviation, residual standard error, root mean square error, or standard error of the regression).
#' @export
iso_plot_calibration_parameters <- function(
  dt, ..., x = calibration_model_name(), calibration = last_calibration(dt),
  select_from_summary = c(R2 = adj.r.squared, RSD = sigma),
  color = signif,
  panel = term ~ .,
  panel_scales = "free",
  points = TRUE) {

  # safety checks
  if (missing(dt)) stop("no data table supplied", call. = FALSE)

  # check for model parameters
  calib_vars <- get_calibration_vars(calibration)
  check_calibration_cols(!!enquo(dt), calib_vars$model_params)

  # pull out all coefficients (all for now, should always show all?)
  calib_coefs <- dt %>%
    iso_get_calibration_coefficients(calibration = calibration, quiet = TRUE)

  # pull out requested summary
  select_quo <- enquo(select_from_summary)
  calib_summary <- dt %>%
    iso_get_calibration_summary(calibration = calibration, quiet = TRUE)
  cs_cols <- get_column_names(calib_summary, select = select_quo, n_reqs = list(select = "*"))

  # check if any summary should be included
  if (length(cs_cols$select) > 0) {
    calib_summary <- calib_summary %>%
      rename(!!!map(cs_cols$select, sym)) %>%
      gather(term, estimate, !!!map(names(cs_cols$select), sym))
    visualization_data <-
      vctrs::vec_rbind(calib_coefs, calib_summary)

    # make signif available for use
    if ("signif" %in% names(visualization_data))
      visualization_data <- visualization_data %>%
      mutate(signif = ifelse(is.na(signif), "NA", signif))

  } else {
    visualization_data <- calib_coefs
  }

  # clean up visualization data
  visualization_data <- visualization_data %>%
    mutate(term = as_factor(term)) %>%
    filter(!is.na(estimate))

  # quos
  calibration_model_name <- function() calib_vars$model_name
  eval_calibration_var <- function(pquo) {
    if (rlang::quo_is_call(pquo) && rlang::call_name(pquo) == "calibration_model_name")
      sym(calibration_model_name())
    else pquo
  }
  x_quo <- enquo(x) %>% eval_calibration_var()
  panel_quo <- enquo(panel) %>% eval_calibration_var()

  # plot
  iso_plot_data(
    visualization_data,
    x = !!x_quo, y = estimate, y_error = std.error,
    points = points,
    color = !!enquo(color),
    panel = !!panel_quo,
    panel_scales = panel_scales,
    ...
  ) + labs(y = NULL)

}

# plot modifications ======

#' Visualize x range
#'
#' This is a convenience function to visualize x ranges with gray vertical boxes in a plot (typically one generated by \code{\link{iso_plot_data}}) by specifying a simple filter condition. Considers the facet variables (if any are set) to evaluate the condition within each panel.
#'
#' @param p a \link[ggplot2]{ggplot} object, typically generated by \code{\link{iso_plot_data}}
#' @param condition any \link[dplyr]{filter} expression to identify x ranges to mark
#' @param color color of the shading boxes
#' @param fill fill of the shading boxes
#' @param alpha alpha of the shading boxes
#' @family plot functions
#' @export
iso_mark_x_range <- function(p, condition, color = NA, fill = "gray", alpha = 0.25) {

  # safety checks
  if (missing(p)) stop("no base plot provided. If piping to this function make sure to use '%>%' instead of '+'.", call. = FALSE)

  # warnings
  condition_quo <- rlang::enquo(condition)
  if (rlang::quo_is_missing(condition_quo)) {
    stop("the parameter 'condition' is required to determine the x range", call. = FALSE)
  }

  # panel asethetics
  if (!is.null(p$facet$params)) {
    group_quos <- c(p$facet$params$facets, p$facet$params$cols, p$facet$params$rows)
  } else {
    group_quos <- quos()
  }

  # function
  find_x_ranges <- function(df) {
    return(
      dplyr::ungroup(df) %>%
        # sort by x
        dplyr::arrange(!!p$mapping$x) %>%
        # within each panel evaluate what are the range blocks
        dplyr::group_by(!!!unname(group_quos)) %>%
        dplyr::mutate(
          ..in_x_range = !!condition_quo,
          ..change = c(0, diff(.data$..in_x_range)),
          ..group = ifelse(..in_x_range, cumsum(.data$..change == 1), NA_integer_)
        ) %>%
        dplyr::group_by(..group, add = TRUE) %>%
        dplyr::filter(!is.na(..group)) %>%
        dplyr::summarise(
          xmin = min(!!p$mapping$x, na.rm = TRUE),
          xmax = max(!!p$mapping$x, na.rm = TRUE),
          ymin = -Inf,
          ymax = Inf
        )
    )
  }

  # generate plot
  p$layers <-
    c(
      geom_rect(
        data = find_x_ranges,
        mapping = aes(x = NULL, y = NULL,
                      xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
                      color = NULL, shape = NULL, fill = NULL, alpha = NULL,
                      size = NULL, linetype = NULL),
        color = color, fill = fill, alpha = alpha,
        show.legend = FALSE
      ),
      p$layers
    )

  return(p)
}

#' Visualize value range
#'
#' This is a convenience function to visualize value ranges (mean +/- std. deviation) with horizontal lines in a plot, typically one generated by \code{\link{iso_plot_data}}. Considers aesthetics color and group as well as the facet variables (if any are set) to calculate averages within each panel, color and group. This leaves the aesthetics fill and shape to be used for other purposes in displaying data points.
#'
#' @param p a \link[ggplot2]{ggplot} object, typically generated by \code{\link{iso_plot_data}}
#' @param mean logical (default TRUE), whether to show a line for the value mean
#' @param plus_minus_value which fixed value intervals to show (mean +/- fixed), by default none are included.
#' @param plus_minus_sd which standard deviation intervals to show (mean +/- sd), by default +/- 1 and +/-2 population std. deviations. To omit these intervals, set \code{sd = c()}.
#' @param sd renamed to the more descriptive \code{plus_minus_sd}
#' @param color use to override inherited color aesthetics with a fixed value and thus create value ranges that average across all data points in a panel (for this to work, make sure the group aesthetic of the parent plot does NOT include the color aesthetic). Has to be a valid color string, e.g. \code{"black"} or \code{"#ffab05"}. If omitted or \code{color = NULL}, the color aesthetic from the plot is inherited (the default behavior).
#' @family plot functions
#' @export
iso_mark_value_range <- function(p, mean = TRUE, plus_minus_value = c(), plus_minus_sd = sd, sd = 1:2, color = NULL) {

  # safety checks
  if (missing(p)) stop("no base plot provided. If piping to this function make sure to use '%>%' instead of '+'.", call. = FALSE)
  if (length(sd) > 0 && !is.numeric(sd)) stop("the 'sd' parameter must be numeric", call. = FALSE)
  color_expr <- rlang::enexpr(color)
  if (!rlang::is_missing(color_expr) && !rlang::is_syntactic_literal(color_expr))
    stop("if set, the 'color' parameter for value ranges must be a single fixed color (e.g.\"black\")", call. = FALSE)
  color <- rlang::eval_tidy(color_expr)
  if (!is.null(color) && (length(color) != 1L || !is.character(color)))
    stop("if set, the 'color' parameter for value ranges must be a single fixed color (e.g. \"black\")", call. = FALSE)

  # warnings
  if (!missing(sd)) {
    warning("the parameter 'sd' has been renamed to the more descriptive 'plus_minus_sd'. Please use 'plus_minus_sd' to avoid this warning.", immediate. = TRUE, call. = FALSE)
  }

  # find relevant active aesthetics
  mark_mean <- mean
  mutate_quos <- rlang::quos(colour = 1, group = 1)
  if (is.null(color))
    active_aes <- c("colour", "group") %>% intersect(names(p$mapping))
  else
    active_aes <- c("group") %>% intersect(names(p$mapping))
  mutate_quos[active_aes] <- map(active_aes, ~p$mapping[[.x]])
  group_quos <- quos(colour, group)

  # panel asethetics
  if (!is.null(p$facet$params)) {
    group_quos <- c(group_quos, p$facet$params$facets, p$facet$params$cols, p$facet$params$rows)
  }

  # make plus_minus_value quos
  value_quos <- c(
    map(plus_minus_value, ~quo(mean + (!!.x))) %>%
      setNames(map_chr(plus_minus_value, ~paste0("bar(x) + ", .x))),
    map(plus_minus_value, ~quo(mean - (!!.x))) %>%
      setNames(map_chr(plus_minus_value, ~paste0("bar(x) - ", .x)))
  )

  # make plus_minus_sd quos
  std_devs_quos <- c(
    map(plus_minus_sd, ~quo(mean + (!!.x) * sd)) %>%
      setNames(map_chr(plus_minus_sd, ~paste0("bar(x) + ", .x, "*s"))),
    map(plus_minus_sd, ~quo(mean - (!!.x) * sd)) %>%
      setNames(map_chr(plus_minus_sd, ~paste0("bar(x) - ", .x, "*s")))
  )

  # resulting aes
  if ("colour" %in% active_aes) {
    aes_map <- ggplot2::aes(
      yintercept = y,
      colour = colour, linetype = line,
      fill = NULL, shape = NULL, group = group
    )
  } else {
    aes_map <- ggplot2::aes(
      yintercept = y,
      linetype = line,
      color = NULL, fill = NULL, shape = NULL, group = group
    )
  }

  # add mean and std_devs lines
  calc_ranges <- function(df) {
    dplyr::ungroup(df) %>%
      dplyr::mutate(!!!mutate_quos) %>%
      dplyr::group_by(!!!unname(group_quos)) %>%
      dplyr::summarize(
        mean = base::mean(!!p$mapping$y),
        sd = stats::sd(!!p$mapping$y),
        `bar(x)` = mean,
        !!!value_quos,
        !!!std_devs_quos
      ) %>%
      dplyr::ungroup() %>%
      tidyr::gather(key = "line", value = "y", starts_with("bar")) %>%
      dplyr::filter(!!mark_mean | line != "bar(x)") %>%
      dplyr::mutate(
        line = stringr::str_replace(line, "[+-]", "%+-%") %>% factor() %>% forcats::fct_inorder()
      )
  }

  # add layer
  if (!is.null(color)) {
    new_layer <- geom_hline(data = calc_ranges, mapping = aes_map, color = color)
  } else {
    new_layer <- geom_hline(data = calc_ranges, mapping = aes_map)
  }
  p$layers <- c(new_layer, p$layers)

  # add scale info
  p <- p +
    scale_linetype_manual(
      labels = scales::parse_format(),
      values = if (mark_mean) 1:9 else 2:9
    ) +
    labs(linetype = "value range") +
    theme(legend.text.align = 0)

  return(p)
}

#' Mark outliers
#'
#' This is a convenience function to visually highlight values that fall outside the data range via a flexible \code{condition} statement, \code{plus_minus_value} cutoff from the mean, or \code{plus_minus_sd} standard deviation cutoff from the mean. Optionally with a text label to make it easier to identify the analysis.
#'
#' @param p a \link[ggplot2]{ggplot} object, typically generated by \code{\link{iso_plot_data}}
#' @param condition any \link[dplyr]{filter} expression to identify outliers
#' @param plus_minus_value value cutoff to identify outliers (mean +/- value)
#' @param plus_minus_sd standard deviation cutoff to identify outliers (mean +/- sd)
#' @param label optional expression to add a label to the outlier points
#' @param size size of the outlier points
#' @param shape shape of the outlier points
#' @param color color of the outlier points
#' @param sd_cutoff renamed to the more descriptive \code{plus_minus_sd}
#' @family plot functions
#' @export
iso_mark_outliers <- function(p, condition = NULL, plus_minus_value = NULL, plus_minus_sd = sd_cutoff, sd_cutoff = NULL, label = NULL, size = 2, shape = 16, color = "black") {

  # safety checks
  if (missing(p)) stop("no base plot provided. If piping to this function make sure to use '%>%' instead of '+'.", call. = FALSE)

  # warnings
  if (!missing(sd_cutoff)) {
    warning("the parameter 'sd_cutoff' has been renamed to the more descriptive 'plus_minus_sd'. Please use 'plus_minus_sd' to avoid this warning.", immediate. = TRUE, call. = FALSE)
  }

  # quos
  condition_quo <- rlang::enquo(condition)
  label_quo <- rlang::enquo(label)
  use_condition <- !rlang::quo_is_null(condition_quo)
  use_value <- !is.null(plus_minus_value)
  use_sd <- !is.null(plus_minus_sd)

  # safety checks
  if (use_condition + use_value + use_sd > 1) {
    stop("more than one cutoff defined: please provide only one of 'condition', 'plus_minus_value', or 'plus_minus_sd'", call. = FALSE)
  } else if (use_condition + use_value + use_sd == 0) {
    stop("no cutoff provided: please provide one of 'condition', 'plus_minus_value', or 'plus_minus_sd'", call. = FALSE)
  }

  # find relevant active aesthetics
  active_aes <- c("colour", "group") %>% intersect(names(p$mapping))
  mutate_quos <- rlang::quos(colour = 1, group = 1)
  mutate_quos[active_aes] <- map(active_aes, ~p$mapping[[.x]])
  group_quos <- quos(colour, group)

  # panel asethetics
  if (!is.null(p$facet$params)) {
    group_quos <- c(group_quos, p$facet$params$facets, p$facet$params$cols, p$facet$params$rows)
  }

  # function
  find_outliers <- function(df) {
    if (use_condition) {
      return (dplyr::filter(df, !!condition_quo))
    } else {
      return(
        dplyr::ungroup(df) %>%
          dplyr::mutate(!!!mutate_quos) %>%
          dplyr::group_by(!!!unname(group_quos)) %>%
          iso_identify_outliers(
            y = !!p$mapping$y,
            plus_minus_value = plus_minus_value,
            plus_minus_sd = plus_minus_sd
          ) %>%
          dplyr::ungroup() %>%
          dplyr::filter(is_outlier)
      )
    }
  }

  # generate plot
  p <- p + geom_point(
    data = find_outliers,
    size = size, color = color, shape = shape
  )

  # add label?
  if (!rlang::quo_is_null(label_quo)) {
    p <- p +
      ggrepel::geom_text_repel(
        data = find_outliers,
        mapping = aes_q(label = label_quo), hjust = -0.3, vjust = 0.5,
        min.segment.length = 0, color = color, show.legend = FALSE
      )
  }

  return(p)
}

#' Visualize the calibration range
#'
#' This is a convenience function to visualize the calibration ranges in a plot generated by \code{\link{iso_plot_data}} with a gray area behind the data points. Note that \code{\link{iso_evaluate_calibration_range}} must have been called at an earlier point to establish calibration ranges for different terms. Also note that if one of the chosen dimensions (x or y) is not part of the available calibration range for a panel but the other dimension is, it simply shows two lines bracketing the samples in the dimension that is part of the calibration range.
#' @inheritParams iso_plot_calibration_parameters
#' @param p a plot generated by \code{\link{iso_plot_data}} (usually piped to this function)
#' @family plot functions
#' @export
iso_mark_calibration_range <- function(p, calibration = last_calibration(p$data)) {

  # safety checks
  if (missing(p)) stop("no base plot provided", call. = FALSE)


  # x (only one that needs to come from plot)
  x <- rlang::as_label(p$mapping$x)

  # panel aesthetics
  paneling_cols <- c()
  if (!is.null(p$facet$params)) {
    paneling_cols <- c(p$facet$params$facets, p$facet$params$cols, p$facet$params$rows) %>%
      map(rlang::as_label) %>% unlist()
  }

  # add geom rect for calibration range
  p$layers <-
    c(
      geom_rect(
        data = function(df) prepare_range_marker_data(df, xcol = rlang::as_label(p$mapping$x), paneling_cols = paneling_cols, calibration = calibration),
        mapping = aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
                      x = NULL, y = NULL, color = NULL, fill = NULL,
                      linetype = NULL, label = NULL, shape = NULL),
        color = "black", fill = "grey", alpha = 0.3,
        show.legend = FALSE),
      p$layers
    )

  return(p)
}

# helper function to prepare range marker data frame
# @param p the plot
prepare_range_marker_data <- function(plot_data, xcol, paneling_cols = c(), calibration = all_calibrations(plot_data)) {

  # y cols
  ycols <- unique(plot_data$variable)

  # available ranges
  calib_ranges <-
    tibble(
      ..calibration = calibration,
      range = map(..calibration, ~{
        calib_vars <- get_calibration_vars(.x)
        check_calibration_cols(
          plot_data, c(calib_vars$model_name, calib_vars$model_params))
        plot_data[unique(c(paneling_cols, calib_vars$model_params))] %>%
          iso_get_calibration_range(quiet = TRUE) %>% unique()
        })
    ) %>%
    unnest(range) %>%
    unique() %>%
    group_by(!!!map(c(paneling_cols, "term"), sym)) %>%
    mutate(..n_calibs = n(), ..calibs = paste(..calibration, collapse = ", ")) %>%
    ungroup() %>%
    select(-..calibration) %>%
    unique()

  # needed ranges
  req_ranges <- tibble(x = xcol, y = ycols)

  # ranges with and without units
  calib_ranges <-
    vctrs::vec_rbind(
      calib_ranges,
      mutate(calib_ranges, term = ifelse(!is.na(units), sprintf("%s [%s]", term, units), term))
    )

  # x-range
  x_range <-
    calib_ranges %>%
    filter(term %in% xcol)

  # y-range
  y_range <-
    calib_ranges %>%
    filter(term %in% ycols)

  # warning if any are defined multiple ways
  if (any(x_range$..n_calibs > 1) || any(y_range$..n_calibs > 1)) {
    ranges <- vctrs::vec_rbind(x_range, y_range) %>% filter(..n_calibs > 1) %>%
      select(term, ..calibs) %>% unique()
    glue::glue(
      "multiple range constraints from different calibrations exist for:\n - ",
      with(ranges, sprintf("'%s' from calibrations: %s", term, ..calibs)) %>%
        paste(collapse = "\n - "),
      "\nPlease specify which calibration range constraints to use via the 'calibration' parameter."
    ) %>% warning(immediate. = TRUE, call. = FALSE)
  }

  # resulting data frame
  x_range <- select(x_range, !!!paneling_cols, x = term, xmin = min, xmax = max)
  y_range <- select(y_range, !!!paneling_cols, y = term, ymin = min, ymax = max)
  if (length(paneling_cols) > 0) {
    ranges <- full_join(x_range, y_range, by = paneling_cols)
  } else {
    ranges <- crossing(x_range, y_range)
  }

  ranges <- ranges %>%
    filter(!(is.na(xmin) & is.na(xmax) & is.na(ymin) & is.na(ymax))) %>%
    mutate(
      xmin = ifelse(is.na(xmin), -Inf, xmin),
      xmax = ifelse(is.na(xmax), Inf, xmax),
      ymin = ifelse(is.na(ymin), -Inf, ymin),
      ymax = ifelse(is.na(ymax), Inf, ymax)
    )

  # if 'panel' part of the faceting, make sure to filter by it to
  # have the ranges highlighted only in the relevant panels
  if ("panel" %in% paneling_cols) {
    ranges <- ranges %>%
      filter(!is.na(x) & panel == x | !is.na(y) & panel == y)
  }

  return(ranges)
}
KopfLab/isoprocessorCUB documentation built on Nov. 8, 2021, 9:54 a.m.