#' Threshold estimation for peak detection
#'
#' Estimates the threshold value for peak detection on an [nmr_dataset_1D] object by examining
#' a range without peaks, by default the 9.5 - 10 ppm range.
#'
#' Two methods can be used:
#'
#' - "mean3sd": The mean3sd method computes the mean and the standard deviation of each spectrum
#' in the 9.5 - 10 ppm range. The mean spectrum and the mean standard deviation are both vectors
#' of length equal to the number of points in the given range. The mean of the mean spectrum
# and the mean of the standard deviations are used to summarize the center and dispersion of
#' the noise. The threshold is defined as `center + 3 dispersion`, and it is one single threshold
#' for the whole dataset. This is the default for backwards compatibility.
#'
#' - "median3mad": First we take the data matrix. If we have estimated a baseline already,
#' subtract it. In the defined region without peaks, estimate the median of each sample and
#' its median absolute deviation. Return a vector of length equal to the number of samples
#' with the `median+3mad` for each sample. This is a new more robust method.
#'
#' @family peak detection functions
#' @param nmr_dataset An [nmr_dataset_1D].
#' @param method Either "mean3sd" or the more robust "median3mad". See the details.
#' @param range_without_peaks A vector with two doubles describing a range without peaks suitable for baseline detection
#' @return Numerical. A threshold value in intensity below that no peak is detected.
#' @export
#' @examples
#' ppm_axis <- seq(from = 0, to = 10, length.out = 1000)
#' data_1r <- matrix(runif(1000, 0, 10), nrow = 1) + 100
#' dataset_1D <- new_nmr_dataset_1D(
#' ppm_axis = ppm_axis,
#' data_1r = data_1r,
#' metadata = list(external=data.frame(NMRExperiment = "10"))
#' )
#' bl_threshold <- nmr_baseline_threshold(dataset_1D, range_without_peaks = c(9.5,10))
#'
nmr_baseline_threshold <- function(nmr_dataset, range_without_peaks = c(9.5, 10), method = c("mean3sd", "median3mad")) {
# FIXME: Maybe a whole baseline would be better, so we can cope with slowly changing baselines better
method <- match.arg(method)
if (length(range_without_peaks) != 2) {
rlang::abort("range_without_peaks must have length 2")
}
r_start <- min(range_without_peaks)
r_end <- max(range_without_peaks)
threshold_ind <- nmr_dataset$axis >= r_start & nmr_dataset$axis < r_end
if (sum(threshold_ind) < 10) {
rlang::abort(
message = c(
"Can't estimate a baseline threshold reliably",
"i" = glue("The selected range_without_peaks [{r_start},{r_end}] ppm contains {sum(threshold_ind)} points."),
"i" = glue("Either change the interpolation axis limits or choose a different range here")
)
)
}
if (method == "mean3sd") {
if (nmr_dataset$num_samples > 1) {
cent <- mean(apply(nmr_dataset$data_1r[, threshold_ind, drop = FALSE], 2, mean))
disp <- mean(apply(nmr_dataset$data_1r[, threshold_ind, drop = FALSE], 2, stats::sd))
} else {
cent <- mean(as.numeric(nmr_dataset$data_1r[, threshold_ind]))
disp <- stats::sd(as.numeric(nmr_dataset$data_1r[, threshold_ind]))
}
return(cent + 3 * disp)
} else if (method == "median3mad") {
out <- rep(NA_real_, nmr_dataset$num_samples)
for (i in seq_len(nmr_dataset$num_samples)) {
if ("data_1r_baseline" %in% names(unclass(nmr_dataset))) {
spec_region <- nmr_dataset$data_1r[i, threshold_ind, drop = FALSE] - nmr_dataset$data_1r_baseline[i, threshold_ind]
} else {
spec_region <- nmr_dataset$data_1r[i, threshold_ind, drop = FALSE]
}
out[i] <- stats::median(spec_region) + 3 * stats::mad(spec_region)
}
names(out) <- names(nmr_dataset)
return(out)
} else {
stop("Unexpected method", method)
}
}
#' Plot the baseline thresholds
#'
#' If you have a lot of samples you can make the plot window bigger (or
#' use "` ```{r fig.height=10, fig.width=10}`" in notebooks), or choose some NMRExperiments.
#'
#' @inheritParams plot.nmr_dataset_1D
#' @param nmr_dataset An [nmr_dataset_1D] object
#' @param thresholds A named vector. The values are baseline thresholds. The names are NMRExperiments.
#' @param NMRExperiment The NMRExperiments to plot (Use `"all"` to plot all of them)
#' @param chemshift_range The range to plot, as a first check use the `range_without_peaks` from [nmr_baseline_threshold]
#'
#' @return A plot.
#' @export
#' @examples
#'
#' ppm_axis <- seq(from = 0, to = 10, length.out = 1000)
#' data_1r <- matrix(runif(1000, 0, 10), nrow = 1) + 100
#' dataset_1D <- new_nmr_dataset_1D(
#' ppm_axis = ppm_axis,
#' data_1r = data_1r,
#' metadata = list(external=data.frame(NMRExperiment = "10"))
#' )
#' bl_threshold <- nmr_baseline_threshold(dataset_1D, range_without_peaks = c(9.5,10))
#' baselineThresh <- nmr_baseline_threshold(dataset_1D)
#' nmr_baseline_threshold_plot(dataset_1D, bl_threshold)
nmr_baseline_threshold_plot <- function(nmr_dataset, thresholds, NMRExperiment = "all", chemshift_range = c(9.5, 10), ...) {
if (is.null(NMRExperiment)) {
if (nmr_dataset$num_samples > 20) {
NMRExperiment <- sample(names(nmr_dataset), size = 10)
} else {
NMRExperiment <- names(nmr_dataset)
}
} else if (identical(NMRExperiment, "all")) {
NMRExperiment <- names(nmr_dataset)
}
if (length(thresholds) == 1L) {
thresholds <- rep(thresholds, length = length(NMRExperiment))
names(thresholds) <- NMRExperiment
}
if (!identical(NMRExperiment, "all")) {
thresholds <- thresholds[NMRExperiment]
}
is_aes_string <- is_using_aes_string(...)
if (is_aes_string) {
cli::cli_warn(
c(
"!" = "Passing aes_string arguments to nmr_baseline_threshold_plot(nmr_dataset, ...) is deprecated.",
"i" = "Please pass aes arguments instead"
),
.frequency = "regularly",
.frequency_id = "nmr_baseline_threshold_plot_plotting_with_aes_string",
)
aes_str <- as.character(list(...))
columns_to_request <- c("NMRExperiment", get_vars_from_aes_string(aes_str))
} else {
columns_to_request <- c("NMRExperiment", get_vars_from_aes(...))
}
tidy_data <- tidy_spectra_baseline_and_threshold(
dataset = nmr_dataset,
thresholds = thresholds,
chemshift_range = chemshift_range,
NMRExperiment = NMRExperiment,
columns = columns_to_request
)
to_plot <- tidy_data$spectra
to_plot_baseline <- tidy_data$baselines
to_plot_threshold <- tidy_data$thresholds
ymax <- 1.5 * max(to_plot_threshold$intensity)
if (is_aes_string) {
return(
nmr_baseline_threshold_plot_aes_string(
to_plot,
to_plot_baseline,
to_plot_threshold,
chemshift_range,
ymax,
NMRExperiment,
...
)
)
}
dots_aes_args <- prepare_aes(...)
gplt <- ggplot2::ggplot() +
# The spectra:
ggplot2::geom_line(mapping = ggplot2::aes(!!!dots_aes_args), data = to_plot)
if (!is.null(to_plot_baseline)) {
# The baseline:
gplt <- gplt +
ggplot2::geom_line(
mapping = ggplot2::aes(!!!dots_aes_args),
data = to_plot_baseline,
linetype = "dashed"
)
}
gplt <- gplt +
# The threshold
ggplot2::geom_line(
mapping = ggplot2::aes(!!!dots_aes_args),
data = to_plot_threshold,
linetype = "dashed",
color = "black"
) +
# Other plotting options
ggplot2::labs(x = "Chemical Shift (ppm)", y = "Intensity (a.u.)") +
ggplot2::scale_x_reverse(limits = rev(chemshift_range[seq_len(2)])) +
ggplot2::scale_y_continuous(labels = scales::label_number(scale_cut = scales::cut_si("")), limits = c(0, ymax)) +
ggplot2::facet_wrap(~ factor(NMRExperiment, levels = unique(NMRExperiment))) +
ggplot2::theme(legend.position = "none")
gplt
}
# deprecated
nmr_baseline_threshold_plot_aes_string <- function(to_plot, to_plot_baseline, to_plot_threshold, chemshift_range, ymax, NMRExperiment, ...) {
dotdotdot_aes <- list(...)
fixed_aes <- list(
x = "chemshift",
y = "intensity",
group = "NMRExperiment"
)
all_aes <- c(fixed_aes, dotdotdot_aes)
if (!"color" %in% names(all_aes) && !"colour" %in% names(all_aes)) {
all_aes <- c(all_aes, list(color = "NMRExperiment"))
}
gplt <- ggplot2::ggplot() +
# The spectra:
ggplot2::geom_line(mapping = do.call(ggplot2::aes_string, all_aes), data = to_plot)
if (!is.null(to_plot_baseline)) {
# The baseline:
gplt <- gplt + ggplot2::geom_line(mapping = do.call(ggplot2::aes_string, all_aes), data = to_plot_baseline, linetype = "dashed")
}
gplt <- gplt +
# The threshold
ggplot2::geom_line(mapping = do.call(ggplot2::aes_string, all_aes), data = to_plot_threshold, linetype = "dashed", color = "black") +
# Other plotting options
ggplot2::labs(x = "Chemical Shift (ppm)", y = "Intensity (a.u.)") +
ggplot2::scale_x_reverse(limits = rev(chemshift_range[seq_len(2)])) +
ggplot2::scale_y_continuous(labels = scales::label_number(scale_cut = scales::cut_si("")), limits = c(0, ymax)) +
ggplot2::facet_wrap(~ factor(NMRExperiment, levels = unique(NMRExperiment))) +
ggplot2::theme(legend.position = "none")
gplt
}
tidy_spectra_baseline_and_threshold <- function(dataset, thresholds, chemshift_range, NMRExperiment, columns = character(0L)) {
to_plot <- tidy(
dataset,
chemshift_range = chemshift_range,
NMRExperiment = NMRExperiment,
columns = columns,
matrix_name = "data_1r"
)
if ("data_1r_baseline" %in% names(unclass(dataset))) {
to_plot_baseline <- tidy(
dataset,
chemshift_range = chemshift_range,
NMRExperiment = NMRExperiment,
columns = columns,
matrix_name = "data_1r_baseline"
)
if (is.null(thresholds)) {
to_plot_threshold <- NULL
} else {
to_plot_threshold <- dplyr::left_join(
to_plot_baseline,
tibble::enframe(
thresholds,
name = "NMRExperiment",
value = "threshold"
),
by = "NMRExperiment"
)
to_plot_threshold$intensity <- to_plot_threshold$intensity + to_plot_threshold$threshold
}
} else {
to_plot_baseline <- NULL
if (is.null(thresholds)) {
to_plot_threshold <- NULL
} else {
to_plot_threshold <- dplyr::left_join(
to_plot,
tibble::enframe(
thresholds,
name = "NMRExperiment",
value = "threshold"
),
by = "NMRExperiment"
)
to_plot_threshold$intensity <- to_plot_threshold$threshold
}
}
list(
spectra = to_plot,
baselines = to_plot_baseline,
thresholds = to_plot_threshold
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.