################################
#
# Extract Peaks
#
################################
# purpose: Extract a target peak using an a priori diameter
# author: Lukas Muenter
# date: 29.07.2021
## analyses ===================
#' Get target peaks per group
#'
#' A wrapper around `coulteR::detect_peaks`. Per group in a dataframe with identifiers, peaks are reported.
#' @param df `dataframe` of coerced measurements-`modules` as produced by `coulteR::bulk_read`.
#' @param diameter The expected diameter of particles
#' @param full_df Should all peaks be reported? default = `FALSE`
#' @importFrom dplyr group_by
#' @importFrom dplyr group_split
#' @return A `dataframe`.
#' @export
bulk_peak_detect = function(df, diameter, full_df = FALSE){
if(!is.data.frame(df)){
stop("Error: argument `df` is no dataframe. Did you coerce your modules when using `coulter::bulk_read`?")
}
df %>%
group_by(sample) %>%
group_split() %>%
lapply(peak_detect, diameter = diameter, full_df = full_df) %>%
do.call("rbind", .)
}
#' Detect target peaks
#'
#' This function detects a specific target peak. Optionally, all peaks are reported.
#' @param df A measurements-`module` as produced by `coulteR::read_accucomp`
#' @param diameter The expected diameter of particles
#' @param full_df Should all peaks be reported? default = `FALSE`
#' @return A `dataframe` with the diameter of the peak, the diameter range, and the total number of cells in the range.
#' @importFrom dplyr left_join
#' @importFrom dplyr select
#' @importFrom dplyr filter
#' @importFrom dplyr pull
#' @importFrom dplyr rename
#' @importFrom pracma findpeaks
#' @importFrom Hmisc wtd.mean
#' @importFrom Hmisc wtd.var
#' @export
peak_detect = function(df, diameter, full_df = FALSE){
peaks = df %>%
na.omit() %>%
pull(number.diff) %>%
findpeaks() %>%
as.data.frame() %>%
setNames(c("number.diff", "bin", "bin.start", "bin.end")) %>%
left_join(df)
## identify Target peak
peaks$target = FALSE
peaks$target[get_nearest_peak(peaks$diameter.bin.um, diameter)] = TRUE
## get size ranges of peaks
peaks$d.range.start = df$diameter.bin.um[peaks$bin.start]
peaks$d.range.end = df$diameter.bin.um[peaks$bin.end]
## get weighted mean of peaks
#peaks.wtdmean = wtd.mean(df$diameter.bin.um[peaks$bin.start], weights = df$number.diff[peaks$bin.start])
peaks$d.wtdmean = get_wtd_mean(df, peaks)
## get weighted sd of means
peaks$d.wtdsd = get_wtd_sd(df, peaks)
## get number of cells in distribution
peaks$n.cells = get_number_cells(df, starts = peaks$bin.start, ends = peaks$bin.end)
if(full_df == FALSE){
return(
peaks %>%
filter(target == TRUE) %>%
select(sample, "d.peak" = diameter.bin.um, d.range.start, d.range.end, n.cells, d.wtdmean, d.wtdsd)
)
} else {
return(
peaks %>% rename("d.peak" = diameter.bin.um)
)
}
}
#' Obtain peak nearest to target
#'
#' @param x A numeric vector
#' @param y The target number
#' @return A `number`
get_nearest_peak = function(x, y){
which(abs(x - y) == min(abs(x - y)))
}
#' Get number of cells in range
#' @param df A `measurements`-module
#' @param starts Indices of start bins
#' @param ends Indices of end bins
#' @return A `vector` of summed cells
get_number_cells = function(df, starts,ends){
Map(":", starts, ends) %>%
lapply(function(x,y) sum(df$number.diff[x]),y = df) %>%
do.call("c", .)
}
#' Get weighted means
#' Computes weighted means for all detected peaks
#' @param df A `measurements`-module
#' @param peaks A `peaks` dataframe
#' @return A numeric `vector`
get_wtd_mean = function(df, peaks){
lapply(c(1:nrow(peaks)), function(x,y,z)
wtd.mean(y$diameter.bin.um[c(x$bin.start[z]:x$bin.end[z])],
weights = y$number.diff[c(x$bin.start[z]:x$bin.end[z])]),
x = peaks,
y = df) %>% do.call("c", .)
}
#' Get weighted SDs
#' Computes weighted standard deviations for all detected peaks
#' @param df A `measurements`-module
#' @param peaks A `peaks` dataframe
#' @return A numeric `vector`
get_wtd_sd = function(df, peaks){
lapply(c(1:nrow(peaks)), function(x,y,z)
sqrt(
wtd.var(y$diameter.bin.um[c(x$bin.start[z]:x$bin.end[z])],
weights = y$number.diff[c(x$bin.start[z]:x$bin.end[z])])
),
x = peaks,
y = df) %>% do.call("c", .)
}
#' Plot target tracks
#' Plots tracks of selected samples. Note that file extensions and the character `#` are to be removed from sample names!
#' This function either plots selected samples, or selected indices, or selects and plots 9 randomly selected tracks.
#' @param df A `A dataframe` as generated by `coulteR::bulk_read`, with the parameter `module = "measurements"`
#' @param peaks A `dataframe` as generated by `coulteR::bulk_peak_detect`
#' @param indices Indices of plots to be plotted. Default: `NULL`
#' @param seed Seed for random selection. Default: `123`
#' @param N Number of plots to be randomly selected. Default: 9
#' @param ... Other variables passed on to geom_line
#' @return A `ggplot`-object
#' @import dplyr
#' @import ggplot2
#' @export
ggtracks = function(df, peaks = NULL, samples = NULL, seed = 123, N = 1, show.legend = TRUE){
if(is.null(samples)){
set.seed(seed)
samples.idx = sample(c(1:nrow(peaks)), N, replace = FALSE)
samples = peaks$sample[samples.idx]
}
a = df %>%
filter(sample%in% samples)
p = ggplot() +
geom_line(data = a, aes(x = diameter.bin.um, y = number.diff, color = sample), show.legend = show.legend) +
theme_bw() +
facet_wrap(sample ~ .)
if(is.null(peaks)) {
p
} else {
## filter the peak dataframe
b = peaks %>%
filter(sample %in% samples)
p + geom_rect(data = b, aes(ymin=0, ymax=Inf, xmin=d.range.start, xmax=d.range.end), alpha = 0.1) +
geom_vline(data = b, aes(xintercept = d.wtdmean)) +
geom_vline(data = b, aes(xintercept = d.wtdmean + d.wtdsd), linetype = "dotted") +
geom_vline(data = b, aes(xintercept = d.wtdmean - d.wtdsd), linetype = "dotted")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.