Nothing
# WARNING - Generated by {fusen} from dev/dereplicate-spectra.Rmd: do not edit by hand
#' Set a reference spectrum for each cluster
#'
#' Define a high-quality spectra as a representative
#' spectra of the cluster based on the highest median signal-to-noise ratio
#' and the number of detected peaks
#'
#'
#' @param cluster_df A tibble of *n* rows for each spectra produced by [delineate_with_similarity] function with at least the following columns:
#' * `name`: the rownames of the similarity matrix indicating the spectra names
#' * `membership`: integers stating the cluster number to which the spectra belong to. It starts from 1 to _c_, the total number of clusters.
#' * `cluster_size`: integers indicating the total number of spectra in the corresponding cluster.
#' @param metadata_df A tibble of *n* rows for each spectra produced by the [process_spectra] function with median signal-to-noise ratio (`SNR`), peaks number (`peaks`), and spectra names in the `name` column.
#'
#'
#' @return A merged tibble in the same order as `cluster_df` with both the columns of `cluster_df` and `metadata_df`, as well as a logical column `is_reference` indicating if the spectrum is the reference spectra of the cluster.
#'
#' @seealso [delineate_with_similarity], [pick_spectra]
#'
#' @export
#' @examples
#' # Get an example directory of six Bruker MALDI Biotyper spectra
#' # Import the six spectra and
#' # Transform the spectra signals according to Strejcek et al. (2018)
#' processed <- system.file(
#' "toy-species-spectra",
#' package = "maldipickr"
#' ) %>%
#' import_biotyper_spectra() %>%
#' process_spectra()
#'
#' # Toy similarity matrix between the six example spectra of
#' # three species. The cosine metric is used and a value of
#' # zero indicates dissimilar spectra and a value of one
#' # indicates identical spectra.
#' cosine_similarity <- matrix(
#' c(
#' 1, 0.79, 0.77, 0.99, 0.98, 0.98,
#' 0.79, 1, 0.98, 0.79, 0.8, 0.8,
#' 0.77, 0.98, 1, 0.77, 0.77, 0.77,
#' 0.99, 0.79, 0.77, 1, 1, 0.99,
#' 0.98, 0.8, 0.77, 1, 1, 1,
#' 0.98, 0.8, 0.77, 0.99, 1, 1
#' ),
#' nrow = 6,
#' dimnames = list(
#' c(
#' "species1_G2", "species2_E11", "species2_E12",
#' "species3_F7", "species3_F8", "species3_F9"
#' ),
#' c(
#' "species1_G2", "species2_E11", "species2_E12",
#' "species3_F7", "species3_F8", "species3_F9"
#' )
#' )
#' )
#' # Delineate clusters based on a 0.92 threshold applied
#' # to the similarity matrix
#' clusters <- delineate_with_similarity(
#' cosine_similarity,
#' threshold = 0.92
#' )
#'
#' # Set reference spectra with the toy example
#' set_reference_spectra(clusters, processed$metadata)
set_reference_spectra <- function(cluster_df, metadata_df) {
# Checking the tibbles sizes
if (base::nrow(cluster_df) != base::nrow(metadata_df)) {
stop(
"The tibbles do not have the same number of rows!",
"Note: if multiple batches are included in 'cluster_df'",
"consider combining the multiple associated metadata tables",
"using 'dplyr::bind_rows()'."
)
}
# Checking the tibbles columns
if (any(!c("name", "membership", "cluster_size") %in% colnames(cluster_df))) {
stop(
"'cluster_df' lacks one of the following columns:",
"name, membership, cluster_size"
)
}
if (any(!c("name", "SNR", "peaks") %in% colnames(metadata_df))) {
stop(
"'metadata_df' lacks one of the following columns:",
"name, SNR, peaks"
)
}
# Merging cluster information and spectra/peaks metadata to select the
# reference spectra
cluster_df %>%
# Keep track of the row order via a numeric id
tibble::rowid_to_column() %>%
dplyr::inner_join(metadata_df, by = "name") %>%
# Make sure the sorting is done within groups
dplyr::group_by(.data$membership) %>%
dplyr::arrange(dplyr::desc(.data$SNR), dplyr::desc(.data$peaks), .by_group = TRUE) %>%
# first() gives the name of the spectra so needs for an
# extra step to convert to a logical vector
dplyr::mutate(
"is_reference" = dplyr::first(.data$name),
"is_reference" = .data$is_reference == .data$name
) %>%
dplyr::ungroup() %>%
# Sort the tibble in the original order
dplyr::arrange(.data$rowid) %>%
dplyr::select(-c("rowid")) %>%
return()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.