Nothing
# WARNING - Generated by {fusen} from dev/dereplicate-spectra.Rmd: do not edit by hand
#' Import clusters results generated by SPeDE
#'
#' Reformat the table output from the analysis of raw Bruker MALDI Biotyper
#' spectra by the SPeDE tool from Dumolin et al. (2019) to be consistent with the Strejcek et al. (2018) procedure followed
#' in the [maldipickr] package.
#'
#' @param path Path to the comma separated table generated by SPeDE
#'
#' @return A tibble with the following columns:
#'
#' * `name`: a character denoting the spectra name (all spaces, dashes and dots are replaced by underscores "_" in SPeDE)
#' * `membership`: integers stating the cluster number to which the spectra belong to. It starts from 1 to _c_, the total number of clusters.
#' * `cluster_size`: integers indicating the total number of spectra in the corresponding cluster.
#' * `quality`: a character indicating the spectra quality category by SPeDE, out of GREEN, ORANGE and RED.
#' * `is_reference`: a logical indicating whether the corresponding spectra is a reference spectra of the cluster.
#'
#' @seealso <https://github.com/LM-UGent/SPeDE>
#' @references Dumolin C, Aerts M, Verheyde B, Schellaert S, Vandamme T, Van Der Jeugt F, De Canck E, Cnockaert M, Wieme AD, Cleenwerck I, Peiren J, Dawyndt P, Vandamme P, & Carlier A. (2019). "Introducing SPeDE: High-Throughput Dereplication and Accurate Determination of Microbial Diversity from Matrix-Assisted Laser Desorption–Ionization Time of Flight Mass Spectrometry Data". *MSystems* 4(5). <doi:10.1128/msystems.00437-19>.
#' @export
#' @examples
#' # Reformat the output from SPeDE table
#' # https://github.com/LM-UGent/SPeDE
#' import_spede_clusters(
#' system.file("spede.csv", package = "maldipickr")
#' )
import_spede_clusters <- function(path) {
# Import the SPeDE csv table with only the columns planned to be used
utils::read.csv(path) %>%
dplyr::select(
"SOURCE_FILE",
"QUALITY",
"REFERENCE",
"REFERENCE_NUMBER"
) %>%
# Discard the trailing "Not matched:" and "Rejected spectra:" at the end of the csv
dplyr::filter(!is.na(.data$REFERENCE_NUMBER)) %>%
tibble::as_tibble() %>%
# Sanitize the column names and variable types to the package convention
dplyr::rename(c(
"name" = "SOURCE_FILE",
"membership" = "REFERENCE_NUMBER",
"quality" = "QUALITY"
)) %>%
dplyr::mutate(
"membership" = .data$membership + 1,
"is_reference" = if_else(.data$REFERENCE == "Yes", TRUE, FALSE)
) %>%
# Add the cluster_size
dplyr::group_by(.data$membership) %>%
dplyr::mutate("cluster_size" = dplyr::n()) %>%
dplyr::select("name", "membership", "cluster_size", "quality", "is_reference") %>%
dplyr::ungroup() %>%
return()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.