Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(maldipickr)
## ----examples-process_spectra-------------------------------------------------
# Get an example directory of six Bruker MALDI Biotyper spectra
directory_biotyper_spectra <- system.file(
"toy-species-spectra",
package = "maldipickr"
)
# Import the six spectra
spectra_list <- import_biotyper_spectra(directory_biotyper_spectra)
# Transform the spectra signals according to Strejcek et al. (2018)
processed <- process_spectra(spectra_list)
# Overview of the list architecture that is returned
# with the list of processed spectra, peaks identified and the
# metadata table
str(processed, max.level = 2)
# A detailed view of the metadata with the median signal-to-noise
# ratio (SNR) and the number of peaks
processed$metadata
## ----examples-merge_processed_spectra-----------------------------------------
# Get an example directory of six Bruker MALDI Biotyper spectra
directory_biotyper_spectra <- system.file(
"toy-species-spectra",
package = "maldipickr"
)
# Import the six spectra
spectra_list <- import_biotyper_spectra(directory_biotyper_spectra)
# Transform the spectra signals according to Strejcek et al. (2018)
processed <- process_spectra(spectra_list)
# Merge the spectra to produce the feature matrix
fm <- merge_processed_spectra(list(processed))
# The feature matrix has 6 spectra as rows and
# 35 peaks as columns
dim(fm)
# Notice the difference when the interpolation is turned off
fm_no_interpolation <- merge_processed_spectra(
list(processed),
interpolate_missing = FALSE
)
sum(fm == 0) # 0
sum(fm_no_interpolation == 0) # 68
# Multiple runs can be aggregated using list()
# Merge the spectra to produce the feature matrix
fm_all <- merge_processed_spectra(list(processed, processed, processed))
# The feature matrix has 3×6=18 spectra as rows and
# 35 peaks as columns
dim(fm_all)
# If using a list, names will be dropped and are not propagated to the matrix.
#' \dontrun{
#' fm_all <- merge_processed_spectra(
#' list("A" = processed, "B" = processed, "C" = processed))
#' any(grepl("A|B|C", rownames(fm_all))) # FALSE
#' }
#'
## ----similarity, eval = FALSE-------------------------------------------------
# # A. Compute the similarity matrix on the transposed feature matrix
# # using Pearson correlation coefficient
# sim_matrix <- stats::cor(t(fm), method = "pearson")
#
# # B.1 Install the coop package
# # install.packages("coop")
#
# # B.2 Compute the similarity matrix on the rows of the feature matrix
# sim_matrix <- coop::tcosine(fm)
## ----examples-delineate_with_similarity---------------------------------------
# Toy similarity matrix between the six example spectra of
# three species. The cosine metric is used and a value of
# zero indicates dissimilar spectra and a value of one
# indicates identical spectra.
cosine_similarity <- matrix(
c(
1, 0.79, 0.77, 0.99, 0.98, 0.98,
0.79, 1, 0.98, 0.79, 0.8, 0.8,
0.77, 0.98, 1, 0.77, 0.77, 0.77,
0.99, 0.79, 0.77, 1, 1, 0.99,
0.98, 0.8, 0.77, 1, 1, 1,
0.98, 0.8, 0.77, 0.99, 1, 1
),
nrow = 6,
dimnames = list(
c(
"species1_G2", "species2_E11", "species2_E12",
"species3_F7", "species3_F8", "species3_F9"
),
c(
"species1_G2", "species2_E11", "species2_E12",
"species3_F7", "species3_F8", "species3_F9"
)
)
)
# Delineate clusters based on a 0.92 threshold applied
# to the similarity matrix
delineate_with_similarity(cosine_similarity, threshold = 0.92)
## ----examples-set_reference_spectra-------------------------------------------
# Get an example directory of six Bruker MALDI Biotyper spectra
# Import the six spectra and
# Transform the spectra signals according to Strejcek et al. (2018)
processed <- system.file(
"toy-species-spectra",
package = "maldipickr"
) %>%
import_biotyper_spectra() %>%
process_spectra()
# Toy similarity matrix between the six example spectra of
# three species. The cosine metric is used and a value of
# zero indicates dissimilar spectra and a value of one
# indicates identical spectra.
cosine_similarity <- matrix(
c(
1, 0.79, 0.77, 0.99, 0.98, 0.98,
0.79, 1, 0.98, 0.79, 0.8, 0.8,
0.77, 0.98, 1, 0.77, 0.77, 0.77,
0.99, 0.79, 0.77, 1, 1, 0.99,
0.98, 0.8, 0.77, 1, 1, 1,
0.98, 0.8, 0.77, 0.99, 1, 1
),
nrow = 6,
dimnames = list(
c(
"species1_G2", "species2_E11", "species2_E12",
"species3_F7", "species3_F8", "species3_F9"
),
c(
"species1_G2", "species2_E11", "species2_E12",
"species3_F7", "species3_F8", "species3_F9"
)
)
)
# Delineate clusters based on a 0.92 threshold applied
# to the similarity matrix
clusters <- delineate_with_similarity(
cosine_similarity,
threshold = 0.92
)
# Set reference spectra with the toy example
set_reference_spectra(clusters, processed$metadata)
## ----example-delineate_with_identification------------------------------------
report_unknown <- read_biotyper_report(
system.file("biotyper_unknown.csv", package = "maldipickr")
)
delineate_with_identification(report_unknown)
## ----examples-import_spede_clusters-------------------------------------------
# Reformat the output from SPeDE table
# https://github.com/LM-UGent/SPeDE
import_spede_clusters(
system.file("spede.csv", package = "maldipickr")
)
## ----examples-pick_spectra----------------------------------------------------
# 0. Load a toy example of a tibble of clusters created by
# the `delineate_with_similarity` function.
clusters <- readRDS(
system.file("clusters_tibble.RDS",
package = "maldipickr"
)
)
# 1. By default and if no other metadata are provided,
# the function picks reference spectra for each clusters.
#
# N.B: The spectra `name` and `to_pick` columns are moved to the left
# only for clarity using the `relocate()` function.
#
pick_spectra(clusters) %>%
dplyr::relocate(name, to_pick) # only for clarity
# 2.1 Simulate OD600 values with uniform distribution
# for each of the colonies we measured with
# the Bruker MALDI Biotyper
set.seed(104)
metadata <- dplyr::transmute(
clusters,
name = name, OD600 = runif(n = nrow(clusters))
)
metadata
# 2.2 Pick the spectra based on the highest
# OD600 value per cluster
pick_spectra(clusters, metadata, "OD600") %>%
dplyr::relocate(name, to_pick) # only for clarity
# 3.1 Say that the wells on the right side of the plate are
# used for negative controls and should not be picked.
metadata <- metadata %>% dplyr::mutate(
well = gsub(".*[A-Z]([0-9]{1,2}$)", "\\1", name) %>%
strtoi(),
is_edge = is_well_on_edge(
well_number = well, plate_layout = 96, edges = "right"
)
)
# 3.2 Pick the spectra after discarding (or soft masking)
# the spectra indicated by the `is_edge` column.
pick_spectra(clusters, metadata, "OD600",
soft_mask_column = "is_edge"
) %>%
dplyr::relocate(name, to_pick) # only for clarity
# 4.1 Say that some spectra were picked before
# (e.g., in the column F) in a previous experiment.
# We do not want to pick clusters with those spectra
# included to limit redundancy.
metadata <- metadata %>% dplyr::mutate(
picked_before = grepl("_F", name)
)
# 4.2 Pick the spectra from clusters without spectra
# labeled as `picked_before` (hard masking).
pick_spectra(clusters, metadata, "OD600",
hard_mask_column = "picked_before"
) %>%
dplyr::relocate(name, to_pick) # only for clarity
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.