#' Get unique peptides from a list of proteins
#'
#' @description Finds unique peptides that are a result of tryptic digestion
#' of proteins specified in a list of amino acid sequences.
#'
#' @param protein_list List of proteins built from single letter codes of
#' amino acids.
#'
#' @return Character vector of peptides.
#'
#' @export
#'
#' @examples
#' get_peptides(list("MKRAQ"))
#'
get_peptides <- function(protein_list) {
peptides <- lapply(protein_list, tryptic_digestion_single)
unlist(peptides, recursive = TRUE)
}
#' Summary of peptides
#'
#' @description Returns a dataframe that indicates in how many proteins
#' are matched by each peptide.
#'
#' @param peptides_proteins_matrix List returned by the
#' get_peptide_protein_matrix function.
#'
#' @return data.frame with a column with peptides (as AA sequences) and number
#' of proteins in which they occur.
#'
#' @export
#'
#' @examples
#' summarise_peptides(get_peptide_protein_matrix("CARAQ", c("ARCAQP", "CARAQPP")))
#'
summarise_peptides <- function(peptides_proteins_matrix) {
peptides <- rownames(peptides_proteins_matrix$peptide_protein_matrix)
proteins_count <- rowSums(peptides_proteins_matrix$peptide_protein_matrix)
data.frame(
peptides = peptides,
protein_count = proteins_count
)
}
#' Plot a histogram of number of proteins that match shared peptides
#'
#' @description For peptides that match more than one protein,
#' a bar plot of number of matched proteins is plotted
#'
#' @param peptides_summary data.frame returned by the summarise_peptides function.
#'
#' @return ggplot2 object
#'
#' @export
#'
#' @examples
#' summ <- summarise_peptides(get_peptide_protein_matrix(c("CARAQ", "ARC", "AR"),
#' list(x = "ARCAQP", y = "CARAQPP")))
#' plot_shared_peptides(summ)
#'
#' @importFrom ggplot2 ggplot geom_bar theme_bw aes coord_flip xlab ylab
#'
plot_shared_peptides <- function(peptides_summary) {
ggplot(peptides_summary[peptides_summary$protein_count > 1, ],
aes(x = reorder(peptides, protein_count), y = protein_count)) +
geom_bar(stat = "identity") +
theme_bw() +
coord_flip() +
xlab("") +
ylab("number of proteins matched by the peptide")
}
#' Calculate the matrix of peptide-protein inclusions.
#'
#' @description The i,j element of the matrix is 1 if i-th peptide is part
#' of j-th protein.
#'
#' @param peptides Character vector of peptides.
#' @param proteins A list of proteins.
#'
#' @return matrix
#'
#' @export
#'
#' @examples
#' get_peptide_protein_matrix(c("CARAQ", "ARC"), list(x = "ARCAQP", y = "CARAQPP"))
get_peptide_protein_matrix <- function(peptides, proteins) {
unique_peptides <- unique(peptides)
peptide_detections <- lapply(proteins, function(x) {
stringr::str_detect(x, unique_peptides)
})
peptide_protein_matrix <- as.matrix(as.data.frame(peptide_detections))
rownames(peptide_protein_matrix) <- unique_peptides
peptide_masses <- sapply(peptides,
calculate_monoisotopic_mass)
names(peptide_masses) <- peptides
list(peptide_protein_matrix = peptide_protein_matrix,
peptide_masses = tapply(peptide_masses, peptides, sum))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.