R/peptide_protein_matrix.R

Defines functions get_peptides summarise_peptides plot_shared_peptides get_peptide_protein_matrix

Documented in get_peptide_protein_matrix get_peptides plot_shared_peptides summarise_peptides

#' Get unique peptides from a list of proteins
#'
#' @description Finds unique peptides that are a result of tryptic digestion
#' of proteins specified in a list of amino acid sequences.
#'
#' @param protein_list List of proteins built from single letter codes of
#' amino acids.
#'
#' @return Character vector of peptides.
#'
#' @export
#'
#' @examples
#' get_peptides(list("MKRAQ"))
#'

get_peptides <- function(protein_list) {
  peptides <- lapply(protein_list, tryptic_digestion_single)
  unlist(peptides, recursive = TRUE)
}


#' Summary of peptides
#'
#' @description Returns a dataframe that indicates in how many proteins
#' are matched by each peptide.
#'
#' @param peptides_proteins_matrix List returned by the
#' get_peptide_protein_matrix function.
#'
#' @return data.frame with a column with peptides (as AA sequences) and number
#' of proteins in which they occur.
#'
#' @export
#'
#' @examples
#' summarise_peptides(get_peptide_protein_matrix("CARAQ", c("ARCAQP", "CARAQPP")))
#'

summarise_peptides <- function(peptides_proteins_matrix) {
  peptides <- rownames(peptides_proteins_matrix$peptide_protein_matrix)
  proteins_count <- rowSums(peptides_proteins_matrix$peptide_protein_matrix)
  data.frame(
    peptides = peptides,
    protein_count = proteins_count
  )
}


#' Plot a histogram of number of proteins that match shared peptides
#'
#' @description For peptides that match more than one protein,
#' a bar plot of number of matched proteins is plotted
#'
#' @param peptides_summary data.frame returned by the summarise_peptides function.
#'
#' @return ggplot2 object
#'
#' @export
#'
#' @examples
#' summ <- summarise_peptides(get_peptide_protein_matrix(c("CARAQ", "ARC", "AR"),
#'                                                       list(x = "ARCAQP", y = "CARAQPP")))
#' plot_shared_peptides(summ)
#'
#' @importFrom ggplot2 ggplot geom_bar theme_bw aes coord_flip xlab ylab
#'

plot_shared_peptides <- function(peptides_summary) {
  ggplot(peptides_summary[peptides_summary$protein_count > 1, ],
         aes(x = reorder(peptides, protein_count), y = protein_count)) +
    geom_bar(stat = "identity") +
    theme_bw() +
    coord_flip() +
    xlab("") +
    ylab("number of proteins matched by the peptide")
}


#' Calculate the matrix of peptide-protein inclusions.
#'
#' @description The i,j element of the matrix is 1 if i-th peptide is part
#' of j-th protein.
#'
#' @param peptides Character vector of peptides.
#' @param proteins A list of proteins.
#'
#' @return matrix
#'
#' @export
#'
#' @examples
#' get_peptide_protein_matrix(c("CARAQ", "ARC"), list(x = "ARCAQP", y = "CARAQPP"))

get_peptide_protein_matrix <- function(peptides, proteins) {
  unique_peptides <- unique(peptides)
  peptide_detections <- lapply(proteins, function(x) {
    stringr::str_detect(x, unique_peptides)
  })
  peptide_protein_matrix <- as.matrix(as.data.frame(peptide_detections))
  rownames(peptide_protein_matrix) <- unique_peptides

  peptide_masses <- sapply(peptides,
                           calculate_monoisotopic_mass)

  names(peptide_masses) <- peptides

  list(peptide_protein_matrix = peptide_protein_matrix,
       peptide_masses = tapply(peptide_masses, peptides, sum))
}
mstaniak/proteomics_examples documentation built on Aug. 27, 2019, 9:56 a.m.