R/peptic_digestion.R

Defines functions calculate_monoisotopic_mass tryptic_digestion_single

Documented in calculate_monoisotopic_mass tryptic_digestion_single

#' In silico (perfect) tryptic digestion
#'
#' @description This function performes tryptic digestion without missed
#' or erroneus cleavage sites. This type of proteolysis is defined by
#' the cleavage scheme [RK].<P>
#'
#' @param protein Amino acids sequence as a single character of amino acid
#' codes (single letter).
#'
#' @return a list of amico acid sequences that encode peptides.
#'
#' @export
#'
#' @examples
#' tryptic_digestion_single("DIRVAL")
#' # list("DIR", "VAL")
#'

tryptic_digestion_single <- function(protein) {
  protein <- stringr::str_to_upper(protein)
  rk_positions <- stringr::str_locate_all(string = protein,
                                          pattern = "[RK]")[[1]][, "start"]
  p_positions <- stringr::str_locate_all(string = protein,
                                         pattern = "P")[[1]][, "start"]
  cleavage_sites_ends <- rk_positions[!((rk_positions + 1) %in% p_positions)]
  cleavage_sites_starts <- unique(c(1, cleavage_sites_ends + 1))
  proper_starts <- cleavage_sites_starts <= stringr::str_length(protein)
  cleavage_sites_starts <- cleavage_sites_starts[proper_starts]
  cleavage_sites_ends <- unique(c(cleavage_sites_ends,
                                  stringr::str_length(protein)))
  lapply(1:length(cleavage_sites_starts),
         function(x) stringr::str_sub(protein,
                                      start = cleavage_sites_starts[x],
                                      end = cleavage_sites_ends[x]))
}


#' Find monoisotopic mass of a neutral peptide
#'
#' @description For information about the masses of amino acid residues, please refer to the
#' documentation for monoisotopic_masses dataset.
#'
#' @param aa_chain character, a single string of amino acids
#' (single letter codes)
#'
#' @return numeric monoisotopic mass of the peptide
#'
#' @export
#'
#' @examples
#' calculate_monoisotopic_mass("YILSCHCEECNS")
#' # 1381.51
#'

calculate_monoisotopic_mass <- function(aa_chain) {
  amino_acids <- stringr::str_split(stringr::str_to_upper(aa_chain),
                                    "", simplify = TRUE)
  masses <- ProteomicsExamples::monoisotopic_masses_vec[amino_acids]
  sum(masses) + 2*1.007825035 + 15.99491463
}
mstaniak/proteomics_examples documentation built on Aug. 27, 2019, 9:56 a.m.