#' Creates the "Peptide Coverage" Object
#'
#' @description Returns an object with two data tables used to visualize where
#' identified peptides map to a specific literature sequence. These data tables
#' include one that contains sequences with start and stop positions, and the
#' other contains counts of identified residues.
#'
#' @param ScanMetadata Object of the scan_metadata class from get_scan_metadata. Required.
#' @param ProteinTable A "protein_table" object generated by get_protein_table. Required.
#' @param ProteinID The ID of the protein to pull protein coverage information from. Required.
#'
#' @details
#' The two outputted data tables are used by the three coverage plots.
#'
#' The first is called "PeptidesByPosition" and is used by Coverage Plot and Coverage Lit Seq Plot.
#' \tabular{ll}{
#' Scan Number \tab MS Scan Number from ScanMetadata \cr
#' \tab \cr
#' Sequence \tab Peptide or protein sequence of the fragment or literature sequence. \cr
#' \tab \cr
#' Peptide Start Position \tab A single number to indicate where the fragment begins on the literature sequence (from N to C) \cr
#' \tab \cr
#' Q Value \tab The Q value (adjusted P value) of the peptide-spectral match provided by a database search tool. \cr
#' \tab \cr
#' Score \tab The score of the peptide-spectral match (typically a small value where smaller = better match) \cr
#' \tab \cr
#' }
#'
#' The second is called "ResidueCount" and is used by coverage bar plot.
#' \tabular{ll}{
#' Residue \tab The residue from the literature sequence written in letter annotation and position format. i.e. "M1" is the first methionine. \cr
#' \tab \cr
#' Count \tab The number of times that residue was identified across all fragments. \cr
#' \tab \cr
#' Position \tab The residue's position on the literature sequence \cr
#' \tab \cr
#' }
#'
#' @examples
#' \dontrun{
#'
#' # Make Peptide Coverage Object
#' PeptideCoverage <- get_peptide_coverage(ScanMetadata = BU_ScanMetadata, ProteinTable = ProteinTable1, ProteinID = "SO_0225")
#'
#' }
#' @export
get_peptide_coverage <- function(ScanMetadata,
ProteinTable,
ProteinID) {
##################
## CHECK INPUTS ##
##################
# Assert that ScanMetadata is a ScanMetadata object.
if ("scan_metadata" %in% class(ScanMetadata) == FALSE) {
stop("ScanMetadata must be a scan_metadata object generated by get_scan_metadata.")
}
# Assert that ProteinTable is a "protein_table" object
if ("protein_table" %in% class(ProteinTable) == FALSE) {
stop("ProteinTable must be a protein_table object generated by get_protein_table.")
}
# Assert that Protein is a single string
if (is.character(ProteinID) == FALSE || length(ProteinID) != 1) {
stop("Protein must be a single string.")
}
# Assert that Protein is in the the ProteinTable
if (ProteinID %in% ProteinTable$Protein == FALSE) {
stop(paste0("Protein must be in the ProteinTable. ", ProteinID, " was not found."))
}
##########################
## GENERATE DATA TABLES ##
##########################
# Get the reference sequence
RefSeq <- attr(ProteinTable, "pspecter")$LitSeq %>%
dplyr::filter(Protein == ProteinID) %>%
dplyr::select(`Literature Sequence`) %>%
unlist()
# Get the Q Value Maximum. If it's 1, set the filter to NULL.
QValueMaximum <- if (attr(ProteinTable, "pspecter")$QValueMaximum == 1) {
NULL
} else {
attr(ProteinTable, "pspecter")$QValueMaximum
}
# Get the Score Maximum. If it's 1, set the filter to NULL.
ScoreMaximum <- if (attr(ProteinTable, "pspecter")$ScoreMaximum == 1) {
NULL
} else {
attr(ProteinTable, "pspecter")$ScoreMaximum
}
## Data Table 1: Peptides by Position-----------------------------------------
# Pull scan number, sequence, and start/end position per Protein ID. Pull the
# fragment table and order it so that subset works correctly
FragmentTable <- ScanMetadata
class(FragmentTable) <- c("data.table", "data.frame")
FragmentTable <- FragmentTable[FragmentTable$`Protein ID` == ProteinID & is.na(FragmentTable$`Protein ID`) == FALSE,]
# Get Peptides by Position
PeptidesByPosition <- FragmentTable %>%
dplyr::select(c(`Scan Number`, Sequence, `Peptide Start Position`, `Q Value`, Score)) %>%
dplyr::mutate(
"Peptide End Position" = `Peptide Start Position` + nchar(Sequence)
)
# Determine whether filters can be applied given a Q Value and a Score
if (is.na(PeptidesByPosition$`Q Value`) %>% any()) {QValueMaximum <- NULL}
if (is.na(PeptidesByPosition$Score) %>% any()) {ScoreMaximum <- NULL}
# Apply filters that are not NULL
if (is.null(QValueMaximum) == FALSE) {
PeptidesByPosition <- PeptidesByPosition %>% dplyr::filter(`Q Value` <= QValueMaximum)
}
if (is.null(ScoreMaximum) == FALSE) {
PeptidesByPosition <- PeptidesByPosition %>% dplyr::filter(Score <= ScoreMaximum)
}
# Add the reference sequence
PeptidesByPosition <- rbind(PeptidesByPosition,
data.table::data.table("Scan Number" = 0, "Sequence" = RefSeq,
"Peptide Start Position" = 0, "Q Value" = NA,
"Score" = NA, "Peptide End Position" = nchar(RefSeq)))
## Data Table 2: Generate the counts of identified residues-------------------
# Pull Sequence and Peptide Start Position information
Residues <- PeptidesByPosition %>%
dplyr::filter(`Scan Number` != 0) %>%
dplyr::select(c(Sequence, `Peptide Start Position`))
# Get positional number of each residue
ResidueCount <- lapply(1:nrow(Residues), function(row) {
# Split the sequence
SplitSeq <- Residues[row, "Sequence"] %>% unlist() %>% strsplit("") %>% unlist()
# Create positional vector
Position <- (Residues[row, "Peptide Start Position"] %>% unlist()) + 0:(length(SplitSeq)-1)
return(paste0(SplitSeq, Position))
}) %>%
unlist() %>%
table(dnn = "Residue") %>%
data.frame() %>%
dplyr::mutate(
"Position" = gsub("[[:alpha:]]", "", Residue) %>% as.numeric()
) %>%
dplyr::arrange(Position) %>%
data.table::data.table()
# Rename the second column "count"
colnames(ResidueCount)[2] <- "Count"
#######################
## RETURN THE OBJECT ##
#######################
# Store results as two data tables
res <- list(
"PeptidesByPosition" = PeptidesByPosition,
"ResidueCount" = ResidueCount
)
# Add attributes
attr(res, "pspecter")$QValueMaximum <- attr(ProteinTable, "pspecter")$QValueMaximum
attr(res, "pspecter")$ScoreMaximum <- attr(ProteinTable, "pspecter")$ScoreMaximum
attr(res, "pspecter")$RemoveContaminants <- attr(ProteinTable, "pspecter")$RemoveContaminants
attr(res, "pspecter")$ProteinID <- ProteinID
# Store the class information of this object
class(res) = "peptide_coverage"
# Return results
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.