Nothing
utils::globalVariables(c("PROTEIN", "PEPTIDE"))
#' Filter only for the highest intense peptides
#'
#' In order to reduce the data, the data is filtered only for the proteins with
#' the highest intensity peptides.
#'
#' @param data A data frame containing SWATH data with the column names:
#' ProteinNames, PeptideSequence, PrecursorCharge, Intensity.
#' @param n_peptides Maximum number of highest intense peptides to filter the
#' data on.
#' @param protein_col Column with protein identifiers. Default: ProteinName
#' @param peptide_col Column with peptide identifiers. Default: Peptide.Sequence or FullPeptideName
#' @param rm.decoy Option to remove the decoys during filtering.
#' @return Returns a data frame of the filtered data.
#' @author Peter Blattmann
#' @examples{
#' data("OpenSWATH_data", package="SWATH2stats")
#' data("Study_design", package="SWATH2stats")
#' data <- sample_annotation(OpenSWATH_data, Study_design)
#' data.filtered <- filter_mscore_freqobs(data, 0.01,0.8)
#' data.max <- filter_on_max_peptides(data.filtered, 5)
#' }
#' @importFrom data.table setkey setkeyv setnames
#' @importFrom utils head
#' @export
filter_on_max_peptides <- function(data,
n_peptides,
protein_col = "ProteinName",
peptide_col = c("Peptide.Sequence", "FullPeptideName"),
rm.decoy = TRUE) {
data <- unifyProteinGroupLabels(data)
if (isTRUE(rm.decoy)) {
data <- removeDecoyProteins(data)
}
# select valid columns
columns <- validate_columns(data, list(Protein = protein_col,
Peptide = peptide_col))
data <- data.table::data.table(data)
setnames(data, columns[["Protein"]], "PROTEIN")
setnames(data, columns[["Peptide"]], "PEPTIDE")
data.peptides <- data[, c("PROTEIN", "PEPTIDE", "Intensity"), with = FALSE]
data.table::setkeyv(data, cols = c("PROTEIN", "PEPTIDE"))
Intensity <- NULL
data.peptides.int <- data.peptides[, sum(Intensity), by = "PROTEIN,PEPTIDE"]
setnames(data.peptides.int, "V1", "SUM.INTENSITY")
setkey(data.peptides.int, PROTEIN)
data.peptides.int <- data.peptides.int[order(data.peptides.int$SUM.INTENSITY,
decreasing = TRUE), ]
.SD <- NULL
peptides.sel <- unique(data.peptides.int[, head(.SD, n_peptides), by = PROTEIN])
data.filtered <- data.frame(data[PEPTIDE %in% peptides.sel$PEPTIDE, ])
message("Before filtering: ", "\n",
" Number of proteins: ", length(unique(data$PROTEIN)), "\n",
" Number of peptides: ", length(unique(data$PEPTIDE)), "\n\n",
"Percentage of peptides removed: ",
round((length(unique(data$PEPTIDE)) - length(unique(data.filtered$PEPTIDE)))/length(unique(data$PEPTIDE)) *
100, digits = 2), "%", "\n\n",
"After filtering: ", "\n", " Number of proteins: ", length(unique(data.filtered$PROTEIN)), "\n",
" Number of peptides: ", length(unique(data.filtered$PEPTIDE)),"\n")
colnames(data.filtered) <- gsub("PROTEIN", columns[["Protein"]], colnames(data.filtered))
colnames(data.filtered) <- gsub("PEPTIDE", columns[["Peptide"]], colnames(data.filtered))
return(data.filtered)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.