R/comet.R

Defines functions summary.cometdecoy .is.cometdecoy

Documented in summary.cometdecoy

#R

.is.cometdecoy <- function(object){
    if(sum(c('num', 'sp_score', 'protein', 'plain_peptide') %in% names(object))==4) return(TRUE)

    FALSE
}


summary.cometdecoy <- function(object, psmFdrCutoff = 0.05, idx = rev(order(object[['sp_score']])), decoyPattern = "^REV_", ...){
  stopifnot(.is.cometdecoy(object), length(idx) == nrow(object))
  
  # consider only first matches
  object <- object[object$num ==1, ]
  
  ## TODO(cp): use `e-value` instead of `sp_score`
  n <- nrow(object)
  object <- object[idx, ]
  
  object$REV <- grepl(decoyPattern, object$protein)
  if(sum(object$REV) == 0){
    warning("no decoy hit found. consider different decoy pattern or enable decoy search in comet.")
    data.frame(nPSM = n,
                   psmFdrCutoff = psmFdrCutoff,
                   nDecoyPSM = 0,
                   nConfidentPSM = NA,
                   nDecoyPeptide = 0,
                   nConfidentPeptide = NA,
                   nDecoyProteins = 0,
                   nConfidentProteins = NA,
                   fdrPSM = NA,
                   fdrPeptide = NA,
                   fdrProtein = NA) -> df
    return(df)
  }
  
  object$nREVhits <- cumsum(object$REV)
  object$FDR <- object$nREVhits / (seq(1, n) - object$nREVhits)
  
  nConfidentPSM <- which(object$FDR > psmFdrCutoff)[1]
  nDecoyPSM <- object$nREVhits[which(object$FDR > psmFdrCutoff)[1]]
  confidentPeptide <- as.character(object$plain_peptide[seq(1, nConfidentPSM)])
  decoyPeptide <- grepl(decoyPattern, as.character(object$protein[seq(1, nConfidentPSM)]))
  
  nDecoyPeptide <- length(unique(confidentPeptide[decoyPeptide]))
  nConfidentPeptide <- length(unique(confidentPeptide[!decoyPeptide]))
  
  ## Keep 1st accession only to avoid almost redundant groups. 
  ## Expect to receive less protein groups but higher FDR.
  ## by <jg@fgcz.ethz.ch>
  confidentProteins <- unique(sapply(strsplit(as.character(object$protein[seq(1, nConfidentPSM)]), ','), function(y)y[1]))
  nDecoyProteins <- length(grep(decoyPattern, confidentProteins))
  nConfidentProteins <- length(confidentProteins) - nDecoyProteins
  
  data.frame(nPSM = n,
                   psmFdrCutoff = psmFdrCutoff,
                   nDecoyPSM = nDecoyPSM,
                   nConfidentPSM = nConfidentPSM,
                   nDecoyPeptide = nDecoyPeptide,
                   nConfidentPeptide = nConfidentPeptide,
                   nDecoyProteins = nDecoyProteins,
                   nConfidentProteins = nConfidentProteins,
                   fdrPSM = nDecoyPSM / nConfidentPSM,
                   fdrPeptide = nDecoyPeptide / nConfidentPeptide,
                   fdrProtein = nDecoyProteins / nConfidentProteins
  ) -> df
  df     
}
cpanse/protViz documentation built on July 4, 2025, 10:34 p.m.