R/annotateClass.R

letter1 <- c('A','B','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','U','V','W','X','Y','Z')
letter3 <- c( 'Ala','Asx','Cys','Asp','Glu','Phe','Gly','His','Ile','Lys','Leu','Met','Asn','Pro','Gln','Arg','Ser','Thr','SeC','Val','Trp','Unknown','Tyr','Glx')
formula <- c('C3H5ON','','C3H5ONS','C4H5O3N','C5H7O3N','C9H9ON','C2H3ON','C6H7ON3','C6H11ON','C6H12ON2','C6H11ON','C5H9ONS','C4H6O2N2','C5H7ON','C5H8O2N2','C6H12ON4','C3H5O2N','C4H7O2N','C3H5NOSe','C5H9ON','C11H10ON2','','C9H9O2N','')
Monoisotopic <- c(71.03711,NA,103.00919,115.02694,129.04259,147.06841,57.02146,137.05891,113.08406,128.09496,113.08406,131.04049,114.04293,97.05276,128.05858,156.10111,87.03203,101.04768,150.95363,99.06841,186.07931,NA,163.06333,NA)
Average <- c(71.0788,NA,103.1388,115.0886,129.1155,147.1766,57.0519,137.1411,113.1594,128.1741,113.1594,131.1926,114.1038,97.1167,128.1307,156.1875,87.0782,101.1051,150.0379,99.1326,186.2132,NA,163.176,NA)

.AminoAcids <- data.frame(letter1,letter3, formula, Monoisotopic, Average, stringsAsFactors = FALSE)
#' test dummy
#' 
#' @export
aaMassesFun <- function(){
  aaMasses = bibliospec::.AminoAcids
}

#' Data frame with amino acid masses
#'
#' @name AminoAcids
#' @docType data
#' @keywords data
NULL

#' Data frame with iRTpeptides
#'
#' @name iRTpeptides
#' @docType data
#' @keywords data
NULL
#' Data frame with CiRTpeptides
#' 
#' See Parker Aebersold for more details
#' @name CiRTpeptides
#' @docType data
#' @keywords data
NULL

#'A list with functions usefull for annotating peptide sequences
#'
#' @export Annotate
#' @import stringr
#' @import plyr
#' @import reshape2
#' @useDynLib bibliospec
#' @examples
#'
#'
#' masses <-c(  71.03711,  71.03711,  71.03711,  71.03711,  71.03711,
#'    71.03711,  71.03711,  57.02146,  71.03711,  71.03711, 57.02146,  57.02146, 156.10111)
#' ann <- Annotate()
#' ann$aaMasses
#' ann$atomMass
#' ann$fragmentIonBY(masses)
#' ann$parentMassWithLoss(masses, 2)
#' ann$parentMassWithLoss(masses, 2, neutrallossMass =-43.005814,
#'   neutrallossName = "Citrulline_Ornithine" )
#' sequence <- "AAADLMTYCDAHACEDPLITPVPTSENPFR"
#' ann$aa2mass(sequence)
#' ann$upIonsCharge(ann$fragmentIonBY(masses), charge=2) 
#' ann$fragmentIonBY(masses)
#' ann$fragmentBYIonsUpTO(masses)
#' ann$fragmentBYIonsUpTO(masses ,charge= 2)
#' res <- ann$fragmentBYIonsUpTO(masses ,charge= 3)
#' res <- ann$fragmentBYIonsUpTO(masses ,charge= 2, wide=TRUE)
#' ann$parentIonMass(masses)
#' ann$psmEasy(1:100,1:20)
#' ann$parentMassWithLoss(ann$aa2mass(sequence),
#'                       2,neutrallossMass =-43.005814, neutrallossName= "Citrulline_Ornithine")
#'
Annotate <- setRefClass("Annotate",
                        fields = list( atomMass = "list",
                                       aaMasses  = "data.frame",
                                       proton = "numeric"),
                        methods =  list(
                          initialize = function(){
                            .self$atomMass <- list(Hydrogen=1.007825, Oxygen=15.994915, Nitrogen=14.003074,Electron = 0.000549)
                            
                            .self$aaMasses <- .AminoAcids
                            .self$proton <- atomMass$Hydrogen - atomMass$Electron
                          },
                          modifyMass = function(letter1,Monoisotopic, Average=NULL ){
                            "modify mass of existing amino acid"
                            .self$aaMasses[which(letter1 == .self$aaMasses$letter1), c("Monoisotopic","Average")] <- c(Monoisotopic,Average) 
                          },
                          addAA=function(letter1, Monoisotopic, formula =NULL, letter3=NULL,  Average=NULL){
                            "add new amino acid"
                            newrow <- c(letter1 = letter1,Monoisotopic=Monoisotopic,formula=formula, letter3=letter3,Average=Average)
                            .self$aaMasses <- rbind(.self$aaMasses, newrow) 
                          },
                          aa2mass = function(sequence){
                            str <- stringr::str_split(sequence,"")[[1]]
                            res <- .self$aaMasses$Monoisotopic[match(str, .self$aaMasses$letter1)]
                            return(res)
                          },
                          parentMassWithLoss = function(masses, charge, neutrallossMass = NULL, neutrallossName = NULL  ){
                            " compute parent ion and neutral loss if any "
                            mass <- .self$parentIonMass(masses,charge)
                            lossmass <- mass + neutrallossMass/charge
                            
                            massWithLosses <- c(mass, lossmass )
                            names(massWithLosses) <- c(names(mass),neutrallossName)
                            massWithLosses <- data.frame(mass = massWithLosses,
                                                         ion = names(massWithLosses),
                                                         charge = charge ,
                                                         pos = 0)
                            return(massWithLosses)
                          },
                          fragmentIonBY =function(masses){
                            "compute fragment ions"
                            b <- cumsum(masses) + proton
                            y <- cumsum(rev(masses)) + atomMass$Oxygen + 2*atomMass$Hydrogen + proton
                            res<- rbind(data.frame(mass=y, ion="y", charge = 1 , pos = 1:length(y) ),
                                        data.frame(mass=b, ion="b", charge = 1 , pos = 1:length(b)))
                            return(res)
                          },
                          upIonsCharge  = function(ions, charge = 2) {
                            mass <- ions$mass
                            mass <- (mass + (charge - 1) *proton) / charge
                            res <- data.frame(mass= mass , ion = ions$ion , charge = charge, pos = ions$pos)
                            return(res)
                          },
                          fragmentBYIonsUpTO = function(masses, charge = 1, wide =FALSE, genSeries = .self$fragmentIonBY){
                            "generate fragment ions"
                            res <- genSeries(masses)
                            tmp <- list()
                            tmp[[1]] <- res
                            if(charge > 1){
                              for(i in 2:charge){
                                tmp[[i]] <- upIonsCharge(res, charge = i)
                              }
                            }
                            res <- plyr::rbind.fill( tmp )
                            if(wide){
                              res <- reshape2::dcast(res,  pos ~ ion + charge , value.var = "mass")
                              rownames(res) <- res$pos
                              res <- res[,-1]
                            }
                            return(res)
                          },
                          
                          parentIonMass= function(masses, charge = 2){
                            "get mass of parent ion"
                            parent <- sum(masses)
                            parent <- (parent + charge * proton)/charge
                            names(parent) <- "parent" 
                            return(parent)
                          },
                          psmEasy =function(mz , series.mZ) {
                            if(is.unsorted( mz )){
                              stop("error in psmEasy! The mz argument must be sorted.")
                            }
                            "match peaks in mz with those in series.mZ, mz must be sorted"
                            out <- .C("findNN_",PACKAGE = 'bibliospec',
                                      nbyion=as.integer(length(series.mZ)),
                                      nmZ=as.integer(length(mz)),
                                      byion=as.double(series.mZ),
                                      mZ=as.double(mz),
                                      NN=as.integer(rep(-1, length(series.mZ))))
                            
                            mZ.error <- mz[out$NN+1] - series.mZ
                            
                            tmp <- (data.frame(mZ_Da_error = mZ.error, 
                                               mZ_ppm_error = 1E+6*mZ.error/series.mZ,
                                               idx = out$NN+1,
                                               stringsAsFactors = FALSE
                            ))
                            return(tmp)
                          }  
                        )
)
#' annotate spectrum
#' 
#' @name Annotate_annotateSpectrum
#' @param psm peptide spectrum match table
#' @param modification modification table
#' @param spectrum spectrum  table
#' @examples
#' library(bibliospec)
#' rm(list=ls())
#'  dbfile <- file.path(path.package("bibliospec"),
#'  "extdata/peptideStd.sqlite")
#' 
#' # call constructor
#' BS <- Bibliospec(dbfile=dbfile)
#' tmp <- BS$getAnnotationLists()
#' names(tmp)
#' tmp$psml[[1]]
#' tmp$mod[[1]]
#' tmp$spectruml[[1]]
#' ann <- Annotate()
#' 
#' 
#' ann$aa2mass(tmp$psml[[1]]$peptideSeq)
#' 
#' spectrum <- tmp$spectruml[[1]]$mz
#' masses <- ann$aa2mass(tmp$psm[[1]]$peptideSeq)
#' xx<-ann$fragmentBYIonsUpTO( masses , charge = (tmp$psm[[1]]$precursorCharge -1))
#' head(xx)
#' x<-ann$psmEasy(spectrum, xx$mass )
#' head(x)
#' 
#' matches <- ann$annotateSpectrum(psm = tmp$psml[[1]],
#' modification = tmp$modl[[1]], spectrum = tmp$spectruml[[1]])
#' head(matches)
#' 
NULL
Annotate$methods(
  annotateSpectrum= function(psm ,
                             modification,
                             spectrum ,
                             error = 10,
                             ppm = TRUE,
                             neutrallossMass = -43.005814, neutrallossName = "Citrulline_Ornithine"){
    
    stopifnot(psm$RefSpectraId == spectrum$RefSpectraId[1])
    if(!is.null(modification)){
      stopifnot(psm$RefSpectraId == modification$RefSpectraId[1])
    }
    ### compute masses
    masses <- .self$aa2mass(psm$peptideSeq)
    
    massesmod <- masses
    massesmod[ modification$position ] <- masses[ modification$position ] + modification$mass
    
    fragtableBY23 <- .self$fragmentBYIonsUpTO( massesmod , charge = (psm$precursorCharge -1) )
    
    ### match peptides
    pepmatches <- .self$psmEasy(spectrum$mz, fragtableBY23$mass )
    pepmatches <- cbind(fragtableBY23,pepmatches, spectrum[pepmatches$idx,])
    ### parent match
    
    parent <- .self$parentMassWithLoss(massesmod , psm$precursorCharge, neutrallossMass , neutrallossName )
    parentmatches <- .self$psmEasy(spectrum$mz, parent$mass )
    parentmatches <- cbind(parent,parentmatches, spectrum[parentmatches$idx,])
    
    
    matches <- rbind.fill(parentmatches, pepmatches)
    
    if(ppm){
      matches <- matches[abs(matches$mZ_ppm_error) < error,]
    }else{
      matches <- matches[abs(matches$mZ_Da_error) < error,]
    }
    return(matches)
  }
)
#' compute which fragments carry modification
#' 
#' @name Annotate_isModification
#' @param psm data.frame
#' @param modification data.frame
#'
#' @examples 
#' 
#' library(bibliospec)
#' rm(list=ls())
#'  dbfile <- file.path(path.package("bibliospec"),
#'  "extdata/peptideStd.sqlite")
#' 
#' # call constructor
#' BS <- Bibliospec(dbfile=dbfile)
#' tmp <- BS$getAnnotationLists()
#' ann <- Annotate()
#' 
#' ann$isModification(tmp$psml[[1]], tmp$modl[[1]])
#' 
NULL
Annotate$methods(
isModification=function( psm , modification){
  if(is.null(modification)){
    return(NULL)
  }
  masses <- .self$aa2mass(psm$peptideSeq)
  aaseq <- stringr::str_split(psm$peptideSeq, "")[[1]]
  res <- list()
  for(i in 1:nrow(modification))
  {
    massesmod <- masses
    massesmod[ modification$position[i] ] <- masses[ modification$position[i] ] + modification$mass[i]
    fragtableBY23 <- .self$fragmentBYIonsUpTO( massesmod, charge =  psm$precursorCharge - 1 )
    fragtableBY23nonMod <- .self$fragmentBYIonsUpTO(masses , charge = psm$precursorCharge - 1 )
    fragtableBY23nonMod <- rename(fragtableBY23nonMod,c("mass"= "massnonmod"))
    fragtable <- fragtableBY23
    fragtable$massnonmod <- fragtableBY23nonMod$massnonmod
    fragtable$ismod  <- fragtable$mass != fragtable$massnonmod
    fragtable$nrAA <- nchar(psm$peptideSeq)
    fragtable$modmass <- modification$mass[i]
    fragtable$modposition <- modification$position[i]
    fragtable$modaa <- aaseq[modification$position[i]]
    fragtable$RefSpectraId <- psm$RefSpectraId
    
    name <- paste("mod.", i, round(modification$mass[i],digits=2),sep="_")
    res[[ name ]] <- fragtable
  }
  res <- rbind.fill( res)
  return(res)
}
)
protViz/bibliospec documentation built on May 26, 2019, 9:37 a.m.