R/bibliospec.R

Defines functions convertMZToNumeric convertIntToNumeric blobs2Frame newPsm

#R
.convertMZToNumeric<-function(mzBlob, numPeaks){
  mZ <- try(readBin(memDecompress(as.raw(mzBlob),'g'), double(), n=numPeaks), TRUE)
  if (!is.numeric(mZ)){
    mZ <- try(readBin(as.raw(mzBlob), double(),numPeaks), FALSE)
  }
  return(mZ)
}

.convertIntToNumeric<-function(intensityBlob, numPeaks){
  intensity <- try(readBin(memDecompress(as.raw(intensityBlob),'g'), 
                           numeric(), n = numPeaks, size = 4), TRUE)
  if (!is.numeric(intensity)){
    intensity <- try(readBin(as.raw(intensityBlob), 
                             numeric(), n = numPeaks, size = 4), FALSE)
  }
  return(intensity)
}

.querySpectra <- c(
  "SELECT RefSpectra.id as RefSpectraId, numPeaks,  peptideSeq,",
  "precursorCharge, precursorMZ, retentionTime,",
  "peptideModSeq, score, SpectrumSourceFiles.fileName",
  "FROM SpectrumSourceFiles, RefSpectra",
  "WHERE SpectrumSourceFiles.id = RefSpectra.fileID;")

.mergeQuery <- c(
  "SELECT RefSpectra.id as RefSpectraId, numPeaks,",  
  "peptideSeq, precursorCharge, precursorMZ, retentionTime, peptideModSeq, score," ,
  "SpectrumSourceFiles.fileName, SpectraPeaks.mz, SpectraPeaks.intensity",
  "FROM RefSpectra, SpectraPeaks, SpectrumSourceFiles", 
  "WHERE RefSpectra.id = SpectraPeaks.RefSpectraId and  SpectrumSourceFiles.id = RefSpectra.fileID;")


.queryPeaks <- c(
  "SELECT RefSpectra.id  as  RefSpectraId, numPeaks, peakMZ, peakIntensity",
  "FROM RefSpectraPeaks, RefSpectra",
  "WHERE RefSpectra.id=RefSpectraPeaks.RefSpectraID"
)

.getModicications <- c("SELECT RefSpectraID, position, mass FROM Modifications;")

.blobs2Frame<-function(xx, asList= FALSE){
  resMZ <- mcmapply(.convertMZToNumeric, xx$peakMZ, xx$numPeaks )
  message(" extracted MZ")
  resIntensity <- mcmapply(.convertIntToNumeric, xx$peakIntensity, xx$numPeaks )
  message(" extracted Intensity")
  stopifnot(sapply(resMZ,length) == sapply(resIntensity,length))
  if(asList){
    res <- mcmapply(data.frame, RefSpectraId = xx$RefSpectraId, mz = resMZ, intensity = resIntensity, SIMPLIFY=FALSE)
    names(res)<-xx$RefSpectraId
    return(res)
    
  }
  repspecID <- rep(xx$RefSpectraId , xx$numPeaks)
  res <- data.frame(RefSpectraId = repspecID ,mz = unlist(resMZ), intensity = unlist(resIntensity) )
  message("to data frame")
  ## res <- plyr::rbind.fill(res)
  return(res)
}

.newPsm <- function(x , peaks ){
  res <- list(
    id = x$RefSpectraId,
    peaks = x$numPeaks,
    mZ = peaks$mz, 
    intensity = peaks$intensity, 
    peptideSequence = x$peptideSeq,
    peptideModSeq = x$peptideModSeq,
    charge = x$precursorCharge, 
    pepmass = x$precursorMZ,
    fileName = x$fileName,
    rt = x$retentionTime,
    varModification = rep(0.0, nchar(x$peptideSeq)),
    score = x$score
  )
  
  class(res) = "psm"
  return(res)
}


#' R access to Bibliospec File 
#' 
#' @description 
#' This class implements an R referenz class for BiblioSpec 
#' generated sqlite files and can return the data contained as data.frames
#' or as list of tandem mass spectra peptide assignments objects (psm).
#' 
#' 
#' @field dbfile database file location
#' @import parallel
#' @import DBI
#' @import RSQLite
#' @import methods
#' @export Bibliospec
#' @exportClass Bibliospec
#' @author Witold E. Wolski and Christian Panse
#' @aliases blib Bibliospec bibliospec
#' @details The function performs a SQL query on the SQLite
#' files generated by BiblioSpec using the RSQLite package. 
#' 
#' BiblioSpec files are generated by using Skyline or the BiblioSpec command
#' line tool available from protwiz.
#' @references 
#' \itemize{
#' 
#' \item{UNIT 13.7 Using BiblioSpec for Creating and Searching Tandem MS,
#' Peptide Libraries. Barbara Frewen, Michael J. MacCoss.
#' Current Protocols in Bioinformatics Current Protocols in Bioinformatics.
#' \url{http://dx.doi.org/10.1002/0471250953.bi1307s20}.}
#' 
#' \item{
#' The predecessor of the method \code{getPsmSet} was implemented in 
#' the bioconductor package \href{http://bioconductor.org/packages/specL/}{specL}:
#' 
#' Panse C, Trachsel C, Grossmann J and Schlapbach R. (2015). 
#' specL - An R/Bioconductor package to prepare peptide spectrum matches 
#' for use in targeted proteomics. 
#' Bioinformatics. 
#' \url{http://dx.doi.org/10.1093/bioinformatics/btv105}.}}
#' @seealso 
#' \itemize{
#' \item{\url{https://skyline.gs.washington.edu/labkey/project/home/software/Skyline/begin.view}}
#' \item{\url{https://skyline.gs.washington.edu/labkey/project/home/software/BiblioSpec/begin.view}}
#' }
#' 
#' @examples
#'
#' library(bibliospec)
#' 
#' # use the sqlite file provided in the package
#' dbfile <- file.path(path.package("bibliospec"),
#'  "extdata/peptideStd.sqlite")
#' 
#' # call constructor
#' BS <- Bibliospec(dbfile=dbfile)
#' 
#' # test; should return TRUE
#' BS$getNrPSM() == 137
#' 
#' S <- BS$getPsmSet()
#' 
#' \dontrun{
#'  library(specL)
#'  print(S)
#'  lapply(S[1:10], plot)
#' }
#' 
#' peaks <- BS$getPeaks()
#' 
#' print(BS$summary())
#' head(peaks)
#' 
#' colnames(peaks)
#' 
#' spectrMet <- BS$getSpectraMeta()
#' 
#' dim(spectrMet)
#' alldata <- BS$getSpectraWithMeta()
#' alldata<- merge(spectrMet, peaks)
#' 
#' modification <- BS$getModification()
#' 
#' head(modification)
#' 
#' table(table(modification$RefSpectraID))
#' 
Bibliospec <- setRefClass("Bibliospec",
                          fields = list( .data = "list",
                                         dbfile = "character",
                                         peaksTable = "character"
                          ),
                          methods = list(
                            initialize = function(dbfile="", ...) {
                              dbfile <<- dbfile
                              m <- DBI::dbDriver("SQLite")
                              .data$connection <<- DBI::dbConnect( m , dbname=dbfile, flags = SQLITE_RWC )
                              peaksTable <<- "SpectraPeaks"
                              
                              message('database connected')
                            },
                            finalize  = function(){
                              message("finalize")
                              dbDisconnect(.data$connection)
                              print("disconnected")
                            },
                            getNrPSM = function() {
                              "Get number of psm's in database."
                              query <- "Select count(*) from Refspectra;"
                              res <- DBI::dbGetQuery(.data$connection, statement = query)
                              return(res)
                            },
                            getPeaks = function(asList= FALSE){
                              "Get peaks - mz and intensity and spectra id as data.frame."
                              message("getspectra")
                              if(asList){
                                message("preparing Peaks as a list, will take a while if you blib file is large.")
                                query <- paste(.queryPeaks,collapse=" ")
                                peaks <- ( dbGetQuery(.data$connection,query) )
                                peaks <- .blobs2Frame(peaks, asList= asList)
                                return(peaks)
                              }
                              if(dbExistsTable(.data$connection,peaksTable )){
                                peaks <- ( dbGetQuery(.data$connection,paste("Select * from", peaksTable)) )
                                return(peaks)
                              }else{
                                message("preparing Peaks, will take a while if you blib file is large.")
                                query <- paste(.queryPeaks,collapse=" ")
                                peaks <- ( dbGetQuery(.data$connection,query) )
                                peaks <- .blobs2Frame(peaks)
                                DBI::dbWriteTable(.data$connection, peaksTable,peaks)
                                return(peaks)
                              }
                            },
                            getSpectraMeta =   function(){
                              "Get spectra meta information - retention time , file name, num peaks as data.frame"
                              query <- paste(.querySpectra,collapse=" ")
                              message(paste(.querySpectra, collapse="\n"))
                              return( dbGetQuery(.data$connection,query) ) 
                              
                            },
                            getSpectraWithMeta = function(){
                              "Get peaks with all the meta information as data.frame"
                              if(!dbExistsTable(.data$connection,peaksTable )){
                                getPeaks()
                              }
                              query <- paste(.mergeQuery,collapse=" ")
                              message(paste(.mergeQuery, collapse="\n"))
                              return( dbGetQuery(.data$connection,query) ) 
                              
                            },
                            getModification = function(){
                              return( dbGetQuery(.data$connection,.getModicications) ) 
                            },
                            getPsmSet = function(){
                              "get class psmSet S3 (list of psm objects).
                              psm objects can be viewed by using the peakplot
                              method of package protViz.
                              "
                              spectrumMeta <- getSpectraMeta()
                              peaksList <- getPeaks(asList=TRUE)
                              
                              res <- vector(mode="list", length=nrow(spectrumMeta))
                              for(i in 1:nrow(spectrumMeta)){
                                res[[i]]<-.newPsm( spectrumMeta[i,], peaksList[[i]])
                              }
                              modifications <- getModification()
                              message(paste("assigning", nrow(modifications), "modifications ..."))
                              for (i in 1:nrow(modifications)){
                                x <- modifications[1,]
                                res[[ x$RefSpectraID ]]$varModification[x$position] <- x$mass
                              }
                              class(res) <- 'psmSet'
                              return(res)
                            },
                            summary = function(){
                              "summary of bibliospec file"
                              query <- c(
                                "SELECT SpectrumSourceFiles.fileName , sum(numPeaks) as nrPeaks,",
                                "min(RefSpectra.retentionTime) as minRT, max(RefSpectra.retentionTime) as maxRT,",
                                "min(RefSpectra.precursorMZ) as minPrecursorMZ, max(RefSpectra.precursorMZ) as maxPrecursorMZ,",
                                "min(RefSpectra.precursorCharge) as minPrecursorCharge,",
                                "max(RefSpectra.precursorCharge) as maxPrecursorCharge",
                                "from RefSpectra, SpectrumSourceFiles", 
                                "where RefSpectra.fileID = SpectrumSourceFiles.id group by RefSpectra.fileID;"
                              )
                              return( dbGetQuery(.data$connection,paste(query, collapse=" ")) ) 
                            }
                          ))

Try the bibliospec package in your browser

Any scripts or data that you put into this service are public.

bibliospec documentation built on May 29, 2017, 6:40 p.m.