#R
.convertMZToNumeric<-function(mzBlob, numPeaks){
mZ <- try(readBin(memDecompress(as.raw(mzBlob),'g'), double(), n=numPeaks), TRUE)
if (!is.numeric(mZ)){
mZ <- try(readBin(as.raw(mzBlob), double(),numPeaks), FALSE)
}
return(mZ)
}
.convertIntToNumeric<-function(intensityBlob, numPeaks){
intensity <- try(readBin(memDecompress(as.raw(intensityBlob),'g'),
numeric(), n = numPeaks, size = 4), TRUE)
if (!is.numeric(intensity)){
intensity <- try(readBin(as.raw(intensityBlob),
numeric(), n = numPeaks, size = 4), FALSE)
}
return(intensity)
}
.querySpectra <- c(
"SELECT RefSpectra.id as RefSpectraId, numPeaks, peptideSeq,",
"precursorCharge, precursorMZ, retentionTime, copies,",
"peptideModSeq, score, length(peptideseq) as lengthPepSeq, SpectrumSourceFiles.fileName, SpecIDinFile",
"FROM SpectrumSourceFiles, RefSpectra",
"WHERE SpectrumSourceFiles.id = RefSpectra.fileID;"
)
.mergeQuery <- c(
"SELECT RefSpectra.id as RefSpectraId, numPeaks,",
"peptideSeq, precursorCharge, precursorMZ, retentionTime, peptideModSeq, score," ,
"SpectrumSourceFiles.fileName, SpectraPeaks.mz, SpectraPeaks.intensity",
"FROM RefSpectra, SpectraPeaks, SpectrumSourceFiles",
"WHERE RefSpectra.id = SpectraPeaks.RefSpectraId and SpectrumSourceFiles.id = RefSpectra.fileID;"
)
.queryPeaks <- c(
"SELECT RefSpectra.id as RefSpectraId, numPeaks, peakMZ, peakIntensity",
"FROM RefSpectraPeaks, RefSpectra",
"WHERE RefSpectra.id=RefSpectraPeaks.RefSpectraID"
)
.summarizePeaks <- c(
"SELECT SpectraPeaks.RefSpectraId, count(*) as nrMS2Peaks, sum(SpectraPeaks.intensity) as PeakIntensity,",
"min(SpectraPeaks.mz) as minMZ, max(SpectraPeaks.mz) as maxMZ",
"FROM SpectraPeaks GROUP BY SpectraPeaks.RefSpectraId;"
)
.getModicications <- c("SELECT RefSpectraID, position, mass FROM Modifications;")
.blobs2Frame<-function(xx, asList= FALSE){
resMZ <- mcmapply(.convertMZToNumeric, xx$peakMZ, xx$numPeaks )
message(" extracted MZ")
resIntensity <- mcmapply(.convertIntToNumeric, xx$peakIntensity, xx$numPeaks )
message(" extracted Intensity")
stopifnot(sapply(resMZ,length) == sapply(resIntensity,length))
if(asList){
res <- mcmapply(data.frame, RefSpectraId = xx$RefSpectraId, mz = resMZ, intensity = resIntensity, SIMPLIFY=FALSE)
names(res)<-xx$RefSpectraId
return(res)
}
repspecID <- rep(xx$RefSpectraId , xx$numPeaks)
res <- data.frame(RefSpectraId = repspecID ,mz = unlist(resMZ), intensity = unlist(resIntensity) )
message("to data frame")
## res <- plyr::rbind.fill(res)
return(res)
}
.newPsm <- function(x , peaks ){
res <- list(
id = x$RefSpectraId,
peaks = x$numPeaks,
mZ = peaks$mz,
intensity = peaks$intensity,
peptideSequence = x$peptideSeq,
peptideModSeq = x$peptideModSeq,
charge = x$precursorCharge,
pepmass = x$precursorMZ,
fileName = x$fileName,
rt = x$retentionTime,
varModification = rep(0.0, nchar(x$peptideSeq)),
score = x$score
)
class(res) = "psm"
return(res)
}
#' R access to Bibliospec File
#'
#' @description
#' This class implements an R referenz class for BiblioSpec
#' generated sqlite files and can return the data contained as data.frames
#' or as list of tandem mass spectra peptide assignments objects (psm).
#'
#' @field .data private
#' @field dbfile database file location
#' @field peaksTable name of peaks table. do not modify
#' @import parallel
#' @import prozor
#' @import DBI
#' @import RSQLite
#' @import methods
#' @include annotateClass.R
#' @include FurtherAnalysis.R
#' @export Bibliospec
#' @exportClass Bibliospec
#' @author Witold E. Wolski and Christian Panse
#' @aliases blib Bibliospec bibliospec
#' @details The function performs a SQL query on the SQLite
#' files generated by BiblioSpec using the RSQLite package.
#'
#' BiblioSpec files are generated by using Skyline or the BiblioSpec command
#' line tool available from protwiz.
#' @references
#' \itemize{
#'
#' \item{UNIT 13.7 Using BiblioSpec for Creating and Searching Tandem MS,
#' Peptide Libraries. Barbara Frewen, Michael J. MacCoss.
#' Current Protocols in Bioinformatics Current Protocols in Bioinformatics.
#' \url{http://dx.doi.org/10.1002/0471250953.bi1307s20}.}
#'
#' \item{
#' The predecessor of the method \code{getPsmSet} was implemented in
#' the bioconductor package \href{http://bioconductor.org/packages/specL/}{specL}:
#'
#' Panse C, Trachsel C, Grossmann J and Schlapbach R. (2015).
#' specL - An R/Bioconductor package to prepare peptide spectrum matches
#' for use in targeted proteomics.
#' Bioinformatics.
#' \url{http://dx.doi.org/10.1093/bioinformatics/btv105}.}}
#' @seealso
#' \itemize{
#' \item{\url{https://skyline.gs.washington.edu/labkey/project/home/software/Skyline/begin.view}}
#' \item{\url{https://skyline.gs.washington.edu/labkey/project/home/software/BiblioSpec/begin.view}}
#' }
#'
#' @examples
#'
#' library(bibliospec)
#'
#' # use the sqlite file provided in the package
#' dbfile <- file.path(path.package("bibliospec"),
#' "extdata/peptideStd.sqlite")
#'
#' # call constructor
#' BS <- Bibliospec(dbfile=dbfile)
#' stopifnot(BS$getNrPSM() == 137)
#' S <- BS$getPsmSet()
#'
#' \dontrun{
#' library(specL)
#' print(S)
#' lapply(S[1:10], plot)
#' }
#'
#' peaks <- BS$getPeaks()
#'
#' print(BS$summary())
#' head(peaks)
#'
#' colnames(peaks)
#'
#' spectrMet <- BS$getSpectraMeta()
#'
#' # have a look at RT algiment
#' dim(spectrMet)
#' data("iRTpeptides")
#' head(iRTpeptides)
#' colnames(spectrMet)
#' x <- merge(spectrMet, iRTpeptides , by.x="peptideSeq", by.y="peptide" )
#' plot(x$iRT, x$retentionTime)
#'
#' alldata <- BS$getPeaksWithMeta()
#' alldata <- merge(spectrMet, peaks)
#' colnames(BS$getSpectraMeta())
#' head(BS$getPeaksSummaries())
#'
#' tmp <-merge(BS$getSpectraMeta(),BS$getPeaksSummaries(), by="RefSpectraId" )
#' head(tmp)
#'
Bibliospec <- setRefClass("Bibliospec",
fields = list( .data = "list",
dbfile = "character",
peaksTable = "character",
annotatedPeaksTable = "character",
annotate = "Annotate"
),
methods = list(
initialize = function(dbfile="", annotate=Annotate(), ...) {
dbfile <<- dbfile
m <- DBI::dbDriver("SQLite")
.self$.data$connection <- DBI::dbConnect( m , dbname=dbfile, flags = SQLITE_RWC )
.self$peaksTable <- "SpectraPeaks"
.self$annotate <- annotate
.self$annotatedPeaksTable <- "AnnotatedPeaks"
message('database connected')
},
finalize = function(){
message("finalize")
dbDisconnect(.data$connection)
print("disconnected")
},
getNrPSM = function() {
"Get number of psm's in database."
query <- "Select count(*) from Refspectra;"
res <- DBI::dbGetQuery(.data$connection, statement = query)
return(res)
},
getPeaksAsList = function(){
"preparing Peaks as a list, will take a while if you blib file is large."
query <- paste(.queryPeaks,collapse=" ")
peaks <- ( dbGetQuery(.data$connection,query) )
peaks <- .blobs2Frame(peaks, asList= TRUE)
return(peaks)
},
getPeaks = function(){
"Get peaks - mz and intensity and spectra id as data.frame."
message("getspectra")
if(dbExistsTable(.data$connection,peaksTable )){
peaks <- ( dbGetQuery(.data$connection,paste("Select * from", peaksTable)) )
return(peaks)
}else{
message("preparing Peaks, will take a while if you blib file is large.")
query <- paste(.queryPeaks,collapse=" ")
peaks <- ( dbGetQuery(.data$connection,query) )
peaks <- .blobs2Frame(peaks)
DBI::dbWriteTable(.data$connection, peaksTable,peaks)
return(peaks)
}
},
getSpectraMeta = function(){
"Get spectra meta information - retention time , file name, num peaks as data.frame"
query <- paste(.querySpectra,collapse=" ")
message(paste(.querySpectra, collapse="\n"))
return( DBI::dbGetQuery(.data$connection,query) )
},
getPeaksWithMeta = function(){
"Get peaks with all the meta information as data.frame. Might be huge!"
if(!dbExistsTable(.data$connection,peaksTable )){
getPeaks()
}
query <- paste(.mergeQuery,collapse=" ")
message(paste(.mergeQuery, collapse="\n"))
return( dbGetQuery(.data$connection,query) )
},
getPeaksSummaries = function(){
"Get summaries for all peaks. Might be huge!"
if(!dbExistsTable(.data$connection,peaksTable )){
getPeaks()
}
query <- paste(.summarizePeaks, collapse=" ")
message(paste(.summarizePeaks, collapse="\n"))
return( dbGetQuery(.data$connection,query) )
},
getModification = function(){
"modification table"
return( dbGetQuery(.data$connection,.getModicications) )
},
getPsmSet = function(){
"get class psmSet S3 (list of psm objects).
psm objects can be viewed by using the peakplot
method of package protViz.
"
spectrumMeta <- getSpectraMeta()
peaksList <- getPeaksAsList()
res <- vector(mode="list", length=nrow(spectrumMeta))
for(i in 1:nrow(spectrumMeta)){
res[[i]]<-.newPsm( spectrumMeta[i,], peaksList[[i]])
}
modifications <- getModification()
message(paste("assigning", nrow(modifications), "modifications ..."))
for (i in 1:nrow(modifications)){
x <- modifications[1,]
res[[ x$RefSpectraID ]]$varModification[x$position] <- x$mass
}
class(res) <- 'psmSet'
return(res)
},
summary = function(){
"summary of bibliospec file"
query <- c(
"SELECT SpectrumSourceFiles.fileName , sum(numPeaks) as nrPeaks,",
"min(RefSpectra.retentionTime) as minRT, max(RefSpectra.retentionTime) as maxRT,",
"min(RefSpectra.precursorMZ) as minPrecursorMZ, max(RefSpectra.precursorMZ) as maxPrecursorMZ,",
"min(RefSpectra.precursorCharge) as minPrecursorCharge,",
"max(RefSpectra.precursorCharge) as maxPrecursorCharge",
"from RefSpectra, SpectrumSourceFiles",
"where RefSpectra.fileID = SpectrumSourceFiles.id group by RefSpectra.fileID;"
)
return( dbGetQuery(.data$connection,paste(query, collapse=" ")) )
},
sql=function(x){
return( dbGetQuery(.data$connection,x) )
},
listTables = function(){
"List tables in db"
return( dbListTables(.data$connection) )
})
)
#' Annotate naked peptide seqeuences with protein IDs using fasta file and creates table PeptideAnnotation
#'
#' @name Bibliospec_annotateDB
#' @return table with annotated peptides
#' @param fasta fasta file either a list generated using prozor::readPeptideFasta or a path to a fasta file
#' @import prozor
#' @examples
#' library(bibliospec)
#' dbfile <- file.path(path.package("bibliospec"), "extdata/peptideStd.sqlite")
#'
#' # call constructor
#' BS <- Bibliospec(dbfile=dbfile)
#' BS$listTables()
#' library(prozor)
#' human <- prozor::readPeptideFasta(file=file.path(path.package("bibliospec"),"extdata/uniprot.fasta.gz" ))
#' tmp <- BS$annotateDB(fasta=c(human))
#'
#' mods <- BS$sql("select * from PeptideAnnotation" )
#'
#'
Bibliospec$methods(
annotateDB=function(fasta, prefix= "(([RK])|(^)|(^M))", suffix=""){
pep <- dbGetQuery(.data$connection,"select distinct peptideSeq from RefSpectra ")$peptideSeq
if(class(fasta) == "character"){
fasta <- prozor::readPeptideFasta( fasta )
}
annotation <- annotatePeptides(pep, fasta=fasta, prefix = prefix, suffix = suffix)
if(DBI::dbExistsTable(.self$.data$connection, "PeptideAnnotation")){
DBI::dbRemoveTable(.self$.data$connection, "PeptideAnnotation")
}
DBI::dbWriteTable(.self$.data$connection, "PeptideAnnotation", annotation)
try(DBI::dbGetQuery(.self$.data$connection, "CREATE INDEX RefpeptideSeqIDX ON RefSpectra (peptideSeq)"),silent = TRUE)
try(DBI::dbGetQuery(.self$.data$connection, "CREATE INDEX AnnotpeptideSeqIDX ON PeptideAnnotation (peptideSeq)"),silent=TRUE)
try(DBI::dbGetQuery(.self$.data$connection, .ProteotypicCountView),silent = TRUE)
try(DBI::dbGetQuery(.self$.data$connection, .ProteinsView), silent=TRUE)
invisible(annotation)
}
)
#' annotate precursors
#' @name Bibliospec_annotatePrecursors
#' @return table with annotated precursors
#' @import prozor
#' @examples
#' library(bibliospec)
#' library(prozor)
#'
#' dbfile <- file.path(path.package("bibliospec"),
#' "extdata/peptideStd.sqlite")
#'
#' # call constructor
#' BS <- Bibliospec(dbfile=dbfile)
#' BS$annotateDB(fasta=file.path(path.package("bibliospec"),"extdata/uniprot.fasta.gz" ))
#' xx <- BS$annotatePrecursors()
#' head(xx)
#'
Bibliospec$methods(
annotatePrecursors = function(){
if(!DBI::dbExistsTable(.self$.data$connection, "PeptideAnnotation")){
warning("run annotateDB on Bibliospec object first!")
}
precursors <- unique(subset(.self$getSpectraMeta(),select = c(peptideModSeq,precursorCharge,peptideSeq )))
pepProteinMapping <- .self$sql("select peptideSeq, proteinID from PeptideAnnotation")
annotatedPrecursors <- merge(precursors , pepProteinMapping, by.x="peptideSeq",
by.y="peptideSeq")
annotatedPrecursorsMerge <- plyr::ddply(annotatedPrecursors, plyr::.(peptideSeq, peptideModSeq, precursorCharge),
function(x){
data.frame(nrProteins = length(x$proteinID),
proteinIDmerged = paste( x$proteinID,collapse=","))
}
)
#print(colnames(annotatedPrecursors))
annotatedPrecursors <- dplyr::mutate(annotatedPrecursors, precursorID = paste(peptideModSeq,precursorCharge, sep="|" ))
#print(colnames(annotatedPrecursors))
precRazorProtMapping <- prepareMatrix(annotatedPrecursors, peptideID = "precursorID")
precRazorProtMapping <- greedy(precRazorProtMapping)
peptideAndCharge <- quantable::split2table(names(precRazorProtMapping), split="\\|" )
colnames(peptideAndCharge) <- c("peptideModSeq", "precursorCharge" )
precRazorProtMapping <- data.frame(peptideAndCharge, proteinID = unlist(precRazorProtMapping) )
annotationRazor <- merge(annotatedPrecursorsMerge, precRazorProtMapping)
if(DBI::dbExistsTable(.self$.data$connection, "PrecursorRazorProteinMapping")){
DBI::dbRemoveTable(.self$.data$connection, "PrecursorRazorProteinMapping")
}
DBI::dbWriteTable(.self$.data$connection, "PrecursorRazorProteinMapping", annotationRazor)
}
)
#' get spectra table annotated with razor proteins
#'
#' @name Bibliospec_getSpectraMetaWithAnnotation
#' @return table with annotated peptide spectrum matches
#' @import prozor
#' @examples
#' library(prozor)
#' library(bibliospec)
#'
#' dbfile <- file.path(path.package("bibliospec"),
#' "extdata/peptideStd.sqlite")
#'
#' # call constructor
#' BS <- Bibliospec(dbfile=dbfile)
#' BS$annotateDB(fasta=file.path(path.package("bibliospec"),"extdata/uniprot.fasta.gz" ))
#' specMet <- BS$getSpectraMetaWithAnnotation()
Bibliospec$methods(
getSpectraMetaWithAnnotation = function(all=FALSE){
if(!DBI::dbExistsTable(.self$.data$connection, "PrecursorRazorProteinMapping")){
warning("runing annotatePrecursors first")
.self$annotatePrecursors()
}
spectrMeta <- .self$getSpectraMeta()
precursorRazorAnnot <- .self$sql("Select * from PrecursorRazorProteinMapping")
res <- merge(spectrMeta, precursorRazorAnnot ,all.x = all)
}
)
#' get peptide anntotation list
#'
#' @name Bibliospec_getAnnotationLists
#' @param excludeAA Which AA to exclude - this argument is required
#' because including amino acids with no mass defined will crash annotatePeaksWithFragments function, defualt is B, X and Z.
#' @return list with psml - peptide spectrum match, modl - modification information, spectruml - the spectrum
#' @examples
#'
#' # use the sqlite file provided in the package
#' dbfile <- file.path(path.package("bibliospec"),
#' "extdata/peptideStd.sqlite")
#'
#' # call constructor
#' BS <- Bibliospec(dbfile=dbfile)
#' tmp <- BS$getAnnotationLists()
#' length(tmp$psml)
#' names(tmp)
#' tmp <- BS$getAnnotationLists(excludeAA = "Y|R")
#' length(tmp$psml)
#' stopifnot(length(tmp$psml) == 44)
Bibliospec$methods(
getAnnotationLists = function(excludeAA = "B|X|Z"){
mod <- .self$getModification()
peaks <- .self$getPeaks()
spectrMet <- .self$getSpectraMeta()
# convert to lists with plyr
message("preparing lists")
spectruml <- plyr::dlply( peaks, .(RefSpectraId), function(x){x} )
psml <- plyr::dlply( spectrMet,.(RefSpectraId), function(x){x} )
modl <-vector(mode="list",length=length(psml))
names(modl)<-1:length(psml)
modltmp <- plyr::dlply(mod, .(RefSpectraID), function(x){x} )
message("plyrd all data")
modl[names(modltmp)] <- modltmp
message("l(spectruml):", length(spectruml),", l(psml):", length(psml),", l(modl):", length(modl))
stopifnot(length(psml) == length(modl))
#Remove some special peptides (those without masses)
toExclude <-sapply(psml,function(x){grepl(excludeAA,x$peptideSeq)})
return(list(psml =psml[!toExclude], modl = modl[!toExclude], spectruml= spectruml[!toExclude]))
}
)
#' Annotate the MS2 with fragment ion inforamtion, creates a new table "AnnotatedPeaks".
#'
#' @name Bibliospec_annotatePeaksWithFragments
#' @param error error size
#' @param ppm error type (default TRUE ppm, if FALSE than DA)
#' @param neutralloss named vector with netral losses to add
#' @param excludeAA remove all entries with special amino acids (default = "B|X|Z")
#'
#' @examples
#' dbfile <- file.path(path.package("bibliospec"),
#' "extdata/peptideStd.sqlite")
#'
#' #call constructor
#' BS <- Bibliospec(dbfile=dbfile)
#' BS$annotatePeaksWithFragments()
#' annotPeaksTable <- (BS$getAnnotedPeaks())
#' head(annotPeaksTable)
#'
#' psml <- BS$getAnnotationLists()$psml
#' xx <- plyr::rbind.fill(psml)
#' head(xx)
#' xdf <- merge(xx, annotPeaksTable, by="RefSpectraId")
#' head(xdf)
Bibliospec$methods(
annotatePeaksWithFragments=function(error = 10,
ppm = TRUE,
neutralloss = c("Citrulline_Ornithine"=-43.005814),
excludeAA = "B|X|Z")
{
"annotate database table, be carefull might run for a while"
message("loading data")
data <-getAnnotationLists(excludeAA= excludeAA)
#annotates the peaklists in bibliopsec file#
message("running annotation")
annotation <- mapply( annotate$annotateSpectrum , data$psml , data$modl , data$spectruml, error = error, ppm = ppm,
neutrallossMass = neutralloss, neutrallossName= names(neutralloss),
SIMPLIFY = FALSE )
message("mapping modifications")
ismodified <- mapply( annotate$isModification , data$psml, data$modl)
message("merging data")
annotation <-rbind.fill(annotation)
ismodified <-rbind.fill(ismodified)
x <- merge(annotation, ismodified, all=TRUE)
x <- x[order(x$RefSpectraId, x$charge, x$ion, x$pos ),]
x <- x[!is.na(x$mZ_Da_error),]
if(dbExistsTable(.data$connection,annotatedPeaksTable )){
dbRemoveTable(.data$connection,annotatedPeaksTable )
}
DBI::dbWriteTable(.self$.data$connection, annotatedPeaksTable, x)
invisible(x)
},
getAnnotedPeaks = function(){
"Get peak lists with Ion annotation."
if(!dbExistsTable(.data$connection,peaksTable )){
getPeaks()
}
query <- paste("Select * from ",annotatedPeaksTable,sep=" ")
return( dbGetQuery(.data$connection, query) )
}
)
#' Compute FDR and add it to RefSpectra Table
#'
#' @name Bibliospec_getScoreForPSM
#' @import prozor
#' @examples
#' library(bibliospec)
#' library(prozor)
#'
#' dbfile <- file.path(path.package("bibliospec"),
#' "extdata/peptideStd.sqlite")
#'
#' #call constructor
#' BS <- Bibliospec(dbfile=dbfile)
#' BS$listTables()
#' BS$annotateDB(fasta=file.path(path.package("bibliospec"),"extdata/uniprot.fasta.gz" ))
#' (specMeta <- BS$getSpectraMetaWithAnnotation())
#' specMeta <- BS$getSpectraMeta()
#'
#' dim(specMeta)
#' specMeta
#' fdr <- prozor::computeFDR(specMeta$score, specMeta$proteinID,larger_better=FALSE)
#' head(fdr)
Bibliospec$methods(
getScoreForPSM = function(FDR = 1, plot=TRUE)
{
specMeta <- .self$sql("Select score, proteinID from RefSpectra")
fdr <- prozor::computeFDR(specMeta$score, specMeta$proteinID,larger_better=FALSE)
if(plot){
prozor::plotFDR(fdr)
}
prozor::predictScoreFDR(fdr, fdr = FDR)
},
getScoreForPrecursor = function(FDR , plot = TRUE)
{
},
getScoreForPeptide = function(FDR, plot = TRUE)
{
},
getScoreForProtein = function(FDR, plot = TRUE){
}
)
#' Get naked peptide list
#'
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.