#' Adduct quantification for adductomicsR
#'@import foreach
#'@import ade4
#'@import pastecs
#'@import DT
#'@import fpc
#'@import stats
#'@import utils
#'@import graphics
#'@import grDevices
#'@import methods
#'@import datasets
#'@import rvest
#'@import reshape2
#'@param nCores number of cores to use for analysis.
#'If NULL thenumber of cores detected will be used.
#'@param targTable is the fullpath to the target table.
#'See inst/extdata/examplePeptideTargetTable.csv for an example.
#'@param intStdRtDrift the maximum drift for the internal standard in seconds.
#'Default = NULL and therefore no RT correction is applied to the internal
#'standard.
#'@param rtDevModels is the full path to the rtDevModels.RData file
#'from rtDevModels(). default is NULL and therefore has no RT correction.
#'@param filePaths required list of mzXML files for analysis.
#'If all files are in the
#'same directory these can be accessed using
#'list.files('J:\\parentdirectory\\directoryContainingfiles',
#'pattern='.mzXML', all.files=FALSE, full.names=TRUE).
#'@param quantObject character string for filepath to an
#'AdductQuantif object to be integrated.
#'@param maxPpm numeric for the maximum parts per million to be used.
#'@param minSimScore a numeric between 0
# and 1 for peak quality stringency.
#'@param spikeScans a numeric for the number of scans that a spike
#'must be seen in for it to be integrated. Default is 2.
#'@param indivAdduct numeric vector of AdductQuantif targets to re-integrate
#'@param minPeakHeight numeric to determine the minimum height for a
#'peak to be integrated. Default
#'is set low at 100.
#'@param maxRtDrift numeric for the maximum retention time
#'drift to be considered. Default is 20.
#'@param maxRtWindow numeric in
#'seconds for the retention time window (total window will be 2 times
#'this value)
#'@param isoWindow numeric for the pepide isotope window in seconds,
#'default is 80
#'@param hkPeptide is capitalized string for the housekeeping peptide.
#'The default is 'LVNEVTEFAK' from human serum albumin.
#'@param gaussAlpha numeric for the gaussian smoothing parameter to
#'smooth the peaks. Default is 16.
#'Output is an adductQuantf object saved to the working directory
#'@description reads mzXML files from a directory, corrects RT according
#'to RT correction model and quantifies peaks.
#'@return adductQuant object
#'@examples
#'\dontrun{
#'adductQuant(nCores=4, targTable=paste0(system.file("extdata",
#'package = "adductomicsR"),'/exampletargTable2.csv'), intStdRtDrift=30,
#'rtDevModels=system.file("extdata", "rtDevModels.RData", package =
#'"adductData"),
#'filePaths=list.files(system.file("extdata", package =
#'"adductData"),pattern=".mzXML", all.files=FALSE,full.names=TRUE),
#'quantObject=NULL,indivAdduct=NULL,maxPpm=5,minSimScore=0.8,spikeScans=1,
#'minPeakHeight=100,maxRtDrift=20,maxRtWindow=240,isoWindow=80,
#'hkPeptide='LVNEVTEFAK', gaussAlpha=16)
#'}
#'@usage adductQuant(nCores = NULL, targTable = NULL,
#'intStdRtDrift = NULL, rtDevModels = NULL,
#'filePaths = NULL, quantObject = NULL, indivAdduct = NULL, maxPpm = 4,
#'minSimScore = 0.8, spikeScans = 2, minPeakHeight = 100, maxRtDrift = 20,
#'maxRtWindow = 120, isoWindow = 80,
#'hkPeptide = "LVNEVTEFAK", gaussAlpha = 16)
#'@export
adductQuant <- function(nCores = NULL,
targTable = NULL,
intStdRtDrift = NULL,
rtDevModels = NULL,
filePaths = NULL,
quantObject = NULL,
indivAdduct = NULL,
maxPpm = 4,
minSimScore = 0.8,
spikeScans = 2,
minPeakHeight = 100,
maxRtDrift = 20,
maxRtWindow = 120,
isoWindow = 80,
hkPeptide = "LVNEVTEFAK",
gaussAlpha = 16) {
if (is.null(rtDevModels)) {
stop("Please provide an rtDevModels file...\n")
}
if (is.character(rtDevModels)) {
rtDevModelsDir <- dirname(rtDevModels)
message("loading rtDevModels .RData file...Please wait.\n")
objectName <- load(rtDevModels, envir = environment())
# if different object then assign object name eval parse
rtDevModels <- eval(parse(text = objectName))
}
# error handling
if (is.null(quantObject)) {
if (is.null(targTable)) {
stop("Please provide a target table file ")
}
if (is.character(targTable)) {
# read in table from character string if necessary
targTable <- as.data.frame(data.table::fread(targTable,
header = TRUE), stringsAsFactors = FALSE)
}
if (!is.data.frame(targTable)) {
stop("targTable is not a data.frame")
}
if (!all(c("mass", "RT", "peptide", "chargeState") %in%
colnames(targTable))) {
stop(
"The Quantification target table must have the
column names:
\"mass\", \"RT\", \"peptide\", \"chargeState\"
(the retention time must appear in minutes
and the peptide(s) as single letter sequences)"
)
}
if (is.null(filePaths)) {
stop("a character vector of file paths to mzXML
or mzML files must be supplied.")
}
# peptide isotopic distribution prediction
indxTmp <- !duplicated(targTable$peptide)
pepSeqs <- as.character(targTable$peptide[indxTmp])
eleForm <-
lapply(pepSeqs, OrgMassSpecR::ConvertPeptide,
IAA = FALSE)
eleForm <-
lapply(seq_along(eleForm), function(ele) {
eleForm[[ele]]$H <- eleForm[[ele]]$H +
targTable$chargeState[indxTmp][ele]
return(eleForm[[ele]])
})
predIsoDist <-
mapply(IsotopicDistributionMod,
eleForm,
targTable$chargeState[indxTmp],
SIMPLIFY = FALSE)
predIsoDist <- lapply(predIsoDist,
function(x)
data.frame(
x,
id = c("MIM",
paste0("iso", seq_len((
nrow(x) - 1
)))),
sign = c(0, sign(diff(x$percent)))
))
names(predIsoDist) <- pepSeqs
# min/ max rt diff
minMaxRt <- c((min(as.numeric(
targTable$RT
)) * 60) -
maxRtWindow,
(max(as.numeric(
targTable$RT
)) *
60) + maxRtWindow)
# empty list for results
results <-
matrix(0,
ncol = 24,
nrow = length(filePaths)
* nrow(targTable))
colnames(results) <-
c(
"file",
"featureName",
"expMass",
"expRt",
"peptide",
"dotProd",
"isoDet",
"avOrApex",
"massAcc",
"rtDev",
"peakWidth",
"scanRange",
"nScans",
"peakArea",
"height",
"mzmed",
"mzmax",
"mzmin",
"corrRtMed",
"corrRtMax",
"corrRtMin",
"rtmed",
"rtmin",
"rtmax"
)
# add file and feature names to table
results[, "file"] <- rep(basename(filePaths),
each = nrow(targTable))
results[, "featureName"] <- rep(paste0(
"M",
round(targTable$mass, 3),
"_RT",
round(targTable$RT, 2)
),
length(filePaths))
results[, "expMass"] <-
rep(targTable$mass, length(filePaths))
results[, "expRt"] <-
rep(targTable$RT, length(filePaths))
results[, "peptide"] <- rep(targTable$peptide,
length(filePaths))
resultsList <- vector("list", nrow(results))
# integration sequence
intSeq <- seq_len(nrow(targTable))
} else {
# error handling
if (!is(quantObject, 'AdductQuantif')) {
stop("argument object is not an
AdductQuantif class object")
} else if (is.null(indivAdduct)) {
stop("a numeric vector of AdductQuantif
targets to re-integrate must be provided")
}
targTable <- targTable(quantObject)
intSeq <- indivAdduct
minMaxRt <- c((min(as.numeric(
targTable$RT
)) * 60) -
maxRtWindow, (max(as.numeric(
targTable$RT
)) * 60) +
maxRtWindow)
# extract previous results
predIsoDist <- predIsoDist(quantObject)
results <- peakQuantTable(quantObject)
# empty the results rows necessary
keepCols <-
grepl("file|featureName|expMass|expRt|peptide",
colnames(results))
keepCols <- which(keepCols == FALSE)
emptyResultRows <- rep(((seq_len(
length(file.paths(quantObject))
) - 1)
* nrow(targTable)),
length(indivAdduct)) + rep(indivAdduct,
each = length(file.paths(quantObject)))
results[emptyResultRows, keepCols] <- 0
resultsList <- peakIdData(quantObject)
filePaths <- file.paths(quantObject)
}
for (i in seq_along(filePaths)) {
filePathTmp <- filePaths[i]
# mass drift correction
massDriftFile <- gsub("\\.mzXML", ".massDrift.csv",
filePathTmp)
if (file.exists(massDriftFile)) {
massDriftTable <- read.csv(massDriftFile,
header = TRUE,
stringsAsFactors = FALSE)
}
MSfile <- mzR::openMSfile(filePathTmp)
metaSubTmp <- mzR::header(MSfile)
# max gap ms1 scans
maxGapMs1Scan <- max(diff(metaSubTmp$retentionTime[
metaSubTmp$msLevel == 1])) + 1
# single point rt drift
if (!is.null(intStdRtDrift)) {
metaSubTmp$retentionTime <- as.numeric(metaSubTmp$retentionTime)
+ (intStdRtDrift * -1)
}
# scan numbers
indxScans <-
which(
metaSubTmp$retentionTime < minMaxRt[2] &
metaSubTmp$retentionTime >
minMaxRt[1] & metaSubTmp$msLevel == 1
)
peakRangeAll <-
do.call(rbind, mzR::peaks(MSfile, indxScans))
# add retention times
peakRangeAll <- cbind(
peakRangeAll,
rep(metaSubTmp$retentionTime[indxScans],
metaSubTmp$peaksCount[indxScans]),
rep(metaSubTmp$seqNum[indxScans],
metaSubTmp$peaksCount[indxScans])
)
# retention time deviation model
if (!is.null(rtDevModels)) {
rtDevModel <- rtDevModels[[i]]
} else {
rtDevModel <- NULL
}
nCores <- ifelse(is.null(nCores), 1, nCores)
if (nCores > 1) {
pmt <- proc.time()
message(paste0(
"Starting SNOW cluster with ",
nCores,
" local sockets...\n"
))
message("identifying and quantifying ",
nrow(targTable),
" features...\n")
cl <- parallel::makeCluster(nCores)
doSNOW::registerDoSNOW(cl)
# foreach and dopar from foreach package
resultsTmp <- foreach(j =
seq_len(nrow(targTable))) %dopar% {
mzTmp <- as.numeric(targTable[j, 1])
rtTmp <- as.numeric(targTable[j, 2]) * 60
isoPat <- 1.0032 / targTable$chargeState[j]
isoPatPred <- predIsoDist[[as.character(targTable$peptide[
j])]]
isoPat <-
seq(0, isoPat * (nrow(isoPatPred) - 1),
isoPat)
names(isoPat) <- isoPatPred$id
# retention time deviation model
if (!is.null(rtDevModel)) {
rtDevTmp <- predict(rtDevModel, newdata = rtTmp / 60)
predRtTmp <- rtTmp + (rtDevTmp * 60)
} else {
predRtTmp <- rtTmp
}
# different parameters for hk peptide
hkIndx <-
targTable[j, "peptide"] %in% hkPeptide
if (hkIndx) {
peakRangeRtSub <- peakRangeAll[which(peakRangeAll[
, 3] < {
predRtTmp + 240
} & peakRangeAll[, 3] > {
predRtTmp - 240
}), , drop = FALSE]
} else {
peakRangeRtSub <- peakRangeAll[which(
peakRangeAll[, 3] <
(predRtTmp +
maxRtWindow) & peakRangeAll[, 3] >
(predRtTmp - maxRtWindow)
),
, drop = FALSE]
}
# mass drift correction
if (file.exists(massDriftFile)) {
peakRangeRtSub[, 1] <- peakRangeRtSub[, 1] - {
{
peakRangeRtSub[, 1] / 1e+06
} * massDriftTable[peakRangeRtSub[, 4],
"ppmDrift"]
}
}
resultTMP <-
peakIdQuant_newMethod(
mzTmp = mzTmp,
rtTmp = rtTmp,
peakRangeRtSub = peakRangeRtSub,
rtDevModel = rtDevModel,
isoPat = isoPat,
isoPatPred = isoPatPred,
minSimScore = minSimScore,
maxPpm = maxPpm,
gaussAlpha = gaussAlpha,
spikeScans = spikeScans,
minPeakHeight = minPeakHeight,
maxRtDrift = ifelse(hkIndx, 60, maxRtDrift),
isoWindow = isoWindow,
maxGapMs1Scan = maxGapMs1Scan,
intMaxPeak = hkIndx
)
}
# stop SNOW cluster
parallel::stopCluster(cl)
proc.time() - pmt
for (res in seq_along(resultsTmp)) {
resultRow <- ((i - 1) * nrow(targTable)) + res
if (length(resultsTmp[[res]]) == 3) {
results[resultRow, 6:ncol(results)] <-
resultsTmp[[res]]$results
resultsTmp[[res]]$results <- NULL
}
resultsList[[resultRow]] <-
resultsTmp[[res]]
}
# single threaded
} else {
pmt <- proc.time()
for (j in intSeq) {
resultRow <- ((i - 1) * nrow(targTable)) + j
mzTmp <- as.numeric(targTable[j, 1])
rtTmp <- as.numeric(targTable[j, 2]) * 60
isoPat <- 1.0032 / targTable$chargeState[j]
isoPatPred <- predIsoDist[[as.character(targTable$peptide[j])]]
isoPat <-
seq(0, isoPat * (nrow(isoPatPred) - 1),
isoPat)
names(isoPat) <- isoPatPred$id
# retention time deviation model
if (!is.null(rtDevModel)) {
rtDevTmp <- predict(rtDevModel, newdata = rtTmp / 60)
predRtTmp <- rtTmp + (rtDevTmp * 60)
} else {
predRtTmp <- rtTmp
}
# different parameters for hk peptide
hkIndx <-
targTable[j, "peptide"] %in% hkPeptide
if (hkIndx) {
peakRangeRtSub <- peakRangeAll[which(peakRangeAll[, 3] < {
predRtTmp + 240
} & peakRangeAll[, 3] > {
predRtTmp - 240
}), , drop = FALSE]
} else {
peakRangeRtSub <- peakRangeAll[which(
peakRangeAll[, 3] <
(predRtTmp + maxRtWindow) &
peakRangeAll[, 3] >
(predRtTmp - maxRtWindow)
),
, drop = FALSE]
}
# mass drift correction
if (file.exists(massDriftFile)) {
peakRangeRtSub[, 1] <- peakRangeRtSub[, 1] - {
{
peakRangeRtSub[, 1] / 1e+06
} * massDriftTable[peakRangeRtSub[, 4],
"ppmDrift"]
}
}
resultsList[[resultRow]] <-
peakIdQuant_newMethod(
mzTmp
= mzTmp,
rtTmp = rtTmp,
peakRangeRtSub = peakRangeRtSub,
rtDevModel = rtDevModel,
isoPat = isoPat,
isoPatPred = isoPatPred,
minSimScore = minSimScore,
maxPpm = maxPpm,
spikeScans = spikeScans,
minPeakHeight = minPeakHeight,
maxRtDrift = ifelse(hkIndx, 60, maxRtDrift),
isoWindow = isoWindow,
maxGapMs1Scan = maxGapMs1Scan,
intMaxPeak = hkIndx
)
########################### PLOTS #############
colnames(peakRangeRtSub) = c("MIM", "int",
"adjRT", "seq Number")
print(head(peakRangeRtSub))
} # end feature loop
proc.time() - pmt
} # if parallel
} # end file loop
# create new adductQuant object
if (!is.null(quantObject)) {
object <- quantObject
} else {
object <- new("AdductQuantif")
predIsoDist(object) <- predIsoDist
targTable(object) <- targTable
file.paths(object) <- filePaths
}
peakQuantTable(object) <- results
peakIdData(object) <- resultsList
save(object, file = "adductQuantResults.Rdata")
return(object)
} # end Function
setMethod("show", "AdductQuantif", function(object) {
if (length(file.paths(object)) > 0) {
message(
"A \"AdductQuantif\" class object derived from ",
length(file.paths(object)),
" MS files \n\n"
)
message(
"Consisting of:\n",
sum(peakQuantTable(object)[,
"peakArea"] != "0"),
" quantified peaks\n",
"and ",
sum(peakQuantTable(object)[, "peakArea"] ==
"0"),
" missing peaks (i.e. zero peak area)\n",
"Derived from a target list of: ",
nrow(targTable(object)),
" targets.\n\n"
)
if (any(grepl("^possOutPeak$",
colnames(peakQuantTable(object))))) {
message("Potentially outlying peaks:")
message(
sum(peakQuantTable(object)[, "possOutPeak"] == 1),
paste0("(", round((
sum(peakQuantTable(object)[,
"possOutPeak"] == "1") /
sum(peakQuantTable(object)[, "peakArea"] !="0") * 100
), 1), "%)", collapse = ""),
"of a total of",
sum(peakQuantTable(object)[,
"peakArea"] != "0"),
"peaks quantified were potentially non-gaussian,
long tailed,
or deviant from the peak group
retention time or peak area.\n\n"
)
}
memsize <- object.size(object)
message("Memory usage:", signif(memsize / 2 ^ 20, 3), "MB")
} else {
message("A new empty\"AdductQuantif\" class object")
}
})
setMethod("c", signature(x = "AdductQuantif"), function(x, ...) {
elements = list(...)
# error handling check if all adductSpec object
if (any(vapply(elements, function(ele)
is(ele, 'AdductQuantif'),
FUN.VALUE = logical(1)) == FALSE)) {
stop("all elements must be an AdductQuantif class object")
}
emptyAdductQuantif <- new("AdductQuantif")
for (i in seq_along(elements)) {
if (ncol(peakQuantTable(emptyAdductQuantif))==0) {
peakQuantTable(emptyAdductQuantif) <-
peakQuantTable(elements[[i]])
} else {
peakQuantTable(emptyAdductQuantif) <-
rbind(peakQuantTable(emptyAdductQuantif),
peakQuantTable(elements[[i]]))
}
peakIdData(emptyAdductQuantif) <-
c(peakIdData(emptyAdductQuantif),
peakIdData(elements[[i]]))
predIsoDist(emptyAdductQuantif) <-
c(predIsoDist(emptyAdductQuantif),
predIsoDist(elements[[i]]))
# keep unique
predIsoDist(emptyAdductQuantif) <-
predIsoDist(emptyAdductQuantif)[!duplicated(names(predIsoDist(
emptyAdductQuantif)))]
# target table
targTable(emptyAdductQuantif) <-
rbind(targTable(emptyAdductQuantif),
targTable(elements[[i]]))
# remove duplicates
uniEntries <-
apply(targTable(emptyAdductQuantif)[, seq_len(3)],
1, paste, collapse = "")
targTable(emptyAdductQuantif) <-
targTable(emptyAdductQuantif)[duplicated(uniEntries) ==
FALSE, , drop = FALSE]
file.paths(emptyAdductQuantif) <-
c(file.paths(emptyAdductQuantif),
file.paths(elements[[i]]))
}
return(emptyAdductQuantif)
}) # end function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.