###############################################################
#This is the main script to perform Integrated data Processing and automated metabolite Annotation (JPA)
#Tao Huan, Sam Shen, Jian Guo 2020-07-28
#Copyright @ University of British Columbia
###############################################################
#Helper function: match ms2 to features
matchMS2 <- function(x,featuretable, rt.tol = 0, mz.tol = 0) {
pks <- featuretable
pks <- cbind(pks, 0)
colnames(pks)[ncol(pks)] <- "mzmin"
pks <- cbind(pks, 0)
colnames(pks)[ncol(pks)] <- "mzmax"
pks[, "mzmin"] <- pks[, "mz"] - mz.tol
pks[, "mzmax"] <- pks[, "mz"] + mz.tol
pks[, "rtmin"] <- pks[, "rt"] - rt.tol
pks[, "rtmax"] <- pks[, "rt"] + rt.tol
peak_ids <- rownames(pks)
sps <- spectra(x)
pmz <- precursorMz(x)
rtm <- rtime(x)
res <- vector(mode = "list", nrow(pks))
for (i in 1:nrow(pks)) {
if (is.na(pks[i, "mz"])) next()
idx <- which(pmz >= pks[i, "mzmin"] & pmz <= pks[i, "mzmax"] &
rtm >= pks[i, "rtmin"] & rtm <= pks[i, "rtmax"])
if (length(idx)) {
res[[i]] <- lapply(sps[idx], function(z) {
z
})
}
}
names(res) <- peak_ids
return(res)
}
#XCMS Feature Extraction
XCMS.featureTable <- function(dir, mz.tol = 50, ppm=50, peakwidth=c(5,20), mzdiff = 0.01, snthresh = 6, integrate = 1,
prefilter = c(3,100), noise = 100){
#XCMS feature detection
cwp <- CentWaveParam(ppm = ppm,
peakwidth = peakwidth,
mzdiff = mzdiff,
snthresh = snthresh,
integrate = integrate,
prefilter = prefilter,
noise = noise)
# Calculate the number of cores
no_cores <- detectCores() - 1
# print("Using cores:")
# print(no_cores)
# Initiate cluster
registerDoParallel(no_cores)
setwd(dir)
input.files <- list.files(pattern = ".mzXML|.mzML")
data <- readMSData(input.files, centroided = TRUE, mode = "onDisk")
#PICK LEVEL 1 AND 2 FEATURES
data <- findChromPeaks(data, param = cwp)
featureTable <- as.data.frame(chromPeaks(data))
featureTable <- cbind(featureTable, 2)
colnames(featureTable)[ncol(featureTable)] <- "level"
if(length(input.files) == 1){
#Label Level 1,2 features
premass.matrix <- data@featureData@data[data@featureData@data$msLevel == 2 &
data@featureData@data$basePeakIntensity > 0,c(10, 18, 20)]
colnames(premass.matrix) <- c("rt", "mz", "int")
is.level12 <- logical(length = nrow(premass.matrix))
for(i in 1:nrow(premass.matrix)){
mass.lower.limit <- premass.matrix$mz[i] * (1 - mz.tol * 1e-6)
mass.upper.limit <- premass.matrix$mz[i] * (1 + mz.tol * 1e-6)
short.list <- featureTable[featureTable$mz >= mass.lower.limit & featureTable$mz <= mass.upper.limit,]
short.list <- short.list[short.list$rtmin <= premass.matrix$rt[i] & short.list$rtmax >= premass.matrix$rt[i],]
if(nrow (short.list) > 0){
featureTable$level[featureTable$mz >= mass.lower.limit &
featureTable$mz <= mass.upper.limit &
featureTable$rtmin <= premass.matrix$rt[i] &
featureTable$rtmax >= premass.matrix$rt[i]] <- 1
is.level12[i] <- TRUE
}
}
}else{
width <- floor(log10(length(input.files))) + 1
#Label Level 1,2 features
label12 <- foreach(n = 1:length(input.files)) %dopar% {
currFeatureTable <- featureTable[featureTable$sample == n,]
premass.matrix <- data@featureData@data[data@featureData@data$msLevel == 2 &
data@featureData@data$basePeakIntensity > 0,c(10, 18, 20)]
sample.extract <- grep(paste("F", formatC(n, width=width, flag="0"), sep=""), rownames(premass.matrix))
premass.matrix <- premass.matrix[sample.extract,]
colnames(premass.matrix) <- c("rt", "mz", "int")
level12Vector <- logical(length = nrow(premass.matrix))
for(i in 1:nrow(premass.matrix)){
if(is.na(premass.matrix$mz[i])) next
mass.lower.limit <- premass.matrix$mz[i] * (1 - mz.tol * 1e-6)
mass.upper.limit <- premass.matrix$mz[i] * (1 + mz.tol * 1e-6)
short.list <- featureTable[featureTable$mz >= mass.lower.limit &
featureTable$mz <= mass.upper.limit &
featureTable$sample == n,]
short.list <- short.list[short.list$rtmin <= premass.matrix$rt[i] & short.list$rtmax >= premass.matrix$rt[i],]
if(nrow (short.list) > 0){
level12Vector[i] <- TRUE
currFeatureTable[,12][currFeatureTable[,1] >= mass.lower.limit &
currFeatureTable[,1] <= mass.upper.limit &
currFeatureTable[,5] <= premass.matrix$rt[i] &
currFeatureTable[,6] >= premass.matrix$rt[i]] <- 1
}
}
return(currFeatureTable)
}
featureTable <- as.data.frame(bind_rows(label12))
}
#clean up the cluster
stopImplicitCluster()
featureTable <- as.data.frame(featureTable)
featureTable <- featureTable[order(featureTable$mz, decreasing = F ),]
digits <- floor(log10(nrow(featureTable))) + 1
rownames(featureTable) <- paste0("CP", sprintf(paste0("%0", digits, "d"), c(1:nrow(featureTable))))
chromPeaks(data) <- as.matrix(featureTable)
rownames(featureTable) <- paste("F", 1:nrow(featureTable), sep="")
featureTable <- featureTable[, c("mz", "rt", "rtmin", "rtmax", "maxo", "sample", "level")]
MSdata <<- data
return(featureTable)
}
#Read csv feature table for plotting
custom.featureTable <- function(dir, FTdir, mz.tol = 10){
# Calculate the number of cores
no_cores <- detectCores() - 1
# print("Using cores:")
# print(no_cores)
# Initiate cluster
registerDoParallel(no_cores)
setwd(dir)
cwp <- CentWaveParam(ppm = 50,
peakwidth = c(5,20),
mzdiff = 0.01,
snthresh = 6,
integrate = 1,
prefilter = c(3,100),
noise = 100)
input.files <- list.files(pattern = ".mzXML|.mzML")
data <- readMSData(input.files, centroided = TRUE, mode = "onDisk")
#PICK LEVEL 1 AND 2 FEATURES
data <- findChromPeaks(data, param = cwp)
setwd(FTdir)
input.csv <- list.files(pattern = ".csv")
samp1 <- read.csv(file = input.csv[1], header = T, stringsAsFactors = F)
featureTable <- data.frame(matrix(nrow = nrow(samp1), ncol = ncol(chromPeaks(data))))
colnames(featureTable) <- colnames(chromPeaks(data))
featureTable$mz <- samp1[,1]
featureTable$mzmin <- samp1[,1]
featureTable$mzmax <- samp1[,1]
featureTable$rt <- samp1[,2]
featureTable$rtmin <- samp1[,3]
featureTable$rtmax <- samp1[,4]
featureTable$into <- 0
featureTable$intb <- 0
featureTable$maxo <- samp1[,5]
featureTable$sn <- 0
featureTable$sample <- 1
if(length(input.csv) > 1){
for(i in 1:length(input.csv)){
addFT <- read.csv(file = input.csv[i], header = T, stringsAsFactors = F)
newFT <- data.frame(matrix(nrow = nrow(addFT), ncol = ncol(chromPeaks(data))))
colnames(newFT) <- colnames(chromPeaks(data))
newFT$mz <- addFT[,1]
newFT$mzmin <- addFT[,1]
newFT$mzmax <- addFT[,1]
newFT$rt <- addFT[,2]
newFT$rtmin <- addFT[,3]
newFT$rtmax <- addFT[,4]
newFT$into <- 0
newFT$intb <- 0
newFT$maxo <- addFT[,5]
newFT$sn <- 0
newFT$sample <- i
featureTable <- rbind(featureTable, newFT)
}
}
featureTable <- as.data.frame(featureTable)
featureTable <- featureTable[order(featureTable$mz, decreasing = F ),]
digits <- floor(log10(nrow(featureTable))) + 1
rownames(featureTable) <- paste0("CP", sprintf(paste0("%0", digits, "d"), c(1:nrow(featureTable))))
chromPeaks(data) <- as.matrix(featureTable)
featureTable <- cbind(featureTable, 2)
colnames(featureTable)[ncol(featureTable)] <- "level"
if(length(input.files) == 1){
#Label Level 1,2 features
premass.matrix <- data@featureData@data[data@featureData@data$msLevel == 2 &
data@featureData@data$basePeakIntensity > 0,c(10, 18, 20)]
colnames(premass.matrix) <- c("rt", "mz", "int")
is.level12 <- logical(length = nrow(premass.matrix))
for(i in 1:nrow(premass.matrix)){
mass.lower.limit <- premass.matrix$mz[i] * (1 - mz.tol * 1e-6)
mass.upper.limit <- premass.matrix$mz[i] * (1 + mz.tol * 1e-6)
short.list <- featureTable[featureTable$mz >= mass.lower.limit & featureTable$mz <= mass.upper.limit,]
short.list <- short.list[short.list$rtmin <= premass.matrix$rt[i] & short.list$rtmax >= premass.matrix$rt[i],]
if(nrow (short.list) > 0){
featureTable$level[featureTable$mz >= mass.lower.limit &
featureTable$mz <= mass.upper.limit &
featureTable$rtmin <= premass.matrix$rt[i] &
featureTable$rtmax >= premass.matrix$rt[i]] <- 1
is.level12[i] <- TRUE
}
}
}else{
width <- floor(log10(length(input.files))) + 1
#Label Level 1,2 features
label12 <- foreach(n = 1:length(input.files)) %dopar% {
currFeatureTable <- featureTable[featureTable$sample == n,]
premass.matrix <- data@featureData@data[data@featureData@data$msLevel == 2 &
data@featureData@data$basePeakIntensity > 0,c(10, 18, 20)]
sample.extract <- grep(paste("F", formatC(n, width=width, flag="0"), sep=""), rownames(premass.matrix))
premass.matrix <- premass.matrix[sample.extract,]
colnames(premass.matrix) <- c("rt", "mz", "int")
level12Vector <- logical(length = nrow(premass.matrix))
for(i in 1:nrow(premass.matrix)){
if(is.na(premass.matrix$mz[i])) next
mass.lower.limit <- premass.matrix$mz[i] * (1 - mz.tol * 1e-6)
mass.upper.limit <- premass.matrix$mz[i] * (1 + mz.tol * 1e-6)
short.list <- featureTable[featureTable$mz >= mass.lower.limit &
featureTable$mz <= mass.upper.limit &
featureTable$sample == n,]
short.list <- short.list[short.list$rtmin <= premass.matrix$rt[i] & short.list$rtmax >= premass.matrix$rt[i],]
if(nrow (short.list) > 0){
level12Vector[i] <- TRUE
currFeatureTable[,12][currFeatureTable[,1] >= mass.lower.limit &
currFeatureTable[,1] <= mass.upper.limit &
currFeatureTable[,5] <= premass.matrix$rt[i] &
currFeatureTable[,6] >= premass.matrix$rt[i]] <- 1
}
}
return(currFeatureTable)
}
featureTable <- as.data.frame(bind_rows(label12))
}
#clean up the cluster
stopImplicitCluster()
featureTable <- as.data.frame(featureTable)
featureTable <- featureTable[order(featureTable$mz, decreasing = F ),]
digits <- floor(log10(nrow(featureTable))) + 1
rownames(featureTable) <- paste0("CP", sprintf(paste0("%0", digits, "d"), c(1:nrow(featureTable))))
chromPeaks(data) <- as.matrix(featureTable)
rownames(featureTable) <- paste("F", 1:nrow(featureTable), sep="")
featureTable <- featureTable[, c("mz", "rt", "rtmin", "rtmax", "maxo", "sample", "level")]
MSdata <<- data
return(featureTable)
}
#Add additional level 3 features to featureTable
find.level3features <- function(data, mz.tol = 50, mass.tol = 0.01, rt.tol = 60, derep.mass.tol = 0.01, derep.rt.tol = 30,
level3.threshold = 2){
# Calculate the number of cores
no_cores <- detectCores() - 1
# print("Using cores:")
# print(no_cores)
# Initiate cluster
registerDoParallel(no_cores)
input.files <- gsub("\\", "/", data@processingData@files, fixed=TRUE)
if(length(input.files) == 1){
#Label Level 1,2 features
premass.matrix <- data@featureData@data[data@featureData@data$msLevel == 2 &
data@featureData@data$basePeakIntensity > 0,c(10, 18, 20)]
colnames(premass.matrix) <- c("rt", "mz", "int")
# Dereplication of premass
premass.matrix <- premass.matrix[order(premass.matrix$int, decreasing = T ),]
dereplicate.level3 <- data.frame(matrix(ncol = ncol(premass.matrix), nrow = 1))
colnames(dereplicate.level3) <- colnames(premass.matrix)
for(q in 1:nrow(premass.matrix)){
mass.lower.limit <- premass.matrix$mz[q] - derep.mass.tol
mass.upper.limit <- premass.matrix$mz[q] + derep.mass.tol
rt.lower.limit <- premass.matrix$rt[q] - derep.rt.tol
rt.upper.limit <- premass.matrix$rt[q] + derep.rt.tol
temp <- dereplicate.level3[(dereplicate.level3$mz >= mass.lower.limit &
dereplicate.level3$mz <= mass.upper.limit &
dereplicate.level3$rt >= rt.lower.limit &
dereplicate.level3$rt <= rt.upper.limit),]
temp <- temp[complete.cases(temp),]
if(nrow(temp) == 0) {
dereplicate.level3 <- rbind(dereplicate.level3, premass.matrix[q,])
}
}
dereplicate.level3 <- dereplicate.level3[complete.cases(dereplicate.level3),]
# Confirm level 3 features
is.level12 <- logical(length = nrow(dereplicate.level3))
for(i in 1:nrow(dereplicate.level3)){
mass.lower.limit <- dereplicate.level3$mz[i] * (1 - mz.tol * 1e-6)
mass.upper.limit <- dereplicate.level3$mz[i] * (1 + mz.tol * 1e-6)
tmpFT <- as.data.frame(chromPeaks(data))
short.list <- tmpFT[tmpFT$mz >= mass.lower.limit & tmpFT$mz <= mass.upper.limit,]
short.list <- short.list[short.list$rtmin <= dereplicate.level3$rt[i] & short.list$rtmax >= dereplicate.level3$rt[i],]
if(nrow (short.list) > 0){
is.level12[i] <- TRUE
}
}
xraw <- xcmsRaw(input.files[1],profstep=0, mslevel = 1)
putative.level3 <- dereplicate.level3[is.level12 == FALSE,]
level3.matrix <- data.frame(matrix(nrow = nrow(putative.level3), ncol = ncol(tmpFT)))
colnames(level3.matrix) <- colnames(tmpFT)
if(nrow(putative.level3 != 0)){
for (j in 1:nrow(putative.level3)){
if(putative.level3$mz[j] > xraw@mzrange[2] | putative.level3$mz[j] < xraw@mzrange[1]) next
if(putative.level3$rt[j] > (tail(xraw@scantime, n=1)-5) | putative.level3$rt[j] < 5) next
rt.lower.limit <- putative.level3$rt[j] - rt.tol
rt.upper.limit <- putative.level3$rt[j] + rt.tol
mass.lower.limit <- putative.level3$mz[j] - mass.tol
mass.upper.limit <- putative.level3$mz[j] + mass.tol
mzRange <- as.double(cbind(mass.lower.limit, mass.upper.limit))
RTRangeFront <- as.integer(cbind(rt.lower.limit, putative.level3$rt[j]))
RTRangeBehind <- as.integer(cbind(putative.level3$rt[j], rt.upper.limit))
RTRange <- as.integer(cbind(rt.lower.limit, rt.upper.limit))
eeicFront <- rawEIC(xraw, mzrange=mzRange, rtrange=RTRangeFront)
eeicBehind <- rawEIC(xraw, mzrange=mzRange, rtrange=RTRangeBehind)
eic.matrixFront <- eeicFront[["intensity"]]
eic.matrixBehind <- eeicBehind[["intensity"]]
RTIndex <- max(which(xraw@scantime < putative.level3$rt[j]))
premassRT <- xraw@scantime[RTIndex]
if(premassRT > as.integer(putative.level3$rt[j])){
###uphill
if(putative.level3$int[j] < eic.matrixBehind[2] & putative.level3$int[j] > eic.matrixFront[length(eic.matrixFront)]){
for(x in 2:length(eic.matrixBehind)){
TempInt <- eic.matrixBehind[x]
if(putative.level3$int[j] > TempInt) break
putative.level3$int[j] <- TempInt
RTindex <- eeicBehind[["scan"]][x]
putative.level3$rt[j] <- xraw@scantime[RTindex]
}
}
###downhill
if(putative.level3$int[j] > eic.matrixBehind[2] & putative.level3$int[j] < eic.matrixFront[length(eic.matrixFront)]){
for(x in length(eic.matrixFront):1){
TempInt <- eic.matrixFront[x]
if(putative.level3$int[j] > TempInt) break
putative.level3$int[j] <- TempInt
RTindex <- eeicFront[["scan"]][x]
putative.level3$rt[j] <- xraw@scantime[RTindex]
}
}
}
if(premassRT <= as.integer(putative.level3$rt[j])){
###uphill
if(putative.level3$int[j] < eic.matrixBehind[1] & putative.level3$int[j] > eic.matrixFront[length(eic.matrixFront)-1]){
for(x in 1:length(eic.matrixBehind)){
TempInt <- eic.matrixBehind[x]
if(putative.level3$int[j] > TempInt) break
putative.level3$int[j] <- TempInt
RTindex <- eeicBehind[["scan"]][x]
putative.level3$rt[j] <- xraw@scantime[RTindex]
}
}
###downhill
if(putative.level3$int[j] > eic.matrixBehind[1] & putative.level3$int[j] < eic.matrixFront[length(eic.matrixFront)-1]){
for(x in (length(eic.matrixFront)-1):1){
TempInt <- eic.matrixFront[x]
if(putative.level3$int[j] > TempInt) break
putative.level3$int[j] <- TempInt
RTindex <- eeicFront[["scan"]][x]
putative.level3$rt[j] <- xraw@scantime[RTindex]
}
}
}
eeic <- rawEIC(xraw, mzrange=mzRange, rtrange=RTRange)
eic.matrix <- eeic[["intensity"]]
avg.int <- (sum(eic.matrix) - putative.level3$int[j]) / (length(eic.matrix) - 1)
if(is.na(avg.int) || length(eic.matrix) < 4) next
if(putative.level3$int[j] >= level3.threshold * avg.int){
#Put level 3 features in featureTable
level3.matrix[j,1] <- putative.level3$mz[j]
level3.matrix[j,2] <- putative.level3$mz[j]
level3.matrix[j,3] <- putative.level3$mz[j]
level3.matrix[j,4] <- putative.level3$rt[j]
level3.matrix[j,5] <- putative.level3$rt[j]
level3.matrix[j,6] <- putative.level3$rt[j]
level3.matrix[j,7] <- 0
level3.matrix[j,8] <- 0
level3.matrix[j,9] <- putative.level3$int[j]
level3.matrix[j,10] <- 0
level3.matrix[j,11] <- 1
level3.matrix[j,12] <- 3
}
}
level3.matrix <- level3.matrix[is.na(level3.matrix$mz)==FALSE,]
}
}else{
width <- floor(log10(length(input.files))) + 1
featureT <- as.data.frame(chromPeaks(data))
label12 <- foreach(n = 1:length(input.files), .packages = c("xcms", "dplyr")) %dopar% {
currFeatureTable <- chromPeaks(data)[chromPeaks(data)[,11] == n,]
premass.matrix <- data@featureData@data[data@featureData@data$msLevel == 2 &
data@featureData@data$basePeakIntensity > 0,c(10, 18, 20)]
sample.extract <- grep(paste("F", formatC(n, width=width, flag="0"), sep=""), rownames(premass.matrix))
premass.matrix <- premass.matrix[sample.extract,]
colnames(premass.matrix) <- c("rt", "mz", "int")
# Dereplication of premass
premass.matrix <- premass.matrix[order(premass.matrix$int, decreasing = T ),]
dereplicate.level3 <- data.frame(matrix(ncol = ncol(premass.matrix), nrow = 1))
colnames(dereplicate.level3) <- colnames(premass.matrix)
for(q in 1:nrow(premass.matrix)){
mass.lower.limit <- premass.matrix$mz[q] - derep.mass.tol
mass.upper.limit <- premass.matrix$mz[q] + derep.mass.tol
rt.lower.limit <- premass.matrix$rt[q] - derep.rt.tol
rt.upper.limit <- premass.matrix$rt[q] + derep.rt.tol
temp <- dereplicate.level3[(dereplicate.level3$mz >= mass.lower.limit &
dereplicate.level3$mz <= mass.upper.limit &
dereplicate.level3$rt >= rt.lower.limit &
dereplicate.level3$rt <= rt.upper.limit),]
temp <- temp[complete.cases(temp),]
if(nrow(temp) == 0) {
dereplicate.level3 <- rbind(dereplicate.level3, premass.matrix[q,])
}
}
dereplicate.level3 <- dereplicate.level3[complete.cases(dereplicate.level3),]
# Confirm level 3 features
level12Vector <- logical(length = nrow(dereplicate.level3))
for(i in 1:nrow(dereplicate.level3)){
if(is.na(dereplicate.level3$mz[i])) next
mass.lower.limit <- dereplicate.level3$mz[i] * (1 - mz.tol * 1e-6)
mass.upper.limit <- dereplicate.level3$mz[i] * (1 + mz.tol * 1e-6)
short.list <- featureT[featureT$mz >= mass.lower.limit &
featureT$mz <= mass.upper.limit &
featureT$sample == n,]
short.list <- short.list[short.list$rtmin <= dereplicate.level3$rt[i] & short.list$rtmax >= dereplicate.level3$rt[i],]
if(nrow (short.list) > 0){
level12Vector[i] <- TRUE
}
}
outList <- list(level12Vector, dereplicate.level3)
return(outList)
}
is.level12List <- list()
dereplicate.level3List <- list()
for(n in 1:length(input.files)){
is.level12List[[n]] <- label12[[n]][[1]]
dereplicate.level3List[[n]] <- label12[[n]][[2]]
}
#PICK LEVEL 3 FEATURES AND ADD TO XCMS RESULTS
level3List <- foreach(n = (1:length(input.files)), .packages = c("xcms", "dplyr")) %dopar% {
# Find potential level 3 features
xraw <- xcmsRaw(input.files[n],profstep=0, mslevel = 1)
dereplicate.level3 <- dereplicate.level3List[[n]]
is.level12 <- is.level12List[[n]]
# Confirm level 3 features
putative.level3 <- dereplicate.level3[is.level12 == FALSE,]
putative.level3 <- putative.level3[is.na(putative.level3$mz) == FALSE,]
is.level3 <- logical(length = nrow(putative.level3))
level3.matrix <- data.frame(matrix(nrow = nrow(putative.level3), ncol = ncol(chromPeaks(data))))
colnames(level3.matrix) <- colnames(chromPeaks(data))
if(nrow(putative.level3 != 0)){
for (j in 1:nrow(putative.level3)){
if(putative.level3$mz[j] > xraw@mzrange[2] | putative.level3$mz[j] < xraw@mzrange[1]) next
if(putative.level3$rt[j] > (tail(xraw@scantime, n=1)-5) | putative.level3$rt[j] < 5) next
rt.lower.limit <- putative.level3$rt[j] - rt.tol
rt.upper.limit <- putative.level3$rt[j] + rt.tol
mass.lower.limit <- putative.level3$mz[j] - mass.tol
mass.upper.limit <- putative.level3$mz[j] + mass.tol
mzRange <- as.double(cbind(mass.lower.limit, mass.upper.limit))
RTRangeFront <- as.integer(cbind(rt.lower.limit, putative.level3$rt[j]))
RTRangeBehind <- as.integer(cbind(putative.level3$rt[j], rt.upper.limit))
RTRange <- as.integer(cbind(rt.lower.limit, rt.upper.limit))
eeicFront <- rawEIC(xraw, mzrange=mzRange, rtrange=RTRangeFront)
eeicBehind <- rawEIC(xraw, mzrange=mzRange, rtrange=RTRangeBehind)
eic.matrixFront <- eeicFront[["intensity"]]
eic.matrixBehind <- eeicBehind[["intensity"]]
RTIndex <- max(which(xraw@scantime < putative.level3$rt[j]))
premassRT <- xraw@scantime[RTIndex]
if(premassRT > as.integer(putative.level3$rt[j])){
###uphill
if(putative.level3$int[j] < eic.matrixBehind[2] & putative.level3$int[j] > eic.matrixFront[length(eic.matrixFront)]){
for(x in 2:length(eic.matrixBehind)){
TempInt <- eic.matrixBehind[x]
if(putative.level3$int[j] > TempInt) break
putative.level3$int[j] <- TempInt
RTindex <- eeicBehind[["scan"]][x]
putative.level3$rt[j] <- xraw@scantime[RTindex]
}
}
###downhill
if(putative.level3$int[j] > eic.matrixBehind[2] & putative.level3$int[j] < eic.matrixFront[length(eic.matrixFront)]){
for(x in length(eic.matrixFront):1){
TempInt <- eic.matrixFront[x]
if(putative.level3$int[j] > TempInt) break
putative.level3$int[j] <- TempInt
RTindex <- eeicFront[["scan"]][x]
putative.level3$rt[j] <- xraw@scantime[RTindex]
}
}
}
if(premassRT <= as.integer(putative.level3$rt[j])){
###uphill
if(putative.level3$int[j] < eic.matrixBehind[1] & putative.level3$int[j] > eic.matrixFront[length(eic.matrixFront)-1]){
for(x in 1:length(eic.matrixBehind)){
TempInt <- eic.matrixBehind[x]
if(putative.level3$int[j] > TempInt) break
putative.level3$int[j] <- TempInt
RTindex <- eeicBehind[["scan"]][x]
putative.level3$rt[j] <- xraw@scantime[RTindex]
}
}
###downhill
if(putative.level3$int[j] > eic.matrixBehind[1] & putative.level3$int[j] < eic.matrixFront[length(eic.matrixFront)-1]){
for(x in (length(eic.matrixFront)-1):1){
TempInt <- eic.matrixFront[x]
if(putative.level3$int[j] > TempInt) break
putative.level3$int[j] <- TempInt
RTindex <- eeicFront[["scan"]][x]
putative.level3$rt[j] <- xraw@scantime[RTindex]
}
}
}
eeic <- rawEIC(xraw, mzrange=mzRange, rtrange=RTRange)
eic.matrix <- eeic[["intensity"]]
avg.int <- (sum(eic.matrix) - putative.level3$int[j]) / (length(eic.matrix) - 1)
if(is.na(avg.int) || length(eic.matrix) < 4) next
if(putative.level3$int[j] >= level3.threshold * avg.int){
#Put level 3 features in featureTable
is.level3 <- TRUE
level3.matrix[j,1] <- putative.level3$mz[j]
level3.matrix[j,2] <- putative.level3$mz[j]
level3.matrix[j,3] <- putative.level3$mz[j]
level3.matrix[j,4] <- putative.level3$rt[j]
level3.matrix[j,5] <- putative.level3$rt[j]
level3.matrix[j,6] <- putative.level3$rt[j]
level3.matrix[j,7] <- 0
level3.matrix[j,8] <- 0
level3.matrix[j,9] <- putative.level3$int[j]
level3.matrix[j,10] <- 0
level3.matrix[j,11] <- n
level3.matrix[j,12] <- 3
}
}
level3.matrix <- level3.matrix[is.na(level3.matrix$mz)==FALSE,]
return(level3.matrix)
}
}
level3.matrix <- as.data.frame(bind_rows(level3List))
}
#clean up the cluster
stopImplicitCluster()
featureTable <- rbind(chromPeaks(data), as.matrix(level3.matrix))
featureTable <- as.data.frame(featureTable)
featureTable <- featureTable[order(featureTable$mz, decreasing = F ),]
digits <- floor(log10(nrow(featureTable))) + 1
rownames(featureTable) <- paste0("CP", sprintf(paste0("%0", digits, "d"), c(1:nrow(featureTable))))
chromPeaks(data) <- as.matrix(featureTable)
rownames(featureTable) <- paste("F", 1:nrow(featureTable), sep="")
featureTable <- featureTable[, c("mz", "rt", "rtmin", "rtmax", "maxo", "sample", "level")]
MSdata <<- data
return(featureTable)
}
#Add additional features
find.targetfeatures <- function(data, tarFTdir, tarFTname, mass.tol = 0.01, rt.tol = 30, target.threshold = 2){
# Calculate the number of cores
no_cores <- detectCores() - 1
# print("Using cores:")
# print(no_cores)
# Initiate cluster
registerDoParallel(no_cores)
input.files <- gsub("\\", "/", data@processingData@files, fixed=TRUE)
featureTable <- as.data.frame(chromPeaks(data))
setwd(tarFTdir)
input <- read.csv(file = tarFTname, header = T, stringsAsFactors = F)
addFT <- data.frame(matrix(ncol = ncol(featureTable), nrow = nrow(input)))
colnames(addFT) <- colnames(featureTable)
addFT$mz <- input[,1]
addFT$rt <- input[,2]
if(length(input.files) == 1){
xraw <- xcmsRaw(input.files[1],profstep=0, mslevel = 1)
for(i in 1:nrow(addFT)){
rt.lower.limit <- addFT$rt[i] - rt.tol
rt.upper.limit <- addFT$rt[i] + rt.tol
mass.lower.limit <- addFT$mz[i] - mass.tol
mass.upper.limit <- addFT$mz[i] + mass.tol
temp <- featureTable[featureTable$mz >= mass.lower.limit & featureTable$mz <= mass.upper.limit,]
temp <- temp[temp$rt >= rt.lower.limit & temp$rt <= rt.upper.limit,]
if(nrow(temp) != 0) next
if(addFT$mz[i] > xraw@mzrange[2] | addFT$mz[i] < xraw@mzrange[1]) next
if(addFT$rt[i] > tail(xraw@scantime, n=1) | addFT$rt[i] < xraw@scantime[1]) next
mzRange <- as.double(cbind(mass.lower.limit, mass.upper.limit))
RTRange <- as.integer(cbind(rt.lower.limit, rt.upper.limit))
eeic <- rawEIC(xraw, mzrange=mzRange, rtrange=RTRange)
eic.matrix <- eeic[["intensity"]]
max.int <- max(eic.matrix)
if(is.na(max.int) | max.int == 0) next
avg.int <- (sum(eic.matrix) - max.int) / (length(eic.matrix) - 1)
if(max.int >= target.threshold * avg.int){
addFT[i,1] <- addFT$mz[i]
addFT[i,2] <- addFT$mz[i]
addFT[i,3] <- addFT$mz[i]
addFT[i,4] <- addFT$rt[i]
addFT[i,5] <- addFT$rt[i]
addFT[i,6] <- addFT$rt[i]
addFT[i,7] <- 0
addFT[i,8] <- 0
addFT[i,9] <- max.int
addFT[i,10] <- 0
addFT[i,11] <- 1
addFT[i,12] <- 4
featureTable <- rbind(featureTable, addFT[i,])
}
}
featureTable <- featureTable[complete.cases(featureTable),]
# dereplicatedFT <- data.frame(matrix(ncol = ncol(featureTable), nrow = 0)) #generate data frame with dereplicated features
# colnames(dereplicatedFT) <- colnames(featureTable)
# for(m in (1:nrow(featureTable))) {
# mass.lower.limit <- featureTable$mz[m] - 0.01
# mass.upper.limit <- featureTable$mz[m] + 0.01
# rt.lower.limit <- featureTable$rt[m] - 30
# rt.upper.limit <- featureTable$rt[m] + 30
# temp <- dereplicatedFT[dereplicatedFT$mz >= mass.lower.limit & dereplicatedFT$mz <= mass.upper.limit,]
# temp <- temp[temp$rt >= rt.lower.limit & temp$rt <= rt.upper.limit,]
# if(nrow(temp) == 0) {
# dereplicatedFT[nrow(dereplicatedFT) + 1,] = featureTable[m,]
# }else{
# index <- which(dereplicatedFT$mz == temp$mz[1])[1]
# if(sum(dereplicatedFT$maxo[index]) < temp$maxo[1]){
# dereplicatedFT[index, ] <- temp[1, ]
# }
# }
# }
}else{
newFTlist <- foreach(n = (1:length(input.files)), .packages = c("xcms", "dplyr")) %dopar% {
FT <- featureTable[featureTable$sample == n, ]
xraw <- xcmsRaw(input.files[n],profstep=0, mslevel = 1)
for (i in 1:nrow(addFT)){
rt.lower.limit <- addFT$rt[i] - rt.tol
rt.upper.limit <- addFT$rt[i] + rt.tol
mass.lower.limit <- addFT$mz[i] - mass.tol
mass.upper.limit <- addFT$mz[i] + mass.tol
temp <- featureTable[featureTable$mz >= mass.lower.limit & featureTable$mz <= mass.upper.limit,]
temp <- temp[temp$rt >= rt.lower.limit & temp$rt <= rt.upper.limit,]
if(nrow(temp) != 0) next
if(addFT$mz[i] > xraw@mzrange[2] | addFT$mz[i] < xraw@mzrange[1]) next
if(addFT$rt[i] > tail(xraw@scantime, n=1) | addFT$rt[i] < xraw@scantime[1]) next
mzRange <- as.double(cbind(mass.lower.limit, mass.upper.limit))
RTRange <- as.integer(cbind(rt.lower.limit, rt.upper.limit))
eeic <- rawEIC(xraw, mzrange=mzRange, rtrange=RTRange)
eic.matrix <- eeic[["intensity"]]
max.int <- max(eic.matrix)
if(is.na(max.int) | max.int == 0) next
avg.int <- (sum(eic.matrix) - max.int) / (length(eic.matrix) - 1)
if(max.int >= target.threshold * avg.int){
addFT[i,1] <- addFT$mz[i]
addFT[i,2] <- addFT$mz[i]
addFT[i,3] <- addFT$mz[i]
addFT[i,4] <- addFT$rt[i]
addFT[i,5] <- addFT$rt[i]
addFT[i,6] <- addFT$rt[i]
addFT[i,7] <- 0
addFT[i,8] <- 0
addFT[i,9] <- max.int
addFT[i,10] <- 0
addFT[i,11] <- n
addFT[i,12] <- 4
FT <- rbind(FT, addFT[i,])
}
}
# newFT <- newFT[complete.cases(newFT),]
# FT <- rbind(featureTable[featureTable$sample == n, ], newFT)
# dereplicatedFT <- data.frame(matrix(ncol = ncol(FT), nrow = 0)) #generate data frame with dereplicated features
# colnames(dereplicatedFT) <- colnames(FT)
# for(m in (1:nrow(FT))) {
# mass.lower.limit <- FT$mz[m] - 0.01
# mass.upper.limit <- FT$mz[m] + 0.01
# rt.lower.limit <- FT$rt[m] - 30
# rt.upper.limit <- FT$rt[m] + 30
# temp <- dereplicatedFT[dereplicatedFT$mz >= mass.lower.limit & dereplicatedFT$mz <= mass.upper.limit,]
# temp <- temp[temp$rt >= rt.lower.limit & temp$rt <= rt.upper.limit,]
# if(nrow(temp) == 0) {
# dereplicatedFT[nrow(dereplicatedFT) + 1,] = FT[m,]
# }else{
# index <- which(dereplicatedFT$mz == temp$mz[1])[1]
# if(sum(dereplicatedFT$maxo[index]) < temp$maxo[1]){
# dereplicatedFT[index, ] <- temp[1, ]
# }
# }
# }
# dereplicatedFT <- dereplicatedFT[complete.cases(dereplicatedFT),]
FT <- FT[complete.cases(FT),]
return(FT)
}
featureTable <- as.data.frame(bind_rows(newFTlist))
}
#clean up the cluster
stopImplicitCluster()
featureTable <- as.data.frame(featureTable)
featureTable <- featureTable[order(featureTable$mz, decreasing = F ),]
digits <- floor(log10(nrow(featureTable))) + 1
rownames(featureTable) <- paste0("CP", sprintf(paste0("%0", digits, "d"), c(1:nrow(featureTable))))
chromPeaks(data) <- as.matrix(featureTable)
rownames(featureTable) <- paste("F", 1:nrow(featureTable), sep="")
featureTable <- featureTable[, c("mz", "rt", "rtmin", "rtmax", "maxo", "sample", "level")]
MSdata <<- data
featureTable$level[featureTable$level == 4] <- "target"
return(featureTable)
}
#CAMERA Annotation
adduct.isotope.annotation <- function(data, polarity){
input.files <- gsub("\\", "/", data@processingData@files, fixed=TRUE)
featureTable <- as.data.frame(chromPeaks(data))
featureTable$level[featureTable$level == "target"] <- 4
featureTable$level <- as.numeric(featureTable$level)
if(length(input.files) == 1){
data_filtered <- filterMsLevel(data, msLevel = 1L)
xset <- as(data_filtered, 'xcmsSet')
xsa <- xsAnnotate(xset)
anF <- groupFWHM(xsa, perfwhm = 0.6)
anI <- findIsotopes(anF, mzabs = 0.01)
anIC <- groupCorr(anI, cor_eic_th = 0.75)
anFA <- findAdducts(anIC, polarity=polarity)
peaklist <- getPeaklist(anFA)
peaklist <- peaklist[order(peaklist$mz),]
featureTable <- cbind(featureTable, peaklist$isotopes)
colnames(featureTable)[ncol(featureTable)] <- "Isotopes"
featureTable <- cbind(featureTable, peaklist$adduct)
colnames(featureTable)[ncol(featureTable)] <- "Adduct"
featureTable <- cbind(featureTable, as.numeric(peaklist$pcgroup))
colnames(featureTable)[ncol(featureTable)] <- "pcgroup"
}else{
# Calculate the number of cores
no_cores <- detectCores() - 1
# print("Using cores:")
# print(no_cores)
# Initiate cluster
registerDoParallel(no_cores)
newFTlist <- foreach(n = (1:length(input.files)), .packages = c("xcms", "CAMERA")) %dopar% {
FT <- featureTable[featureTable$sample == n, ]
currData <- readMSData(input.files[n], centroided = TRUE, mode = "onDisk")
#PICK LEVEL 1 AND 2 FEATURES
cwp <- CentWaveParam(ppm = 50,
peakwidth = c(5,20),
mzdiff = 0.01,
snthresh = 6,
integrate = 1,
prefilter = c(3,100),
noise = 100)
currData <- findChromPeaks(currData, param = cwp)
FT <- FT[order(FT$mz, decreasing = F ),]
digits <- floor(log10(nrow(FT))) + 1
rownames(FT) <- paste0("CP", sprintf(paste0("%0", digits, "d"), c(1:nrow(FT))))
FT$sample <- 1
chromPeaks(currData) <- as.matrix(FT)
data_filtered <- filterMsLevel(currData, msLevel = 1L)
xset <- as(data_filtered, 'xcmsSet')
xsa <- xsAnnotate(xset)
anF <- groupFWHM(xsa, perfwhm = 0.6)
anI <- findIsotopes(anF, mzabs = 0.01)
anIC <- groupCorr(anI, cor_eic_th = 0.75)
anFA <- findAdducts(anIC, polarity=polarity)
peaklist <- getPeaklist(anFA)
peaklist <- peaklist[order(peaklist$mz),]
FT <- cbind(FT, peaklist$isotopes)
colnames(FT)[ncol(FT)] <- "isotopes"
FT <- cbind(FT, peaklist$adduct)
colnames(FT)[ncol(FT)] <- "adduct"
FT <- cbind(FT, as.numeric(peaklist$pcgroup))
colnames(FT)[ncol(FT)] <- "pcgroup"
FT$sample <- n
# FT <- FT[complete.cases(FT),]
return(FT)
}
featureTable <- as.data.frame(bind_rows(newFTlist))
#clean up the cluster
stopImplicitCluster()
featureTable <- as.data.frame(featureTable)
userFT <- featureTable
if(polarity == "positive"){
logicalISP <- grepl("[M]+", featureTable$isotopes, fixed = T)
}else{
logicalISP <- grepl("[M]-", featureTable$isotopes, fixed = T)
}
featureTable$isotopes[featureTable$isotopes == "" | logicalISP] <- 8
featureTable$isotopes[featureTable$isotopes != 8 ] <- 0
featureTable$isotopes <- as.numeric(featureTable$isotopes)
if(polarity == "positive"){
logicalADD <- grepl("[M+H]+", featureTable$adduct, fixed = T)
}else{
logicalADD <- grepl("[M-H]-", featureTable$adduct, fixed = T)
}
featureTable$adduct[featureTable$adduct == "" | logicalADD] <- 8
featureTable$adduct[featureTable$adduct != 8 ] <- 0
featureTable$adduct <- as.numeric(featureTable$adduct)
featureTable <- featureTable[order(featureTable$mz, decreasing = F ),]
digits <- floor(log10(nrow(featureTable))) + 1
rownames(featureTable) <- paste0("CP", sprintf(paste0("%0", digits, "d"), c(1:nrow(featureTable))))
chromPeaks(data) <- as.matrix(featureTable)
MSdata <<- data
userFT <- userFT[order(userFT$mz, decreasing = F ),]
rownames(userFT) <- paste("F", 1:nrow(userFT), sep="")
userFT <- userFT[, c("mz", "rt", "rtmin", "rtmax", "maxo", "sample", "level",
"isotopes", "adduct", "pcgroup")]
userFT$level[userFT$level == 4] <- "target"
return(userFT)
}
}
#Performs feature alignment for multi-sample analysis
peak.alignment <- function(data, bw = 5, minfrac = 0.5, mzwid = 0.015,
minsamp = 1, max = 100, quantitative.method = "maxo"){
#ALIGNMENT
groupParam <- PeakDensityParam(sampleGroups = rep(1, length(fileNames(data))),
bw = bw, minFraction = minfrac, binSize = mzwid, minSamples = minsamp, maxFeatures = max)
data <- groupChromPeaks(data, param = groupParam)
# data <- adjustRtime(data, param = ObiwarpParam(binSize = 1))
data <- adjustRtime(data, param = PeakGroupsParam(minFraction = 1))
data <- groupChromPeaks(data, param = groupParam)
data <- fillChromPeaks(data, param = FillChromPeaksParam())
XCMI <- as.data.frame(featureValues(data, method = "maxint", value = quantitative.method,
intensity = quantitative.method, filled = T))
if("isotopes" %in% colnames(chromPeaks(data))){
XCMISP <- as.data.frame(featureValues(data, method = "maxint", value = "isotopes",
intensity = quantitative.method, filled = T))
XCMISP <- cbind(XCMISP, 0)
colnames(XCMISP)[ncol(XCMISP)] <- "isotopePercent"
XCMISP[is.na(XCMISP)] <- 8
for(i in 1:nrow(XCMISP)){
XCMISP$isotopePercent[i] <- sum(XCMISP[i,-ncol(XCMISP)] == 0) / (ncol(XCMISP) - 1)
}
XCMADD <- as.data.frame(featureValues(data, method = "maxint", value = "adduct",
intensity = quantitative.method, filled = T))
XCMADD <- cbind(XCMADD, 0)
colnames(XCMADD)[ncol(XCMADD)] <- "adductPercentage"
XCMADD[is.na(XCMADD)] <- 8
for(i in 1:nrow(XCMADD)){
XCMADD$adductPercentage[i] <- sum(XCMADD[i,-ncol(XCMADD)] == 0) / (ncol(XCMADD) - 1)
}
XCMPCG <- as.data.frame(featureValues(data, method = "maxint", value = "pcgroup",
intensity = quantitative.method, filled = T))
colnames(XCMPCG) <- paste0(colnames(XCMPCG), "_pcgroup")
XCMt <- as.data.frame(featureDefinitions(data))
featureTable <- cbind(XCMt$mzmed, XCMt$rtmed, XCMt$rtmin, XCMt$rtmax, XCMI,
XCMISP$isotopePercent, XCMADD$adductPercentage, XCMPCG)
colnames(featureTable)[1:4] <- c("mz", "rt", "rtmin", "rtmax")
colnames(featureTable)[(5+ncol(XCMPCG)):(6+ncol(XCMPCG))] <- c("isotopePercent", "adductPercentage")
}else{
XCMt <- as.data.frame(featureDefinitions(data))
featureTable <- cbind(XCMt$mzmed, XCMt$rtmed, XCMt$rtmin, XCMt$rtmax, XCMI)
colnames(featureTable)[1:4] <- c("mz", "rt", "rtmin", "rtmax")
}
#Output
featureTable <- as.data.frame(featureTable)
featureTable <- featureTable[order(featureTable$mz, decreasing = F ),]
rownames(featureTable) <- paste("F", 1:nrow(featureTable), sep="")
featureTable[is.na(featureTable)] <- 0
return(featureTable)
}
#MS2 Assignment
ms2.tofeaturetable <- function(data, featureTable, rt.tol = 30, mz.tol = 0.01){
MS2spectra <- matchMS2(data, featureTable, rt.tol = rt.tol, mz.tol = mz.tol)
featureTable <- cbind(featureTable,F,0,0,0,0)
colnames(featureTable)[(ncol(featureTable)-4):ncol(featureTable)] <- c("MS2_match", "MS2mz", "MS2int",
"PeaksCount", "fromFile")
for (i in 1:nrow(featureTable)) {
if(!is.null(MS2spectra[[i]])){
tmpSpectra <- MS2spectra[[i]]
for (j in 1:length(tmpSpectra)){
if(tmpSpectra[[j]]@peaksCount == 0){
tmpSpectra[[j]] <- NA
}
}
tmpSpectra <- tmpSpectra[is.na(tmpSpectra)==FALSE]
if(length(tmpSpectra) > 0){
currInt = tmpSpectra[[1]]@precursorIntensity
currIdx = 1
for(k in 1:length(tmpSpectra)){
if(tmpSpectra[[k]]@precursorIntensity > currInt){
currIdx = k
currInt = tmpSpectra[[k]]@precursorIntensity
}
}
finalSpectra = tmpSpectra[[currIdx]]
indices <- finalSpectra@intensity >= 0;
finalSpectra@intensity <- finalSpectra@intensity[indices];
finalSpectra@mz <- finalSpectra@mz[indices];
featureTable$MS2_match[i] <- TRUE
featureTable$MS2mz[i] <- paste(round(finalSpectra@mz,4),collapse = ";")
featureTable$MS2int[i] <- paste(finalSpectra@intensity, collapse = ";")
featureTable$PeaksCount[i] <- finalSpectra@peaksCount
featureTable$fromFile[i] <- finalSpectra@fromFile
}
}
}
featureTable[featureTable == ""] <- 0
return(featureTable)
}
#Feature Annotation
feature.annotation <- function(featureTable, lib_directory, lib_name, dp = 0.7, ms1.tol = 0.01, ms2.tol = 0.02){
# Dot product function
dp.score <- function(x,y){
if(nrow(x)==0 | nrow(y)==0){return(0)}
x[,2] <- 100*x[,2]/max(x[,2])
y[,2] <- 100*y[,2]/max(y[,2])
alignment <- data.frame(matrix(nrow=nrow(x), ncol=3))
alignment[,1:2] <- x[,1:2]
y1 <- y ##in case one row in y can be selected multiple times
for(i in 1:nrow(x)){
mass.diff <- abs(y1[,1] - x[i,1])
if(min(mass.diff) <= ms2.tol){
alignment[i,3] <- y1[mass.diff==min(mass.diff),2][1]
y1[mass.diff==min(mass.diff),1][1] <- NA # after matched, NA assigned
y1 <- y1[complete.cases(y1),]
if(is.null(nrow(y1)) ==TRUE) break
if(nrow(y1)==0) break
}
}
alignment <- alignment[complete.cases(alignment),]
if(nrow(alignment)==0){score <- 0}
if(nrow(alignment)>0){
#dot product calculation
AB <- sum(alignment[,2]*alignment[,3])
A <- sum(x[,2]^2)
B <- sum(y[,2]^2)
dp.score <- AB/sqrt(A*B)
score <- as.numeric(dp.score)
}
match_No <- nrow(alignment)
return <- c(score,match_No)
return(return)
}
# Calculate the number of cores
no_cores <- detectCores() - 1
# print("Using cores:")
# print(no_cores)
# Initiate cluster
registerDoParallel(no_cores)
setwd(lib_directory)
database <- read.msp(lib_name, only.org = FALSE)
featureTable <- cbind(featureTable, 0)
colnames(featureTable)[ncol(featureTable)] <- "Annotation"
featureTable <- cbind(featureTable, 0)
colnames(featureTable)[ncol(featureTable)] <- "DPscore"
rez <- foreach(x = 1:nrow(featureTable)) %dopar% {
premass.Q <- featureTable$mz[x] ###query precursor ion mass
if(featureTable$MS2mz[x] == 0){
# featureTable$Annotation[x] <- "unknown"
return(c("unknown", 0))
}
ms2.Q <- data.frame(m.z = strsplit(featureTable$MS2mz[x], ";")[[1]],
int = strsplit(featureTable$MS2int[x], ";")[[1]]) ###query ms2 input, ncol = 2, m.z & int
ms2.Q$m.z <- as.numeric(as.character(ms2.Q$m.z))
ms2.Q$int <- as.numeric(as.character(ms2.Q$int))
output <- data.frame(matrix(ncol=3))
colnames(output) <- c('std.name','DP.score','match_No')
h <- 1
for(i in 1:length(database)){
if(is.null(database[[i]]$PrecursorMZ)==TRUE) next # no precursor mass
premass.L <- database[[i]]$PrecursorMZ # database precursor
if(abs(premass.L-premass.Q) > ms1.tol) next # precursor filter
ms2.L <- as.data.frame(database[[i]]$pspectrum) # database spectrum
name.L <- database[[i]]$Name
output[h,1] <- name.L
output[h,2] <- dp.score(ms2.Q,ms2.L)[1]
output[h,3] <- dp.score(ms2.Q,ms2.L)[2]
h <- h + 1
}
output <- output[complete.cases(output),]
# Dp score threshold, Dp score >= 0.7 , match_No >= 6 (used in GNPS identification)
output <- output[output[,2] >= dp,]
# output <- output[output[,3] >= match.number.threshold,]
if(nrow(output)==0) {
# featureTable$Annotation[x] <- "unknown"
return(c("unknown", 0))
} else {
output <- output[order(-output[,2]),] # sort by scores
# featureTable$Annotation[x] <- output[1,1]
# featureTable$DPscore[x] <- output[1,2]
return(c(output[1,1], output[1,2]))
}
}
for(x in 1:nrow(featureTable)){
featureTable[x, c("Annotation", "DPscore")] <- rez[[x]]
}
featureTable$DPscore <- as.numeric(featureTable$DPscore)
#clean up the cluster
stopImplicitCluster()
return(featureTable)
}
#Plot level 1, 2, 3, target, and aligned features
plot.features <- function(dir, featureTable, data, plot.type, plotmz.tol = 0.01, plotrt.tol = 60, smooth = 2){
#peak smooth function
peak_smooth <- function(x,level=smooth){
n <- level
if(length(x) < 2*n){
return(x)
} else if(length(unique(x))==1){
return(x)
} else{
y <- vector(length=length(x))
for(i in 1:n){
y[i] <- sum(c((n-i+2):(n+1),n:1)*x[1:(i+n)])/sum(c((n-i+2):(n+1),n:1))
}
for(i in (n+1):(length(y)-n)){
y[i] <- sum(c(1:(n+1),n:1)*x[(i-n):(i+n)])/sum(c(1:(n+1),n:1))
}
for(i in (length(y)-n+1):length(y)){
y[i] <- sum(c(1:n,(n+1):(n+i-length(x)+1))*x[(i-n):length(x)])/sum(c(1:n,(n+1):(n+i-length(x)+1)))
}
return(y)
}
}
input.files <- gsub("\\", "/", data@processingData@files, fixed=TRUE)
setwd(dir)
if(plot.type == 1 | plot.type == 2 | plot.type == 3 | plot.type == "target"){
xraw <- xcmsRaw(input.files[1],profstep=0, mslevel = 1)
dir.create(paste0("level_", plot.type))
setwd(paste0("level_", plot.type))
plot.matrix <- featureTable[featureTable$level == plot.type,]
if(nrow(plot.matrix)!=0){
for(k in 1:nrow(plot.matrix)){
rt.lower.limit <- plot.matrix$rt[k] - plotrt.tol
rt.upper.limit <- plot.matrix$rt[k] + plotrt.tol
mass.lower.limit <- plot.matrix$mz[k] - plotmz.tol
mass.upper.limit <- plot.matrix$mz[k] + plotmz.tol
mzRange <- as.double(cbind(mass.lower.limit, mass.upper.limit))
RTRange <- as.integer(cbind(rt.lower.limit, rt.upper.limit))
eeic <- rawEIC(xraw, mzrange=mzRange, rtrange=RTRange) #extracted EIC object
points <- cbind(xraw@scantime[eeic$scan], peak_smooth(eeic$intensity))
png(file = paste0(rownames(plot.matrix)[k], "_",
round(plot.matrix$mz[k], digits = 4), "_",
round(plot.matrix$rt[k], digits = 0), ".png"),
width = 480, height = 480)
eic <- plot(points, type="l", main=paste("Extracted Ion Chromatogram m/z ",mzRange[1]," - ",mzRange[2],sep=""), xlab="Seconds",
ylab="Intensity", xlim=RTRange)
dev.off()
}
}
}else{
dir.create("JPA_EIC")
setwd("JPA_EIC")
plot.matrix <- featureTable[,5:(ncol(featureTable))]
xrawList <- list()
for(n in 1:length(input.files)){
xrawList[n] <- xcmsRaw(input.files[n],profstep=0)
}
for(k in 1:nrow(plot.matrix)){
rt.lower.limit <- featureTable$rt[k] - plotrt.tol
rt.upper.limit <- featureTable$rt[k] + plotrt.tol
mass.lower.limit <- featureTable$mz[k] - plotmz.tol
mass.upper.limit <- featureTable$mz[k] + plotmz.tol
maxIndex <- as.numeric(which.max(plot.matrix[k,]))
mzRange <- as.double(cbind(mass.lower.limit, mass.upper.limit))
RTRange <- as.integer(cbind(rt.lower.limit, rt.upper.limit))
eeic <- rawEIC(xrawList[[maxIndex]], mzrange=mzRange, rtrange=RTRange) #extracted EIC object
points <- cbind(xrawList[[maxIndex]]@scantime[eeic$scan], peak_smooth(eeic$intensity))
png(file = paste0(rownames(featureTable)[k], "_",
round(featureTable$mz[k], digits = 4), "_",
round(featureTable$rt[k], digits = 0), ".png"),
width = 480, height = 480)
eic <- plot(points, type="l", main=paste("Extracted Ion Chromatogram m/z ",mzRange[1]," - ",mzRange[2],sep=""), xlab="Seconds",
ylab="Intensity", xlim=RTRange)
dev.off()
}
}
setwd(dir)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.