# This script is used to search tandem MS spectrum
#' processMSMSupload
#'
#' @param mSetObj mSetObj
#' @param spectrum spectrum for uploading
#' @export
#' @author Zhiqiang Pang
processMSMSupload <- function(mSetObj=NA, spectrum){
#
mSetObj <- .get.mSet(mSetObj);
mSetObj$dataSet$spectrum_str <- spectrum;
spm_vec <- strsplit(spectrum, "\n")[[1]]
spm_lst <- lapply(spm_vec, function(x){
allv <- strsplit(x, "\t")[[1]]
data.frame(mz = as.numeric(allv[1]), int = as.numeric(allv[2]))
})
spm_dtf <- do.call(rbind,spm_lst)
mSetObj$dataSet$spectrum_dataframe <- spm_dtf;
# save results into R mem for extraction/display later
return(.set.mSet(mSetObj));
}
#' performMS2searchSingle
#'
#' @param mSetObj mSetObj
#' @param ppm1 ppm value for ms1
#' @param ppm2 ppm value for ms2
#' @param dbpath database path
#' @param frgdbpath fragmentation database path
#' @param database database option
#' @param similarity_meth similarity computing method
#' @param precMZ mz of precursor
#' @param sim_cutoff filtration cutoff of similarity score. will be enabled soon.
#' @param ionMode ion mode, for ESI+, is should be 1. for ESI-, it should be 0
#' @param unit1 ppm or da for ms1 matching
#' @param unit2 ppm or da for ms2
#' @export
performMS2searchSingle <- function(mSetObj=NA, ppm1 = 10, ppm2 = 25,
dbpath = "",
frgdbpath = "",
database = "all",
similarity_meth = 0,
precMZ = NA, sim_cutoff = 30, ionMode = "positive",
unit1 = "ppm", unit2 = "ppm"){
require("OptiLCMS")
mSetObj <- .get.mSet(mSetObj);
# fetch the searching function
SpectraSearchingSingle <- OptiLCMS:::SpectraSearchingSingle;
# record parameters
mSetObj$dataSet$params <-
data.frame(ppm1 = ppm1, ppm2 = ppm2, database = database,
similarity_meth = similarity_meth, precMZ = precMZ,
ionMode = ionMode, unit1 = unit1, unit2 = unit2,
OptiLCMSVersion = packageVersion("OptiLCMS"))
# configure ppm/da param
if(unit1 == "da"){
ppm1 <- ppm1/precMZ*1e6;
}
if(unit2 == "da"){
ppm2 <- ppm2/precMZ*1e6;
}
if(ionMode == "positive"){
ion_mode_idx = 1;
} else {
ion_mode_idx = 0;
}
if(dbpath =="" || !file.exists(dbpath)){
stop("Database file does not exist! Please check!")
}
# now set up the database option
database <- gsub("\\[|\\]", "", database)
database <- gsub(" ", "", database)
database <- strsplit(database, ",")[[1]]
database_opt <- generateMS2dbOpt(database, ionMode);
# now need to check if need to convert to NL
if(mSetObj[["dataSet"]]$MSMS_db_option == "nl"){
mSetObj$dataSet$spectrum_dataframe$mz <- precMZ - mSetObj$dataSet$spectrum_dataframe$mz
mSetObj$dataSet$spectrum_dataframe <- mSetObj$dataSet$spectrum_dataframe[mSetObj$dataSet$spectrum_dataframe$mz>0, ]
}
# if use entropy
if(similarity_meth == 1){
useEntropy <- TRUE;
} else {
useEntropy <- FALSE;
}
# now let call OPTILCMS to search the database
df <- as.matrix(mSetObj$dataSet$spectrum_dataframe)
Concensus_spec <- list(as.vector(0L), list(list(df)))
peak_mtx <- matrix(c(precMZ-1e-10, precMZ+1e-10, NA, NA), ncol = 4)
colnames(peak_mtx) <- c("mzmin", "mzmax", "rtmin", "rtmax")
results <- SpectraSearchingSingle(Concensus_spec, 0, peak_mtx, ppm1, ppm2,
ion_mode_idx, dbpath, database_opt, useEntropy = useEntropy)
results_clean <- lapply(results, msmsResClean)
mSetObj$dataSet$msms_result <- results_clean
# to extract fragments
require(RSQLite)
require(DBI)
require(progress)
con <- dbConnect(SQLite(), frgdbpath)
dt_idx <- dbGetQuery(con, "SELECT * FROM Index_table")
dbDisconnect(con)
frgs_list <- lapply(results_clean[[1]][["IDs"]], function(n){
dbs <- dt_idx$DB_Tables[which((dt_idx$Min_ID <= n) & (n <= dt_idx$Max_ID))]
con <- dbConnect(SQLite(), frgdbpath)
res <- dbGetQuery(con, paste0("SELECT Fragments FROM ", dbs, " WHERE ID=",n))
dbDisconnect(con)
strsplit(res$Fragments, "\t")[[1]]
})
mSetObj$dataSet$frgs_result[[1]] <- frgs_list
res_df <- data.frame(IDs = results_clean[[1]][["IDs"]],
Scores = results_clean[[1]][["Scores"]][[1]],
Similarity_score = results_clean[[1]][["dot_product"]][[1]],
CompoundName = results_clean[[1]][["Compounds"]],
Formula = results_clean[[1]][["Formulas"]],
SMILES = results_clean[[1]][["SMILEs"]],
InchiKeys = results_clean[[1]][["InchiKeys"]],
Precursors = results_clean[[1]][["Precursors"]],
MS2_reference = results_clean[[1]][["MS2refs"]])
qs::qsave(results_clean, file = "MS2searchSingle_results.qs")
write.csv(res_df, file = "MS2searchSingle_results.csv", row.names = F)
return(.set.mSet(mSetObj));
}
msmsResClean <- function(res){
allcmpds <- res[["Compounds"]];
allscore <- res[["Scores"]][[1]];
scores_idxs <- is.finite(allscore);
allcmpds <- allcmpds[scores_idxs];
allscore <- allscore[scores_idxs];
allcmpds_unique <- unique(allcmpds);
idx <- vapply(allcmpds_unique, function(cmpd){
idx1 <- (allcmpds == cmpd)
msco <- max(allscore[idx1], na.rm = TRUE)
idx2 <- (allscore == msco)
which(idx1 & idx2)[1]
}, FUN.VALUE = integer(1L))
res[["IDs"]] <- res[["IDs"]][idx]
res[["Scores"]][[1]] <- res[["Scores"]][[1]][idx]
res[["dot_product"]][[1]] <- res[["dot_product"]][[1]][idx]
res[["Neutral_loss"]][[1]] <- res[["Neutral_loss"]][[1]][idx]
res[["Compounds"]] <- res[["Compounds"]][idx]
res[["Formulas"]] <- res[["Formulas"]][idx]
res[["SMILEs"]] <- res[["SMILEs"]][idx]
res[["InchiKeys"]] <- res[["InchiKeys"]][idx]
res[["Precursors"]] <- res[["Precursors"]][idx]
res[["MS2refs"]] <- res[["MS2refs"]][idx]
# sort based on score (high -> low)
iddx <- order(res[["Scores"]][[1]], decreasing = T)
res[["IDs"]] <- res[["IDs"]][iddx]
res[["Scores"]][[1]] <- res[["Scores"]][[1]][iddx]
res[["dot_product"]][[1]] <- res[["dot_product"]][[1]][iddx]
res[["Neutral_loss"]][[1]] <- res[["Neutral_loss"]][[1]][iddx]
res[["Compounds"]] <- res[["Compounds"]][iddx]
res[["Formulas"]] <- res[["Formulas"]][iddx]
res[["SMILEs"]] <- res[["SMILEs"]][iddx]
res[["InchiKeys"]] <- res[["InchiKeys"]][iddx]
res[["Precursors"]] <- res[["Precursors"]][iddx]
res[["MS2refs"]] <- res[["MS2refs"]][iddx]
return(res)
}
Updatecurrentmsmsidx <- function(mSetObj=NA, label = ""){
mSetObj <- .get.mSet(mSetObj);
if(label != ""){
idx <- which(label == mSetObj[["dataSet"]][["prec_mzrt_included"]])
cat("Now the Updatecurrentmsmsidx ===> ", idx, "\n")
idx -> mSetObj[["dataSet"]][["current_msms_idx"]]
}
return(.set.mSet(mSetObj))
}
Setcurrentmsmsidx <- function(mSetObj=NA, idx = 1){
mSetObj <- .get.mSet(mSetObj);
cat("Now the Setcurrentmsmsidx ===> ", idx, "\n")
idx -> mSetObj[["dataSet"]][["current_msms_idx"]]
return(.set.mSet(mSetObj))
}
GetMSMSFeatureLabel <- function(mSetObj=NA, idx = 1){
mSetObj <- .get.mSet(mSetObj);
llb <- mSetObj[["dataSet"]][["prec_mzrt_included"]][idx]
return(llb)
}
GetMSMSCompoundNames_single <- function(mSetObj=NA, idx = 1){
mSetObj <- .get.mSet(mSetObj);
idx -> mSetObj[["dataSet"]][["current_msms_idx"]]
.set.mSet(mSetObj)
return(mSetObj[["dataSet"]][["msms_result"]][[idx]][["Compounds"]])
}
GetMSMSFormulas_single <- function(mSetObj=NA, idx = 1){
mSetObj <- .get.mSet(mSetObj);
return(mSetObj[["dataSet"]][["msms_result"]][[idx]][["Formulas"]])
}
GetMSMSSimScores_single <- function(mSetObj=NA, idx = 1){
mSetObj <- .get.mSet(mSetObj);
return(round(mSetObj[["dataSet"]][["msms_result"]][[idx]][["Scores"]][[1]],2))
}
GetMSMSSmiles_single <- function(mSetObj=NA, idx = 1){
mSetObj <- .get.mSet(mSetObj);
return(mSetObj[["dataSet"]][["msms_result"]][[idx]][["SMILEs"]])
}
GetMSMSInchiKeys_single <- function(mSetObj=NA, idx = 1){
mSetObj <- .get.mSet(mSetObj);
return(mSetObj[["dataSet"]][["msms_result"]][[idx]][["InchiKeys"]])
}
GetMSMSPrecs_single <- function(mSetObj=NA, idx = 1){
mSetObj <- .get.mSet(mSetObj);
return(round(mSetObj[["dataSet"]][["msms_result"]][[idx]][["Precursors"]],4))
}
GetMSMSDot_single <- function(mSetObj=NA, idx = 1){
mSetObj <- .get.mSet(mSetObj);
return(round(mSetObj[["dataSet"]][["msms_result"]][[idx]][["dot_product"]][[1]],2))
}
#' plotMirror
#' @param mSetObj mSetObj
#' @param featureidx index of feature
#' @param precMZ mz of precursor
#' @param ppm ppm for ms2 fragment matching mz error
#' @param imageNM image name
#' @param dpi dpi of images
#' @param format format of images
#' @param width width of images
#' @param height height of images
#' @param cutoff_relative cutoff of relative intensity to filter out
#' @export
#' @author Zhiqiang Pang
plotMirror <- function(mSetObj=NA, featureidx = 1,
precMZ, ppm,
imageNM = "",
dpi = 300, format = "png", width = 8, height = 8,
cutoff_relative = 5){
# Fetch mSetobj
mSetObj <- .get.mSet(mSetObj);
#cat("Now the height is == > ", height, "\n")
# get plotting function
MirrorPlotting <- OptiLCMS:::MirrorPlotting
# Fetch data for plotting
if(!is.null(mSetObj[["dataSet"]][["spectrum_dataframe"]])){
spec_df <- mSetObj[["dataSet"]][["spectrum_dataframe"]] # query spec
spec_top <- spec_df
ref_str <- mSetObj[["dataSet"]][["msms_result"]][[1]][["MS2refs"]][featureidx]
frgs_vec <- mSetObj$dataSet$frgs_result[[1]][[featureidx]]
if(is.na(ref_str)){
return (1);
}
spec_bottom <- OptiLCMS:::parse_ms2peaks(ref_str)
# compoundName, score
compoundName <- mSetObj[["dataSet"]][["msms_result"]][[1]][["Compounds"]][featureidx]
score <- mSetObj[["dataSet"]][["msms_result"]][[1]][["Scores"]][[1]][featureidx]
smiles <- mSetObj[["dataSet"]][["msms_result"]][[1]][["SMILEs"]][featureidx]
inchikeys <- mSetObj[["dataSet"]][["msms_result"]][[1]][["InchiKeys"]][featureidx]
formulas <- mSetObj[["dataSet"]][["msms_result"]][[1]][["Formulas"]][featureidx]
cat("compoundName ==> ", compoundName, "\n")
cat("score ==> ", score, "\n")
current_msms_idx <- 1;
} else {
if(!is.null(mSetObj[["dataSet"]][["current_msms_idx"]])){
current_msms_idx <- mSetObj[["dataSet"]][["current_msms_idx"]]
} else {
current_msms_idx <- 1
}
cat("Now the current_msms_idx ==> ", current_msms_idx, "\n")
cat("Now the featureidx ==> ", featureidx, "\n")
spec_df <- mSetObj[["dataSet"]][["spectrum_set"]][[current_msms_idx]][["ms2_spec"]]
spec_top <- spec_df
ref_str <- mSetObj[["dataSet"]][["msms_result"]][[current_msms_idx]][["MS2refs"]][featureidx]
frgs_vec <- mSetObj$dataSet$frgs_result[[current_msms_idx]][[featureidx]]
if(is.na(ref_str)){
return (1);
}
spec_bottom <- OptiLCMS:::parse_ms2peaks(ref_str)
# compoundName, score
compoundName <- mSetObj[["dataSet"]][["msms_result"]][[current_msms_idx]][["Compounds"]][featureidx]
score <- mSetObj[["dataSet"]][["msms_result"]][[current_msms_idx]][["Scores"]][[1]][featureidx]
smiles <- mSetObj[["dataSet"]][["msms_result"]][[current_msms_idx]][["SMILEs"]][featureidx]
inchikeys <- mSetObj[["dataSet"]][["msms_result"]][[current_msms_idx]][["InchiKeys"]][featureidx]
formulas <- mSetObj[["dataSet"]][["msms_result"]][[current_msms_idx]][["Formulas"]][featureidx]
cat("compoundName ==> ", compoundName, "\n")
cat("score ==> ", score, "\n")
}
# prepare fragments
normalize_spectrum <- OptiLCMS:::normalize_spectrum
bottom <- normalize_spectrum(spec_bottom, cutoff_relative)
frgs_vec_done <- vapply(bottom$mz, function(x){
y <- frgs_vec[spec_bottom[,1] == x]
if(is.na(y[1])){return("")}
return(y[1])
}, FUN.VALUE = character(1L))
write.table(bottom[,c(1:2)],
file = paste0("reference_spectrum_",featureidx-1, "_", precMZ,".txt"),
row.names = F, col.names = T)
df_cmpd <- data.frame(CompoundName = compoundName,
score = score,
SMILES = smiles,
InchiKeys = inchikeys,
Formula = formulas)
write.table(df_cmpd,
file = paste0("compound_info_",featureidx-1, "_", precMZ,".txt"),
row.names = F, col.names = T)
# now, let's plot
title <- paste0("Precursor: ", precMZ)
subtitle <- paste0(compoundName, "\nMatching Score: ", round(score,3))
p1 <- MirrorPlotting(spec_top,
spec_bottom,
ppm = ppm,
title= title,
subtitle = subtitle,
cutoff_relative = cutoff_relative)
p1 <- p1 + theme(
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 14),
text=element_text(family="Helvetica", face = "plain"),
plot.subtitle=element_text(size=13, face="plain", color="black"),
plot.title=element_text(size=18, face="plain", color="black"))
# Save the static plot with Cairo
Cairo::Cairo(
file = imageNM, #paste0("mirrorplot_", precMZ, "_", dpi, ".", format),
unit = "in", dpi = dpi, width = width, height = height, type = format, bg = "white")
print(p1)
dev.off()
# px <- plotly::ggplotly(p1);
px <- ggplotly_modified(p1, tempfile_path = paste0(getwd(), "/temp_file4plotly"));
#pxl <- list(px$x$data,px$x$layout,px$x$config);
#names(pxl) <- c("data","layout","config");
px[["x"]][["layout"]][["width"]] <- px[["width"]] <- 850
px[["x"]][["layout"]][["height"]] <- px[["height"]] <- 425
px[["x"]][["layout"]][["yaxis"]][["title"]][["font"]][["size"]] <- 16
px[["x"]][["layout"]][["xaxis"]][["title"]][["font"]][["size"]] <- 16
#px[["x"]][["layout"]][["title"]] <-NULL
px[["x"]][["layout"]][["title"]][["font"]][["size"]] <- 16
if(length(px[["x"]][["data"]])>2){
px[["x"]][["data"]][[3]][["marker"]][["size"]] <- px[["x"]][["data"]][[3]][["marker"]][["size"]]/2
}
## update the top data -data1
if(length(px[["x"]][["data"]][[1]])>0){
if(length(px[["x"]][["data"]][[1]][["text"]])>0){
px[["x"]][["data"]][[1]][["text"]] <- gsub("normalized|-normalized","Relative Intensity",px[["x"]][["data"]][[1]][["text"]])
px[["x"]][["data"]][[1]][["text"]] <- gsub("y: 0<br />","",px[["x"]][["data"]][[1]][["text"]])
px[["x"]][["data"]][[1]][["text"]] <- gsub("mz: [0-9]+.[0-9]+<br />mz","mz",px[["x"]][["data"]][[1]][["text"]])
px[["x"]][["data"]][[1]][["text"]] <- gsub("-","",px[["x"]][["data"]][[1]][["text"]])
}
}
## update the bottom data -data2
if(length(px[["x"]][["data"]][[2]])>0){
if(length(px[["x"]][["data"]][[2]][["text"]])>0){
px[["x"]][["data"]][[2]][["text"]] <- gsub("normalized|-normalized","Relative Intensity",px[["x"]][["data"]][[2]][["text"]])
px[["x"]][["data"]][[2]][["text"]] <- gsub("y: 0<br />","",px[["x"]][["data"]][[2]][["text"]])
px[["x"]][["data"]][[2]][["text"]] <- gsub("mz: [0-9]+.[0-9]+<br />mz","mz",px[["x"]][["data"]][[2]][["text"]])
px[["x"]][["data"]][[2]][["text"]] <- gsub("-","",px[["x"]][["data"]][[2]][["text"]])
non_na_txt <- px[["x"]][["data"]][[2]][["text"]][!is.na(px[["x"]][["data"]][[2]][["text"]])]
frgs_vec_done[frgs_vec_done==""] <- "Unknown"
new_labels <- sapply(1:length(non_na_txt), function(x){
paste0(non_na_txt[x], "<br />Fragment Formula: ", frgs_vec_done[ceiling(x/2)])
})
px[["x"]][["data"]][[2]][["text"]][!is.na(px[["x"]][["data"]][[2]][["text"]])] <- new_labels
}
}
## update the matched star -data3
if(length(px[["x"]][["data"]])>2){
if(length(px[["x"]][["data"]][[3]])>0){
if(length(px[["x"]][["data"]][[3]][["text"]])>0){
px[["x"]][["data"]][[3]][["text"]] <- gsub("<br />star_intensity.*","",px[["x"]][["data"]][[3]][["text"]])
px[["x"]][["data"]][[3]][["text"]] <- paste0("<b>Matched Fragment: </b><br />", px[["x"]][["data"]][[3]][["text"]])
}
}
}
# Save the interactive plot with ggplot
saveRDS(px, file = paste0(gsub(".png|.svg|.pdf", "", imageNM), ".rds"))
jsonlist <- RJSONIO::toJSON(px, pretty = T,force = TRUE,.na = "null");
sink(paste0(gsub(".png|.svg|.pdf", "", imageNM),".json"));
cat(jsonlist);
sink();
if(is.null(mSetObj[["imgSet"]][["msmsmirror"]])){
df <- data.frame(indx = featureidx, imageNM = imageNM, ft_idx = current_msms_idx)
mSetObj[["imgSet"]][["msmsmirror"]] <- df
} else {
mSetObj[["imgSet"]][["msmsmirror"]] -> df0
df <- data.frame(indx = featureidx, imageNM = imageNM, ft_idx = current_msms_idx)
mSetObj[["imgSet"]][["msmsmirror"]] <- rbind(df, df0)
}
return(.set.mSet(mSetObj))
}
readMgfSpectra <- function(filename) {
require("MSnbase")
cat("Scanning", filename, "...\n")
mgf <- scan(file = filename, what = "",
sep = "\n", quote = "",
allowEscapes = FALSE,
quiet = TRUE)
## From http://www.matrixscience.com/help/data_file_help.html#GEN
## Comment lines beginning with one of the symbols #;!/ can be included,
## but only outside of the BEGIN IONS and END IONS statements that delimit an MS/MS dataset.
cmts <- grep("^[#;!/]", mgf)
if (length(cmts))
mgf <- mgf[-cmts]
begin <- grep("BEGIN IONS", mgf) + 1L
end <- grep("END IONS", mgf) - 1L
n <- length(begin)
spectra <- vector("list", length = n)
fdata <- vector("list", length = n)
for (i in seq(along = spectra)) {
specInfo <- MSnbase:::extractMgfSpectrum2Info(mgf[begin[i]:end[i]],
centroided = TRUE)
spectra[[i]] <- specInfo$spectrum
fdata[[i]] <- specInfo$fdata
}
fdata <- do.call(rbind, fdata)
names(spectra) <- paste0("X", seq_along(spectra))
assaydata <- list2env(spectra)
process <- new("MSnProcess",
processing = paste("Data loaded:", date()),
files = filename,
smoothed = FALSE)
pdata <- new("AnnotatedDataFrame",
data = data.frame(sampleNames = filename,
fileNumbers = 1))
rownames(fdata) <- names(spectra)
fdata <- AnnotatedDataFrame(data = data.frame(fdata))
fdata <- fdata[ls(assaydata), ] ## reorder features
## only levels 0 and 1 for mgf peak lists
cache <- MSnbase:::testCacheArg(1, maxCache = 1)
tmp <- new("MSnExp",
assayData = assaydata,
phenoData = pdata,
featureData = fdata,
processingData = process)
newhd <- MSnbase:::.header(tmp)
.cacheEnv <- MSnbase:::setCacheEnv(list(assaydata = assaydata,
hd = newhd),
cache, lock = TRUE)
toReturn <- new("MSnExp",
assayData = assaydata,
phenoData = pdata,
featureData = fdata,
processingData = process,
.cache = .cacheEnv)
if (validObject(toReturn))
return(toReturn)
}
#' SaintyCheckMSPfile
#'
#' @param mSetObj mSetObj
#' @param filename filename with path
#' @param format_type format type, can be 'mgf' or 'msp'
#' @param keepAllspec if you want to search all spectra from your local, set keepAllspec to TRUE. it is FALSE by default.
#' @export
#' @author Zhiqiang Pang
SaintyCheckMSPfile <- function(mSetObj=NA, filename = "", format_type = "msp", keepAllspec = FALSE){
# Fetch mSetobj
mSetObj <- .get.mSet(mSetObj);
if(filename == ""){
AddErrMsg("The msp file is empty!");
}
Msg <- vector(mode = "character");
if(format_type == "mzmine"){
data_table <- read.delim(filename, header = F)
mSetObj[["dataSet"]][["spectrum_raw"]] <- data_table
Msg <- c(Msg, "Your msp file is generated by mzMine.")
start_idxs <- vapply(data_table[,1], function(x){x == "BEGIN IONS"},
FUN.VALUE = logical(1L), USE.NAMES = F)
Msg <- c(Msg, paste0("A total of ", length(which(start_idxs)), " MS/MS spectra included in your data."))
if((length(which(start_idxs)) > 20) & !keepAllspec){
AddMsg("Only first 20 tandem spectra will be searched!")
Msg <- c(Msg, paste0("Only first 20 tandem spectra will be searched by default!"))
keep20 = TRUE;
} else {
keep20 = FALSE;
}
end_idxs <- vapply(data_table[,1], function(x){x == "END IONS"},
FUN.VALUE = logical(1L), USE.NAMES = F)
rt_idxs <- vapply(data_table[,1], function(x){x == "RTINSECONDS"},
FUN.VALUE = logical(1L), USE.NAMES = F)
ms2_idxs <- vapply(data_table[,1], function(x){grepl(pattern = "Num peaks", x)},
FUN.VALUE = logical(1L), USE.NAMES = F)#Num peaks
ms2_num <- vapply(data_table[ms2_idxs,1], function(x){
as.integer(sub("Num peaks=", "",x))
}, FUN.VALUE = integer(1L), USE.NAMES = F)
start_idxs <- which(start_idxs)
end_idxs <- which(end_idxs)
rt_idxs <- which(rt_idxs)
ms2_idxs <- which(ms2_idxs)
idx_prec0 <- grep("PEPMASS=", data_table[,1])
prec_mz_all <- as.double(sub("PEPMASS=", "", data_table[idx_prec0,1]))
idx_precrt0 <- grep("RTINSECONDS=", data_table[,1])
prec_rt_all <- as.double(sub("RTINSECONDS=", "", data_table[idx_precrt0,1]))
mSetObj[["dataSet"]][["prec_mz_all"]] <- prec_mz_all
mSetObj[["dataSet"]][["prec_rt_all"]] <- prec_rt_all
mSetObj[["dataSet"]][["prec_mzrt_all"]] <- paste0(prec_mz_all, "mz@", prec_rt_all, "sec")
ms2spec_full <- vector(mode = "list", length(ms2_num))
for(i in 1:length(ms2_num)){
this_spec <- data_table[c(start_idxs[i]:end_idxs[i]),]
idx_prec <- grep("PEPMASS=", this_spec)
prec_mz <- as.double(sub("PEPMASS=", "", this_spec[idx_prec]))
idx_precrt <- grep("RTINSECONDS=", this_spec)
prec_rt <- as.double(sub("RTINSECONDS=", "", this_spec[idx_precrt]))
len_ms2_spec <- ms2_num[i]
ms2_this_idx <- grep("Num peaks=", this_spec)
spec_strs <- this_spec[c((ms2_this_idx+1):(length(this_spec)-1))]
spec_list <- lapply(spec_strs, function(x){
vals <- as.double(strsplit(x, " ")[[1]])
data.frame(mz = vals[1], int = vals[2])
})
ms2_spec <- do.call(rbind, spec_list)
ms2spec_full[[i]] <- list(precursor = prec_mz, prec_rt = prec_rt, ms2_spec = ms2_spec)
}
if(keep20){
start_idxs <- start_idxs[1:20]
end_idxs <- end_idxs[1:20]
ms2_idxs <- ms2_idxs[1:20]
ms2_num <- ms2_num[1:20]
}
ms2spec <- vector(mode = "list", length(ms2_num))
for(i in 1:length(ms2_num)){
this_spec <- data_table[c(start_idxs[i]:end_idxs[i]),]
idx_prec <- grep("PEPMASS=", this_spec)
prec_mz <- as.double(sub("PEPMASS=", "", this_spec[idx_prec]))
idx_precrt <- grep("RTINSECONDS=", this_spec)
prec_rt <- as.double(sub("RTINSECONDS=", "", this_spec[idx_precrt]))
len_ms2_spec <- ms2_num[i]
ms2_this_idx <- grep("Num peaks=", this_spec)
spec_strs <- this_spec[c((ms2_this_idx+1):(length(this_spec)-1))]
spec_list <- lapply(spec_strs, function(x){
vals <- as.double(strsplit(x, " ")[[1]])
data.frame(mz = vals[1], int = vals[2])
})
ms2_spec <- do.call(rbind, spec_list)
ms2spec[[i]] <- list(precursor = prec_mz, prec_rt = prec_rt, ms2_spec = ms2_spec)
}
all_precmz <- vapply(ms2spec, function(x){x[[1]]}, FUN.VALUE = double(1L))
all_precrt <- vapply(ms2spec, function(x){x[[2]]}, FUN.VALUE = double(1L))
msms_frg_num <- vapply(ms2spec, function(x){nrow(x[[3]])}, FUN.VALUE = double(1L))
Msg <- c(Msg, paste0("The m/z range of all included precursors in your data is from ", min(all_precmz), " to ", max(all_precmz), "."))
Msg <- c(Msg, paste0("The retention time range of all included precursors in your data is from ", min(all_precrt), " to ", max(all_precrt), "."))
Msg <- c(Msg, paste0("The minimum number of MS/MS fragments is ", min(msms_frg_num), "."))
Msg <- c(Msg, paste0("The maximum number of MS/MS fragments is ", max(msms_frg_num), "."))
mSetObj[["dataSet"]][["prec_mz_included"]] <- all_precmz
mSetObj[["dataSet"]][["prec_rt_included"]] <- all_precrt
mSetObj[["dataSet"]][["prec_mzrt_included"]] <- paste0(all_precmz, "mz@", all_precrt, "sec")
} else if(format_type == "msp"){
data_table <- read.delim(filename, header = F,sep = "\n")
mSetObj[["dataSet"]][["spectrum_raw"]] <- data_table
Msg <- c(Msg, "Your msp file is exported by MS-DIAL")
start_idxs <- vapply(data_table[,1], function(x){grepl("NAME: ",x)},
FUN.VALUE = logical(1L), USE.NAMES = F)
ms2_idxs <- vapply(data_table[,1], function(x){grepl("Num Peaks: ",x)},
FUN.VALUE = logical(1L), USE.NAMES = F)
rt_idxs <- vapply(data_table[,1], function(x){x == "RETENTIONTIME"},
FUN.VALUE = logical(1L), USE.NAMES = F)
non0_idxs <- vapply(data_table[ms2_idxs,1], function(x){x == "Num Peaks: 0"},
FUN.VALUE = logical(1L), USE.NAMES = F)
ms2_num <- vapply(data_table[ms2_idxs,1], function(x){
as.integer(sub("Num Peaks: ", "",x))
}, FUN.VALUE = integer(1L), USE.NAMES = F)
Msg <- c(Msg, paste0("A total of ", length(which(start_idxs)), " MS/MS records detected in your data."))
Msg <- c(Msg, paste0("A total of ", length(which(!non0_idxs)), " non-empty MS/MS spectra found in your data."))
if((length(which(!non0_idxs)) > 20) & (!keepAllspec)){
AddMsg("Only first 20 tandem spectra will be searched!")
Msg <- c(Msg, paste0("Only first 20 tandem spectra will be searched by default!"))
keep20 = TRUE;
} else {
keep20 = FALSE;
}
start_idxs <- which(start_idxs)
rt_idxs <- which(rt_idxs)
end_idxs <- c(start_idxs[-1]-1, nrow(data_table))
ms2_idxs <- which(ms2_idxs)
start_idxs <- start_idxs[!non0_idxs]
rt_idxs <- rt_idxs[!non0_idxs]
end_idxs <- end_idxs[!non0_idxs]
ms2_idxs <- ms2_idxs[!non0_idxs]
ms2_num <- ms2_num[!non0_idxs]
idx_prec0 <- grep("PRECURSORMZ: ", data_table[,1])
idx_precrt0 <- grep("RETENTIONTIME: ", data_table[,1])
prec_mz_all <- as.double(sub("PRECURSORMZ: ", "", data_table[idx_prec0,1]))
prec_rt_all <- as.double(sub("RETENTIONTIME: ", "", data_table[idx_precrt0,1]))
mSetObj[["dataSet"]][["prec_mz_all"]] <- prec_mz_all[!non0_idxs]
mSetObj[["dataSet"]][["prec_rt_all"]] <- prec_rt_all[!non0_idxs]
mSetObj[["dataSet"]][["prec_mzrt_all"]] <- paste0(prec_mz_all[!non0_idxs], "mz@", prec_rt_all[!non0_idxs], "min")
ms2spec_full <- vector(mode = "list", length(ms2_num))
for(i in 1:length(ms2_num)){
this_spec <- data_table[c(start_idxs[i]:end_idxs[i]),]
idx_prec <- grep("PRECURSORMZ: ", this_spec)
prec_mz <- as.double(sub("PRECURSORMZ: ", "", this_spec[idx_prec]))
idx_precrt <- grep("RETENTIONTIME: ", this_spec)
prec_rt <- as.double(sub("RETENTIONTIME: ", "", this_spec[idx_precrt]))
len_ms2_spec <- ms2_num[i]
ms2_this_idx <- grep("Num Peaks: ", this_spec)
spec_strs <- this_spec[c((ms2_this_idx+1):(length(this_spec)))]
spec_list <- lapply(spec_strs, function(x){
vals <- as.double(strsplit(x, "\t")[[1]])
data.frame(mz = vals[1], int = vals[2])
})
ms2_spec <- do.call(rbind, spec_list)
ms2spec_full[[i]] <- list(precursor = prec_mz, prec_rt = prec_rt, ms2_spec = ms2_spec)
}
if(keep20){
start_idxs <- start_idxs[1:20]
end_idxs <- end_idxs[1:20]
ms2_idxs <- ms2_idxs[1:20]
ms2_num <- ms2_num[1:20]
}
ms2spec <- vector(mode = "list", length(ms2_num))
for(i in 1:length(ms2_num)){
this_spec <- data_table[c(start_idxs[i]:end_idxs[i]),]
idx_prec <- grep("PRECURSORMZ: ", this_spec)
prec_mz <- as.double(sub("PRECURSORMZ: ", "", this_spec[idx_prec]))
idx_precrt <- grep("RETENTIONTIME: ", this_spec)
prec_rt <- as.double(sub("RETENTIONTIME: ", "", this_spec[idx_precrt]))
len_ms2_spec <- ms2_num[i]
ms2_this_idx <- grep("Num Peaks: ", this_spec)
spec_strs <- this_spec[c((ms2_this_idx+1):(length(this_spec)))]
spec_list <- lapply(spec_strs, function(x){
vals <- as.double(strsplit(x, "\t")[[1]])
data.frame(mz = vals[1], int = vals[2])
})
ms2_spec <- do.call(rbind, spec_list)
ms2spec[[i]] <- list(precursor = prec_mz,prec_rt = prec_rt, ms2_spec = ms2_spec)
}
all_precmz <- vapply(ms2spec, function(x){x[[1]]}, FUN.VALUE = double(1L))
all_precrt <- vapply(ms2spec, function(x){x[[2]]}, FUN.VALUE = double(1L))
msms_frg_num <- vapply(ms2spec, function(x){nrow(x[[3]])}, FUN.VALUE = double(1L))
Msg <- c(Msg, paste0("The m/z range of all precursors in your data is from ", min(all_precmz), " to ", max(all_precmz), "."))
Msg <- c(Msg, paste0("The retention time range of all included precursors in your data is from ", min(all_precrt), " to ", max(all_precrt), "."))
Msg <- c(Msg, paste0("The minimum number of MS/MS fragments is ", min(msms_frg_num), "."))
Msg <- c(Msg, paste0("The maximum number of MS/MS fragments is ", max(msms_frg_num), "."))
mSetObj[["dataSet"]][["prec_mz_included"]] <- all_precmz
mSetObj[["dataSet"]][["prec_rt_included"]] <- all_precrt
mSetObj[["dataSet"]][["prec_mzrt_included"]] <- paste0(all_precmz, "mz@", all_precrt, "min")
} else if(format_type == "mgf") {
require(MSnbase)
mgf_dt <- readMgfSpectra(filename)
total_size <- length(mgf_dt@assayData)
Msg <- c(Msg, "Your MGF file is a standard MGF file!")
scannames <- names(mgf_dt@assayData)
counts <- vapply(1:total_size, FUN = function(x){
mgf_dt@assayData[[scannames[x]]]@peaksCount
}, FUN.VALUE = integer(length = 1L))
non0_idxs <- which(counts != 0)
scannames <- scannames[non0_idxs]
Msg <- c(Msg, paste0("A total of ",total_size, " MS/MS records detected in your data."))
Msg <- c(Msg, paste0("A total of ", length(non0_idxs), " non-empty MS/MS spectra found in your data."))
if((length(non0_idxs) > 20) & (!keepAllspec)) {
AddMsg("Only first 20 tandem spectra will be searched!")
Msg <- c(Msg, paste0("Only first 20 tandem spectra will be searched by default!"))
keep20 = TRUE;
} else {
keep20 = FALSE;
}
ms2spec_full <- lapply(1:total_size, function(x){
prec_mz <- round(mgf_dt@assayData[[scannames[x]]]@precursorMz,5)
prec_rt <-round(mgf_dt@assayData[[scannames[x]]]@rt,3)
ms2_spec <- cbind(mgf_dt@assayData[[scannames[x]]]@mz,
mgf_dt@assayData[[scannames[x]]]@intensity)
list(precursor = prec_mz, prec_rt = prec_rt, ms2_spec = ms2_spec)
})
prec_mz_all <- vapply(ms2spec_full, function(x){x[[1]]}, FUN.VALUE = double(1L))
prec_rt_all <- vapply(ms2spec_full, function(x){x[[2]]}, FUN.VALUE = double(1L))
mSetObj[["dataSet"]][["prec_mz_all"]] <- prec_mz_all
mSetObj[["dataSet"]][["prec_rt_all"]] <- prec_rt_all
mSetObj[["dataSet"]][["prec_mzrt_all"]] <- paste0(prec_mz_all, "mz@", prec_rt_all, "min")
if(keep20){
scannames <- scannames[1:20]
}
ms2spec <- lapply(1:length(scannames), FUN = function(x){
prec_mz <- round(mgf_dt@assayData[[scannames[x]]]@precursorMz,5)
prec_rt <-round(mgf_dt@assayData[[scannames[x]]]@rt,3)
ms2_spec <- cbind(mgf_dt@assayData[[scannames[x]]]@mz,
mgf_dt@assayData[[scannames[x]]]@intensity)
list(precursor = prec_mz,prec_rt = prec_rt, ms2_spec = ms2_spec)
})
all_precmz <- vapply(ms2spec, function(x){x[[1]]}, FUN.VALUE = double(1L))
all_precrt <- vapply(ms2spec, function(x){x[[2]]}, FUN.VALUE = double(1L))
msms_frg_num <- vapply(ms2spec, function(x){nrow(x[[3]])}, FUN.VALUE = double(1L))
Msg <- c(Msg, paste0("The m/z range of all precursors in your data is from ", min(all_precmz), " to ", max(all_precmz), "."))
Msg <- c(Msg, paste0("The retention time range of all included precursors in your data is from ", min(all_precrt), " to ", max(all_precrt), "."))
Msg <- c(Msg, paste0("The minimum number of MS/MS fragments is ", min(msms_frg_num), "."))
Msg <- c(Msg, paste0("The maximum number of MS/MS fragments is ", max(msms_frg_num), "."))
mSetObj[["dataSet"]][["prec_mz_included"]] <- all_precmz
mSetObj[["dataSet"]][["prec_rt_included"]] <- all_precrt
mSetObj[["dataSet"]][["prec_mzrt_included"]] <- paste0(all_precmz, "mz@", all_precrt, "min")
}
if(mSetObj[["dataSet"]][["MSMS_db_option"]] == "nl"){
Msg <- c(Msg, paste0("Database searching is based on <u>Neutral Loss</u> spectra of corresponding MS/MS spectra."))
}
if(keepAllspec){
Msg <- c(Msg, paste0("You can use <b>Edit</b> button below to manually update the inclusion list for database searching!"))
}
if(keep20){
Msg <- c(Msg, paste0("Please use <b>Edit</b> button below to manually update the inclusion list for database searching!"))
Msg <- c(Msg, paste0("Please click <b>Proceed</b> button to start database searching."))
} else {
if(length(ms2spec)>20){
Msg <- c(Msg, paste0("Please click <b>Submit</b> button to start database searching."))
} else {
Msg <- c(Msg, paste0("Please click <b>Proceed</b> button to start database searching."))
}
}
if(mSetObj[["dataSet"]][["MSMS_db_option"]] == "nl"){
ms2spec <- lapply(ms2spec, convert2NLspec)
ms2spec_full <- lapply(ms2spec_full, convert2NLspec)
}
mSetObj[["msgSet"]][["sanity_msgvec"]] <- Msg
mSetObj[["dataSet"]][["spectrum_set"]] <- ms2spec
mSetObj[["dataSet"]][["spectrum_set_full"]] <- ms2spec_full
return(.set.mSet(mSetObj))
}
convert2NLspec <- function(data_spec){
spectrum_dataframe <- data_spec$ms2_spec
precMZ <- data_spec$precursor
spectrum_dataframe$mz <- precMZ - spectrum_dataframe$mz
spectrum_dataframe <- spectrum_dataframe[spectrum_dataframe$mz>0, ]
data_spec$ms2_spec <- spectrum_dataframe
return(data_spec)
}
#' performMS2searchBatch
#'
#' @param mSetObj mSetObj
#' @param ppm1 ppm value for ms1
#' @param ppm2 ppm value for ms2
#' @param dbpath database path
#' @param frgdbpath fragmentation database path
#' @param database database option
#' @param similarity_meth similarity computing method
#' @param precMZ mz of precursor
#' @param sim_cutoff filtration cutoff of similarity score. will be enabled soon.
#' @param ionMode ion mode, for ESI+, is should be 1. for ESI-, it should be 0
#' @param unit1 ppm or da for ms1 matching
#' @param unit2 ppm or da for ms2
#' @param ncores number of cpu cores used to search
#' @export
#' @author Zhiqiang Pang
performMS2searchBatch <- function(mSetObj=NA, ppm1 = 10, ppm2 = 25,
dbpath = "",
frgdbpath = "",
database = "all",
similarity_meth = 0,
precMZ = NA, sim_cutoff = 30, ionMode = "positive",
unit1 = "ppm", unit2 = "ppm", ncores = 1){
cat(10, file = "progress_value.txt")
require("OptiLCMS")
mSetObj <- .get.mSet(mSetObj);
cat(15, file = "progress_value.txt")
#save(mSetObj, ppm1, ppm2, dbpath, frgdbpath, database, similarity_meth, precMZ, sim_cutoff, ionMode, unit1, unit2, ncores,
# file = "performMS2searchBatch__input.rda")
mSetObj$dataSet$params <-
data.frame(ppm1 = ppm1, ppm2 = ppm2, database = database,
similarity_meth = similarity_meth, precMZ = precMZ,
ionMode = ionMode, unit1 = unit1, unit2 = unit2,
OptiLCMSVersion = packageVersion("OptiLCMS"))
# fetch the searching function
SpectraSearchingBatch <- OptiLCMS:::SpectraSearchingBatch
# configure ppm/da param
if(unit1 == "da"){
ppm1 <- ppm1/precMZ*1e6;
}
if(unit2 == "da"){
ppm2 <- ppm2/precMZ*1e6;
}
if(ionMode == "positive"){
ion_mode_idx = 1;
} else {
ion_mode_idx = 0;
}
if(dbpath =="" || !file.exists(dbpath)){
stop("Database file does not exist! Please check!")
}
# now set up the database option
database <- gsub("\\[|\\]", "", database)
database <- gsub(" ", "", database)
database <- strsplit(database, ",")[[1]]
database_opt <- generateMS2dbOpt(database, ionMode);
# now let call OPTILCMS to search the database
# df <- as.matrix(mSetObj$dataSet$spectrum_dataframe)
spec_set <- mSetObj[["dataSet"]][["spectrum_set"]]
spec_set_ms2 <- lapply(spec_set, function(x){list(as.matrix(x[[3]]))})
spec_set_prec <- sapply(spec_set, function(x){x[[1]]})
Concensus_spec <- list(as.vector(seq(0,length(spec_set)-1)), spec_set_ms2)
peak_mtx <- cbind(spec_set_prec-1e-10, spec_set_prec+1e-10, NA, NA)
colnames(peak_mtx) <- c("mzmin", "mzmax", "rtmin", "rtmax")
cat(18, file = "progress_value.txt")
cat("", file = "progress_value_parallel.txt")
cat("\nSearching the database now. This step may take a long time....\n", file = "metaboanalyst_ms2_search.txt", append = TRUE)
# if use entropy
if(similarity_meth == 1){
useEntropy <- TRUE;
} else {
useEntropy <- FALSE;
}
if(ncores == 1){
results <- SpectraSearchingBatch(Concensus_spec, 0:(length(spec_set_prec)-1), peak_mtx, ppm1, ppm2,
ion_mode_idx, dbpath, database_opt, useEntropy = useEntropy)
} else {
# for multiple cores
require(parallel);
cl <- makeCluster(getOption("cl.cores", ncores))
clusterExport(cl, c("Concensus_spec", "peak_mtx", "spec_set_prec",
"ppm1", "ppm2", "ion_mode_idx", "dbpath",
"database_opt", "SpectraSearchingBatch", "useEntropy"), envir = environment())
res1 <- list()
res1 <- parLapply(cl,
1:ncores,
function(x, Concensus_spec, peak_mtx,
ppm1, ppm2, ion_mode_idx, dbpath,
database_opt, ncores, useEntropy){
length_data <- length(Concensus_spec[[1]])
lg_pcore <- ceiling(length_data/ncores)
if(ncores == x){
vec_idx <- ((x-1)*lg_pcore+1):length_data
} else {
vec_idx <- ((x-1)*lg_pcore+1):(x*lg_pcore)
}
Concensus_spec0 <- Concensus_spec
Concensus_spec0[[1]] <- 0:length(vec_idx)
Concensus_spec0[[2]] <- Concensus_spec0[[2]][vec_idx]
if(length(vec_idx)==1){
peak_mtx_input <- matrix(peak_mtx[vec_idx,], ncol = 4)
} else {
peak_mtx_input <- peak_mtx[vec_idx,]
}
res0 <- SpectraSearchingBatch(Concensus_spec0,
0:(length(vec_idx)-1),
peak_mtx_input,
ppm1, ppm2, ion_mode_idx, dbpath, database_opt, useEntropy = useEntropy)
cat("=", file = "progress_value_parallel.txt", append = TRUE)
res0},
Concensus_spec = Concensus_spec,
peak_mtx = peak_mtx,
ppm2 = ppm2,
ppm1 = ppm1,
ion_mode_idx = ion_mode_idx,
dbpath = dbpath,
database_opt = database_opt,
ncores = ncores,
useEntropy = useEntropy)
stopCluster(cl)
cat(70, file = "progress_value.txt")
res <- list()
for(i in 1:ncores){
res <- c(res, res1[[i]])
}
res -> results
}
results_clean <- lapply(results, msmsResClean)
mSetObj$dataSet$msms_result <- results_clean
cat(80, file = "progress_value.txt")
# to extract fragments
require(RSQLite)
require(DBI)
require(progress)
con <- dbConnect(SQLite(), frgdbpath)
dt_idx <- dbGetQuery(con, "SELECT * FROM Index_table")
dbDisconnect(con)
cat(90, file = "progress_value.txt")
for(k in 1:length(results_clean)){
frgs_list <- lapply(results_clean[[k]][["IDs"]], function(n){
dbs <- dt_idx$DB_Tables[which((dt_idx$Min_ID <= n) & (n <= dt_idx$Max_ID))]
con <- dbConnect(SQLite(), frgdbpath)
res <- dbGetQuery(con, paste0("SELECT Fragments FROM ", dbs, " WHERE ID=",n))
dbDisconnect(con)
strsplit(res$Fragments, "\t")[[1]]
})
mSetObj$dataSet$frgs_result[[k]] <- frgs_list
}
cat(100, file = "progress_value.txt")
mSetObj$dataSet$spec_set_prec <- spec_set_prec;
return(.set.mSet(mSetObj));
}
DataUpdatefromInclusionList <- function(mSetObj=NA, included_str = ""){
if(included_str == ""){
return (1);
}
mSetObj <- .get.mSet(mSetObj);
Msg = vector()
included_list <- strsplit(included_str, "\n")[[1]]
spectrum_set_full <- mSetObj[["dataSet"]][["spectrum_set_full"]]
prec_mzrt_all <- mSetObj[["dataSet"]][["prec_mzrt_all"]]
idxs <- vapply(included_list, function(x){
which(x == prec_mzrt_all)[1]
}, FUN.VALUE = integer(1L))
if(length(included_list)>20){
mSetObj[["dataSet"]][["prec_mz_included"]] <-
mSetObj[["dataSet"]][["prec_mz_all"]][idxs][1:20];
mSetObj[["dataSet"]][["prec_rt_included"]] <-
mSetObj[["dataSet"]][["prec_rt_all"]][idxs][1:20];
mSetObj[["dataSet"]][["prec_mzrt_included"]] <-
mSetObj[["dataSet"]][["prec_mzrt_all"]][idxs][1:20];
mSetObj[["dataSet"]][["spectrum_set"]] <-
mSetObj[["dataSet"]][["spectrum_set_full"]][idxs][1:20];
ms2spec <- mSetObj[["dataSet"]][["spectrum_set"]];
all_precmz <- vapply(ms2spec, function(x){x[[1]]}, FUN.VALUE = double(1L))
all_precrt <- vapply(ms2spec, function(x){x[[2]]}, FUN.VALUE = double(1L))
msms_frg_num <- vapply(ms2spec, function(x){nrow(x[[3]])}, FUN.VALUE = double(1L))
Msg <- c(Msg, paste0("Only first 20 tandem spectra will be searched by default!"))
Msg <- c(Msg, paste0("The m/z range of all precursors in your data is from ", min(all_precmz), " to ", max(all_precmz), "."))
Msg <- c(Msg, paste0("The retention time range of all included precursors in your data is from ", min(all_precrt), " to ", max(all_precrt), "."))
Msg <- c(Msg, paste0("The minimum number of MS/MS fragments is ", min(msms_frg_num), "."))
Msg <- c(Msg, paste0("The maximum number of MS/MS fragments is ", max(msms_frg_num), "."))
Msg <- c(Msg, paste0("Please use <b>Edit</b> button below to manually update the inclusion list for database searching!"))
Msg <- c(Msg, paste0("Please click <b>Submit</b> button to start database searching."))
} else {
mSetObj[["dataSet"]][["prec_mz_included"]] <-
mSetObj[["dataSet"]][["prec_mz_all"]][idxs];
mSetObj[["dataSet"]][["prec_rt_included"]] <-
mSetObj[["dataSet"]][["prec_rt_all"]][idxs];
mSetObj[["dataSet"]][["prec_mzrt_included"]] <-
mSetObj[["dataSet"]][["prec_mzrt_all"]][idxs];
mSetObj[["dataSet"]][["spectrum_set"]] <-
mSetObj[["dataSet"]][["spectrum_set_full"]][idxs];
ms2spec <- mSetObj[["dataSet"]][["spectrum_set"]];
all_precmz <- vapply(ms2spec, function(x){x[[1]]}, FUN.VALUE = double(1L))
all_precrt <- vapply(ms2spec, function(x){x[[2]]}, FUN.VALUE = double(1L))
msms_frg_num <- vapply(ms2spec, function(x){nrow(x[[3]])}, FUN.VALUE = double(1L))
Msg <- c(Msg, paste0("A total of ", length(idxs), " tandem spectra will be searched!"))
Msg <- c(Msg, paste0("The m/z range of all precursors in your data is from ", min(all_precmz), " to ", max(all_precmz), "."))
Msg <- c(Msg, paste0("The retention time range of all included precursors in your data is from ", min(all_precrt), " to ", max(all_precrt), "."))
Msg <- c(Msg, paste0("The minimum number of MS/MS fragments is ", min(msms_frg_num), "."))
Msg <- c(Msg, paste0("The maximum number of MS/MS fragments is ", max(msms_frg_num), "."))
Msg <- c(Msg, paste0("Please click <b>Proceed</b> button to start database searching."))
}
mSetObj[["msgSet"]][["sanity_msgvec"]] <- c(mSetObj[["msgSet"]][["sanity_msgvec"]][1:2], Msg)
return(.set.mSet(mSetObj))
}
DataUpdatefromInclusionListPro <- function(mSetObj=NA, included_str = ""){
if(included_str == ""){
return (1);
}
mSetObj <- .get.mSet(mSetObj);
Msg = vector()
included_list <- strsplit(included_str, "\n")[[1]]
spectrum_set_full <- mSetObj[["dataSet"]][["spectrum_set_full"]]
prec_mzrt_all <- mSetObj[["dataSet"]][["prec_mzrt_all"]]
idxs <- vapply(included_list, function(x){
which(x == prec_mzrt_all)[1]
}, FUN.VALUE = integer(1L))
mSetObj[["dataSet"]][["prec_mz_included"]] <-
mSetObj[["dataSet"]][["prec_mz_all"]][idxs];
mSetObj[["dataSet"]][["prec_rt_included"]] <-
mSetObj[["dataSet"]][["prec_rt_all"]][idxs];
mSetObj[["dataSet"]][["prec_mzrt_included"]] <-
mSetObj[["dataSet"]][["prec_mzrt_all"]][idxs];
mSetObj[["dataSet"]][["spectrum_set"]] <-
mSetObj[["dataSet"]][["spectrum_set_full"]][idxs];
ms2spec <- mSetObj[["dataSet"]][["spectrum_set"]];
all_precmz <- vapply(ms2spec, function(x){x[[1]]}, FUN.VALUE = double(1L))
all_precrt <- vapply(ms2spec, function(x){x[[2]]}, FUN.VALUE = double(1L))
msms_frg_num <- vapply(ms2spec, function(x){nrow(x[[3]])}, FUN.VALUE = double(1L))
Msg <- c(Msg, paste0("A total of ", length(idxs), " tandem spectra will be searched!"))
Msg <- c(Msg, paste0("The m/z range of all precursors in your data is from ", min(all_precmz), " to ", max(all_precmz), "."))
Msg <- c(Msg, paste0("The retention time range of all included precursors in your data is from ", min(all_precrt), " to ", max(all_precrt), "."))
Msg <- c(Msg, paste0("The minimum number of MS/MS fragments is ", min(msms_frg_num), "."))
Msg <- c(Msg, paste0("The maximum number of MS/MS fragments is ", max(msms_frg_num), "."))
Msg <- c(Msg, paste0("Please click <b>Proceed</b> button to start database searching."))
mSetObj[["msgSet"]][["sanity_msgvec"]] <- c(mSetObj[["msgSet"]][["sanity_msgvec"]][1:2], Msg)
return(.set.mSet(mSetObj))
}
SummarizeCMPDResults <- function(mSetObj=NA, top_cutoff = 60, low_cutoff = 20){
mSetObj <- .get.mSet(mSetObj);
msms_result <- mSetObj[["dataSet"]][["msms_result"]]
top_scores <- vapply(msms_result, function(x){
if(length(x[["Scores"]][[1]]) == 0){
return(0.0)
}
max(x[["Scores"]][[1]])
}, FUN.VALUE = double(1L))
res <- vector(mode = "integer", length = 3L)
res[1] <- length(which(top_scores >= top_cutoff))
res[2] <- length(which((top_scores < top_cutoff) & (top_scores >= low_cutoff)))
res[3] <- length(which(top_scores < low_cutoff))
return(res)
}
GetCountsOfIncludedIons <- function(mSetObj=NA){
mSetObj <- .get.mSet(mSetObj);
mSetObj[["dataSet"]][["prec_mz_included"]] -> all_precmz;
return(length(all_precmz))
}
GetMSMSPrecMZvec_msp <- function(mSetObj=NA){
mSetObj <- .get.mSet(mSetObj);
mSetObj$dataSet$spec_set_prec -> spec_set_prec
return(spec_set_prec)
}
GetMSMSPrecMZvec_msp_min <- function(mSetObj=NA){
mSetObj <- .get.mSet(mSetObj);
mSetObj$dataSet$spec_set_prec -> spec_set_prec
return(min(spec_set_prec))
}
FetchExampleMSP <- function(URL = ""){
download.file(url = URL, destfile = basename(URL), method = "libcurl")
return(1)
}
GetMSPSanityCheckMsg <- function(mSetObj=NA){
mSetObj <- .get.mSet(mSetObj);
mSetObj[["msgSet"]][["sanity_msgvec"]] -> Msg;
return(Msg)
}
GetAllPrecMZs <- function(mSetObj=NA){
mSetObj <- .get.mSet(mSetObj);
mSetObj[["dataSet"]][["prec_mz_all"]] -> prec_mz_all;
return(prec_mz_all)
}
GetIncludedPrecMZs <- function(mSetObj=NA){
mSetObj <- .get.mSet(mSetObj);
mSetObj[["dataSet"]][["prec_mz_included"]] -> all_precmz
#cat("all_precmz ==> ", all_precmz, "\n")
return(all_precmz)
}
GetNonIncludedPrecMZs <- function(mSetObj=NA){
mSetObj <- .get.mSet(mSetObj);
mSetObj[["dataSet"]][["prec_mz_included"]] -> all_precmz;
mSetObj[["dataSet"]][["prec_mz_all"]] -> prec_mz_all;
all_nonprecmz <- prec_mz_all[!(prec_mz_all %in% all_precmz)]
#cat("all_nonprecmz ==> ", all_nonprecmz, "\n")
return(all_nonprecmz)
}
GetAllPrecMZRTs <- function(mSetObj=NA){
mSetObj <- .get.mSet(mSetObj);
mSetObj[["dataSet"]][["prec_mzrt_all"]] -> prec_mz_allrt;
return(prec_mz_allrt)
}
GetIncludedPrecMZRTs <- function(mSetObj=NA){
mSetObj <- .get.mSet(mSetObj);
mSetObj[["dataSet"]][["prec_mzrt_included"]] -> all_precmzrt
#cat("all_precmz ==> ", all_precmz, "\n")
return(all_precmzrt)
}
GetNonIncludedPrecMZRTs <- function(mSetObj=NA){
mSetObj <- .get.mSet(mSetObj);
mSetObj[["dataSet"]][["prec_mzrt_included"]] -> all_precmzrt;
mSetObj[["dataSet"]][["prec_mzrt_all"]] -> prec_mzrt_all;
all_nonprecmzrt <- prec_mzrt_all[!(prec_mzrt_all %in% all_precmzrt)]
#cat("all_nonprecmz ==> ", all_nonprecmz, "\n")
return(all_nonprecmzrt)
}
PlotMS2SummarySingle <- function(mSetObj=NA, imageNM = "", option = 0L, dpi = 72, format="png", width = 12, height = 8){
mSetObj <- .get.mSet(mSetObj);
if(is.null(mSetObj[["dataSet"]][["msms_result"]])){
if(file.exists("MS2searchSingle_results.qs")){
ms2_res <- qs::qread("MS2searchSingle_results.qs")[[1]]
} else {
return(0)
}
} else {
ms2_res <- mSetObj[["dataSet"]][["msms_result"]][[1]]
}
if(option == 0L){
ms2_res[["dot_product"]][[1]] -> simi_vec;
num <- c(length(which(simi_vec>=0.8)),
length(which(simi_vec>=0.6 & simi_vec<0.8)),
length(which(simi_vec<0.6)))
levls <- factor(c("High(>0.8)", "Medium(0.6-0.8)", "Low(<0.6)"), levels = c("High(>0.8)", "Medium(0.6-0.8)", "Low(<0.6)"))
df <- data.frame(levls = levls,
Number = num)
cbPalette <- c("#009E73", "#E69F00", "#999999")
require(ggplot2)
p1 <- ggplot(data=df, aes(x=levls, y=Number)) +
geom_bar(stat="identity", aes(fill=levls), width = 0.618)+
geom_text(aes(label=Number), vjust=1.6, color="white", size=6)+
theme_minimal() + scale_fill_manual(values=cbPalette) +
scale_color_manual(values=cbPalette) +
guides(fill=guide_legend(title="Confidence Level"))+
theme(axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 14))
} else {
ms2_res[["Scores"]][[1]] -> simi_vec;
num <- c(length(which(simi_vec>=80)),
length(which(simi_vec>=60 & simi_vec<80)),
length(which(simi_vec<60)))
levls <- factor(c("High(>80)", "Medium(60-80)", "Low(<60)"), levels = c("High(>80)", "Medium(60-80)", "Low(<60)"))
df <- data.frame(levls = levls,
Number = num)
cbPalette <- c("#009E73", "#E69F00", "#999999")
require(ggplot2)
p1 <- ggplot(data=df, aes(x=levls, y=Number)) +
geom_bar(stat="identity", aes(fill=levls), width = 0.618)+
geom_text(aes(label=Number), vjust=1.6, color="white", size=6)+
theme_minimal() + scale_fill_manual(values=cbPalette) +
scale_color_manual(values=cbPalette) +
guides(fill=guide_legend(title="Confidence Level"))+
theme(axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.title.x = element_blank())
}
imageNM <- paste0(imageNM, "_", dpi, ".", format)
Cairo::Cairo(
file = imageNM,
unit = "in", dpi = dpi, width = width, height = height, type = format, bg = "white")
print(p1)
dev.off()
mSetObj[["imgSet"]]$ms2sumsingle <- imageNM
return(.set.mSet(mSetObj))
}
#' setMS2DBOpt
#'
#' @param mSetObj mSetObj object
#' @param DBoption database option to define neutral loss or not, can be either 'regualr" or 'nl'.
#' @export
#' @author Zhiqiang Pang
#'
setMS2DBOpt <- function(mSetObj=NA, DBoption = "regular") {
mSetObj <- .get.mSet(mSetObj);
if(DBoption!="regular" & DBoption!="nl"){
stop("Wrong MS2DBopt. Must be either regular or nl");
}
mSetObj[["dataSet"]]$MSMS_db_option <- DBoption;
return(.set.mSet(mSetObj))
}
generateMS2dbOpt <- function(database = "all", ionMode = "positive"){
prefix = ""
database_opts = ""
if(length(database)>1){
prefix = "mcst_\t"; # multiple customized
}
for(i in database){
if(i == "all"){
database_opt <- "all";
return("all")
} else if(i == "hmdb_exp") {
if(ionMode == "positive"){
database_opt <- "HMDB_experimental_PosDB";
} else if(ionMode == "negative") {
database_opt <- "HMDB_experimental_NegDB";
} else {
database_opt <- "all";
}
} else if(i == "hmdb_pre"){
if(ionMode == "positive"){
database_opt <- "HMDB_predicted_PosDB";
} else if(ionMode == "negative") {
database_opt <- "HMDB_predicted_NegDB";
} else {
database_opt <- "all";
}
} else if(i == "gnps"){
if(ionMode == "positive"){
database_opt <- "GNPS_PosDB";
} else if(ionMode == "negative") {
database_opt <- "GNPS_NegDB";
} else {
database_opt <- "all";
}
} else if(i == "mines"){
if(ionMode == "positive"){
database_opt <- "MINEs_PosDB";
} else if(ionMode == "negative") {
database_opt <- "MINEs_NegDB";
} else {
database_opt <- "all";
}
} else if(i == "lipidblast"){
if(ionMode == "positive"){
database_opt <- "LipidBlast_PosDB";
} else if(ionMode == "negative") {
database_opt <- "LipidBlast_NegDB";
} else {
database_opt <- "all";
}
} else if(i == "mona"){
if(ionMode == "positive"){
database_opt <- "MoNA_PosDB";
} else if(ionMode == "negative") {
database_opt <- "MoNA_NegDB";
} else {
database_opt <- "all";
}
} else if(i == "massbank"){
if(ionMode == "positive"){
database_opt <- "MassBank_PosDB";
} else if(ionMode == "negative") {
database_opt <- "MassBank_NegDB";
} else {
database_opt <- "all";
}
} else if(i == "riken"){
if(ionMode == "positive"){
database_opt <- "RIKEN_PosDB";
} else if(ionMode == "negative") {
database_opt <- "RIKEN_NegDB";
} else {
database_opt <- "all";
}
} else if(i == "respect"){
if(ionMode == "positive"){
database_opt <- "ReSpect_PosDB";
} else if(ionMode == "negative") {
database_opt <- "ReSpect_NegDB";
} else {
database_opt <- "all";
}
} else if(i == "msdial"){
if(ionMode == "positive"){
database_opt <- "MSDIAL_PosDB";
} else if(ionMode == "negative") {
database_opt <- "MSDIAL_NegDB";
} else {
database_opt <- "all";
}
} else if(i == "bmdms"){
if(ionMode == "positive"){
database_opt <- "BMDMS_PosDB";
} else if(ionMode == "negative") {
database_opt <- "BMDMS_PosDB";
} else {
database_opt <- "all";
}
}
database_opts <- paste0(database_opts, database_opt, "\t")
}
database_str <- paste0(prefix, database_opts)
return(database_str)
}
saveCurrentSession <- function(path){
save.image(file = paste0(path, "/MS2_rsession.RData"))
}
saveParams4Processing <- function(ppm_val1, ppm_val2,
database_path, fragmentDB_pth,
msmsDBOpt, simlarity_meth, precMZ, simi_cutoff, ionMode, unit1, unit2, path){
write.table(0, quote = FALSE, append = FALSE, row.names = FALSE, col.names = FALSE, file = paste0(path, "/JobKill"));
save(ppm_val1, ppm_val2,
database_path, fragmentDB_pth,
msmsDBOpt, simlarity_meth, precMZ, simi_cutoff,
ionMode, unit1, unit2, file = paste0(path, "/MS2_searching_params.rda"))
}
cancelMS2Job <- function(path){
write.table(1, quote = FALSE, append = FALSE, row.names = FALSE, col.names = FALSE, file = paste0(path, "/JobKill"));
}
finishsearchingjob <- function(mSet){
qs::qsave(mSet, file = 'MS2_searching_results.qs')
cat("Everything has been finished successfully!\n", file = "metaboanalyst_ms2_search.txt", append = TRUE)
}
createSLURMBash <- function(path_str){
## Prepare Configuration script for slurm running
conf_inf <- paste0("#!/bin/bash\n#\n#SBATCH --job-name=MS2_searching\n#\n#SBATCH --ntasks=1\n#SBATCH --time=720:00\n#SBATCH --mem-per-cpu=5G\n#SBATCH --cpus-per-task=2\n#SBATCH --output=", path_str, "/metaboanalyst_ms2_search.txt\n")
## Prepare R script for running
# need to require("OptiLCMS")
str <- paste0('library(OptiLCMS)');
# Set working dir & init env & files included
str <- paste0(str, ";\n", "metaboanalyst_env <- new.env()");
str <- paste0(str, ";\n", "setwd(\'",path_str,"\')");
str <- paste0(str, ";\n", "load(\'MS2_rsession.RData\')");
str <- paste0(str, ";\n", "load(\'MS2_searching_params.rda\')");
# start exe performMS2searchBatch
str <- paste0(str, ";\n", "performMS2searchBatch(NA, ppm_val1, ppm_val2, database_path, fragmentDB_pth, msmsDBOpt, simlarity_meth, precMZ, simi_cutoff, ionMode, unit1, unit2, 4)");
str <- paste0(str, ";\n", "finishsearchingjob(mSet)");
# Write down the exe sh file
sink(paste0(path_str, "/ExecuteRawSpec.sh"));
cat(conf_inf);
cat(paste0("\nsrun R -e \"\n", str, "\n\""));
sink();
}
readProgressSec <- function(path_str){
chr <- readLines(paste0(path_str, "/progress_value_parallel.txt"), warn = F)
return(nchar(chr));
}
loadMS2ResultmSet <- function(){
mSet <<- qs::qread("MS2_searching_results.qs");
}
plotms2Mirror <- function(mSetObj=NA, feature, index){
mSetObj <- .get.mSet(mSetObj);
if(grepl("mz@NonSpecified", feature)){
featurelabel_idx <- 1;
candidate_idx <- which(mSetObj[["dataSet"]][["msms_result"]][[featurelabel_idx]][["IDs"]] == index)
mz <- as.numeric(strsplit(feature, "mz@")[[1]][1])
} else {
featurelabel_idx <- which(mSetObj[["dataSet"]][["prec_mzrt_included"]] == feature)
candidate_idx <- which(mSetObj[["dataSet"]][["msms_result"]][[featurelabel_idx]][["IDs"]] == index)
mz <- as.numeric(strsplit(feature, "mz@")[[1]][1])
}
# need to set current ms indx
mSet[["dataSet"]][["current_msms_idx"]] <<- featurelabel_idx
plotMirror(mSetObj, candidate_idx, mz, 5.0, paste0("dyn_mplot_", feature,"_", index, ".svg"), 72.0, "svg", 10, 6)
}
plotrawms2Mirror <- function(mSetObj=NA, feature, index){
mSetObj <- .get.mSet(mSetObj);
save(mSetObj, file = "mSetObj___plotrawms2Mirror.rda")
cat("plotrawms2Mirror == feature ===> ", feature, "\n")
cat("plotrawms2Mirror == index ===> ", index, "\n")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.