# param1 <- metIdentifyParam(database = "msDatabase_rplc0.0.1")
# param2 <- metIdentifyParam(database = "orbitrapDatabase0.0.1")
# param3 <- mzIdentifyParam(database = "HMDB.metabolite.data")
#
# ms2.data <- grep("mgf", dir(), value = TRUE)
# result <- metIdentify4all(ms1.data = "ms1.peak.table.csv",
# ms2.data = ms2.data,
# parameter.list = c(param3, param2, param1),
# path = ".")
#' @title metIdentify4all
#' @description Identify metabolites using multiple databases one time.
#' @author Xiaotao Shen
#' \email{shenxt1990@@163.com}
#' @param ms1.data The name of ms1 peak table (csv format). Column 1 is "name", column 2 is
#' "mz" and column 3 is "rt" (second).
#' @param ms2.data MS2 data, must be mgf, msp or mzXML format. For example, ms2.data = c("test.mgf", "test2.msp").
#' @param parameter.list A list contains paramters for each processing.
#' The parameter must get using metIdentifyParam or mzIdentifyParam.
#' @param path Work directory.
#' @return A list containing mzIdentifyClass object.
#' @export
#' @seealso The example and demo data of this function can be found
#' https://jaspershen.github.io/metIdentify/articles/metIdentify.html
setGeneric(name = "metIdentify4all",
def = function(
ms1.data,
ms2.data,
parameter.list,
path = "."
){
threads = parameter.list[[1]]$threads
ms1.data.name <- ms1.data
ms2.data.name <- ms2.data
file <- dir(path)
if(all(c("ms1.info", "ms2.info") %in% file)){
cat("Use old data\n")
load(file.path(path, "ms1.info"))
load(file.path(path, "ms2.info"))
}else{
##read MS2 data
cat("Reading MS2 data...\n")
temp.ms2.type <- stringr::str_split(string = ms2.data.name,
pattern = "\\.")[[1]]
temp.ms2.type <- temp.ms2.type[length(temp.ms2.type)]
if(temp.ms2.type %in% c("mzXML", "mzML")){
ms2.data <- readMZXML(file = file.path(path, ms2.data.name), threads = threads)
}else{
ms2.data <- lapply(ms2.data.name, function(temp.ms2.data){
temp.ms2.type <- stringr::str_split(string = temp.ms2.data,
pattern = "\\.")[[1]]
temp.ms2.type <- temp.ms2.type[length(temp.ms2.type)]
if(!temp.ms2.type %in% c("mgf", "msp")) stop("We only support mgf or msp.\n")
if(temp.ms2.type == "msp"){
temp.ms2.data <- readMSP(file = file.path(path, temp.ms2.data))
}else{
temp.ms2.data <- readMGF(file = file.path(path, temp.ms2.data))
}
temp.ms2.data
})
names(ms2.data) <- ms2.data.name
###prepare data for metIdentification function
cat("Preparing MS2 data for identification...\n")
ms2.data <- mapply(FUN = function(temp.ms2.data, temp.ms2.data.name){
temp.ms2.data <- lapply(temp.ms2.data, function(x){
info <- x$info
info <- data.frame(name = paste("mz", info[1],"rt", info[2], sep = ""),
"mz" = info[1], "rt" = info[2],
"file" = temp.ms2.data.name,
stringsAsFactors = FALSE)
rownames(info) <- NULL
x$info <- info
x
})
temp.ms2.data
},
temp.ms2.data = ms2.data,
temp.ms2.data.name = ms2.data.name)
if(class(ms2.data) == "matrix"){
ms2.data <- ms2.data[,1]
}else{
ms2.data <- do.call(what = c, args = ms2.data)
}
}
ms1.info <- lapply(ms2.data, function(x){
x[[1]]
})
ms2.info <- lapply(ms2.data, function(x){
x[[2]]
})
ms1.info <- do.call(what = rbind, args = ms1.info)
ms1.info <- as.data.frame(ms1.info)
rownames(ms1.info) <- NULL
duplicated.name <- unique(ms1.info$name[duplicated(ms1.info$name)])
if(length(duplicated.name) > 0){
lapply(duplicated.name, function(x){
ms1.info$name[which(ms1.info$name == x)] <- paste(x, c(1:sum(ms1.info$name == x)), sep = "_")
})
}
names(ms2.info) <- ms1.info$name
##save intermediate data
save(ms1.info, file = file.path(path, "ms1.info"), compress = "xz")
save(ms2.info, file = file.path(path, "ms2.info"), compress = "xz")
}
database.name <- unlist(lapply(parameter.list, function(x){
x$database
}))
if(!all(database.name %in% dir(path))){
stop("The database: ", paste(database.name[!database.name %in% dir(path)], collapse = ", "),
"\n",
" you want to use are not in you directory: \n", path)
}
identification.result <- vector(mode = "list", length = length(database.name))
names(identification.result) <- database.name
for(i in 1:length(database.name)){
cat("-------------------------------\n")
cat('Database', i, ":", database.name[i], "\n")
cat("-------------------------------\n")
new.path <- file.path(path, paste(database.name[i], "Result", sep= '_'))
dir.create(new.path)
if(any(dir(new.path) == "result")){
load(file.path(new.path, "result"))
identification.result[[i]] <- result
rm(list = "result")
next()
}
if(length(parameter.list[[i]]) < 20){
result <- mzIdentify(
ms1.data = ms1.data.name,
ms1.match.ppm = parameter.list[[i]]$ms1.match.ppm,
polarity = parameter.list[[i]]$polarity,
column = parameter.list[[i]]$column,
path = path,
candidate.num = parameter.list[[i]]$candidate.num,
database = parameter.list[[i]]$database,
threads = parameter.list[[i]]$threads
)
}else{
result <- metIdentify(ms1.data = ms1.data.name,
ms2.data = ms2.data.name,
ms1.ms2.match.mz.tol = parameter.list[[i]]$ms1.ms2.match.mz.tol,
ms1.ms2.match.rt.tol = parameter.list[[i]]$ms1.ms2.match.rt.tol,
ms1.match.ppm = parameter.list[[i]]$ms1.match.ppm,
ms2.match.ppm = parameter.list[[i]]$ms2.match.ppm,
mz.ppm.thr = parameter.list[[i]]$mz.ppm.thr,
ms2.match.tol = parameter.list[[i]]$ms2.match.tol,
fraction.weight = parameter.list[[i]]$fraction.weight,
dp.forward.weight = parameter.list[[i]]$dp.forward.weight,
dp.reverse.weight = parameter.list[[i]]$dp.reverse.weight,
rt.match.tol = parameter.list[[i]]$rt.match.tol,
polarity = parameter.list[[i]]$polarity,
ce = parameter.list[[i]]$ce,
column = parameter.list[[i]]$column,
ms1.match.weight = parameter.list[[i]]$ms1.match.weight,
rt.match.weight = parameter.list[[i]]$rt.match.weight,
ms2.match.weight = parameter.list[[i]]$ms2.match.weight,
path = path,
total.score.tol = parameter.list[[i]]$total.score.tol,
candidate.num = parameter.list[[i]]$candidate.num,
database = parameter.list[[i]]$database,
threads = parameter.list[[i]]$threads
)
}
# unlink(x = new.path, recursive = TRUE, force = TRUE)
identification.result[[i]] <- result
save(result, file = file.path(new.path, "result"))
rm(list = "result")
}
invisible(identification.result)
})
#' @title metIdentifyParam
#' @description Generate the parameter list for metIdentify function.
#' @author Xiaotao Shen
#' \email{shenxt1990@@163.com}
#' @param ms1.ms2.match.mz.tol MS1 peak and MS2 spectrum matching m/z tolerance. Default is 25 pm.
#' @param ms1.ms2.match.rt.tol MS1 peak and MS2 spectrum matching RT tolerance. Default is 10 s.
#' @param ms1.match.ppm Precursor match ppm tolerance.
#' @param ms2.match.ppm Fragment ion match ppm tolerance.
#' @param mz.ppm.thr Accurate mass tolerance for m/z error calculation.
#' @param ms2.match.tol MS2 match (MS2 similarity) tolerance.
#' @param fraction.weight The weight for matched fragments.
#' @param dp.forward.weight Forward dot product weight.
#' @param dp.reverse.weight Reverse dot product weight.
#' @param rt.match.tol RT match tolerance.
#' @param polarity The polarity of data, "positive"or "negative".
#' @param ce Collision energy. Please confirm the CE values in your database. Default is "all".
#' @param column "hilic" (HILIC column) or "rp" (reverse phase).
#' @param ms1.match.weight The weight of MS1 match for total score calculation.
#' @param rt.match.weight The weight of RT match for total score calculation.
#' @param ms2.match.weight The weight of MS2 match for total score calculation.
#' @param total.score.tol Total score tolerance. The total score are refering to MS-DIAL.
#' @param candidate.num The number of candidate.
#' @param database MS2 database name.
#' @param threads Number of threads
#' @return A metIdentifyClass object.
#' @export
#' @seealso The example and demo data of this function can be found
#' https://jaspershen.github.io/metIdentify/articles/metIdentify.html
setGeneric(name = "metIdentifyParam",
def = function(
ms1.ms2.match.mz.tol = 25,
ms1.ms2.match.rt.tol = 10,
ms1.match.ppm = 25,
ms2.match.ppm = 30,
mz.ppm.thr = 400,
ms2.match.tol = 0.5,
fraction.weight = 0.3,
dp.forward.weight = 0.6,
dp.reverse.weight = 0.1,
rt.match.tol = 30,
polarity = c("positive", "negative"),
ce = "all",
column = c("hilic", "rp"),
ms1.match.weight = 0.25,
rt.match.weight = 0.25,
ms2.match.weight = 0.5,
total.score.tol = 0.5,
candidate.num = 3,
database,
threads = 3
){
if(missing(database)){
stop("The database name must be provided.\n")
}
# if(class(database) != "metIdentifyClass"){
# stop("database must be metIdentifyClass object\n")
# }
polarity <- match.arg(polarity)
column <- match.arg(column)
param <- list(ms1.ms2.match.mz.tol = ms1.ms2.match.mz.tol,
ms1.ms2.match.rt.tol = ms1.ms2.match.rt.tol,
ms1.ms2.match.mz.tol = ms1.ms2.match.mz.tol,
ms1.ms2.match.rt.tol = ms1.ms2.match.rt.tol,
ms1.match.ppm = ms1.match.ppm,
ms2.match.ppm = ms2.match.ppm,
mz.ppm.thr = mz.ppm.thr,
ms2.match.tol = ms2.match.tol,
fraction.weight = fraction.weight,
dp.forward.weight = dp.forward.weight,
dp.reverse.weight = dp.reverse.weight,
rt.match.tol = rt.match.tol,
polarity = polarity,
ce = ce,
column = column,
ms1.match.weight = ms1.match.weight,
rt.match.weight = rt.match.weight,
ms2.match.weight = ms2.match.weight,
total.score.tol = total.score.tol,
candidate.num = candidate.num,
database = database,
threads = threads
)
list("metIdentifyParam" = param)
})
#' @title mzIdentify
#' @description Generate the mzIdentify parameter list.
#' @author Xiaotao Shen
#' \email{shenxt1990@@163.com}
#' @param ms1.match.ppm Precursor match ppm tolerance.
#' @param polarity The polarity of data, "positive"or "negative".
#' @param column "hilic" (HILIC column) or "rp" (reverse phase).
#' @param candidate.num The number of candidates.
#' @param database MS1 database name.
#' @param threads Number of threads
#' @return A mzIdentifyClass object.
#' @export
#' @seealso The example and demo data of this function can be found
#' https://jaspershen.github.io/metIdentify/articles/metIdentify.html
setGeneric(name = "mzIdentifyParam",
def = function(
ms1.match.ppm = 25,
polarity = c("positive", "negative"),
column = c("hilic", "rp"),
candidate.num = 3,
database,
threads = 3
){
if(missing(database)){
stop("The database name must be provided.\n")
}
if(database != "HMDB.metabolite.data"){
stop("database can only be HMDB.metabolite.data\n")
}
polarity <- match.arg(polarity)
column <- match.arg(column)
param <- list(
ms1.match.ppm = ms1.match.ppm,
polarity = polarity,
column = column,
candidate.num = candidate.num,
database = database,
threads = threads
)
list("mzIdentifyParam" = param)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.