#' Searching and managing the spectral library
#'
#' The function queries and selects or removes scans from the spectral library that satisfy user-defined conditions (query metadata)
#'
#' @param library A list generated by the function library_generator() or the name of mgf spectral library file
#' @param query Vector of characters. Vector of conditions used for querying the library. e.g. c("IONMODE=Positive","PEPMASS=325.19"). The left-hand side must match with the medata items of the searched library.
#' @param logical Character. "AND" for selecting scans that satisfy all conditions, "OR" when selecting records that satisfy at least one condition
#' @param ppm_search Numeric. Mass tolerance in ppm. Only used when searching by precursor mass "PEPMASS=..."
#' @param rt_search Numeric. Retention time tolerance in second (although rt in the query and metadata in min). Only used when searching by retention time "RT=..."
#' @return
#' \itemize{
#' \item{SELECTED:}{ Library object that only contain selected scans}
#' \item{ID_SELECTED:}{ IDs of selected scans}
#' \item{LEFT:}{ Library object that only contain unselected scans}
#' \item{ID_SELECTED:}{ IDs of unnselected scans}
#' }
#'
#' @examples
#'
#' data(DRUG_THERMO_LIBRARY)
#'
#' # Search library using query command lines:
#' query = library_manager(library2,query=c("IONMODE=Positive","RT=1.2"), logical="AND", rt_search=6)
#'
#' # Create a new library from query:
#' new_library1 = query$SELECTED
#'
#' # Summary of found compounds:
#' library_reporter(new_library1)
#'
#' # Remove scans from current library according to query:
#' new_library2 = query$LEFT
#'
#' # Add another filter:
#' query = library_manager(new_library1,query=c("IONMODE=Positive","MSLEVEL=2","RT=1.2"))
#' new_library3 = query$SELECTED
#'
#' @export
#'
#' @importFrom MSnbase fData readMgfData
#' @importFrom tools file_ext
#' @importFrom stringr str_replace_all fixed
#'
library_manager<-function(library, query = "", logical = c("AND","OR"), ppm_search = 20, rt_search = 12){
options(stringsAsFactors = FALSE)
options(warn=-1)
#################
### Check inputs:
#################
if (missing(library)){
stop("Please provide the output of library_generator() or a .mgf file as input library!")}
if (is.character(library)){
if (file_ext(library)!="mgf"){
stop("The file extension of your input library must be mgf!")
}}
if (is.list(library)){
if (length(library)==2 & "complete" %in% names(library)){
library = library$complete
}
if (length(library)!=2 || (!is.list(library$sp)) || !is.data.frame(library$metadata)){
stop("Please make sure your input library is a valid output of library_generator()!")
}}
logical = match.arg(logical,choices=c("AND","OR"),several.ok = FALSE)
#####################################
### Reading from spectral library:
#####################################
if (is.character(library)){ # If input is a mgf file name
library=readMGF2(library)}
metadata = library$metadata
spectrum_list = library$sp
prec_mz = as.numeric(metadata$PEPMASS)
prec_rt = as.numeric(metadata$RT)
###########################
### Run query expressions:
###########################
if (!is.character(query)){
stop("Query expression is not valid!")}
if (query!=""){
indexes_list = list()
NI = 0
for (eps in query){
eps1 = str_replace_all(eps,fixed(" "),"") # Remove white space
## Search pepmass and rt:
if (startsWith(eps1,"PEPMASS=")){
target_mass = as.numeric(strsplit(eps1,"=")[[1]][2])
if (!is.na(target_mass)){
ppm_list = ppm_distance(target_mass,prec_mz)
indexes = which(ppm_list<=ppm_search)}
} else if (startsWith(eps1,"RT=")){
target_rt = as.numeric(strsplit(eps1,"=")[[1]][2])
if (!is.na(target_rt)){
rtdev_list = abs(target_rt*60-prec_rt*60)
indexes = which(rtdev_list<=rt_search)
}} else {
# Search other things:
target_variable = strsplit(eps1,"=")[[1]][1]
target_value = strsplit(eps1,"=")[[1]][2]
cid = which(colnames(metadata) == target_variable)
if (length(cid)==1){
indexes = which(metadata[,cid]==target_value)}}
# Add valid indexes:
if (length(indexes)>0){
NI = NI + 1
indexes_list[[NI]] = indexes}
}
if (logical=="AND"){
indexes_list = Reduce(intersect,indexes_list)
}
if (logical=="OR"){
indexes_list = Reduce(union,indexes_list)
}}
# Ouput results:
NN = 1:length(spectrum_list)
left_list = setdiff(NN, indexes_list)
SELECTED_LIBRARY = LEFT_LIBRARY = library
SELECTED_LIBRARY$sp = library$sp[indexes_list]
SELECTED_LIBRARY$metadata = library$metadata[indexes_list,]
LEFT_LIBRARY$sp = library$sp[left_list]
LEFT_LIBRARY$metadata = library$metadata[left_list,]
return(list(SELECTED = SELECTED_LIBRARY, ID_SELECTED = unique(SELECTED_LIBRARY$metadata$ID),
LEFT = LEFT_LIBRARY, ID_LEFT = unique(LEFT_LIBRARY$metadata$ID)))
}
############################
### Internal functions:
###########################
ppm_distance<-function(x,y){
return(abs((x-y)/y*1000000))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.