#' Matching query spectrum to existing library
#'
#' The function searches unknown MS/MS spectra in a spectral library
#'
#' @param library A list generated by the function library_generator() or the name of mgf spectral library file
#' @param query_spectrum Two-column data matrix. Two columns represent m/z and intensity of query tandem spectrum
#' @param method Character. Method to compare the similarity between query spectrum and library spectra.
#' \itemize{
#' \item{Fragment:}{ Counting only number of fragment matches}
#' \item{Simple:}{ Conting number of fragment and neutral loss matches}
#' \item{All:}{ Conting number of fragment, neutral loss and mass differences matches.}
#' \item{Cosine:}{ cosine similarity score from OrgMassSpecR package.}
#' }
#' @param prec_mz Numeric. Precursor mass of query spectrum (if known). Default value is 0. Must NOT be 0 if method = "All" or use.prec = TRUE (see below)
#' @param use.prec Boolean. If set to TRUE, precursor mass is used to "pre-query" the library
#' @param ppm_search Numeric. Mass tolerance in ppm for precursor/fragment search.
#' @param relative Numeric between 0 and 100. The relative intensity threshold of the highest peak in each spectrum). Peaks in query spectrum below relative thresholds are not taken into account.
#' @param mirror.plot Boolean. True if the query-library comparison is visualized as mirror plot
#' @param png.out Boolean. True if plotted mirror spectra are exported as png images!
#'
#' @return
#' \itemize{
#' \item{<plot>:}{ Comparing query spectrum to ordered "hits" in the spectrum library}
#' \item{SELECTED:}{ Library object that contain found scans.}
#' \item{ID_SELECTED:}{ IDs of found compounds.}
#' \item{SCORES:}{ Similarity scores between query spectrum and each }
#' }
#'
#' @author Youzhong Liu, \email{Youzhong.Liu@uantwerpen.be}
#'
#' @examples
#'
#' data(DRUG_THERMO_LIBRARY)
#' dat = cbind(c(136.073,149.071,151.0991,180.047),c(1,1,20,3))
#' query = library_similarity(library2, dat, method = "Cosine", prec_mz = 0, use.prec =FALSE, ppm_search = 20, relative = 1, mirror.plot = F)
#'
#' # Query-library comparison via mirror plot:
#' library_visualizer_similarity(query$SELECTED, id= query$ID_SELECTED[1], query_spectrum = dat)
#'
#' @importFrom MSnbase fData readMgfData
#' @importFrom tools file_ext
#' @importFrom stringr str_replace_all fixed
#' @importFrom OrgMassSpecR SpectrumSimilarity
#'
#' @export
library_similarity<-function(library, query_spectrum=NULL, method = c("Fragment", "Simple", "All", "Cosine"),
prec_mz = 0, use.prec = FALSE, ppm_search = 20, relative = 1, mirror.plot = T, png.out=F){
options(stringsAsFactors = FALSE)
options(warn=-1)
SELECTED_LIBRARY = NULL
#################
### Check inputs:
#################
if (missing(library)){
stop("Please provide the output of library_generator() or a .mgf file as input library!")}
if (is.character(library)){
if (file_ext(library)!="mgf"){
stop("The file extension of your input library must be mgf!")
}}
if (is.list(library)){
if (length(library)==2 & "complete" %in% names(library)){
library = library$complete
}
if (length(library)!=2 || (!is.list(library$sp)) || !is.data.frame(library$metadata)){
stop("Please make sure your input library is a valid output of library_generator()!")
}}
if (is.null(query_spectrum)){
stop("Please provide a 2 column query spectrum!")
} else {
if (ncol(query_spectrum)<2){
stop("Spectrum must have 2 columns m/z and intensity!")
}
}
if ((prec_mz == 0) && use.prec){
stop("Please provide the precursor mass if it is used for library search!")
}
if ((prec_mz == 0) && (method == "All")){
stop("Please provide the precursor mass if neutral loss are considered!")
}
if ((prec_mz == 0) && (method == "Simple")){
stop("Please provide the precursor mass if neutral loss are considered!")
}
if ((prec_mz!=0) && use.prec){
query = paste0("PEPMASS = ", prec_mz)
library = library_manager(library, query = query, ppm_search = ppm_search)
library = library$SELECTED # Filter library according to precursor mass
if (nrow(library$metadata)==0){stop("No record with specified precursor mass found!")}
}
library = library_manager(library, query = "MSLEVEL = 1")$LEFT
if (nrow(library$metadata)==0){stop("No MS/MS records!")}
###############################
### Preprocess query spectrum:
###############################
dat = query_spectrum[,1:2, drop = FALSE]
# Normalize, cut only masses smaller than precursor and filter background noise:
dat[,2]=dat[,2]/max(dat[,2])*100
if (prec_mz==0){prec_mz = 10000} # Not consider the precursor mass
selected = which((dat[,1] < prec_mz+1)
& (dat[,2]>relative)
& ppm_distance(dat[,2], prec_mz)> ppm_search) # Remove non-precursor masses
if (length(selected)>0){
dat = dat[selected,,drop = FALSE]
#dat = data.matrix(matrix(dat,ncol=2))
} else {
stop("Spectrum not valid!")
}
dat_mdiff = unique(unlist(dat[,1]))
#####################################
### Reading from spectral library:
#####################################
if (is.character(library)){ # If input is a mgf file name
library=readMGF2(library)
}
metadata = library$metadata
spectrum_list = library$sp
NS = nrow(metadata)
nloss_list = lapply(1:NS, function(i) as.numeric(metadata$PEPMASS[i])-spectrum_list[[i]][,1])
mdiff_list = lapply(1:NS, function(i) unique(unlist(dist(spectrum_list[[i]][,1]))))
#print(nloss_list)
#nloss_list = lapply(nloss_list, function(x) x[x>0])
###########################
### Run spectra search:
###########################
nb_matched_scores = rep(0,NS)
sim_scores = rep(0,NS)
for (i in 1:NS){
if (method == "Fragment"){
dist_spec = sapply(spectrum_list[[i]][,1],ppm_distance, dat[,1])
dist_spec[lower.tri(dist_spec)] = 10000000 # Remove redundant matches
sim_scores[i] = sum(dist_spec<=ppm_search)
}
if (method == "Simple"){
dist_spec = sapply(spectrum_list[[i]][,1], ppm_distance, dat[,1])
dist_spec[lower.tri(dist_spec)] = 10000000 # Remove redundant matches
dist_loss = sapply(nloss_list[[i]], ppm_distance, prec_mz - dat[,1])
dist_loss[lower.tri(dist_loss)] = 10000000 # Remove redundant matches
sim_scores[i] = sum(dist_spec<=ppm_search) + sum(dist_loss<=ppm_search)
}
if (method == "All"){
dist_spec = sapply(spectrum_list[[i]][,1], ppm_distance, dat[,1])
dist_spec[lower.tri(dist_spec)] = 10000000 # Remove redundant matches
dist_loss = sapply(nloss_list[[i]], ppm_distance, prec_mz - dat[,1])
dist_loss[lower.tri(dist_loss)] = 10000000 # Remove redundant matches
dist_mdiff = sapply(mdiff_list[[i]], ppm_distance, dat_mdiff)
dist_mdiff[lower.tri(dist_mdiff)] = 10000000 # Remove redundant matches
sim_scores[i] = sum(dist_spec<=ppm_search) + sum(dist_loss<=ppm_search) + sum(dist_mdiff<=ppm_search)
}
if (method == "Cosine"){
abs_search = ppm_search/1000000*median(dat[,1]) # Da window for cosine spectra search
sim_scores[i] = SpectrumSimilarity(dat, spectrum_list[[i]], t = abs_search)
}
graphics.off()
}
###########################
### Plot and output results:
###########################
# Top scores and output:
indexes = order(sim_scores,decreasing=T)
indexes = intersect(indexes,which(sim_scores>0))
if (length(indexes>0)){
SELECTED_LIBRARY = list()
SELECTED_LIBRARY$sp = library$sp[indexes]
SELECTED_LIBRARY$metadata = library$metadata[indexes,]
sim_scores = sim_scores[indexes]
# Plot:
if (mirror.plot){
for (i in 1:length(indexes)){
if (png.out){png(paste0("SIM_SCAN_", SELECTED_LIBRARY$metadata$SCANS[i],".png"), width = 700, height = 480)}
bottom.label = paste0("Library spectrum of ID = ", SELECTED_LIBRARY$metadata$ID[i])
xlim = c(min(dat[,1])*0.8, max(dat[,1])*1.2)
SpectrumSimilarity(dat, SELECTED_LIBRARY$sp[[i]], top.label="Query spectrum",bottom.label= bottom.label,xlim=xlim, print.graphic = T)
if (png.out){dev.off()}
}
}
}
return(list(SELECTED = SELECTED_LIBRARY,
ID_SELECTED = unique(SELECTED_LIBRARY$metadata$ID),
SCORES = sim_scores))
}
############################
### Internal functions:
###########################
ppm_distance<-function(x,y){
return(abs((x-y)/y*1000000))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.