#' Read and extract an existing library
#'
#' The function creates or extracts a list object containing dataframe of metadata and a list of spectra from library object
#'
#' @param raw_library Library file name (character) or object generated by library_generator. If character, the file extension must be mgf, msp or rdata
#'
#' @return
#' \itemize{
#' \item{sp:}{ List of all extracted spectra. Each spectrum is a data matrix with two columns: m/z and intensity}
#' \item{metadata:}{ Data frame containing metadata of extracted scans.}
#' }
#'
#' @importFrom tools file_ext
#' @importFrom MSnbase fData readMgfData
#' @importFrom plyr rbind.fill
#'
#' @export
#'
library_reader<-function(raw_library){
options(stringsAsFactors = FALSE)
options(warn=-1)
library_complete = NULL
library_consensus = NULL
library_network = NULL
########################
### Check input format:#
########################
if (is.character(raw_library)){
input_format = tolower(file_ext(raw_library))
if (!input_format %in% c("msp", "rdata", "mgf")){
stop("The input library must be mgf, msp or RData format!")
}
}
if (is.null(raw_library)){
stop("Please provide file name or a library object as library input!")
}
if (is.list(raw_library)){
input_format = "list"
if ("SELECTED" %in% names(raw_library)){raw_library = raw_library$SELECTED}
tmp_labels = names(raw_library)
if (!("complete" %in% tmp_labels) || !("consensus" %in% tmp_labels) & !("network" %in% tmp_labels)){
if ("metadata" %in% tmp_labels & "sp" %in% tmp_labels){
raw_library = list(complete = raw_library, consensus = raw_library, network = NULL)
} else {
stop("Format of the library is not valid")
}
}
}
##################
### Read inputs:##
##################
if (input_format=="list"){
library_complete = raw_library$complete
library_consensus = raw_library$consensus
library_network = raw_library$network
}
if (input_format=="mgf"){library_complete = readMGF2(raw_library)}
if (input_format=="rdata"){
raw_library = load_object(raw_library)
library_complete = raw_library$complete
library_consensus = raw_library$consensus
library_network = raw_library$network
}
if (input_format=="msp"){library_complete = readMSP2(raw_library)}
#############################################
### Clean and standardize complete library:##
#############################################
#library_complete = standarlize_library(library_complete)
#if (!is.null(library_consensus)){library_consensus = standarlize_library(library_consensus)}
####################
### Output inputs:##
####################
input_library = list(complete = library_complete, consensus = library_consensus, network=library_network)
return(input_library)
}
######################
### Internal function:
######################
load_object <- function(file) {
tmp <- new.env()
load(file = file, envir = tmp)
tmp[[ls(tmp)[1]]]
}
readMGF2<-function(con){
### Reading from spectral library
db = readMgfData(con, verbose = FALSE)
metadata = fData(db)
N = nrow(metadata)
# From a MSnBase object to a list of spectra m/z intensity
spectrum_list=list()
for (i in 1:N){
spectrum_list[[i]]=cbind(db[[i]]@mz,db[[i]]@intensity)
}
### Return new results:
library = list()
if (N>0){
library$sp = spectrum_list
library$metadata = metadata
}
return(library)
}
readMSP2<-function(con){
### Reading from spectral library
msp <- file(con,open="r")
msp_lines <- readLines(msp,warn = F)
msp_lines = msp_lines[msp_lines!=""]
close(msp)
begin_ind = which(grepl("Name:",msp_lines))
#end_ind = which(grepl("Num Peaks:",msp_lines))
N = length(begin_ind)
begin_ind = c(begin_ind, length(msp_lines)+1)
metadata = c() # New metadata
splist = list()
for (i in 1:N){
# Find metadata ranges:
checked = FALSE
ki = begin_ind[i]
while (!checked){
ki = ki + 1
checked = !is.na(as.numeric(strsplit(msp_lines[ki], " ")[[1]][1]))
}
end_ind = ki - 1
metadata_ind = begin_ind[i]:end_ind
metadata_range = msp_lines[metadata_ind]
NI = length(metadata_ind)
new_metadata_names = sapply(metadata_range, function(x) strsplit(x,": ")[[1]][1])
new_metadata_names = as.character(new_metadata_names)
new_metadata = sapply(metadata_range, function(x) strsplit(x,": ")[[1]][2])
new_metadata = as.data.frame(t(new_metadata))
colnames(new_metadata) = new_metadata_names
metadata = rbind.fill(metadata, new_metadata)
spectrum_ind = (end_ind+1):(begin_ind[i+1]-1)
nb_peak = length(spectrum_ind)
spectrum_range = msp_lines[spectrum_ind]
mzlist = sapply(spectrum_range,function(x) strsplit(x," ")[[1]][1])
intlist = sapply(spectrum_range,function(x) strsplit(x," ")[[1]][2])
spectrum = cbind(as.numeric(mzlist),as.numeric(intlist))
splist[[i]] = spectrum
}
### Return results
rownames(metadata) = NULL
metadata= data.frame(metadata)
return(list(metadata = metadata, sp = splist))
}
standarlize_library<-function(library0){
metadata = library0$metadata
sp = library0$sp
temp_col = toupper(colnames(metadata))
# ID:
ind1 = which(temp_col=="ID")
ind2 = which(temp_col=="NAME")
if (length(ind1)==0 & length(ind2)==1){colnames(metadata)[ind2] = "ID"}
if (!("ID" %in% colnames(metadata))){stop("Metadata must contain a column ID!")}
metadata$ID = as.character(metadata$ID)
#PEPMASS:
ind = which(temp_col=="PRECURSORMZ")
if (length(ind)==1){colnames(metadata)[ind] = "PEPMASS"}
if ("PEPMASS" %in% colnames(metadata)){
temp_mz = round(as.numeric(metadata$PEPMASS),4)
temp_mz[is.na(temp_mz)]="N/A"
metadata$PEPMASS = temp_mz
}
#RT:
ind = which(temp_col=="RTINSECONDS")
if (length(ind)==1){
colnames(metadata)[ind] = "RT"
metadata$RT = as.numeric(metadata$RT)/60 # From second to minute
}
if ("RT" %in% colnames(metadata)){
temp_rt = round(as.numeric(metadata$RT),2)
temp_rt[is.na(temp_rt)]="N/A"
metadata$RT = temp_rt
}
#MSLEVEL:
ind = which(temp_col=="SPECTRUM_TYPE")
if (length(ind)==1){colnames(metadata)[ind] = "MSLEVEL"}
if ("MSLEVEL" %in% colnames(metadata)){
temp = metadata$MSLEVEL
temp[temp=="MS2"] = 2
temp[temp=="MS1"] = 1
temp = as.numeric(temp)
temp[is.na(temp)]="N/A"
metadata$MSLEVEL = temp
}
# ADDUCT:
ind = which(temp_col=="PRECURSOR_TYPE")
if (length(ind)==1){colnames(metadata)[ind] = "ADDUCT"}
# IONDMODE:
ind = which(temp_col=="ION_MODE")
if (length(ind)==1){colnames(metadata)[ind] = "IONMODE"}
if ("IONMODE" %in% colnames(metadata)){
temp = as.character(metadata$IONMODE)
temp[temp=="P"] = "Positive"
temp[temp=="N"] = "Negative"
temp[which(!(temp %in% c("Positive", "Negative")))] = "N/A"
metadata$IONMODE = temp
}
# SCANS:
ind = which(temp_col %in% "SCAN")
if (length(ind)==1){colnames(metadata)[ind] = "SCANS"}
if (!("SCANS" %in% colnames(metadata))){metadata$SCANS = 1:nrow(metadata)}
if (NA %in% as.numeric(metadata$SCANS)){metadata$SCANS = 1:nrow(metadata)}
# COMMENTS:
ind = which(temp_col=="COMMENTS")
if (length(ind)==1){metadata = metadata[,-ind]}
# sp
if (!is.list(sp)){sp = list(sp)}
sp = lapply(sp, function(x) data.matrix(x[,1:2,drop=FALSE]))
library1 = list(metadata = metadata, sp = sp)
return(library1)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.