Nothing
#' MassBank-record Parser
#'
#' Can parse MassBank-records(only V2)
#'
#' @aliases parseMassBank
#' @usage parseMassBank(Files)
#' @param Files A path to the plaintext-record that should be read
#' @return The \code{mbWorkspace} that the plaintext-record creates.
#' @seealso \code{\link{validate}}
#' @author Erik Mueller
#' @examples \dontrun{
#' parseMassBank("filepath_to_records/RC00001.txt")
#' }
#' @export
parseMassBank <- function(Files){
mb <- new("mbWorkspace")
mb@compiled_ok <- list()
i <- 1
fileConnection <- file(Files[i])
record <- readLines(fileConnection)
close(fileConnection)
mb@compiled_ok[[i]] <- list()
mb@compiled_ok[[i]][['ACCESSION']] <- substring(grep('ACCESSION:',record, value = TRUE, fixed = TRUE),12)
mb@compiled_ok[[i]][['RECORD_TITLE']] <- substring(grep('RECORD_TITLE:',record, value = TRUE),12)
mb@compiled_ok[[i]][['DATE']] <- format(as.Date(substring(grep('DATE:',record, value = TRUE, fixed = TRUE),7), format = "%Y.%m.%d"), "%Y.%m.%d")
mb@compiled_ok[[i]][['AUTHORS']] <- substring(grep('AUTHORS:',record, value = TRUE, fixed = TRUE),10)
mb@compiled_ok[[i]][['LICENSE']] <- substring(grep('LICENSE:',record, value = TRUE, fixed = TRUE),10)
mb@compiled_ok[[i]][['COPYRIGHT']] <- substring(grep('COPYRIGHT:',record, value = TRUE, fixed = TRUE),12)
##publication <- substring(grep('PUBLICATION:',record, fixed = TRUE),14)
##if(length(publication) > 0){
#mb@compiled_ok[[i]][['PUBLICATION']] <- publication
##}
##The list of comments is handled differently
##in RMassBank, but the flattening should work anyway, if I'm correct(RMassBank uses internal values for comments)
commentlist <- list()
commentlist <- as.list(substring(grep('COMMENT:',record, value = TRUE, fixed = TRUE),10))
mb@compiled_ok[[i]][['COMMENT']] <- list()
mb@compiled_ok[[i]][['COMMENT']] <- commentlist
chnames <- list()
chnames <- as.list(substring(grep('CH$NAME:',record, value = TRUE, fixed = TRUE),10))
mb@compiled_ok[[i]][['CH$NAME']] <- chnames
mb@compiled_ok[[i]][['CH$COMPOUND_CLASS']] <- substring(grep('CH$COMPOUND_CLASS:',record, value = TRUE, fixed = TRUE),20)
mb@compiled_ok[[i]][['CH$FORMULA']] <- substring(grep('CH$FORMULA:',record, value = TRUE, fixed = TRUE),13)
mb@compiled_ok[[i]][['CH$EXACT_MASS']] <- as.numeric(substring(grep('CH$EXACT_MASS:',record, value = TRUE, fixed = TRUE),16))
mb@compiled_ok[[i]][['CH$SMILES']] <- substring(grep('CH$SMILES:',record, value = TRUE, fixed = TRUE),12)
mb@compiled_ok[[i]][['CH$IUPAC']] <- substring(grep('CH$IUPAC:',record, value = TRUE, fixed = TRUE),11)
##Again: Flattening this should be no Problem, although the structure is different -
##RMassBank names every type of link, but this isn't necessary here since we're only
##reading, not creating. If that's a problem, I'll change it.
links <- list()
links <- as.list(substring(grep('CH$LINK:',record, value = TRUE, fixed = TRUE),10))
mb@compiled_ok[[i]][['CH$LINK']] <- links
##SP$ will be included later since it's kind of rarely used
mb@compiled_ok[[i]][['AC$INSTRUMENT']] <- substring(grep('AC$INSTRUMENT:',record, value = TRUE, fixed = TRUE),16)
mb@compiled_ok[[i]][['AC$INSTRUMENT_TYPE']] <- substring(grep('AC$INSTRUMENT_TYPE:',record, value = TRUE, fixed = TRUE),21)
##Get the Subvalues just like in RMassBank
##RECORD VERSION SPECIFIC READING INCLUDED
##This could convert Version 1 -> Version 2 if used right,
##Although I have no idea how well it'd do that
##I'll have to find the old specifications to do this right, until then it should only kind of work
##well enough to do some tests
Version <- 2
ac_ms <- list()
ac_ms[['MS_TYPE']] <- substring(grep('AC$MASS_SPECTROMETRY: MS_TYPE',record, value = TRUE, fixed = TRUE),31)
if(length(ac_ms[['MS_TYPE']]) == 0){
ac_ms[['MS_TYPE']] <- substring(grep('AC$ANALYTICAL_CONDITION: MS_TYPE',record, value = TRUE, fixed = TRUE),34)
Version <- 1
}
if(Version == 1){
##This not a real tag anymore(according to the specifications) but RMassBank still writes it...?
##I'll include it for the case that I'm reading V1-records
ac_ms[['ION_MODE']] <- substring(grep('AC$ANALYTICAL_CONDITION: MODE',record, value = TRUE, fixed = TRUE),31)
ac_ms[['IONIZATION']] <- substring(grep('AC$MASS_SPECTROMETRY: IONIZATION',record, value = TRUE, fixed = TRUE),34)
} else{
ac_ms[['ION_MODE']] <- substring(grep('AC$MASS_SPECTROMETRY: ION_MODE',record, value = TRUE, fixed = TRUE),32)
##Some of the following are part of the (optional) specification, but NOT in RMassBank(!)
##This is just for the sake of completeness
ac_ms[['COLLISION_ENERGY']] <- substring(grep('AC$MASS_SPECTROMETRY: COLLISION_ENERGY',record, value = TRUE, fixed = TRUE),40)
ac_ms[['COLLISION_GAS']] <- substring(grep('AC$MASS_SPECTROMETRY: COLLISION_GAS',record, value = TRUE, fixed = TRUE),37)
ac_ms[['DATE']] <- substring(grep('AC$MASS_SPECTROMETRY: DATE',record, value = TRUE, fixed = TRUE),28)
ac_ms[['DESOLVATION_GAS_FLOW']] <- substring(grep('AC$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW',record, value = TRUE, fixed = TRUE),44)
ac_ms[['DESOLVATION_TEMPERATURE']] <- substring(grep('AC$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE',record, value = TRUE, fixed = TRUE),47)
ac_ms[['IONIZATION']] <- substring(grep('AC$MASS_SPECTROMETRY: IONIZATION',record, value = TRUE, fixed = TRUE),34)
ac_ms[['IONIZATION_ENERGY']] <- substring(grep('AC$MASS_SPECTROMETRY: IONIZATION_ENERGY',record, value = TRUE, fixed = TRUE),41)
ac_ms[['LASER']] <- substring(grep('AC$MASS_SPECTROMETRY: LASER',record, value = TRUE, fixed = TRUE),29)
ac_ms[['MATRIX']] <- substring(grep('AC$MASS_SPECTROMETRY: MATRIX',record, value = TRUE, fixed = TRUE),30)
ac_ms[['MASS_ACCURACY']] <- substring(grep('AC$MASS_SPECTROMETRY: MASS_ACCURACY',record, value = TRUE, fixed = TRUE),37)
ac_ms[['REAGENT_GAS']] <- substring(grep('AC$MASS_SPECTROMETRY: REAGENT_GAS',record, value = TRUE, fixed = TRUE),35)
ac_ms[['SCANNING']] <- substring(grep('AC$MASS_SPECTROMETRY: SCANNING',record, value = TRUE, fixed = TRUE),32)
##These are in RMassBank, but not part of the specification?
##I think I'm misreading something...
#ac_ms[['FRAGMENTATION_MODE']] <- msmsdata$info$mode
#ac_ms[['PRECURSOR_TYPE']] <- precursor_types[spec$mode]
#ac_ms[['RESOLUTION']] <- msmsdata$info$res
ac_lc <- list();
ac_lc[['CAPILLARY_VOLTAGE']] <- substring(grep('AC$CHROMATOGRAPHY: CAPILLARY_VOLTAGE',record, value = TRUE, fixed = TRUE),36)
ac_lc[['COLUMN_NAME']] <- substring(grep('AC$CHROMATOGRAPHY: COLUMN_NAME',record, value = TRUE, fixed = TRUE),32)
ac_lc[['COLUMN_TEMPERATURE']] <- substring(grep('AC$CHROMATOGRAPHY: COLUMN_TEMPERATURE',record, value = TRUE, fixed = TRUE),39)
ac_lc[['FLOW_GRADIENT']] <- substring(grep('AC$CHROMATOGRAPHY: FLOW_GRADIENT',record, value = TRUE, fixed = TRUE),34)
ac_lc[['FLOW_RATE']] <- substring(grep('AC$CHROMATOGRAPHY: FLOW_RATE',record, value = TRUE, fixed = TRUE),30)
ac_lc[['RETENTION_TIME']] <- substring(grep('AC$CHROMATOGRAPHY: RETENTION_TIME',record, value = TRUE, fixed = TRUE),35)
ac_lc[['SOLVENT A']] <- substring(grep('AC$CHROMATOGRAPHY: SOLVENT A',record, value = TRUE, fixed = TRUE),30)
ac_lc[['SOLVENT B']] <- substring(grep('AC$CHROMATOGRAPHY: SOLVENT B',record, value = TRUE, fixed = TRUE),30)
ms_fi <- list()
ms_fi[['BASE_PEAK']] <- as.double(substring(grep('MS$FOCUSED_ION: BASE_PEAK',record, value = TRUE, fixed = TRUE),27))
ms_fi[['PRECURSOR_M/Z']] <- substring(grep('MS$FOCUSED_ION: PRECURSOR_M/Z',record, value = TRUE, fixed = TRUE),31)
ms_fi[['PRECURSOR_TYPE']] <- substring(grep('MS$FOCUSED_ION: PRECURSOR_TYPE',record, value = TRUE, fixed = TRUE),32)
if(ac_ms[['MS_TYPE']] == 'MS2'){
ms_fi[['PRECURSOR_M/Z']] <- as.double(ms_fi[['PRECURSOR_M/Z']])
}
}
namesAcms <- names(ac_ms)
namesAclc <- names(ac_lc)
namesMsfi <- names(ms_fi)
for(k in 1:length(ac_ms)){
if(length(ac_ms[[namesAcms[k]]]) == 0){
ac_ms[[namesAcms[k]]] <- NA
}
}
for(k in 1:length(ac_lc)){
if(length(ac_lc[[namesAclc[k]]]) == 0){
ac_lc[[namesAclc[k]]] <- NA
}
}
for(k in 1:length(ms_fi)){
if(length(ms_fi[[namesMsfi[k]]]) == 0){
ms_fi[[namesMsfi[k]]] <- NA
}
}
mb@compiled_ok[[i]][['AC$MASS_SPECTROMETRY']] <- list()
mb@compiled_ok[[i]][['AC$MASS_SPECTROMETRY']] <- ac_ms
mb@compiled_ok[[i]][['AC$CHROMATOGRAPHY']] <- list()
mb@compiled_ok[[i]][['AC$CHROMATOGRAPHY']] <- ac_lc
mb@compiled_ok[[i]][['MS$FOCUSED_ION']] <- list()
mb@compiled_ok[[i]][['MS$FOCUSED_ION']] <- ms_fi
##Can currently only read annotations of the type "m/z num {formula mass error(ppm)}"
##and'll only read it properly if there is only one annotation
##the strange conversion of the data.frames is there so RMassBank can actually write it again
PKannotationStart <- grep('PK$ANNOTATION:',record, fixed = TRUE) + 1
numpeak <- grep('PK$NUM_PEAK:',record, fixed = TRUE)
if(length(PKannotationStart) > 0 && ReadAnnotation == TRUE){
if(PKannotationStart < numpeak){
splitted <- strsplit(record[PKannotationStart:(numpeak-1)]," ")
PKannotation <- matrix(nrow = numpeak - PKannotationStart, ncol = 5)
for(k in 1:length(splitted)){
splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")]
PKannotation[k,] <- splitted[[k]]
}
PKannotation <- as.data.frame(PKannotation, stringsAsFactors = FALSE)
PKannotation[] <- lapply(PKannotation, type.convert)
colnames(PKannotation) <- c("m/z", "num", "{formula", "mass", "error(ppm)}")
PKannotation$"{formula" <- as.character(PKannotation$"{formula")
}
mb@compiled_ok[[i]][['PK$ANNOTATION']] <- PKannotation
}
##Extract the peaks and write the data into a data.frame
PKStart <- grep('PK$PEAK:',record, fixed = TRUE) + 1
endslash <- tail(grep('//',record, fixed = TRUE),1)
if(PKStart < endslash){
splitted <- strsplit(record[PKStart:(endslash-1)]," ")
PKPeak <- matrix(nrow = endslash - PKStart, ncol = 3)
for(k in 1:length(splitted)){
splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")]
PKPeak[k,] <- splitted[[k]]
}
PKPeak <- as.data.frame(PKPeak, stringsAsFactors = FALSE)
PKPeak[] <- lapply(PKPeak, type.convert)
colnames(PKPeak) <- c("m/z", "int", "rel.int.")
}
mb@compiled_ok[[i]][['PK$PEAK']] <- PKPeak
namesComp <- names(mb@compiled_ok[[i]])
for(k in 1:length(mb@compiled_ok[[i]])){
if(length(mb@compiled_ok[[i]][[namesComp[k]]]) == 0){
mb@compiled_ok[[i]][[namesComp[k]]] <- NA
}
}
print(paste("Read",Files[i]))
flush.console()
return(mb)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.