Nothing
#' @title Read and write standard mzXML files.
#' @description `read.mzXML` and `write.mzXML` are copied from the caMassClass
#' package which is no longer actively on CRAN.
#' @details The main task of `read.mzXML` and `write.mzXML` functions
#' is to extract and save scan data of mzXML files. In addition attempt is
#' made to keep all other sections of mzXML file as unparsed XML code, so
#' the data can be extracted latter or saved into new mzXML files. Those
#' unparsed sections are stored as XML text.
#' @param filename The mzXML file name to be read or written.
#' @return A generic R object of class mzXML for `read.mzXML` and NULL for `write.mzXML`.
#' @export
#' @rdname mzXML
read.mzXML <- function(filename) {
# ===========================================================================#
# Written by Jarek Tuszynski. Copyright 2001-2003 SAIC. #
# Software developed in conjunction with the National Cancer Institute #
# Distributed under GNU General Public License version 3 #
# ===========================================================================#
# verify that suggested packages are available
verify_suggested(c("XML", "digest", "bitops"))
# supporting internal functions
Paste <- function(...) paste(..., sep = "", collapse = "")
strtrunc <- function(Str, Sub) {
lp <- attr(regexpr(paste(".*", Sub, sep = ""), Str), "match.length")
return(substring(Str, 1, lp))
# y = unlist(strsplit(Str,Sub)) # other way of doing it
# return( paste(y[-length(y)], sub, sep="", collapse="") )
}
fregexpr <- function(pattern, filename) { # similar to gregexpr but operating on files not strings
buf.size <- 1024
n <- file.info(filename)$size
pos <- NULL
fp <- file(filename, "rb")
for (d in seq(1, n, by = buf.size)) {
m <- if (n - d > buf.size) buf.size else n - d
p <- gregexpr(pattern, readChar(fp, m))[[1]]
if (p[1] > 0) pos <- c(pos, p + d - 1)
}
close(fp)
if (is.null(pos)) pos <- -1
return(pos)
}
#-------------------------------
# define XML handler function
#-------------------------------
mzXMLhandlers <- function() {
#---------------------------------------------------------
# local variables
#---------------------------------------------------------
obj <- new.mzXML() # create new mzXML object
iScan <- 0
ParentID <- vector(mode = "integer")
sha1 <- vector(mode = "list", length = 2) # optional - element - sha-1 sums
sha1[1] <- sha1[2] <- 0
# Optional attributes that might come with a scan that will be stored
OptScanAttr <- c(
"polarity", "scanType", "centroided", "deisotoped",
"chargeDeconvoluted", "retentionTime", "ionisationEnergy",
"collisionEnergy", "cidGasPressure", "totIonCurrent"
)
#-------------------------------
# local functions
#-------------------------------
ToString <- function(x, indent = 1) { # converts content of a node to a string
if (is.null(x)) {
return(NULL)
}
spaces <- if (indent > 0) Paste(rep(" ", indent)) else ""
Name <- XML::xmlName(x, TRUE)
val <- XML::xmlValue(x)
if (Name == "text") {
return(Paste(spaces, val, "\n"))
}
if (!is.null(XML::xmlAttrs(x))) {
att <- paste(names(XML::xmlAttrs(x)), paste("\"", XML::xmlAttrs(x),
"\"",
sep = ""
), sep = "=", collapse = " ")
att <- paste(" ", att, sep = "")
} else {
att <- ""
}
chl <- ""
for (i in XML::xmlChildren(x)) chl <- Paste(chl, ToString(i, indent + 1))
if (chl == "") {
Str <- Paste(spaces, "<", Name, att, "/>\n")
} else {
Str <- Paste(spaces, "<", Name, att, ">\n", chl, spaces, "</", Name, ">\n")
}
return(Str)
}
CatNodes <- function(x, Name, indent = 2) { # concatinate strings of several nodes
Str <- NULL
for (y in XML::xmlElementsByTagName(x, Name)) {
Str <- paste(Str, ToString(y, indent), sep = "")
}
return(Str)
}
read.mzXML.scan <- function(x) { # process scan section of mzXML file
if (is.null(x)) {
return(NULL)
}
if (XML::xmlName(x) != "scan") {
return(NULL)
}
scanOrigin <- precursorMz <- nameValue <- maldi <- mass <- peaks <- NULL
att <- XML::xmlAttrs(x)
num <- as.integer(att["num"])
msLevel <- as.integer(att["msLevel"])
peaksCount <- as.integer(att["peaksCount"]) # Total number of m/z-intensity pairs in the scan
msk <- names(att) %in% OptScanAttr
if (sum(msk) == 0) {
scanAttr <- ""
} else {
scanAttr <- paste(names(att[msk]), paste("\"", att[msk], "\"", sep = ""), sep = "=", collapse = " ")
scanAttr <- paste(" ", scanAttr, sep = "")
}
maldi <- ToString(x[["maldi"]])
scanOrigin <- CatNodes(x, "scanOrigin", 3)
nameValue <- CatNodes(x, "nameValue", 3)
precursorMz <- CatNodes(x, "precursorMz", 3)
precursorMz <- gsub("\n ", " ", precursorMz)
for (y in XML::xmlElementsByTagName(x, "scan")) ParentID[as.integer(XML::xmlAttrs(y)["num"])] <<- num
y <- x[["peaks"]]
att <- XML::xmlAttrs(y)
peaks <- XML::xmlValue(y) # This is the actual data encoded using base64
precision <- att["precision"] # nr of bits used by each component (32 or 64)
byteOrder <- att["byteOrder"] # Byte order of the encoded binary information (must be network)
# [Modification_by_JaLi:] ProteoWizard exported "contentType" instead of "pairOrder" field
if ("contentType" %in% names(att)) pairOrder <- att["contentType"] # Order of the m/z intensity pairs (must be m/z-int)
if ("pairOrder" %in% names(att)) pairOrder <- att["pairOrder"] # Order of the m/z intensity pairs (must be m/z-int)
endian <- ifelse(byteOrder == "network", "big", "little")
if (precision == "32") {
size <- 4
} else if (precision == "64") {
size <- 8
} else {
stop("read.mzXML.scan: incorrect precision attribute of peaks field")
}
if (is.na(pairOrder) | pairOrder != "m/z-int") {
warning("read.mzXML.scan: incorrect pairOrder attribute of peaks field")
}
if (peaksCount > 0) {
# [Modification_by_JaLi:]
p <- base64decode(peaks, "double", endian = endian, size = size)
np <- length(p) %/% 2
if (np != peaksCount) {
warning(
"read.mzXML.scan: incorrect 'peakCount' attribute of 'peaks' field: expected ",
peaksCount, ", found ", np, " ", (3 * ((nchar(peaks) * size) / 4)) / 2, " (scan #", num, ")"
)
}
dim(p) <- c(2, np)
mass <- p[1, ]
peaks <- p[2, ]
}
# x$children=NULL; # needed to capture the header
# header <<- toString(x)
return(list(
mass = mass, peaks = peaks, num = num, parentNum = num,
msLevel = msLevel, scanAttr = scanAttr, maldi = maldi,
scanOrigin = scanOrigin, precursorMz = precursorMz, nameValue = nameValue
))
}
#---------------------------------------------------------
# the instructions how to parse each section of mzXML file
#---------------------------------------------------------
list(
mzXML = function(x, ...) {
y <- x[["sha1"]]
sha1[1] <<- if (!is.null(y)) XML::xmlValue(y) else 0
x$children <- NULL
obj$header <<- toString(x, terminate = FALSE)
NULL
},
msRun = function(x, ...) {
y <- x[["sha1"]]
sha1[2] <<- if (!is.null(y)) XML::xmlValue(y) else 0
obj$msInstrument <<- ToString(x[["msInstrument"]], 2)
obj$separation <<- ToString(x[["separation"]], 2)
obj$spotting <<- ToString(x[["spotting"]], 2)
obj$parentFile <<- CatNodes(x, "parentFile")
obj$dataProcessing <<- CatNodes(x, "dataProcessing")
NULL
},
scan = function(x, ...) {
iScan <<- iScan + 1
obj$scan[[iScan]] <<- read.mzXML.scan(x)
x$children <- NULL
x
},
data = function() {
if (is.null(obj$header)) {
NULL
} else {
list(mzXML = obj, ParentID = ParentID, sha1 = sha1)
}
}
) # end of list of handler functions
} # done with local functions
#---------------------------------
# begining of read.mzXML function
#---------------------------------
if (!is.character(filename)) stop("read.mzXML: 'filename' has to be a string")
if (length(filename) > 1) filename <- paste(filename, collapse = "") # combine characters into a string
sha1File <- digest::digest(filename, algo = "sha1", file = TRUE)
x <- XML::xmlTreeParse(
file = filename, handlers = mzXMLhandlers(),
addAttributeNamespaces = TRUE
)$ data()
if (is.null(x)) { # is this file a mzXML file ?
stop("read.mzXML: This is not mzXML file")
}
mzXML <- x$mzXML
sha1Read <- x$sha1
# sort scans into correct order; find parent numbers of recursive nodes
n <- length(mzXML$scan)
NumID <- integer(n)
for (i in 1:n) {
NumID[i] <- mzXML$scan[[i]]$num
mzXML$scan[[i]]$scanOrigin <- paste("<scanOrigin parentFileID='", sha1File,
"' num='", NumID[i], "'/>\n",
sep = ""
)
}
mzXML$scan <- mzXML$scan[order(NumID)]
for (i in 1:n) {
if (!is.na(x$ParentID[i])) {
mzXML$scan[[i]]$parentNum <- x$ParentID[i]
} else {
x$ParentID[i] <- mzXML$scan[[i]]$parentNum
}
}
mzXML$scan <- mzXML$scan[order(x$ParentID)]
## read sha1 section
n <- sum(as.integer(lapply(sha1Read, is.character))) # how many sha1 were found
if (n > 0) {
## sha1 - sha-1 sum for this file (from the beginning of the file up to
## (and including) the opening tag of sha1
if (is.null(sha1Read[[1]])) sha1Read[[1]] <- sha1Read[[2]]
sha1Pos <- fregexpr("<sha1>", filename) + 6 # 6 = length("<sha1>")
for (i in n) { # multiple sha1 sections are possible
sha1Calc <- digest::digest(filename, algo = "sha1", file = TRUE, length = sha1Pos[i] - 1)
if (sha1Read[[i]] != sha1Calc) {
warning(
"Stored and calculated Sha-1 sums do not match (stored '",
sha1Read[[i]], "'; calculated '", sha1Calc, "')"
)
}
}
}
# strip mzXML terminator from header section
mzXML$header <- gsub("/>", ">\n", mzXML$header)
mzXML$header <- gsub("^ +", "", mzXML$header)
# Remove incorrect "-quotes inserted in 2.10.0
mzXML$header <- gsub("[\u0093\u0094\u201C\u201D]", '"', mzXML$header)
# add info about parent file (the file we just read)
mzXML$parentFile <- Paste(
mzXML$parentFile, " <parentFile filename='file://",
filename, "' fileType='processedData' fileSha1='", sha1File, "'/>\n"
)
return(mzXML)
}
#' @param mzXML The generic mzXML object
#' @param precision Either '32' or '64' byte.
#'
#' @return
#' Nothing. mzXML object is exported to filename. If it was imported using \code{\link{read.mzXML}} it should contain all previous fields.
#'
#' @export
#' @rdname mzXML
write.mzXML <- function(mzXML, filename, precision = c("32", "64")) {
# ===========================================================================#
# Written by Jarek Tuszynski. Copyright 2001-2003 SAIC. #
# Software developed in conjunction with the National Cancer Institute #
# Distributed under GNU General Public License version 3 #
# ===========================================================================#
# verify that suggested packages are available
verify_suggested(c("digest", "bitops"))
# helper functions
Paste <- function(...) paste(..., sep = "", collapse = "")
fprintf <- function(fp, level, ..., append = TRUE) {
x <- paste(..., sep = "")
if (length(x) == 0 || is.null(x)) {
return(NULL)
}
spaces <- if (level > 0) Paste(rep(" ", level)) else ""
x <- gsub("'", "\"", x)
cat(spaces, x, file = fp, sep = "")
NULL
}
# done with local functions
precision <- match.arg(precision)
if (!is.character(filename)) stop("read.mzXML: 'filename' has to be a string")
if (length(filename) > 1) filename <- paste(filename, collapse = "") # combine characters into a string
fp <- file(filename, "w")
if (is.null(mzXML)) {
stop("write.mzXML: Variable mzXML has to be an instance of class mzXML")
}
if (attr(mzXML, "class") != "mzXML") {
stop("write.mzXML: Variable mzXML has to be an instance of class mzXML")
}
#------------------------------------
# Fill-in required sections if empty
#------------------------------------
if (is.null(mzXML$header)) {
Str <- "http://sashimi.sourceforge.net/schema_revision/mzXML_2.1"
mzXML$header <- Paste(
"<mzXML xmlns='", Str, "'\n ",
"xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n ",
"xsi:schemaLocation='", Str, " ", Str, "/mzXML_idx_2.1.xsd'>\n"
)
}
if (is.null(mzXML$parentFile)) {
mzXML$parentFile <- Paste(
" <parentFile filename='file://unknown' ",
"fileType='RAWData' fileSha1='0000000000000000000000000000000000000000'/>\n"
)
}
if (is.null(mzXML$dataProcessing)) {
Version <- utils::packageDescription("caMassClass")$Version
Time <- format(Sys.time(), "%Y-%m-%dT%H:%M:%S")
mzXML$dataProcessing <- Paste(
" <dataProcessing>\n",
" <software type='processing' name='cran.r-project.org/caMassClass' ",
"version='", Version, "' completionTime='", Time, "'/>\n </dataProcessing>"
)
}
#-----------------------------
# Write beggining of file
#-----------------------------
fprintf(fp, 0, "<?xml version='1.0' encoding='ISO-8859-1'?>\n", append = FALSE)
fprintf(fp, 0, mzXML$header)
fprintf(fp, 1, "<msRun scanCount='", length(mzXML$scan), "'>\n")
fprintf(fp, 0, mzXML$parentFile)
fprintf(fp, 0, mzXML$msInstrument)
fprintf(fp, 0, mzXML$dataProcessing)
fprintf(fp, 0, mzXML$separation)
fprintf(fp, 0, mzXML$spotting)
#---------------------------------
# Write scan Section
#---------------------------------
indexScan <- " <index name='scan'>\n"
n <- length(mzXML$scan)
Num <- integer(n)
for (i in 1:n) Num[i] <- mzXML$scan[[i]]$num
mzXML$scan <- mzXML$scan[order(Num)]
for (i in 1:n) Num[i] <- mzXML$scan[[i]]$parentNum
mzXML$scan <- mzXML$scan[order(Num)]
for (i in 1:n) Num[i] <- mzXML$scan[[i]]$msLevel
Num <- 1 - diff(c(Num, 1)) # number of </scan> after each scan
size <- (if (precision == "32") 4 else 8)
for (i in 1:n) {
if (!is.null(mzXML$scan[[i]])) {
indexScan <- Paste(
indexScan, " <offset id='", mzXML$scan[[i]]$num, "'>",
seek(fp), "</offset>\n"
)
mass <- mzXML$scan[[i]]$mass
peaks <- mzXML$scan[[i]]$peaks
stopifnot(length(mass) == length(peaks))
ScanHeader <- Paste(
"<scan num='", mzXML$scan[[i]]$num,
"' msLevel='", mzXML$scan[[i]]$msLevel,
"' peaksCount='", length(peaks),
"' lowMz='", min(mass),
"' highMz='", max(mass),
"'", mzXML$scan[[i]]$scanAttr, ">\n"
)
fprintf(fp, 2, ScanHeader)
fprintf(fp, 3, mzXML$scan[[i]]$scanOrigin)
fprintf(fp, 0, mzXML$scan[[i]]$precursorMz)
fprintf(fp, 0, mzXML$scan[[i]]$maldi)
p <- as.vector(rbind(mass, peaks))
fprintf(fp, 3, Paste("<peaks precision='", precision, "' byteOrder='network' pairOrder='m/z-int'>", base64encode(p, endian = "big", size = size), "</peaks>\n"))
fprintf(fp, 0, mzXML$scan[[i]]$nameValue)
if (Num[i]) for (j in 1:Num[i]) fprintf(fp, 2, "</scan>\n")
}
}
indexScan <- Paste(indexScan, " </index>\n")
#---------------------------------
# Write end of file
#---------------------------------
fprintf(fp, 1, "</msRun>\n")
n <- seek(fp)
fprintf(fp, 0, indexScan)
fprintf(fp, 1, "<indexOffset>", n, "</indexOffset>\n")
cat(" <sha1>", file = fp, sep = "")
n <- seek(fp)
close(fp)
sha1 <- digest::digest(filename, algo = "sha1", file = TRUE, length = n)
cat(sha1, "</sha1>\n</mzXML>\n", file = filename, append = TRUE, sep = "")
invisible(NULL)
}
#' @rdname mzXML
new.mzXML = function()
{
object = list(
header = NULL, # required - list - Path to all the ancestor files (up to the native acquisition file) used to generate the current XML instance document.
parentFile = NULL, # required - list - Path to all the ancestor files (up to the native acquisition file) used to generate the current XML instance document.
dataProcessing = NULL, # required - list - Description of any manipulation (from the first conversion to mzXML format until the creation of the current mzXML instance document) applied to the data.
msInstrument = NULL, # optional - element - General information about the MS instrument.
separation = NULL, # optional - element - Information about the separation technique, if any, used right before the acquisition.
spotting = NULL, # optional - element - Acquisition independent properties of a MALDI experiment.
scan = vector(mode="list")
)
class(object) <- "mzXML"
return(object)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.