#' @title Convert mz data to disk.frame
#'
#' @description Convert .mzML file to disk.frame using mzR, data.table, and
#' disk.frame
#'
#' @param path path to the mzML file
#' @param diskFramePath path and fileName.df specifying write location of .df
#' directory
#' @param scans Optional parameter. Provide a numeric vector to import select
#' scans and write to a data.frame. If not provided, the whole file is
#' converted. Default is NULL.
#' @param chunkSize number of scans to be extracted and written to the
#' disk.frame at a time. Useful for breaking up large data files and
#' converting to disk.frame. Default is 100. If NULL, imports all scans in
#' single operation and you will probably run out of memory.
#'
#' @return Returns a disk.frame reference object.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' #read .mzML file from system path and write to disk.frame
#' #100 scans (default) at a time
#' mzML2diskFrame(path = path_2_mzML,
#' diskFramePath = "dfPath.df")
#'
#' #read scans 100-200 from .mzML file specified by a system path
#' #and write to disk.frame 20 scans at a time
#' mzML2diskFrame(path = path_2_mzML,
#' diskFramePath = "dfPath.df",
#' scans = c(100:200),
#' chunkSize = 20)
#' }
#'
mzML2diskFrame <- function(path, diskFramePath, scans = NULL, chunkSize = 100){
if(dir.exists(diskFramePath)){
stop("diskFramePath leads to directory location that already exists")
}
#Link to the file
file <- mzR::openMSfile(filename = path, verbose = TRUE)
#Construct index to group the scans into chunks
scanChunks <- .scanChunker(scans = scans,
mzRfilePointer = file,
chunkSize = chunkSize)
#Setup disk.frame backend
disk.frame::setup_disk.frame()
options(future.globals.maxSize = Inf)
#Create disk.frame
diskF <- disk.frame::disk.frame(path = diskFramePath)
#Get scans and write to disk.frame
writeResult <- mapply(FUN = .mzML2diskFrameChunk,
scans = scanChunks,
MoreArgs = list(path = file,
diskFrame = diskF),
SIMPLIFY = FALSE)
#Cleanup
c <- gc()
diskF
}
#' @title Write data.table of mzML/mzXML data to a disk.frame
#'
#' @description Internal function. Import data using mzML2dataTable() and write
#' results to a disk.frame as a chunk.
#' @param path path or mzR pointer to the mzML file
#' @param diskFrame disk.frame pointer object created by disk.frame()
#' @param scans a numeric vector to import select scans and write to a
#' data.frame - likely generated by .scanChunker()
#'
#' @return NULL
#'
.mzML2diskFrameChunk <- function(path, diskFrame, scans){
dt <- mzML2dataTable(path = path, scans = scans)
#Print Statements: WRITE
if(is.null(scans)){
print("Writing all scans to disk.frame")
}else{
scanMin <- min(scans)
scanMax <- max(scans)
print(paste("Writing scans from:", scanMin, "to", scanMax, "to disk.frame"))
}
#Write the results to a disk.frame
diskf <- disk.frame::add_chunk(df = diskFrame, chunk = dt)
remove(dt)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.