#'@title InitDataObjects
#'@description This functions handles the construction of a mSetObj object for storing data for further processing and analysis.
#'It is necessary to utilize this function to specify to MetaboAnalystR the type of data and the type of analysis you will perform.
#'@usage InitDataObjects(data.type, anal.type, paired=FALSE)
#'@param data.type The type of data, either list (Compound lists), conc (Compound concentration data),
#'specbin (Binned spectra data), pktable (Peak intensity table), nmrpeak (NMR peak lists), mspeak (MS peak lists),
#'or msspec (MS spectra data)
#'@param anal.type Indicate the analysis module to be performed: stat, pathora, pathqea, msetora, msetssp, msetqea, ts,
#'cmpdmap, smpmap, or pathinteg
#'@param paired Indicate if the data is paired or not. Logical, default set to FALSE
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#' @return will initialize an mSet object
#' @export
#' @import methods
#' @import BiocParallel
#' @importFrom Cairo CairoFonts
#' @examples
#' mSet<-InitDataObjects("spec", "raw", FALSE)
InitDataObjects <- function(data.type, anal.type, paired=FALSE){
if(anal.type == "raw" & data.type == "spec") {
MessageOutput("OptiLCMS R objects initialized ...\n", SuppressWeb = TRUE);
return(new("mSet"))
}
}
#' @title Import raw MS data
#' @description This function handles the reading in of
#' raw MS data (.mzML, .CDF and .mzXML). Users must set
#' their working directory to the folder containing their raw
#' data, divided into two subfolders named their desired group labels. The
#' function will output two chromatograms into the user's working directory, a
#' base peak intensity chromatogram (BPIC) and a total ion
#' chromatogram (TIC). Further, this function sets the number of cores
#' to be used for parallel processing. It first determines the number of cores
#' within a user's computer and then sets it that number/2.
#' @param mSet mSet Object, can be optional. Usually generated by InitDataObjects("spec", "raw", FALSE) before the data import.
#' @param path Character, input the path to the folder containing
#' the raw MS spectra to be processed. Or a character vector containing all raw files absolute paths.
#' @param metadata Data.frame or character. A phenotype data frame or a absolute path of the metadata file (.txt) for all samples, optional.
#' In the option, first column should be the sample name, while second column is the corresponding group name. If ommited, all samples in the same sub-folder will be
#' considered as one group.
#' @param mode Character, the data input mode. Default is "onDisk" to avoid memory crash. "inMemory" will
#' read data into memory.
#' @param plotSettings List, plotting parameters produced by SetPlotParam Function. "plot.opts" can be added through this
#' function for samples numbers for plotting. Defalut is "default", "all" will apply all samples for plotting and may cause
#' memory crash, especially for large sample dataset.
#' @param running.controller The resuming pipeline running controller. Optional. Don't need to define by hand.
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}, Jasmine Chong \email{jasmine.chong@mail.mcgill.ca},
#' Mai Yamamoto \email{yamamoto.mai@mail.mcgill.ca}, and Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @export
#' @import MSnbase
#' @import BiocParallel
#' @import parallel
#' @import RColorBrewer
#' @importFrom tools file_path_as_absolute file_ext
#' @importFrom Cairo Cairo
#' @importFrom stats na.omit
#' @importFrom graphics legend
#' @importFrom grDevices dev.off
#' @return will return a mSet object will raw data read inside.
#' @examples
#' ##' Get raw spectra files
#' DataFiles <- dir(system.file("mzData", package = "mtbls2"), full.names = TRUE,
#' recursive = TRUE)[c(10:12, 14:16)]
#' ##' Create a phenodata data.frame
#' pd <- data.frame(sample_name = sub(basename(DataFiles), pattern = ".mzData",
#' replacement = "", fixed = TRUE),
#' sample_group = c(rep("col0", 3), rep("cyp79", 3)),
#' stringsAsFactors = FALSE)
#' ##' Import raw spectra
#' mSet <- ImportRawMSData(path = DataFiles, metadata = pd);
ImportRawMSData <-
function(mSet = NULL,
path = getwd(),
metadata = NULL,
mode = "onDisk",
plotSettings = SetPlotParam(),
running.controller = NULL) {
#Initialize a new mSet if missing
if(missing(mSet) & !.on.public.web()){
message("No initialized mSet found, will initialize one automatically !")
mSet <- new("mSet")
} else if(is.null(mSet) & !.on.public.web()){
message("Not a real initialized mSet found, will re-initialize one automatically !")
mSet <- new("mSet")
} else if(.on.public.web() & file.exists("mSet.rda")) {
load("mSet.rda")
} else {
message("Something wierd happened, don't worry. We will re-initialize one automatically !")
mSet <- new("mSet")
}
#Build Running plan for data import - Indentify the controller
if (is.null(running.controller)) {
c1 <- TRUE;
c2 <- TRUE;
plan_switch <- FALSE;
} else {
plan_switch <- TRUE;
c1 <-
running.controller@data_import[["c1"]] # used to control data import
c2 <-
running.controller@data_import[["c2"]] # used to control plotting option
}
if(!exists(".SwapEnv")){
.SwapEnv <<- new.env(parent = .GlobalEnv);
.SwapEnv$.optimize_switch <- FALSE;
.SwapEnv$count_current_sample <- 0;
.SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
.SwapEnv$envir <- new.env();
}
start.time <- Sys.time()
## Deal with the different file path cases
if(.on.public.web() && !dir.exists(path)){
path <- "/home/glassfish/projects/MetaboDemoRawData/upload/"
}
files <- Path2Files(path = path);
if (length(files) == 0) {
MessageOutput(
mes = paste0(
"<font color=\"red\">",
"\nERROR: No standard MS file found ! Please check the extension of your data.",
"</font>"
),
ecol = "\n",
progress = NULL
)
stop()
} else if (length(files) < 3) {
MessageOutput(
mes = paste0(
"<font color=\"red\">",
"\nERROR: At least 3 samples should be provided.",
"</font>"
),
ecol = "\n",
progress = NULL
)
}
.SwapEnv$count_total_sample <- length(files);
.SwapEnv$count_current_sample <- 0;
toRemove = vector();
# Update first
if(length(mSet@rawfiles) == 0){
rawfilenms <- basename(files);
mSet@rawfiles <- files;
} else {
rawfilenms <- basename(mSet@rawfiles);
}
for (i in seq_along(files)) {
file = basename(files[i])
if (!(file %in% rawfilenms)) {
toRemove = c(toRemove, files[i])
}
}
toKeepInx = !(files %in% toRemove);
files = files[toKeepInx];
####### -------------- Files ready ------------------#
if(missing(metadata) | is.null(metadata)){
warning("No metadata provided, all files in one sub-folder will be considered a group!")
snames <- gsub("\\.[^.]*$", "", basename(files))
# msg <- c(msg, paste("A total of ", length(files), "samples were found."))
sclass <- gsub("^\\.$", "sample", dirname(files))
scomp <- strsplit(substr(sclass, 1, min(nchar(sclass))), "", fixed = TRUE)
scomp <- matrix(c(scomp, recursive = TRUE), ncol = length(scomp))
i <- 1
while (all(scomp[i, 1] == scomp[i, -1]) && i < nrow(scomp)) {
i <- i + 1
}
i <-
min(i, tail(c(0, which(
scomp[seq_len(i), 1] == .Platform$file.sep
)), n = 1) + 1)
if (i > 1 && i <= nrow(scomp)) {
sclass <- substr(sclass, i, max(nchar(sclass)))
}
if (.on.public.web() &
unique(sclass)[1] == "upload" &
length(unique(sclass)) == 1) {
sclass <- rep("Unknown", length(sclass))
}
} else if(is.data.frame(metadata)) {
metadata <- metadata;
} else if(is.character(metadata)){
metadata <- read.table(metadata)
}
filesNM <- sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(files));
if (!is.null(metadata)){
if(!all(basename(files) %in% metadata[,1])){
if(!all(basename(filesNM) %in% metadata[,1])){
warning("More files detected than metadata, will omit the extra files not in metadata!")
}
}
snames <- metadata[metadata[,1] %in% c(basename(files), filesNM), 1]
sclass <- metadata[metadata[,1] %in% c(basename(files), filesNM), 2]
}
if(length(snames) == 0 | length(sclass) == 0){
stop("No correct sample/sclass found!")
}
# remove all MS2 files when doing MS1 data processing
if(any(sclass=="MS2")){
ms2_idx <- (sclass=="MS2");
sclass <- sclass[!ms2_idx]
snames <- snames[!ms2_idx]
file_ms1_idx <- vapply(files, FUN = function(x){
res <- vapply(snames, function(y){grepl(y, x)}, FUN.VALUE = logical(1L))
any(res)
}, FUN.VALUE = logical(1L))
files <- files[file_ms1_idx]
}
pd <- data.frame(
sample_name = snames,
sample_group = sclass,
stringsAsFactors = FALSE
)
if(.SwapEnv$.optimize_switch){
MessageOutput(mes = "\nStep 2/6: Start to import the spectrum! \nThis step will take a short time...",
ecol = "\n",
progress = 21.0)
} else {
MessageOutput(mes = "\nStep 1/6: Start to import the spectrum! \nThis step will take a short time...",
ecol = "\n",
progress = 10.0)
}
if (c1) {
raw_data <-
tryCatch(read.MSdata(
files = files,
pdata = new("NAnnotatedDataFrame", pd),
mode = mode,
msLevel. = 1
), error = function(e) {e})
if (!(is(raw_data,"OnDiskMSnExp") | is(raw_data,"MSnExp"))) {
MessageOutput(
mes = paste0(
"<font color=\"red\">",
"\nERROR:",
raw_data$message,
"</font>"
),
ecol = "\n",
progress = 1
)
stop("EXCEPTION POINT CODE: RMS2")
}
if(plan_switch){
cache.save(raw_data, funpartnm = "data_import_c1");
marker_record("data_import_c1");
}
} else {
raw_data <- cache.read ("data_import", "c1")
marker_record("data_import_1")
}
raw_data <- SanitySpectra(raw_data, cutoff = 0.05);
MessageOutput(NULL, NULL, 22)
if (c2) {
if (plotSettings$Plot == TRUE) {
if (is.null(plotSettings$plot.opts)) {
plot.opts <- "default"
} else {
plot.opts <- plotSettings$plot.opts
}
if (plot.opts == "default") {
#subset raw_data to first 50 samples
MessageOutput("To reduce memory usage BPIS and TICS plots will be created using only 10 samples per group.", SuppressWeb = TRUE)
grp_nms <- names(table(pd$sample_group))
files <- NA
for (i in seq_along(grp_nms)) {
numb2ext <- min(table(pd$sample_group)[i], 10)
filt_df <- pd[pd$sample_group == grp_nms[i], ]
files.inx <- sample(nrow(filt_df), numb2ext)
sel.samples <- filt_df$sample_name[files.inx]
files <-
c(files, which(pd$sample_name %in% sel.samples))
}
raw_data_filt <-
filterFile(raw_data, file = na.omit(files))
} else{
raw_data_filt <- raw_data
# just for plotting
}
if (plot.opts == "all") {
h <-
readline(prompt = "Using all samples to create BPIS and TICS plots may cause severe memory issues! Press [0] to continue, or [1] to cancel: ")
h <- as.integer(h)
if (h == 1) {
MessageOutput("ImportRawMSData function aborted!\n")
return(0)
}
}
MessageOutput("Plotting BPIS and TICS.")
# Plotting functions to see entire chromatogram
bpis <- chromatogram(raw_data_filt, aggregationFun = "max", BPPARAM = MulticoreParam(4))
tics <- chromatogram(raw_data_filt, aggregationFun = "sum", BPPARAM = MulticoreParam(4))
groupInfo <-as.factor(pd$sample_group);
groupNum <- nlevels(groupInfo)
if (groupNum > 9) {
col.fun <-
grDevices::colorRampPalette(RColorBrewer::brewer.pal(12, "Set3"))
group_colors <- col.fun(groupNum)
} else{
group_colors <-
paste0(RColorBrewer::brewer.pal(9, "Set1")[seq_len(groupNum)], "60")
}
names(group_colors) <- levels(groupInfo)
bpis_name <-
paste("BPIS_",
plotSettings$dpi,
".",
plotSettings$format,
sep = "")
tics_name <-
paste("TICS_",
plotSettings$dpi,
".",
plotSettings$format,
sep = "")
if(.on.public.web()){
save(raw_data_filt, file = "raw_data_filt.rda");
save(tics, file = "tics.rda");
save(bpis, file = "bpis.rda");
}
if (.on.public.web()) {
Cairo::Cairo(
file = bpis_name,
unit = "in",
dpi = plotSettings$dpi,
width = 9,
height = 6.75,
type = plotSettings$format,
bg = "white"
)
}
plot(bpis, col = group_colors[raw_data_filt$sample_group])
legend(
"topright",
legend = levels(groupInfo),
pch = 15,
col = group_colors
)
if (.on.public.web()) {
dev.off()
Cairo::Cairo(
file = tics_name,
unit = "in",
dpi = plotSettings$dpi,
width = 9,
height = 6.75,
type = plotSettings$format,
bg = "white"
)
}
plot(tics, col = group_colors[raw_data_filt$sample_group])
legend(
"topright",
legend = levels(groupInfo),
pch = 15,
col = group_colors
)
if (.on.public.web()) {
dev.off()
}
}
if (plan_switch) {
marker_record("data_import_c2")
}
}
if(.SwapEnv$.optimize_switch){
MessageOutput(
mes = paste0(
"Step 2/6: Successfully imported raw MS data! (",
Sys.time(),
") \nGoing to the next step..."
),
ecol = "\n",
progress = 24
)
} else {
MessageOutput(
mes = paste0(
"Step 1/6: Successfully imported raw MS data! (",
Sys.time(),
") \nGoing to the next step..."
),
ecol = "\n",
progress = 18
)
}
if(mode == "onDisk"){
mSet@rawOnDisk <- raw_data;
} else if(mode == "inMemory"){
mSet@rawInMemory <- raw_data;
}
.SwapEnv$.optimize_switch <- FALSE;
if(.on.public.web()){
save(mSet, file = "mSet.rda");
}
return(mSet)
}
read.MSdata <- function(files,
pdata = NULL,
msLevel. = NULL,
centroided. = NA,
smoothed. = NA,
cache. = 1L,
mode = c("inMemory", "onDisk")) {
mode <- match.arg(mode)
## o normalize the file path, i.e. replace relative path with absolute
## path. That fixes possible problems on Windows with SNOW parallel
## processing and also proteowizard problems on unis system with ~ paths.
files <- normalizePath(files)
suppressWarnings(.hasChroms <- MSnbase::hasChromatograms(files))
MessageOutput ("Raw file import begin...", "\n", NULL)
if (!length(files)) {
process <- new("MSnProcess",
processing = paste("No data loaded:", date()))
if (mode == "inMemory")
res <- new("MSnExp",
processingData = process)
else res <- new("OnDiskMSnExp",
processingData = process)
} else {
if (mode == "inMemory") {
if (is.null(msLevel.)) msLevel. <- 2L
res <- read.InMemMSd.data(files, pdata = pdata, msLevel. = msLevel.,
centroided. = centroided., smoothed. = smoothed., cache. = cache.)
} else { ## onDisk
res <- read.OnDiskMS.data(files = files, pdata = pdata,
msLevel. = msLevel., centroided. = centroided., smoothed. = smoothed.)
}
}
res
}
#' @import utils
read.InMemMSd.data <- function(files,
pdata,
msLevel.,
centroided.,
smoothed.,
cache. = 1) {
.testReadMSDataInput(environment())
if (isCdfFile(files)) {
if(missing(msLevel.)){
msLevel. <- 1;
}
isCDF <- TRUE;
} else {
isCDF <- FALSE;
}
if (msLevel. == 1) cache. <- 0
msLevel. <- as.integer(msLevel.)
## Creating environment with Spectra objects
assaydata <- new.env(parent = emptyenv())
ioncount <- c()
ioncounter <- 1
filenams <- filenums <- c()
fullhd2 <- fullhdorder <- c()
fullhdordercounter <- 1
.instrumentInfo <- list()
## List eventual limitations
## ## Idea:
## ## o initialize a featureData-data.frame,
## ## o for each file, extract header info and put that into
## featureData;
count.idx <- 0;
for (f in files) {
filen <- match(f, files)
filenums <- c(filenums, filen)
filenams <- c(filenams, f)
## issue #214: define backend based on file format.
msdata <- mzR::openMSfile(f,backend = NULL)
## Extract Instrument Information
instruInfo <- try(mzR::instrumentInfo(msdata), silent = TRUE);
if(is(instruInfo,"list")){
.instrumentInfo <- c(.instrumentInfo, list(instruInfo));
} else {
MessageOutput("Some instrument related information is missing. But the whole process is still running...")
.instrumentInfo <- c(.instrumentInfo, list());
}
fullhd <- mzR::header(msdata)
## Issue #325: get centroided information from file, but overwrite if
## specified with centroided. parameter.
if (!is.na(centroided.))
fullhd$centroided <- as.logical(centroided.)
spidx <- which(fullhd$msLevel == msLevel.)
## increase vectors as needed
ioncount <- c(ioncount, numeric(length(spidx)))
fullhdorder <- c(fullhdorder, numeric(length(spidx)))
if (msLevel. == 1) {
if (length(spidx) == 0)
stop("No MS1 spectra in file: ",basename(f))
if (.on.public.web()){
print_mes <- paste0("Importing ",basename(f),":");
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = " ");
} else {
pb <- progress_bar$new(format = "Reading [:bar] :percent Time left: :eta",
total = length(spidx), clear = TRUE, width= 75)
}
k_count <- 0;
if(isCDF){
Allpeaks <- mzR::peaks(msdata);
for (i in seq_along(spidx)){
if (!.on.public.web()){
pb$tick();
} else {
if (round(i/length(spidx),digits = 4)*100 - k_count > -0.2){
print_mes <- paste0(k_count,"% | ");
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = " ");
k_count <- k_count +20;
}
}
j <- spidx[i]
hd <- fullhd[j, ]
## Fix missing polarity from netCDF
pol <- hd$polarity
if (length(pol) == 0)
pol <- NA
sp <- new("Spectrum1",
rt = hd$retentionTime,
acquisitionNum = as.integer(hd$acquisitionNum),
scanIndex = as.integer(hd$seqNum),
tic = hd$totIonCurrent,
mz = Allpeaks[[j]][, 1],
intensity = Allpeaks[[j]][, 2],
fromFile = as.integer(filen),
centroided = as.logical(hd$centroided),
smoothed = as.logical(smoothed.),
polarity = as.integer(pol))
## peaksCount
ioncount[ioncounter] <- sum(Allpeaks[[j]][, 2])
ioncounter <- ioncounter + 1
.fname <- formatFileSpectrumNames(
fileIds = filen,
spectrumIds = i,
nSpectra = length(spidx),
nFiles = length(files)
)
assign(.fname, sp, assaydata)
fullhdorder[fullhdordercounter] <- .fname
fullhdordercounter <- fullhdordercounter + 1
}
rm(Allpeaks);
} else {
for (i in seq_along(spidx)) {
if (!.on.public.web()){
pb$tick();
} else {
if (round(i/length(spidx),digits = 4)*100 - k_count > -0.2){
print_mes <- paste0(k_count,"% | ");
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = " ");
k_count <- k_count +20;
}
}
j <- spidx[i]
hd <- fullhd[j, ]
## Fix missing polarity from netCDF
pol <- hd$polarity
if (length(pol) == 0)
pol <- NA
.p <- mzR::peaks(msdata, j)
sp <- new("Spectrum1",
rt = hd$retentionTime,
acquisitionNum = as.integer(hd$acquisitionNum),
scanIndex = as.integer(hd$seqNum),
tic = hd$totIonCurrent,
mz = .p[, 1],
intensity = .p[, 2],
fromFile = as.integer(filen),
centroided = as.logical(hd$centroided),
smoothed = as.logical(smoothed.),
polarity = as.integer(pol))
## peaksCount
ioncount[ioncounter] <- sum(.p[, 2])
ioncounter <- ioncounter + 1
.fname <- formatFileSpectrumNames(
fileIds = filen,
spectrumIds = i,
nSpectra = length(spidx),
nFiles = length(files)
)
assign(.fname, sp, assaydata)
fullhdorder[fullhdordercounter] <- .fname
fullhdordercounter <- fullhdordercounter + 1
}
}
} else { ## .msLevel != 1
if (length(spidx) == 0)
stop("No MS(n>1) spectra in file", f)
MessageOutput(paste("Reading ", length(spidx), " MS", msLevel.,
" spectra from file ", basename(f),"\n"))
scanNums <- fullhd[fullhd$msLevel == msLevel., "precursorScanNum"]
if (length(scanNums) != length(spidx))
stop("Number of spectra and precursor scan number do not match!")
pb <- progress_bar$new(format = "Reading [:bar] :percent Time left: :eta",
total = length(spidx), clear = TRUE, width= 75)
for (i in seq_along(spidx)) {
pb$tick();
j <- spidx[i]
hd <- fullhd[j, ]
.p <- mzR::peaks(msdata, j)
sp <- new("Spectrum2",
msLevel = as.integer(hd$msLevel),
merged = as.numeric(hd$mergedScan),
precScanNum = as.integer(scanNums[i]),
precursorMz = hd$precursorMZ,
precursorIntensity = hd$precursorIntensity,
precursorCharge = as.integer(hd$precursorCharge),
collisionEnergy = hd$collisionEnergy,
rt = hd$retentionTime,
acquisitionNum = as.integer(hd$acquisitionNum),
scanIndex = as.integer(hd$seqNum),
tic = hd$totIonCurrent,
mz = .p[, 1],
intensity = .p[, 2],
fromFile = as.integer(filen),
centroided = as.logical(hd$centroided),
smoothed = as.logical(smoothed.),
polarity = as.integer(hd$polarity))
## peaksCount
ioncount[ioncounter] <- sum(.p[, 2])
ioncounter <- ioncounter + 1
.fname <- formatFileSpectrumNames(
fileIds = filen,
spectrumIds = i,
nSpectra = length(spidx),
nFiles = length(files)
)
assign(.fname, sp, assaydata)
fullhdorder[fullhdordercounter] <- .fname
fullhdordercounter <- fullhdordercounter + 1
}
}
if (cache. >= 1)
fullhd2 <- rbind(fullhd2, fullhd[spidx, ])
gc()
mzR::close(msdata)
rm(msdata);
if (.on.public.web()){
print_mes <- paste0("Done!");
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
count.idx <- count.idx + 1;
write.table(1.0 + count.idx/length(files)*3, file = "log_progress.txt", row.names = FALSE,col.names = FALSE);
}
MessageOutput(paste0("Import data: ",basename(f)," finished!"), ecol = "\n")
}
## cache level 2 yet implemented
cache. <- testCacheArg(cache., maxCache = 2)
if (cache. >= 1) {
fl <- sapply(assaydata, function(x) x@fromFile)
featnms <- ls(assaydata) ## feature names in final MSnExp
fl <- fl[featnms] ## reorder file numbers
stopifnot(all(base::sort(featnms) == base::sort(fullhdorder)))
fullhdorder <- match(featnms, fullhdorder)
tmphd <- fullhd2[fullhdorder, ] ## reorder
ioncount <- ioncount[fullhdorder]
newhd <- data.frame(fileIdx = fl,
retention.time = tmphd$retentionTime,
precursor.mz = tmphd$precursorMZ,
precursor.intensity = tmphd$precursorIntensity,
charge = tmphd$precursorCharge,
peaks.count = tmphd$peaksCount,
tic = tmphd$totIonCurrent,
ionCount = ioncount,
ms.level = tmphd$msLevel,
acquisition.number = tmphd$acquisitionNum,
collision.energy = tmphd$collisionEnergy)
} else {
newhd <- NULL ## not used anyway
}
.cacheEnv <- setCacheEnv(list("assaydata" = assaydata,
"hd" = newhd),
cache., lock = TRUE)
## CACHING AS BEEN SUPERSEDED BY THE OnDiskMSnExp IMPLEMENTATION
## if cache==2, do not lock assign msdata in .cacheEnv then lock
## it and do not close(msdata) above; rm(msdata) is OK
## Create 'MSnProcess' object
process <- new("MSnProcess",
processing = paste("Data loaded:", date()),
files = files,
smoothed = smoothed.)
## Create 'fdata' and 'pdata' objects
nms <- ls(assaydata)
if (is.null(pdata)) {
.pd <- data.frame(sampleNames = basename(files))
rownames(.pd) <- .pd$sampleNames
pdata <- new("AnnotatedDataFrame",
data = .pd)
}
fdata <- new("AnnotatedDataFrame",
data = data.frame(
spectrum = seq_along(nms),
row.names = nms))
fdata <- fdata[ls(assaydata)] ## reorder features
## expriment data slot
if (length(.instrumentInfo) > 1) {
cmp <- length(unique(sapply(.instrumentInfo, "[[", 1)))
if (cmp > 1)
message("According to the instrument information in the files,\n",
"the data has been acquired on different instruments!")
for (nm in names(.instrumentInfo[[1]]))
.instrumentInfo[[1]][[nm]] <- sapply(.instrumentInfo, "[[", nm)
} else if (length(.instrumentInfo) == 0){
.instrumentInfo[[1]] <- list(NULL);
.instrumentInfo[[1]]$manufacturer <- .instrumentInfo[[1]]$model <-
.instrumentInfo[[1]]$ionisation <- .instrumentInfo[[1]]$analyzer <- .instrumentInfo[[1]]$detector <-
"Unknown";
}
expdata <- new("MIAPE",
instrumentManufacturer = .instrumentInfo[[1]]$manufacturer,
instrumentModel = .instrumentInfo[[1]]$model,
ionSource = .instrumentInfo[[1]]$ionisation,
analyser = as.character(.instrumentInfo[[1]]$analyzer),
detectorType = .instrumentInfo[[1]]$detector)
## Create and return 'MSnExp' object
toReturn <- new("MSnExp",
assayData = assaydata,
phenoData = pdata,
featureData = fdata,
processingData = process,
experimentData = expdata,
.cache = .cacheEnv)
return(toReturn)
}
read.OnDiskMS.data <- function(files,
pdata,
msLevel.,
centroided.,
smoothed.) {
.testReadMSDataInput(environment())
stopifnot(is.logical(centroided.))
## Creating environment with Spectra objects
assaydata <- new.env(parent = emptyenv())
filenams <- filenums <- c()
fullhd2 <- fullhdorder <- c()
fullhdordercounter <- 1
.instrumentInfo <- list()
## List eventual limitations
if (isCdfFile(files)) {
message("You are using CDF files, please make sure they are centroided !")
}
## Idea:
## o initialize a featureData-data.frame,
featureDataList <- list()
## o for each file, extract header info and put that into featureData
##pb <- progress_bar$new(format = "Reading [:bar] :percent Time left: :eta",
# total = length(spidx), clear = TRUE, width= 75);
count_mark <- 0;
for (f in files) {
if (.on.public.web()){
print_mes <- paste(basename(f),"import done!");
write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
} else {
#pb$tick();
}
filen <- match(f, files)
filenums <- c(filenums, filen)
filenams <- c(filenams, f)
## issue #214: define backend based on file format.
msdata <- mzR::openMSfile(f,backend = NULL)
## Extract Instrument Information
instruInfo <- try(mzR::instrumentInfo(msdata), silent = TRUE);
if(is(instruInfo,"list")){
.instrumentInfo <- c(.instrumentInfo, list(instruInfo));
} else {
MessageOutput("Some instrument related information is missing. But the whole process is still running...")
.instrumentInfo <- c(.instrumentInfo, list());
}
fullhd <- mzR::header(msdata)
spidx <- seq_len(nrow(fullhd))
## Don't read the individual spectra, just define the names of
## the spectra.
fullhdorder <- c(
fullhdorder,
formatFileSpectrumNames(
fileIds = filen,
spectrumIds = seq_along(spidx),
nSpectra = length(spidx),
nFiles = length(files)
)
)
## Extract all Spectrum info from the header and put it into the featureData
fdData <- fullhd[spidx, , drop = FALSE]
## rename totIonCurrent and peaksCount, as detailed in
## https://github.com/lgatto/MSnbase/issues/105#issuecomment-229503816
names(fdData) <- sub("peaksCount", "originalPeaksCount", names(fdData))
## Add also:
## o fileIdx -> links to fileNames property
## o spIdx -> the index of the spectrum in the file.
fdData <- cbind(fileIdx = rep(filen, nrow(fdData)),
spIdx = spidx,
smoothed = rep(as.logical(smoothed.), nrow(fdData)),
fdData, stringsAsFactors = FALSE)
if (isCdfFile(f)) {
## Add the polarity columns if missing in netCDF
if (!any(colnames(fdData) == "polarity"))
fdData <- cbind(fdData, polarity = rep(as.integer(NA),
nrow(fdData)))
}
## Order the fdData by acquisitionNum to force use of acquisitionNum
## as unique ID for the spectrum (issue #103). That way we can use
## the spIdx (is the index of the spectrum within the file) for
## subsetting and extracting.
if (!all(sort(fdData$acquisitionNum) == fdData$acquisitionNum))
warning(c("Unexpected acquisition number order detected. ",
"\n")) ## see issue #160
fdData <- fdData[order(fdData$acquisitionNum), ]
featureDataList <- c(featureDataList, list(fdData))
## Fix for #151; would be nice if we could remove that at some point.
gc()
mzR::close(msdata)
rm(msdata)
}
## new in version 1.9.8
lockEnvironment(assaydata, bindings = TRUE)
.cacheEnv <- setCacheEnv(list("assaydata" = assaydata,
"hd" = NULL),
level = 0,
lock = TRUE)
## Create 'MSnProcess' object
process <- new("MSnProcess",
processing = paste0("Data loaded [", date(), "]"),
files = files,
smoothed = NA)
## Create 'fdata' and 'pdata' objects
if (is.null(pdata)) {
.pd <- data.frame(sampleNames = basename(files))
rownames(.pd) <- .pd$sampleNames
pdata <- new("AnnotatedDataFrame",
data = .pd)
}
## If we've got a featureDataList, use it
if (length(featureDataList) > 0) {
fdata <- do.call(rbind, featureDataList)
fdata <- cbind(fdata, spectrum = seq_len(nrow(fdata)),
stringsAsFactors = FALSE)
## Setting rownames on the data.frame not on the AnnotatedDataFrame;
## did get strange errors otherwise.
rownames(fdata) <- fullhdorder
## Re-order them
fdata <- fdata[base::sort(fullhdorder), ]
fdata <- new("AnnotatedDataFrame", data = fdata)
## Re-order the features.
## fdata <- fdata[ls(assaydata), ]
} else fdata <- new("AnnotatedDataFrame")
## expriment data slot
if (length(.instrumentInfo) > 1) {
cmp <- length(unique(sapply(.instrumentInfo, "[[", 1)))
if (cmp > 1)
message("According to the instrument information in the files,\n",
"the data has been acquired on different instruments!")
for (nm in names(.instrumentInfo[[1]]))
.instrumentInfo[[1]][[nm]] <- sapply(.instrumentInfo, "[[", nm)
} else if (length(.instrumentInfo) == 0){
.instrumentInfo[[1]] <- list(NULL);
.instrumentInfo[[1]]$manufacturer <- .instrumentInfo[[1]]$model <-
.instrumentInfo[[1]]$ionisation <- .instrumentInfo[[1]]$analyzer <- .instrumentInfo[[1]]$detector <-
"Unknown";
}
expdata <- new("MIAPE",
instrumentManufacturer = .instrumentInfo[[1]]$manufacturer,
instrumentModel = .instrumentInfo[[1]]$model,
ionSource = .instrumentInfo[[1]]$ionisation,
analyser = as.character(.instrumentInfo[[1]]$analyzer),
detectorType = .instrumentInfo[[1]]$detector)
## Create ProcessingStep if needed.
## Create the OnDiskMSnExp object.
res <- new("OnDiskMSnExp",
assayData = assaydata,
phenoData = pdata,
featureData = fdata,
processingData = process,
experimentData = expdata,
.cache = .cacheEnv)
if (!is.null(msLevel.)) {
msLevel. <- as.integer(msLevel.)
res <- filterMsLevel(res, msLevel.)
}
if (any(!is.na(centroided.))) {
if (length(centroided.) == 1) {
centroided(res) <- centroided.
} else {
for (i in seq_along(centroided.))
centroided(res, msLevel. = i) <- centroided.[i]
}
}
if(nrow(res@featureData@data)==0){
stop(c("None of your spectra contains MS signal at MS level: ", msLevel., "! Please check your data !"))
}
if (.on.public.web()){
write.table("Raw file initialized Successfully!",file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
}
return(res)
}
#' @title UpdateRawfiles
#' @description Update the Raw spectra included for Processing.
#' All wrong format and uncentroided files will be filtered.
#' NOTE: this function is only effective before data import stage
#' AND can NOT be used for resuming pipeline.
#' @param mSet mSet objects generated
#' with \"mSet <- InitDataObjects(\"spec\", \"raw\", FALSE)\", or omitted.
#' @param filesIncluded filesIncluded is a vector containing the files'
#' paths for the following processing;
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} and
#' Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @return will return an mSet object with raw files updated
#' @export
#' @examples
#' ### Example 1 ---
#' data(mSet)
#' newfiles <- dir(system.file("mzData", package = "mtbls2"),
#' full.names = TRUE, recursive = TRUE)[c(14:16)]
#' mSet <- UpdateRawfiles(mSet, filesIncluded = newfiles)
#'
#' ### Example 2 ---
#' ## load googledrive package to download example data
#' # library("googledrive");
#' # data_folder_Sample <- "Raw_data_example"
#' # temp <- tempfile(fileext = ".zip");
#' ## Please authorize the package to download the data from web
#' # dl <- drive_download(as_id("1CjEPed1WZrwd5T3Ovuic1KVF-Uz13NjO"), path = temp, overwrite = TRUE);
#' # out <- unzip(temp, exdir = data_folder_Sample);
#' # out;
#' # mSet<-InitDataObjects("spec", "raw", FALSE);
#' ## include only two samples CD_SM-77FXR.mzML and CD_SM-6KUCT.mzML for data import.
#' # mSet<-UpdateRawfiles(mSet, c("Raw_data_example/CD/CD_SM-77FXR.mzML",
#' # "Raw_data_example/CD/CD_SM-6KUCT.mzML"))
UpdateRawfiles <- function(mSet = NULL, filesIncluded = NULL){
# TODO: to develope a shiny interface for user to select their files to include
# if (interactive()) {
#
# options(shiny.maxRequestSize=400*1024^2)
#
# ui <- fluidPage(
# titlePanel("Multiple Spectral file read"),
# sidebarLayout(
# sidebarPanel(
# fileInput("SpectralFiles", "Choose Spectra File", accept = c(".mzML",".mzXML","mzml","mzxml","mzData"),
# multiple = TRUE),
#
# ),
# mainPanel(
# textOutput("count")
# )
# )
# )
#
# server <- function(input, output) {
#
# output$contents <- renderTable({
# file <- input$SpectralFiles
# ext <- tools::file_ext(file$datapath)
#
# req(file)
# validate(need(ext %in% c(".mzML",".mzXML","mzml","mzxml","mzData"), "Please upload a spectral file !"))
#
# file;
# })
#
# return(output)
# }
#
# shinyApp(ui, server)
# }
if(is.null(mSet)){
mSet <- new("mSet");
}
if(.on.public.web()){
if(!dir.exists("upload/")){
filesList <- list.files("/home/glassfish/projects/MetaboDemoRawData/upload/", recursive = TRUE, full.names = TRUE);
} else {
filesList <- list.files("upload/", recursive = TRUE, full.names = TRUE);
}
filesListBaseNMs <- basename(filesList);
filesIncluded <- filesList[sapply(filesIncluded, function(x){which(x == filesListBaseNMs)})];
}
if(!is.null(filesIncluded)){
# file exits check
fileIdx <- file.exists(filesIncluded);
if(!any(fileIdx)){
stop("No valid files provided ! Please check your files or their path !")
}
filesIncluded_exited <- filesIncluded[fileIdx];
filesIncluded_full <- unname(sapply(filesIncluded_exited, tools::file_path_as_absolute));
# file format check
exts <- tools::file_ext(filesIncluded_full);
extsIdx <- exts %in% c("mzML","mzXML","mzml","mzxml","mzData", "mzdata", "CDF", "cdf");
if(!any(extsIdx)){
stop("No valid format files provided ! Only files with extension of \"mzML\", \"mzml\", \"mzXML\", \"mzxml\", \"mzData\" and \"mzdata\" are supported!")
}
filesIncluded_formated <- filesIncluded_full[extsIdx];
# file centroid check
Centroididx <- unname(sapply(filesIncluded_formated, CentroidCheck));
if(!any(Centroididx)){
stop("No centroided spectrum found ! Please Centroid them first !")
}
filesIncluded_centroided <- filesIncluded_formated[Centroididx];
message(c(basename(filesIncluded_centroided), " will be included for further processing !\n"))
# file size check
fileSizeInfo <- file.size(filesIncluded_centroided)/1024^2;
largeFileIdx <- fileSizeInfo > 200;
if(any(largeFileIdx)){
message(c(basename(filesIncluded_centroided)[largeFileIdx]), " is larger than 200MB, please note your memory !")
}
filesIncluded <- filesIncluded_centroided;
} else {
warning("No files will be included for mSet !")
}
if(is.null(filesIncluded)){
message("No files will be used to update the files inclusion for mSet!")
}
mSet@rawfiles <- filesIncluded;
if(.on.public.web()){
save(mSet, file = "mSet.rda");
}
return(mSet);
}
#' @title CentroidCheck
#' @description Verify the data is centroid or not
#' @param filename single file name, should contain the absolute path
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} and Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @importFrom stats quantile
#' @export
#' @return will output a logical value to indicate centroid (TRUE) or not (FALSE)
#' @examples
#' DataFiles <- dir(system.file("mzData", package = "mtbls2"), full.names = TRUE,
#' recursive = TRUE)[c(10:11)]
#' # sapply(DataFiles, CentroidCheck)
#'
CentroidCheck <- function(filename) {
# fileh <- MSnbase:::.openMSfile(filename)
fileh <- mzR::openMSfile(filename, backend = NULL)
allSpect <- mzR::peaks(fileh, seq_len(50))
nValues <- base::lengths(allSpect, use.names = FALSE) / 2
mzR::close(fileh)
rm(fileh)
res <- lapply(
allSpect,
FUN = function(z, APPLF, ...) {
pk <- as.data.frame(z)
k = 0.025
qtl = 0.9
.qtl <- quantile(pk[, 2], qtl)
x <- pk[pk[, 2] > .qtl, 1]
quantile(diff(x), 0.25) > k
}
)
res <- unlist(res[!is.na(res)]);
return(sum(res) > 0.6*length(res))
}
#' @title CentroidMSData
#' @description Convert the MS data as centroid
#' @param InFolder single file/folder name
#' @param OutFolder output folder name, if not exits, will create one
#' @param ncore the core number for parallel processing, default is 1
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} and Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @import MSnbase
#' @import BiocParallel
#' @export
#' @return will output a centroid mzML file into the input path
#' @examples
#' InFolder <- system.file("mzData", package = "mtbls2")
#' # CentroidMSData(InFolder) #remove the # befroe your testing
CentroidMSData <- function(InFolder, OutFolder = tempdir(), ncore = 1) {
if (.Platform$OS.type == "unix") {
register(bpstart(MulticoreParam(ceiling(ncore))))
} else if (.Platform$OS.type == "windows") {
register(bpstart(SnowParam(ceiling(ncore))))
}
#TODO: to convert the vendor formats with wine
files.list <-
dir(
InFolder,
pattern = ".mzML|.mzml|.cdf|.mzXML|.mzxml|.mzData|.CDF|.mzdata.xml|.mzData.xml",
recursive = TRUE,
full.names = TRUE
);
if(length(files.list) == 0){
stop("No MS spectra found, please input a folder containing mzML, mzXML, mzData or CDF file(s) !")
}
OutPath <- normalizePath(OutFolder);
if(!dir.exists(OutPath)){
dir.create(OutPath);
OutPath <- normalizePath(OutFolder);
}
if(ncore == 1){
res <- sapply(files.list, function(x){
if(!CentroidCheck(x)){
CMS <- .CentroidSpectra(x);
fileNM <- sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(x));
files.mzml <- paste0(OutPath,"/", fileNM, ".mzML");
writeMSData(CMS, file = files.mzml);
}})
} else {
res <- bplapply(files.list, function(x){
if(!CentroidCheck(x)){
CMS <- .CentroidSpectra(x);
fileNM <- sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(x));
files.mzml <- paste0(OutPath,"/", fileNM, ".mzML");
writeMSData(CMS, file = files.mzml);
}
},
BPPARAM = MulticoreParam(4))
}
register(bpstop());
}
.CentroidSpectra <- function(InputFile){
MS <- read.InMemMSd.data(InputFile, pdata = NULL, msLevel. = 1, centroided. = FALSE, smoothed. = FALSE);
return(MSnbase::pickPeaks(MS))
}
Path2Files <- function(path, centroid = TRUE){
Pathname <- normalizePath(path);
if (length(Pathname) > 1){
multipleMS <- TRUE;
} else {
multipleMS <- FALSE;
};
SingleFolder <- SingleFile <- MultiFolder <- MultiFile <- FALSE;
if(!multipleMS){
# one character provided!
if(dir.exists(Pathname)){
SingleFolder <- TRUE;
} else if(file.exists(Pathname)) {
SingleFile <- TRUE;
} else {
if(.on.public.web()){
SingleFolder <- TRUE;
Pathname <- "/home/glassfish/projects/MetaboDemoRawData/upload"
} else {
stop("Invalid path provided, please check!")
}
}
} else {
# one vector provided!
if(all(sapply(Pathname, dir.exists))){
MultiFolder <- TRUE;
} else if(all(sapply(Pathname, file.exists))){
MultiFile <- TRUE;
} else {
stop("Mixed or Invalid filepath type included, please check!")
}
}
pathnames <- unname(unlist(sapply(Pathname, tools::file_path_as_absolute)));
if(SingleFile | MultiFile){
files_tmp <- pathnames;
} else {
files_tmp <-
dir(
pathnames,
pattern = ".mzML|.mzml|.cdf|.mzXML|.mzxml|.mzData|.CDF",
recursive = TRUE,
full.names = TRUE
);
}
files_exts <- tools::file_ext(files_tmp);
if(length(unique(files_exts)) > 1){
stop("Multiple data formats are not allowed!")
};
formatRes <- files_exts %in% c("mzML","mzml","cdf","mzXML","mzxml","mzData","CDF");
if(!all(formatRes)){
warning("Wrong file format detected, Only mzML, mzXML, mzData or CDF will be included.")
}
files <- files_tmp[formatRes];
# Centroid check & filter
# if(!isCdfFile(files) && centroid){
#
# Centroididx <- unname(sapply(files, CentroidCheck));
#
# if(!all(Centroididx)){
# warning(paste0("Uncentroieded file: ", basename(files[!Centroididx]), "will be filtered!\n"));
# }
#
# files <- files[Centroididx];
# }
return(files)
}
#' @references Gatto L, Gibb S, Rainer J (2020). “MSnbase, efficient and elegant R-based processing and visualisation of raw mass spectrometry data.” bioRxiv.
.testReadMSDataInput <- function(e) {
if (is.numeric(e$msLevel) && !all(e$msLevel > 0))
stop("msLevel must be an integer > 0.")
if (length(e$files) < 1)
stop("At least one MS file is required.")
if (all(unique(e$files) != e$files))
stop("Non unique files provided as input. ")
extensions <- unique(toupper(sub("^.+\\.", "", e$files)))
if (length(extensions) > 1)
warning(c("Reading different file formats in. ",
"This is untested and you are welcome to try it out. ",
"Please report back!", "\n"))
invisible(TRUE)
}
testCacheArg <- function(cache, maxCache = 2) {
## Function used to test the value of a 'cache'
## parameter in a read*Data function
## Parameters:
## cache: value of the cache argument to test
## maxCache: max value allowed. Generally
## 3, but could be less, depending
## on the input data. maxCache is 1
## for readMgfData for instance.
## Value: valid (possibly updated) cache value
if (!is.numeric(cache))
stop("'cache' must be numeric.")
if (cache < 0 | cache > maxCache) {
warning("cache must be [0:", maxCache, "]!")
if (cache < 0) {
warning("Setting cache to 0.")
cache <- 0
} else {
warning("Setting cache to ", maxCache, ".")
cache <- maxCache
}
}
return(cache)
}
setCacheEnv <- function(toCache, level = 0, lock = TRUE) {
## Set the .cache slot of a pSet object.
## Parameters
## toCache a list with
## "assaydata": environment - pSet assaydata slot
## "hd": header dataframe
## level: numeric - cache level
## lock: logical - lock env and bindings (default is TRUE)
## Return:
## A new cache environment
level <- testCacheArg(level)
cacheEnv <- new.env(parent = emptyenv())
assaydata <- toCache[["assaydata"]]
hd <- toCache[["hd"]]
assign("level", level, cacheEnv)
if (level >= 1) { ## levels 2 and 3 not yet implemented
## precursor MZ
precMz <- unname(eapply(assaydata, precursorMz))
assign("rangePrecursorMz", range(precMz), cacheEnv)
assign("nPrecursorMz", length(precMz), cacheEnv)
assign("uPrecursorMz", length(unique(precMz)), cacheEnv)
## MS2 MS range
assign("rangeMz", range(unname(eapply(assaydata, mz))),
cacheEnv)
## MS2 retention time
Rtime <- unname(eapply(assaydata, rtime))
assign("rangeRtime", range(Rtime), cacheEnv)
assign("nRtime", length(Rtime), cacheEnv)
## MS levels
assign("msLevels", unique(unlist(eapply(assaydata, msLevel))),
cacheEnv)
## precursor scans
assign("nPrecursorScans",
length(unique(eapply(assaydata, precScanNum))), cacheEnv)
## assay data size
assign("size",
sum(unlist(unname(eapply(assaydata, object.size)))),
cacheEnv)
## full header
assign("hd", hd, cacheEnv)
}
if (lock)
lockEnvironment(cacheEnv, bindings = TRUE)
return(cacheEnv)
}
isCdfFile <- function(x) {
fileEnds <- c("cdf", "nc")
## check for endings and and ending followed by a . (e.g. cdf.gz)
patts <- paste0("\\.", fileEnds, "($|\\.)")
res <- sapply(patts, function(z) {
grep(z, x, ignore.case = TRUE)
})
return(any(unlist(res)))
}
formatFileSpectrumNames <- function(fileIds, spectrumIds,
nFiles=length(fileIds),
nSpectra=length(spectrumIds)) {
digits <- ceiling(log10(c(nFiles, nSpectra) + 1L))
if (length(fileIds) != 1L && length(spectrumIds) != length(fileIds)) {
stop("Length of 'fileIds' has to be one or equal to ",
"the length of 'spectrumIds'.")
}
sprintf(paste0("F%0", digits[1L], "d.S%0", digits[2L], "d"),
fileIds, spectrumIds)
}
SanitySpectra <- function(rawData, cutoff = 0.15){
if(length(rawData) != 0){
fileList <- unique(rawData@featureData@data[["fileIdx"]]);
exIdx = NULL
res <- sapply(fileList, function(x){
otherMean <- mean(rawData@featureData@data[["basePeakIntensity"]][rawData@featureData@data[["fileIdx"]] != x]);
thisFile <- mean(rawData@featureData@data[["basePeakIntensity"]][rawData@featureData@data[["fileIdx"]] == x]);
return(thisFile/otherMean)
})
removingFile <- rawData@phenoData@data[["sample_name"]][res < cutoff];
if(length(removingFile) > 0 && any(!is.na(removingFile))){
MessageOutput(paste0("Base Peak Intensity of ", removingFile, " is lower than ",
cutoff*100, "% of average, will be excluded!\n"));
}
exIdx <- which(res < cutoff);
SpectraClean(rawData, exIdx)
} else {
return(rawData)
}
}
SpectraClean <- function(rawData, exIdx = NULL){
if(!is.null(exIdx) & length(exIdx) > 0){
rawData@phenoData@data <- rawData@phenoData@data[-exIdx,];
fileExIdx <- (rawData@featureData@data[["fileIdx"]] %in% exIdx);
rawData@featureData@data <- rawData@featureData@data[!fileExIdx,];
filesArray <- unique(rawData@featureData@data[["fileIdx"]]);
for (i in seq(filesArray)){
rawData@featureData@data[["fileIdx"]][rawData@featureData@data[["fileIdx"]] == filesArray[i]] <- i
}
rawData@experimentData@instrumentModel <- rawData@experimentData@instrumentModel[-exIdx];
rawData@experimentData@instrumentManufacturer <- rawData@experimentData@instrumentManufacturer[-exIdx];
rawData@experimentData@ionSource <- rawData@experimentData@ionSource[-exIdx];
rawData@experimentData@analyser <- rawData@experimentData@analyser[-exIdx];
rawData@experimentData@detectorType <- rawData@experimentData@detectorType[-exIdx];
rawData@processingData@files <- rawData@processingData@files[-exIdx];
}
return(rawData)
}
DetectMS2Design <- function(msfile = "NA"){
if(msfile == "NA"){
stop("MS data file is missing!")
} else if(!file.exists(msfile)){
stop("MS data file does not exist!")
}
msdata <- mzR::openMSfile(msfile, backend = NULL)
fullhd <- mzR::header(msdata)
targetmzs <- fullhd$isolationWindowTargetMZ
mzsloffst <- fullhd$isolationWindowLowerOffset
mzsupffst <- fullhd$isolationWindowUpperOffset
if(all(is.na(mzsloffst)) | all(is.na(mzsupffst)) | all(is.na(targetmzs))){
ms1idx <- which(fullhd$msLevel == 1)
lgnms1 <- ms1idx[1:2]
lgnvec<- length(c(lgnms1[1]:lgnms1[2]))
design_df2 <- design_df <-
data.frame(mz_low = double(length = lgnvec-2),
mz_up = double(length = lgnvec-2))
gc()
mzR::close(msdata)
rm(msdata);
return(design_df)
}
idx_set <- !is.na(targetmzs)
targetmzs <- targetmzs[idx_set]
mzsloffst <- mzsloffst[idx_set]
mzsupffst <- mzsupffst[idx_set]
mz1st <- targetmzs[!is.na(targetmzs)][1]
idx_lst <- which(mz1st == targetmzs)[2]-1
design_df0 <- design_df2 <- design_df <-
data.frame(mz_low = double(length = idx_lst),
mz_up = double(length = idx_lst))
for(i in 1:idx_lst){
design_df[i, ]<- c((targetmzs[i] - mzsloffst[i]),
(targetmzs[i] + mzsupffst[i]))
}
for(j in (idx_lst+1):(idx_lst*2)){
design_df2[j-idx_lst, ]<- c((targetmzs[j] - mzsloffst[j]),
(targetmzs[j] + mzsupffst[j]))
}
gc()
mzR::close(msdata)
rm(msdata);
if(all(design_df == design_df2)){
return(design_df)
} else {
return(design_df0)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.