###Functions for ReSOLUTION package####
###EIC and MS/MS functions####
# Emma Schymanski, April 2017.
#' Plot EICs from mz and RT List
#'
#' @description This plots EICs of given \code{mz} and from the mzML (or mzXML),
#' with various options. Additional wrapper functions provide different ways of generating mz.
#'
#' @usage plotEICs(mzML_filePath, mz, rt, rt_in_sec=TRUE, names=NULL, smiles="", plot_title=NULL,
#' rt_window=NULL, mz_limit=0.001, ppm=10, isPos=TRUE, isLog=FALSE, isSwitching=FALSE,
#' greyscale=FALSE, colours = NULL, cpdID=NULL, ymin_log=3, switchShift=0,
#' kekulise=TRUE, addMSMS=TRUE, newRMB=TRUE, takePPM=TRUE)
#'
#' @param mzML_filePath Path to the mzML (or mzXML) file to extract the EICs.
#' @param mz Vector of mz values to process.
#' @param rt Vector of retention times to process. Set the unit with \code{rt_in_sec}.
#' @param rt_in_sec Default \code{TRUE} indicates that \code{rt} is in seconds. If
#' \code{FALSE}, \code{rt} is converted to seconds internally.
#' @param names Optional names to belong to each \code{mz}. If \code{NULL}, \code{mz} is used.
#' @param smiles SMILES code of the compound of interest.
#' @param plot_title Title for the plot. If \code{NULL}, \code{basename(mzML_filePath)} is used.
#' @param rt_window The retention time window (in seconds) for EIC extraction. This is added
#' to \code{rt} to form the extraction window. Should not be zero! If \code{NULL},
#' the whole EIC is extracted.
#' @param mz_limit Default 0.001. This controls the limits for EIC extraction. Choose between
#' using \code{mz_limit} and \code{ppm} with \code{takePPM}.
#' @param ppm Default 10 ppm is an alternative to \code{mz_limit} for EIC extraction.
#' @param isPos \code{TRUE} indicates positive mode; \code{FALSE} negative mode. Only
#' relevant for switching data.
#' @param isLog If \code{TRUE}, EICs are plotted with a log scale; \code{FALSE} with
#' absolute intensities. Zero intensities are lost with \code{TRUE}.
#' @param isSwitching If \code{TRUE}, will split switching files assuming that the
#' first scan is positive and that positive and negative alternate, unless \code{switchShift}
#' is set. USE WITH CAUTION.
#' @param greyscale If \code{TRUE}, greyscale plot will be produced. If \code{FALSE},
#' rainbow colours are assigned automatically.
#' @param colours Used to define EIC colours. If \code{NULL}, rainbow or greyscale colours are used,
#' depending on \code{greyscale}.
#' @param plot_title If \code{NULL}, title is autogenerated using \code{isPos}, \code{smiles} and
#' \code{cpdID} if given.
#' @param cpdID If given, this is used in the output. Otherwise a value of 1 is assigned.
#' @param ymin_log This defines the y axis intercept for log plots, for aesthetic reasons. Should be
#' just under the \code{log(noise_intensity)} threshold. \code{3} is appropriate for Orbitraps.
#' @param switchShift If \code{isSwitching} is \code{TRUE}, this can be used to shift the start scan between
#' positive and negative mode. A \code{"quick fix"}, USE WITH CAUTION.
#' @param kekulise Controls aromaticity detection in SMILES for plotting.
#' @param addMSMS Indicates whether MSMS scans should be added to plot with default settings \code{TRUE}.
#' \code{FALSE} does not attempt to add MSMS scans.
#' @param newRMB Indicates whether new RMassBank version is used for MS/MS retrieval (\code{TRUE}).
#' Set to \code{FALSE} for older versions (< 2.0.0) or if error is observed.
#' @param takePPM Indicates whether ppm or mz_limit value should be used to extract EICs.
#'
#' @return Returns a plot in the active device and a summary output.
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu>, Michael Stravs, Benedikt Lauper
#'
#' @seealso \code{\link{findEIC}}.
#'
#' @export
#'
#' @examples
#' library(RMassBank)
#' library(RMassBankData)
#' RmbDefaultSettings()
#' mzML_files <- list.files(system.file("spectra", package="RMassBankData"), ".mzML", full.names = TRUE)
#' mzML_file <- mzML_files[1]
#' cmpd_id <- "2818"
#' loadList(system.file("list/NarcoticsDataset.csv",package="RMassBankData"))
#' smiles <- findSmiles(cmpd_id)
#' mz <- as.numeric(findMz(cmpd_id, mode="pH")[3])
#' rt <- as.numeric(findRt(cmpd_id))
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles)
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles, addMSMS=TRUE)
#'
#'
plotEICs <- function(mzML_filePath, mz, rt, rt_in_sec=TRUE, names=NULL, smiles="", plot_title=NULL,
rt_window=NULL, mz_limit=0.001, ppm=10, isPos=TRUE, isLog=FALSE, isSwitching=FALSE,
greyscale=FALSE, colours = NULL, cpdID=NULL, ymin_log=3, switchShift=0,
kekulise=TRUE, addMSMS=TRUE, newRMB=TRUE, takePPM=TRUE) {
# contributions from Emma, Benedikt & Michele
# open mzML
f <- openMSfile(mzML_filePath)
# create input for extraction
mz <- as.vector(mz)
if (rt_in_sec) {
rt <- as.vector(rt) #assuming rt is in sec
} else {
rt <- 60*as.vector(rt)
}
mzrt <- cbind(mz, rt)
if (is.null(names)) {
names <- sprintf("%.4f",mz)
}
# retreive the EICs
if (is.null(rt_window)) {
# no window => extract over full window
if (takePPM) {
eics <- apply(mzrt, 1, function(row) {
findEIC(f,row[[1]], ppm(row[[1]],ppm,p=TRUE))
})
} else {
eics <- apply(mzrt, 1, function(row) {
findEIC(f,row[[1]], limit=mz_limit)
})
}
} else {
if (takePPM) {
eics <- apply(mzrt, 1, function(row) {
findEIC(f,row[[1]], ppm(row[[1]],ppm,p=TRUE), row[[2]] + rt_window)
})
} else {
eics <- apply(mzrt, 1, function(row) {
findEIC(f,row[[1]], limit=mz_limit, row[[2]] + rt_window)
})
}
}
# if this is switching data, need to seperate scans here already
eics_mode <- vector("list",length(mz))
if (isSwitching) {
for(n in 1:length(eics)) {
eic <- eics[[n]]
if (isPos) {
eics_mode[[n]] <- eic[seq(1+switchShift, length(eic$scan), 2), ]
} else {
eics_mode[[n]] <- eic[seq(2+switchShift, length(eic$scan), 2), ]
}
}
} else {
eics_mode <- eics
}
# set up the plotting
if (!greyscale && is.null(colours)) {
#colors <- rainbow(length(eics_mode), start=0/12, end=9/12)
colors <- rainbow(length(eics_mode), start=2/6, end=1)
} else if (is.null(colours)) {
colors <- grey.colors(length(eics_mode), start = 0, end = 0.9)
}
# define limits and set up plot
maxmin_rt <- range(unlist(lapply(eics_mode, function(eic) {range(eic$rt)})), finite = T)
max_int <- max(unlist(lapply(eics_mode, function(eic) {max(eic$intensity)})))
rt_at_maxI <- unlist(lapply(eics_mode, function(eic) {eic[which.max(eic$intensity),1]}))
RT_minutes <- as.numeric(rt_at_maxI)/60
I_at_maxI <- unlist(lapply(eics_mode, function(eic) {eic[which.max(eic$intensity),2]}))
if (is.null(cpdID)) {
screen_summary <- cbind("1", names, smiles, mz, rt, RT_minutes, I_at_maxI)
} else {
screen_summary <- cbind(cpdID, names, smiles, mz, rt, RT_minutes, I_at_maxI)
}
colnames(screen_summary) <- c("cpdID", "names", "smiles", "mz", "Input_RT_sec", "RT_minutes_at_maxI", "I_at_maxI")
# set up the plot title
if (is.null(plot_title)) {
plot_title <- basename(mzML_filePath)
}
plot.new()
if (isLog) {
#log:
if (log10(max_int) > ymin_log) {
plot.window(maxmin_rt, c(ymin_log,(log10(max_int)+2)))
} else if (log10(max_int) > 0) {
plot.window(maxmin_rt, c(0,(log10(max_int)+2)))
} else {
plot.window(maxmin_rt, c(0,2))
}
title(main=plot_title, xlab="RT (sec)", ylab="log10(Intensity)")
I_at_maxI_log <- log10(I_at_maxI)
I_at_maxI_log[which(I_at_maxI_log==-Inf)] <- 0
} else {
# non-log
plot.window(maxmin_rt, c(0,1.5*max_int))
title(main=plot_title, xlab="RT (sec)", ylab="Intensity")
}
box()
axis(1, at=seq(0,maxmin_rt[2], by=60))
axis(2)
legend("topright", legend=names, col=colors, lty=1, lwd=3)
# add SMILES if we have it ...
if (nchar(smiles)>=1) {
if (isLog) {
log_max_int <- log10(max_int)
if (log_max_int==-Inf || log_max_int <= ymin_log) {
log_max_int <- 2
}
renderSMILES.rcdk.default(smiles, kekulise=kekulise, coords=c(min(maxmin_rt),log_max_int,
min(maxmin_rt)+(max(maxmin_rt)-min(maxmin_rt))/3, log_max_int+2))
} else {
if (max_int==0) {
max_int <- 1
}
renderSMILES.rcdk.default(smiles, kekulise=kekulise, coords=c(min(maxmin_rt),max_int,
min(maxmin_rt)+(max(maxmin_rt)-min(maxmin_rt))/3, 1.5*max_int))
}
}
# plot the EICs
for(n in 1:length(eics_mode)) {
eic_mode <- eics_mode[[n]]
if (isLog) {
eic_mode_log <- eic_mode[which(eic_mode$intensity!=0), ]
eic_mode_log$intensity <- log10(eic_mode_log$intensity)
lines(intensity ~ rt ,data = eic_mode_log, col=colors[[n]],lwd=1.75)
text((rt_at_maxI[n]+10),I_at_maxI_log[n],labels=as.character(round(RT_minutes[n],digits=2)),pos=4)
if (addMSMS && newRMB) {
addMsMs(mzML_filePath, mz[n], rt[n], rt_window=rt_window, maxCount=10, colour=colors[[n]],
isLog = TRUE)
} else if (addMSMS && !newRMB) {
addMsMs.oldRMB(mzML_filePath, mz[n], rt[n], rt_window=rt_window, maxCount=10, colour=colors[[n]],
isLog = TRUE)
}
} else {
lines(intensity ~ rt ,data = eic_mode, col=colors[[n]],lwd=1.75)
text((rt_at_maxI[n]+10),I_at_maxI[n],labels=as.character(round(RT_minutes[n],digits=2)),pos=4)
if (addMSMS && newRMB) {
addMsMs(mzML_filePath, mz[n], rt[n], rt_window=rt_window, maxCount=10, colour=colors[[n]],
isLog = FALSE)
} else if (addMSMS && !newRMB) {
addMsMs.oldRMB(mzML_filePath, mz[n], rt[n], rt_window=rt_window, maxCount=10, colour=colors[[n]],
isLog = FALSE)
}
}
}
# clear memory
gc()
return(screen_summary)
}
# Note: build this up on a generic plotEICs function and just wrap additional functionality.
#' Plot EICs from Adduct List
#'
#' @description This plots EICs of adducts in \code{adduct_list} from the mzML (or mzXML)
#' and \code{smiles} given, with various options. Adduct masses are calculated from the
#' \code{smiles} using \code{\link{getSuspectMasses}} via \code{rcdk}. Labelling
#' will not be processed correctly. Adducts that cannot be calculated (especially in negative mode)
#' are automatically skipped. Uses \code{\link{plotEICs}} behind the scenes.
#'
#' @usage plotAdductEICs(mzML_filePath, smiles, adduct_list, rt=0, rt_in_sec=TRUE, plot_title=NULL,
#' rt_window=NULL, mz_limit=0.001, isPos=TRUE, isLog=FALSE, isSwitching=FALSE, greyscale=FALSE,
#' colours=NULL, cpdID=NULL, ymin_log=3, switchShift=0, kekulise=TRUE,addMSMS=FALSE,newRMB=TRUE)
#'
#' @param mzML_filePath Path to the mzML (or mzXML) file to extract the EICs.
#' @param smiles SMILES code of the compound of interest.
#' @param adduct_list Vector of adducts to process, matching entries in the \code{adducts}
#' table in \code{\link{enviPat}}. Used to fill \code{mz} and \code{names} in \code{plotEICs}.
#' @param rt The retention time (in seconds) to centre EIC extraction. If unknown, set
#' \code{rt_window} to \code{NULL}.
#' @param rt_in_sec Default \code{TRUE} indicates that \code{rt} is in seconds. If
#' \code{FALSE}, \code{rt} is converted to seconds internally.
#' @param plot_title Title for the plot. If \code{NULL}, \code{basename(mzML_filePath)} is used.
#' @param rt_window The retention time window (in seconds) for EIC extraction. This is added
#' to \code{rt} to form the extraction window. Should not be zero! If \code{NULL},
#' the whole EIC is extracted.
#' @param isPos \code{TRUE} indicates positive mode; \code{FALSE} negative mode. Only
#' relevant for switching data.
#' @param isLog If \code{TRUE}, EICs are plotted with a log scale; \code{FALSE} with
#' absolute intensities. Zero intensities are lost with \code{TRUE}. Acting strangely.
#' @param isSwitching If \code{TRUE}, will split switching files assuming that the
#' first scan is positive and that positive and negative alternate, unless \code{switchShift}
#' is set. USE WITH CAUTION.
#' @param greyscale If \code{TRUE}, greyscale plot will be produced. If \code{FALSE},
#' rainbow colours are assigned automatically.
#' @param colours Used to define EIC colours. If \code{NULL}, rainbow or greyscale colours are used,
#' depending on \code{greyscale}.
#' @param plot_title If \code{NULL}, title is autogenerated using \code{isPos}, \code{smiles} and
#' \code{cpdID} if given.
#' @param cpdID If given, this is used in the output. Otherwise a value of 1 is assigned.
#' @param ymin_log This defines the y axis intercept for log plots, for aesthetic reasons. Should be
#' just under the \code{log(noise_intensity)} threshold. \code{3} is appropriate for Orbitraps.
#' @param switchShift If \code{isSwitching} is \code{TRUE}, this can be used to shift the start scan between
#' positive and negative mode. A \code{"quick fix"}, USE WITH CAUTION.
#' @param kekulise Controls aromaticity detection in SMILES for plotting.
#' @param addMSMS Indicates whether MSMS scans should be added to plot with default settings \code{TRUE}.
#' \code{FALSE} does not attempt to add MSMS scans.
#' @param newRMB Indicates whether new RMassBank version is used for MS/MS retrieval (\code{TRUE}).
#' Set to \code{FALSE} for older versions (< 2.0.0) or if error is observed.
#'
#' @return Returns a plot in the active device and a summary output.
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu>, Michael Stravs, Benedikt Lauper
#'
#' @seealso \code{\link{getSuspectMasses}}, \code{\link{findEIC}}, \code{\link{plotEICs}}.
#'
#' @export
#'
#' @examples
#' pos_add <- c("M+H", "M+", "M+Na", "M+K", "M+NH4")
#' neg_add <- c("M-H", "M-", "M+FA-H", "M+Cl", "M-H2O-H")
#' library(RMassBank)
#' library(RMassBankData)
#' RmbDefaultSettings()
#' mzML_files <- list.files(system.file("spectra", package="RMassBankData"), ".mzML", full.names = TRUE)
#' mzML_file <- mzML_files[1]
#' cmpd_id <- "2818"
#' mzML_file <- mzML_files[5]
#' cmpd_id <- "2822"
#' loadList(system.file("list/NarcoticsDataset.csv",package="RMassBankData"))
#' smiles <- findSmiles(cmpd_id)
#' rt <- as.numeric(findRt(cmpd_id))
#' plotAdductEICs(mzML_file, pos_add, rt, rt_in_sec=FALSE, smiles = smiles)
#' plotAdductEICs(mzML_file, pos_add, rt, rt_in_sec=FALSE, smiles = smiles, addMSMS=TRUE)
#' # at this stage, log plots are not working properly...
#' plotAdductEICs(mzML_file, pos_add, rt, rt_in_sec=FALSE, smiles = smiles, isLog=TRUE)
#'
plotAdductEICs <- function(mzML_filePath, smiles, adduct_list, rt=60, rt_in_sec=TRUE, plot_title=NULL,
rt_window=NULL, mz_limit=0.001, isPos=TRUE, isLog=FALSE, isSwitching=FALSE,
greyscale=FALSE, colours=NULL, cpdID=NULL, ymin_log=3, switchShift=0,
kekulise=TRUE, addMSMS=FALSE, newRMB=TRUE) {
# contributions from Emma, Benedikt & Michele
# get adduct masses from SMILES
add_masses <- getSuspectMasses(smiles, adduct_list)
add_names <- colnames(add_masses)
# remove NAs if present - especially for negative mode
add_masses <- add_masses[ , colSums(is.na(add_masses)) == 0,drop=FALSE]
add_names <- colnames(add_masses)
# add retention time
rt <- matrix(rt,length(add_masses))
mz <- as.numeric(add_masses)
#send to plotEICs
screen_summary <- plotEICs(mzML_filePath, mz=mz, rt=rt, rt_in_sec=rt_in_sec, names=add_names,
smiles=smiles, plot_title=plot_title, rt_window=rt_window, mz_limit=mz_limit,
isPos=isPos, isLog=isLog, isSwitching=isSwitching, greyscale=greyscale,
colours = colours, cpdID=cpdID, ymin_log=ymin_log,
switchShift=switchShift, kekulise=kekulise, addMSMS=addMSMS, newRMB=newRMB)
return(screen_summary)
}
#' Add MS/MS Scans to Plot
#'
#' @description This adds MS/MS Scans to EIC plots (or others) using \code{findMsMsHR.mass} from
#' \code{RMassBank}.
#'
#' @usage addMsMs(mzML_filePath, mz, rt, rt_window=NULL, maxCount=10, colour="black", isLog = FALSE)
#'
#' @param mzML_filePath Path to the mzML (or mzXML) file to extract the EICs.
#' @param mz mz value for MS/MS extraction.
#' @param rt rt value for MS/MS extraction (seconds).
#' @param rt_window RT window for extraction, as in \code{plotEICs}. If \code{NULL}, values
#' are extracted over whole run.
#' @param maxCount Maximum number of scans to extract, for \code{findMsMsHR.mass}.
#' @param colour Colour of the lines to plot. Default \code{black}; ideally same colour as EIC.
#' @param isLog Set whether log values should be plotted. Default \code{FALSE} - the setting
#' \code{TRUE} is completely untested (as EIC log plotting also not working)!
#'
#' @return Returns lines for the plot in the active device. If this returns errors, try
#' \code{\link{addMsMs.oldRMB}}. If this returns warnings, it is likely no MS/MS scans are present
#' matching the given limits (the warnings should indicate if this is the case).
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu>, Michael Stravs <michael.stravs@@eawag.ch>
#'
#' @seealso \code{\link{findMsMsHR.mass}}, \code{\link{plotEICs}}, \code{\link{addMsMs.oldRMB}}.
#'
#' @examples
#' RmbDefaultSettings()
#' mzML_files <- list.files(system.file("spectra", package="RMassBankData"), ".mzML", full.names = TRUE)
#' mzML_file <- mzML_files[1]
#' cmpd_id <- "2818"
#' loadList(system.file("list/NarcoticsDataset.csv",package="RMassBankData"))
#' smiles <- findSmiles(cmpd_id)
#' mz <- as.numeric(findMz(cmpd_id, mode="pH")[3])
#' rt <- as.numeric(findRt(cmpd_id))
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles)
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles, addMSMS=TRUE)
#'
addMsMs <- function(mzML_filePath, mz, rt, rt_window=NULL, maxCount=10, colour="black",
isLog = FALSE) {
mz <- as.vector(mz)
rt <- as.vector(rt) #assuming rt is in sec
mzrt <- cbind(mz, rt)
#RmbDefaultSettings()
f <- openMSfile(mzML_filePath)
# get MS/MS
if (is.null(rt_window)) {
msms <- apply(mzrt, 1, function(row)
{
findMsMsHR.mass(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE), maxCount = maxCount)
})
} else {
msms <- apply(mzrt, 1, function(row)
{
findMsMsHR.mass(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE),
rtLimits = row[[2]] + rt_window, maxCount = maxCount)
})
}
# then add to the plot
for(specs in msms[[1]])
{
if(specs@found == TRUE) {
df <- do.call(rbind, lapply(specs@children, function(sp) c(sp@rt, intensity = max(sp@intensity))))
if (!isLog) {
lines(intensity ~ retentionTime, data=df, col=colour, type='h' )
} else {
lines(log10(intensity) ~ retentionTime, data=df, col=colour, type='h' )
}
}
}
}
#' Add MS/MS Scans to Plot - Backwards Compatible to Older RMassBank versions.
#'
#' @description This adds MS/MS Scans to EIC plots (or others) using \code{findMsMsHR.mass} from
#' \code{RMassBank}, compatible with pre 2.0.0 versions.
#'
#' @usage addMsMs.oldRMB(mzML_filePath, mz, rt, rt_window=NULL, maxCount=10, colour="black", isLog = FALSE)
#'
#' @param mzML_filePath Path to the mzML (or mzXML) file to extract the EICs.
#' @param mz mz value for MS/MS extraction.
#' @param rt rt value for MS/MS extraction (seconds).
#' @param rt_window RT window for extraction, as in \code{plotEICs}. If \code{NULL}, values
#' are extracted over whole run.
#' @param maxCount Maximum number of scans to extract, for \code{findMsMsHR.mass}.
#' @param colour Colour of the lines to plot. Default \code{black}; ideally same colour as EIC.
#' @param isLog Set whether log values should be plotted. Default \code{FALSE} - the setting
#' \code{TRUE} is completely untested (as EIC log plotting also not working)!
#'
#' @return Returns lines for the plot in the active device. If this returns errors, try
#' \code{\link{addMsMs}}. If this returns warnings, it is likely no MS/MS scans are present
#' matching the given limits (the warnings should indicate if this is the case).
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu>, Michael Stravs <michael.stravs@@eawag.ch>
#'
#' @seealso \code{\link{findMsMsHR.mass}}, \code{\link{plotEICs}}, \code{\link{addMsMs}}.
#'
#' @examples
#' library(RMassBank)
#' library(RMassBankData)
#' RmbDefaultSettings()
#' mzML_files <- list.files(system.file("spectra", package="RMassBankData"), ".mzML", full.names = TRUE)
#' mzML_file <- mzML_files[1]
#' cmpd_id <- "2818"
#' loadList(system.file("list/NarcoticsDataset.csv",package="RMassBankData"))
#' smiles <- findSmiles(cmpd_id)
#' mz <- as.numeric(findMz(cmpd_id, mode="pH")[3])
#' rt <- as.numeric(findRt(cmpd_id))
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles)
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles, addMSMS=TRUE, newRMB=FALSE)
#'
addMsMs.oldRMB <- function(mzML_filePath, mz, rt, rt_window=NULL, maxCount=10,
colour="black", isLog=FALSE) {
mz <- as.vector(mz)
rt <- as.vector(rt) #assuming rt is in sec
mzrt <- cbind(mz, rt)
#RmbDefaultSettings()
f <- openMSfile(mzML_filePath)
# get MS/MS
if (is.null(rt_window)) {
msms <- apply(mzrt, 1, function(row)
{
findMsMsHR.mass(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE), maxCount = maxCount)
})
} else {
msms <- apply(mzrt, 1, function(row)
{
findMsMsHR.mass(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE),
rtLimits = row[[2]] + rt_window, maxCount = maxCount)
})
}
# then add to the plot
for(specs in msms[[n]])
{
if(specs$foundOK == TRUE) {
if (!isLog) {
lines(basePeakIntensity ~retentionTime, data = specs$childHeaders, col=colour, type='h' )
} else {
lines(log10(basePeakIntensity) ~retentionTime, data = specs$childHeaders, col=colour, type='h' )
}
}
}
}
####functions below here need updating!!! ####
#almost superceded by the functions above - may need to rearrange some things so we can plot
# either individual EICs or multiple EICs on one plot. This function plots one by one.
plotEicMsMs <- function(file, file_desc, mz, rt, rt_window=c(-180,180)) {
mz <- as.vector(mz)
rt <- as.vector(rt) #assuming rt is in sec
mzrt <- cbind(mz, rt)
names <- paste("mz", mz, "_rt", rt, "_", file_desc,sep="")
names_nodots <- paste("mz", gsub(".","_",mz,fixed=TRUE), "_rt", gsub(".","_",round(rt/60,2),fixed=TRUE), "_", file_desc,sep="")
plot_title <- names_nodots
pdf_title <- paste(file_desc, ".pdf",sep="")
csv_name <- paste(file_desc, ".csv",sep="")
#retreive the EICs
RmbDefaultSettings()
f <- openMSfile(file)
h <- header(f)
c <- makePeaksCache(f,h)
eics <- apply(mzrt, 1,
function(row)
{
findEIC.cached(f,row[[1]],ppm(row[[1]],5,p=TRUE), row[[2]] + rt_window, headerCache=h, peaksCache=c)
#findEIC.cached(f,row[[1]],ppm(row[[1]],10,p=TRUE), headerCache=h, peaksCache=c)
})
#rm(c)
#gc()
## add MSMS scans to the plot
# find MS/MS for each mass
msms <- apply(mzrt, 1,
function(row)
{
# findMsMsHR.mass.cached(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE),
# rtLimits = row[[2]] + c(-180,180), maxCount = 10, headerCache = h, peaksCache=c)
findMsMsHR.mass(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE),
rtLimits = row[[2]] + c(-180,180), maxCount = 10)
})
#add plotting
colors <- rainbow(length(eics), start=2/6, end=1)
#retrieve the summary data of the EICs
maxmin_rt <- range(unlist(lapply(eics, function(eic) {range(eic$rt)})), finite = T)
max_int <- max(unlist(lapply(eics, function(eic) {max(eic$intensity)})))
rt_at_maxI <- unlist(lapply(eics, function(eic) {eic[which.max(eic$intensity),1]}))
RT_minutes <- as.numeric(rt_at_maxI)/60
I_at_maxI <- unlist(lapply(eics, function(eic) {eic[which.max(eic$intensity),2]}))
screen_summary <- cbind(mz, rt, names, RT_minutes, I_at_maxI)
# write outputs to CSV and PDF
write.csv(screen_summary, file=csv_name)
pdf(pdf_title)
for(n in 1:length(eics))
{
plot.new()
maxmin_rt_n <- range(eics[[n]]$rt)
max_int_n <- max(eics[[n]]$intensity)
plot.window(maxmin_rt_n, c(0,1.2*max_int_n))
#plot.window(rt_window, c(0,max(I_at_maxI)))
box()
lines(intensity ~ rt ,data = eics[[n]], col=colors[[n]],lwd=1.75)
for(specs in msms[[n]])
{
if(specs$foundOK == TRUE) {
lines(basePeakIntensity ~retentionTime, data = specs$childHeaders,
col=colors[[n]], type='h' )
}
}
title(main=plot_title[n], xlab="RT (sec)", ylab="Intensity")
#legend("topright", legend=names, col=colors, lty=1, lwd=3)
axis(1, at=seq(0,maxmin_rt[2], by=60))
#axis(1, at=seq(0,rt_window[2], by=60))
axis(2)
}
dev.off()
return(screen_summary)
}
plotEicsAlone <- function(file, file_desc, mz, rt, rt_window=c(-180,180)) {
mz <- as.vector(mz)
rt <- as.vector(rt) #assuming rt is in sec
mzrt <- cbind(mz, rt)
names <- paste("mz", mz, "_rt", rt, "_", file_desc,sep="")
names_nodots <- paste("mz", gsub(".","_",mz,fixed=TRUE), "_rt", gsub(".","_",round(rt/60,2),fixed=TRUE), "_", file_desc,sep="")
plot_title <- paste(names)
pdf_title <- paste(file_desc, ".pdf",sep="")
csv_name <- paste(file_desc, ".csv",sep="")
#retreive the EICs
RmbDefaultSettings()
f <- openMSfile(file)
h <- header(f)
c <- makePeaksCache(f,h)
eics <- apply(mzrt, 1,
function(row)
{
findEIC.cached(f,row[[1]],ppm(row[[1]],5,p=TRUE), row[[2]] + rt_window, headerCache=h, peaksCache=c)
#findEIC.cached(f,row[[1]],ppm(row[[1]],10,p=TRUE), headerCache=h, peaksCache=c)
})
#rm(c)
#gc()
colors <- rainbow(length(eics), start=2/6, end=1)
#retrieve the summary data of the EICs
maxmin_rt <- range(unlist(lapply(eics, function(eic) {range(eic$rt)})), finite = T)
max_int <- max(unlist(lapply(eics, function(eic) {max(eic$intensity)})))
rt_at_maxI <- unlist(lapply(eics, function(eic) {eic[which.max(eic$intensity),1]}))
RT_minutes <- as.numeric(rt_at_maxI)/60
I_at_maxI <- unlist(lapply(eics, function(eic) {eic[which.max(eic$intensity),2]}))
screen_summary <- cbind(mz, rt, names, RT_minutes, I_at_maxI)
# write outputs to CSV and PDF
write.csv(screen_summary, file=csv_name)
pdf(pdf_title)
for(n in 1:length(eics))
{
plot.new()
plot.window(maxmin_rt, c(0,max(I_at_maxI)))
#plot.window(rt_window, c(0,max(I_at_maxI)))
box()
lines(intensity ~ rt ,data = eics[[n]], col=colors[[n]],lwd=1.75)
title(main=plot_title[n], xlab="RT (sec)", ylab="Intensity")
#legend("topright", legend=names, col=colors, lty=1, lwd=3)
axis(1, at=seq(0,maxmin_rt[2], by=60))
#axis(1, at=seq(0,rt_window[2], by=60))
axis(2)
}
dev.off()
return(screen_summary)
}
# This is in plotMSMS now
# plotSpectrum <- function(mz, inten, main, labels=NULL) {
# plot(mz, inten, type="h",
# #if(length(mz)>1) {}
# xlim=c(0,max(mz)*1.1),
# ylim=c(0,max(inten)*1.5),
# main=main,
# cex=1, lwd=2, col="black",
# xlab="m/z", ylab="Intensity"
# )
# #minor.tick(nx=10, ny=10, tick.ratio=0.3)
# invisible(NULL)
# }
getMsms <- function(file, file_desc, mz, rt, dppm=10, rt_window=c(-180,180), mostIntense = TRUE,ms_msms_cut=c(1e5,5e3)) {
mz <- as.vector(mz)
rt <- as.vector(rt) #assuming rt is in sec
mzrt <- cbind(mz, rt)
names <- paste("mz", mz, "_rt", rt, "_", file_desc,sep="")
names_nodots <- paste("mz", gsub(".","_",round(mz,4),fixed=TRUE), "_rt", gsub(".","_",round(rt/60,2),fixed=TRUE), "_", file_desc,sep="")
#plot_title <- paste(names_nodots,"_MSMS",sep="")
pdf_title <- paste(file_desc, "_MSMS.pdf",sep="")
csv_name <- paste(file_desc, "_MSMS.csv",sep="")
msms_files <- vector(mode="character",length=length(mz))
ms_files <- vector(mode="character",length=length(mz))
rt_ms <- vector(mode="numeric",length=length(mz))
rt_msms <- vector(mode="numeric",length=length(mz))
#set up folders for results
current_dir <- getwd()
sample_dir <- paste(current_dir,"/",file_desc,sep="")
if (!dir.exists(sample_dir)) dir.create(sample_dir)
setwd(sample_dir) #reset this back to current_dir at end
msms_dir <- paste(sample_dir,"/MSMS",sep="")
if (!dir.exists(msms_dir)) dir.create(msms_dir)
ms_dir <- paste(sample_dir,"/MS",sep="")
if (!dir.exists(ms_dir)) dir.create(ms_dir)
#open MS file
RmbDefaultSettings()
f <- openMSfile(file)
h <- header(f)
c <- makePeaksCache(f,h)
# get the MS/MS
if (mostIntense) {
msms <- apply(mzrt, 1, function(row) {
spec <- findMsMsHR.mass.cached(f, row[[1]], 0.5, ppm(row[[1]],dppm,p=TRUE),
rtLimits = row[[2]] + rt_window, maxCount = 1, headerCache = h, peaksCache=c)
spec <- spec[[1]]
mzLimits <- list(
mzMin = row[1] - ppm(row[1],10,p=TRUE),
mzCenter = row[1],
mzMax = row[1] + ppm(row[1],10,p=TRUE))
spec$mz <- mzLimits
#spec$id <- row[3]
# spec$formula <- row[[3]]
return(spec)
})
} else {
msms <- apply(mzrt, 1, function(row) {
rtApex <- row[[2]]
spec <- findMsMsHR.mass.cached(f, row[[1]], 0.5, ppm(row[[1]],dppm,p=TRUE),
rtLimits = rtApex + rt_window, headerCache = h, peaksCache=c)
# find the one closest to rt apex
if(spec[[1]]$foundOK)
{
rtMsms <- unlist(lapply(spec, function(s) s$childHeaders$retentionTime))
best <- which.min(abs(rtMsms-rtApex))
spec<-spec[[best]]
}
else
spec <- spec[[1]]
mzLimits <- list(
mzMin = row[1] - ppm(row[1],10,p=TRUE),
mzCenter = row[1],
mzMax = row[1] + ppm(row[1],10,p=TRUE))
spec$mz <- mzLimits
return(spec)
})
}
pdf(file=pdf_title, width=11, height=6)
# write to files
for(n in 1:length(msms))
{
t <- msms[[n]]
if(t$foundOK == F)
next
# HCD:
cut <- ms_msms_cut[2]
cut_ratio <- 0
shot <- as.data.frame(t$peaks[[1]])
shot <- shot[(shot$int >= cut) & (shot$int > max(shot$int) *
cut_ratio), ]
# remove satellite peaks
shot <- filterPeakSatellites(shot)
shot$mz <- round(shot$mz, 4)
shot$int <- round(shot$int, 0)
rt_msms[n] <-t$childHeaders$retentionTime[[1]]/60 #we need the real RT from the MSMS in here to see if we got the right one
#define plot title
msmsplot_title <- paste("mz", gsub(".","_",round(mz[n],4),fixed=TRUE), "_rt",
gsub(".","_",round(rt_msms[n],2),fixed=TRUE), "_", file_desc,"_MSMS",
"_HCD", t$childHeaders$collisionEnergy[[1]], sep="")
# write to file
write.table(shot,file=paste(msms_dir,"/",msmsplot_title,".txt",sep=""), sep=" ", row.names=F, col.names=F)
msms_files[n] <- paste(msmsplot_title,".txt",sep="")
# plot to PDF
#plot.new()
#plotSpectrum(shot$mz, shot$int, main=msmsplot_title)
if(length(shot$mz)>1) {
plotSpectrum(shot$mz, shot$int, main=msmsplot_title)
}
# MS1:
#update 23.10: save in different dir and increase cut to 10000, not 5000
cut <- ms_msms_cut[1]
cut_ratio <- 0
shot <- as.data.frame(t$parentPeak)
shot <- shot[(shot$int >= cut) & (shot$int > max(shot$int) *
cut_ratio), ]
# do not filter satellites to preserve possible fine structure
# shot <- filterPeakSatellites(shot)
M <- floor(t$mz$mzCenter)
# cut to [M, M+10]
shot <- shot[(shot$mz >= M) & (shot$mz <= M+10),]
shot$mz <- round(shot$mz, 4)
shot$int <- round(shot$int, 0)
rt_ms[n] <-t$parentHeader$retentionTime/60
msplot_title <- paste("mz", gsub(".","_",round(mz[n],4),fixed=TRUE), "_rt",
gsub(".","_",round(rt_ms[n],2),fixed=TRUE), "_", file_desc,"_MS",sep="")
write.table(shot,file=paste(ms_dir,"/",msplot_title,".txt",sep=""), sep=" ", row.names=F, col.names=F)
ms_files[n] <- paste(msplot_title,".txt",sep="")
}
dev.off()
# write the table with the filenames out:
rt_min <- rt/60
summary_data <- cbind(mz, rt, rt_min, rt_ms, rt_msms, ms_files, msms_files)
write.csv(summary_data, csv_name)#, col.names=TRUE)
setwd(current_dir)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.