R/EICandMSMS.R

Defines functions getMsms plotEicsAlone plotEicMsMs addMsMs.oldRMB addMsMs plotAdductEICs plotEICs

Documented in addMsMs addMsMs.oldRMB plotAdductEICs plotEICs

###Functions for ReSOLUTION package####
###EIC and MS/MS functions####
# Emma Schymanski, April 2017.


#' Plot EICs from mz and RT List
#'
#' @description This plots EICs of given \code{mz} and from the mzML (or mzXML),
#' with various options. Additional wrapper functions provide different ways of generating mz.
#'
#' @usage plotEICs(mzML_filePath, mz, rt, rt_in_sec=TRUE, names=NULL, smiles="", plot_title=NULL,
#' rt_window=NULL, mz_limit=0.001, ppm=10, isPos=TRUE, isLog=FALSE, isSwitching=FALSE,
#' greyscale=FALSE, colours = NULL, cpdID=NULL, ymin_log=3, switchShift=0,
#' kekulise=TRUE, addMSMS=TRUE, newRMB=TRUE, takePPM=TRUE)
#'
#' @param mzML_filePath Path to the mzML (or mzXML) file to extract the EICs.
#' @param mz Vector of mz values to process.
#' @param rt Vector of retention times to process. Set the unit with \code{rt_in_sec}.
#' @param rt_in_sec Default \code{TRUE} indicates that \code{rt} is in seconds. If
#' \code{FALSE}, \code{rt} is converted to seconds internally.
#' @param names Optional names to belong to each \code{mz}. If \code{NULL}, \code{mz} is used.
#' @param smiles SMILES code of the compound of interest.
#' @param plot_title Title for the plot. If \code{NULL}, \code{basename(mzML_filePath)} is used.
#' @param rt_window The retention time window (in seconds) for EIC extraction. This is added
#' to \code{rt} to form the extraction window. Should not be zero! If \code{NULL},
#' the whole EIC is extracted.
#' @param mz_limit Default 0.001. This controls the limits for EIC extraction. Choose between
#' using \code{mz_limit} and \code{ppm} with \code{takePPM}.
#' @param ppm Default 10 ppm is an alternative to \code{mz_limit} for EIC extraction.
#' @param isPos \code{TRUE} indicates positive mode; \code{FALSE} negative mode. Only
#' relevant for switching data.
#' @param isLog If \code{TRUE}, EICs are plotted with a log scale; \code{FALSE} with
#' absolute intensities. Zero intensities are lost with \code{TRUE}.
#' @param isSwitching If \code{TRUE}, will split switching files assuming that the
#' first scan is positive and that positive and negative alternate, unless \code{switchShift}
#' is set. USE WITH CAUTION.
#' @param greyscale If \code{TRUE}, greyscale plot will be produced. If \code{FALSE},
#' rainbow colours are assigned automatically.
#' @param colours Used to define EIC colours. If \code{NULL}, rainbow or greyscale colours are used,
#' depending on \code{greyscale}.
#' @param plot_title If \code{NULL}, title is autogenerated using \code{isPos}, \code{smiles} and
#' \code{cpdID} if given.
#' @param cpdID If given, this is used in the output. Otherwise a value of 1 is assigned.
#' @param ymin_log This defines the y axis intercept for log plots, for aesthetic reasons. Should be
#' just under the \code{log(noise_intensity)} threshold. \code{3} is appropriate for Orbitraps.
#' @param switchShift If \code{isSwitching} is \code{TRUE}, this can be used to shift the start scan between
#' positive and negative mode. A \code{"quick fix"}, USE WITH CAUTION.
#' @param kekulise Controls aromaticity detection in SMILES for plotting.
#' @param addMSMS Indicates whether MSMS scans should be added to plot with default settings \code{TRUE}.
#' \code{FALSE} does not attempt to add MSMS scans.
#' @param newRMB Indicates whether new RMassBank version is used for MS/MS retrieval (\code{TRUE}).
#' Set to \code{FALSE} for older versions (< 2.0.0) or if error is observed.
#' @param takePPM Indicates whether ppm or mz_limit value should be used to extract EICs.
#'
#' @return Returns a plot in the active device and a summary output.
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu>, Michael Stravs, Benedikt Lauper
#'
#' @seealso \code{\link{findEIC}}.
#'
#' @export
#'
#' @examples
#' library(RMassBank)
#' library(RMassBankData)
#' RmbDefaultSettings()
#' mzML_files <- list.files(system.file("spectra", package="RMassBankData"), ".mzML", full.names = TRUE)
#' mzML_file <- mzML_files[1]
#' cmpd_id <- "2818"
#' loadList(system.file("list/NarcoticsDataset.csv",package="RMassBankData"))
#' smiles <- findSmiles(cmpd_id)
#' mz <- as.numeric(findMz(cmpd_id, mode="pH")[3])
#' rt <- as.numeric(findRt(cmpd_id))
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles)
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles, addMSMS=TRUE)
#'
#'
plotEICs <- function(mzML_filePath, mz, rt, rt_in_sec=TRUE, names=NULL, smiles="", plot_title=NULL,
                     rt_window=NULL, mz_limit=0.001, ppm=10, isPos=TRUE, isLog=FALSE, isSwitching=FALSE,
                     greyscale=FALSE, colours = NULL, cpdID=NULL, ymin_log=3, switchShift=0,
                     kekulise=TRUE, addMSMS=TRUE, newRMB=TRUE, takePPM=TRUE) {
  # contributions from Emma, Benedikt & Michele
  # open mzML
  f <- openMSfile(mzML_filePath)
  # create input for extraction
  mz <- as.vector(mz)
  if (rt_in_sec) {
    rt <- as.vector(rt) #assuming rt is in sec
  } else {
    rt <- 60*as.vector(rt)
  }

  mzrt <- cbind(mz, rt)
  if (is.null(names)) {
    names <- sprintf("%.4f",mz)
  }
  # retreive the EICs
  if (is.null(rt_window)) {
    # no window => extract over full window
    if (takePPM) {
      eics <- apply(mzrt, 1, function(row) {
        findEIC(f,row[[1]], ppm(row[[1]],ppm,p=TRUE))
      })
    } else {
      eics <- apply(mzrt, 1, function(row) {
        findEIC(f,row[[1]], limit=mz_limit)
      })
    }
  } else {
    if (takePPM) {
      eics <- apply(mzrt, 1, function(row) {
        findEIC(f,row[[1]], ppm(row[[1]],ppm,p=TRUE), row[[2]] + rt_window)
      })
    } else {
      eics <- apply(mzrt, 1, function(row) {
        findEIC(f,row[[1]], limit=mz_limit, row[[2]] + rt_window)
      })
    }
  }
  # if this is switching data, need to seperate scans here already
  eics_mode <- vector("list",length(mz))
  if (isSwitching) {
    for(n in 1:length(eics))  {
      eic <- eics[[n]]
      if (isPos) {
        eics_mode[[n]] <- eic[seq(1+switchShift, length(eic$scan), 2), ]
      } else {
        eics_mode[[n]] <- eic[seq(2+switchShift, length(eic$scan), 2), ]
      }
    }
  } else {
    eics_mode <- eics
  }
  # set up the plotting
  if (!greyscale && is.null(colours)) {
    #colors <- rainbow(length(eics_mode), start=0/12, end=9/12)
    colors <- rainbow(length(eics_mode), start=2/6, end=1)
  } else if (is.null(colours)) {
    colors <- grey.colors(length(eics_mode), start = 0, end = 0.9)
  }
  # define limits and set up plot
  maxmin_rt <- range(unlist(lapply(eics_mode, function(eic) {range(eic$rt)})), finite = T)
  max_int <- max(unlist(lapply(eics_mode, function(eic) {max(eic$intensity)})))
  rt_at_maxI <- unlist(lapply(eics_mode, function(eic) {eic[which.max(eic$intensity),1]}))
  RT_minutes <- as.numeric(rt_at_maxI)/60
  I_at_maxI <- unlist(lapply(eics_mode, function(eic) {eic[which.max(eic$intensity),2]}))
  if (is.null(cpdID)) {
    screen_summary <- cbind("1", names, smiles, mz, rt, RT_minutes, I_at_maxI)
  } else {
    screen_summary <- cbind(cpdID, names, smiles, mz, rt, RT_minutes, I_at_maxI)
  }
  colnames(screen_summary) <- c("cpdID", "names", "smiles", "mz", "Input_RT_sec", "RT_minutes_at_maxI", "I_at_maxI")

  # set up the plot title
  if (is.null(plot_title)) {
    plot_title <- basename(mzML_filePath)
  }
  plot.new()
  if (isLog) {
    #log:
    if (log10(max_int) > ymin_log) {
      plot.window(maxmin_rt, c(ymin_log,(log10(max_int)+2)))
    } else if (log10(max_int) > 0) {
      plot.window(maxmin_rt, c(0,(log10(max_int)+2)))
    } else {
      plot.window(maxmin_rt, c(0,2))
    }
    title(main=plot_title, xlab="RT (sec)", ylab="log10(Intensity)")
    I_at_maxI_log <- log10(I_at_maxI)
    I_at_maxI_log[which(I_at_maxI_log==-Inf)] <- 0
  } else {
    # non-log
    plot.window(maxmin_rt, c(0,1.5*max_int))
    title(main=plot_title, xlab="RT (sec)", ylab="Intensity")
  }
  box()
  axis(1, at=seq(0,maxmin_rt[2], by=60))
  axis(2)
  legend("topright", legend=names, col=colors, lty=1, lwd=3)

  # add SMILES if we have it ...
  if (nchar(smiles)>=1) {
    if (isLog) {
      log_max_int <- log10(max_int)
      if (log_max_int==-Inf || log_max_int <= ymin_log) {
        log_max_int <- 2
      }
      renderSMILES.rcdk.default(smiles, kekulise=kekulise, coords=c(min(maxmin_rt),log_max_int,
                                min(maxmin_rt)+(max(maxmin_rt)-min(maxmin_rt))/3, log_max_int+2))
    } else {
      if (max_int==0) {
        max_int <- 1
      }
      renderSMILES.rcdk.default(smiles, kekulise=kekulise, coords=c(min(maxmin_rt),max_int,
                                min(maxmin_rt)+(max(maxmin_rt)-min(maxmin_rt))/3, 1.5*max_int))
    }
  }

  # plot the EICs
  for(n in 1:length(eics_mode))  {
    eic_mode <- eics_mode[[n]]
    if (isLog) {
      eic_mode_log <- eic_mode[which(eic_mode$intensity!=0), ]
      eic_mode_log$intensity <- log10(eic_mode_log$intensity)
      lines(intensity ~ rt ,data = eic_mode_log, col=colors[[n]],lwd=1.75)
      text((rt_at_maxI[n]+10),I_at_maxI_log[n],labels=as.character(round(RT_minutes[n],digits=2)),pos=4)
      if (addMSMS && newRMB) {
        addMsMs(mzML_filePath, mz[n], rt[n], rt_window=rt_window, maxCount=10, colour=colors[[n]],
                            isLog = TRUE)
      } else if (addMSMS && !newRMB) {
        addMsMs.oldRMB(mzML_filePath, mz[n], rt[n], rt_window=rt_window, maxCount=10, colour=colors[[n]],
                       isLog = TRUE)
      }
    } else {
      lines(intensity ~ rt ,data = eic_mode, col=colors[[n]],lwd=1.75)
      text((rt_at_maxI[n]+10),I_at_maxI[n],labels=as.character(round(RT_minutes[n],digits=2)),pos=4)
      if (addMSMS && newRMB) {
        addMsMs(mzML_filePath, mz[n], rt[n], rt_window=rt_window, maxCount=10, colour=colors[[n]],
                isLog = FALSE)
      } else if (addMSMS && !newRMB) {
        addMsMs.oldRMB(mzML_filePath, mz[n], rt[n], rt_window=rt_window, maxCount=10, colour=colors[[n]],
                       isLog = FALSE)
      }

    }
  }
  # clear memory
  gc()
  return(screen_summary)
}


# Note: build this up on a generic plotEICs function and just wrap additional functionality.

#' Plot EICs from Adduct List
#'
#' @description This plots EICs of adducts in \code{adduct_list} from the mzML (or mzXML)
#' and \code{smiles} given, with various options. Adduct masses are calculated from the
#' \code{smiles} using \code{\link{getSuspectMasses}} via \code{rcdk}. Labelling
#' will not be processed correctly. Adducts that cannot be calculated (especially in negative mode)
#' are automatically skipped. Uses \code{\link{plotEICs}} behind the scenes.
#'
#' @usage plotAdductEICs(mzML_filePath, smiles, adduct_list, rt=0, rt_in_sec=TRUE, plot_title=NULL,
#' rt_window=NULL, mz_limit=0.001, isPos=TRUE, isLog=FALSE, isSwitching=FALSE, greyscale=FALSE,
#' colours=NULL, cpdID=NULL, ymin_log=3, switchShift=0, kekulise=TRUE,addMSMS=FALSE,newRMB=TRUE)
#'
#' @param mzML_filePath Path to the mzML (or mzXML) file to extract the EICs.
#' @param smiles SMILES code of the compound of interest.
#' @param adduct_list Vector of adducts to process, matching entries in the \code{adducts}
#' table in \code{\link{enviPat}}. Used to fill \code{mz} and \code{names} in \code{plotEICs}.
#' @param rt The retention time (in seconds) to centre EIC extraction. If unknown, set
#' \code{rt_window} to \code{NULL}.
#' @param rt_in_sec Default \code{TRUE} indicates that \code{rt} is in seconds. If
#' \code{FALSE}, \code{rt} is converted to seconds internally.
#' @param plot_title Title for the plot. If \code{NULL}, \code{basename(mzML_filePath)} is used.
#' @param rt_window The retention time window (in seconds) for EIC extraction. This is added
#' to \code{rt} to form the extraction window. Should not be zero! If \code{NULL},
#' the whole EIC is extracted.
#' @param isPos \code{TRUE} indicates positive mode; \code{FALSE} negative mode. Only
#' relevant for switching data.
#' @param isLog If \code{TRUE}, EICs are plotted with a log scale; \code{FALSE} with
#' absolute intensities. Zero intensities are lost with \code{TRUE}. Acting strangely.
#' @param isSwitching If \code{TRUE}, will split switching files assuming that the
#' first scan is positive and that positive and negative alternate, unless \code{switchShift}
#' is set. USE WITH CAUTION.
#' @param greyscale If \code{TRUE}, greyscale plot will be produced. If \code{FALSE},
#' rainbow colours are assigned automatically.
#' @param colours Used to define EIC colours. If \code{NULL}, rainbow or greyscale colours are used,
#' depending on \code{greyscale}.
#' @param plot_title If \code{NULL}, title is autogenerated using \code{isPos}, \code{smiles} and
#' \code{cpdID} if given.
#' @param cpdID If given, this is used in the output. Otherwise a value of 1 is assigned.
#' @param ymin_log This defines the y axis intercept for log plots, for aesthetic reasons. Should be
#' just under the \code{log(noise_intensity)} threshold. \code{3} is appropriate for Orbitraps.
#' @param switchShift If \code{isSwitching} is \code{TRUE}, this can be used to shift the start scan between
#' positive and negative mode. A \code{"quick fix"}, USE WITH CAUTION.
#' @param kekulise Controls aromaticity detection in SMILES for plotting.
#' @param addMSMS Indicates whether MSMS scans should be added to plot with default settings \code{TRUE}.
#' \code{FALSE} does not attempt to add MSMS scans.
#' @param newRMB Indicates whether new RMassBank version is used for MS/MS retrieval (\code{TRUE}).
#' Set to \code{FALSE} for older versions (< 2.0.0) or if error is observed.
#'
#' @return Returns a plot in the active device and a summary output.
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu>, Michael Stravs, Benedikt Lauper
#'
#' @seealso \code{\link{getSuspectMasses}}, \code{\link{findEIC}}, \code{\link{plotEICs}}.
#'
#' @export
#'
#' @examples
#' pos_add <- c("M+H", "M+", "M+Na", "M+K", "M+NH4")
#' neg_add <- c("M-H", "M-", "M+FA-H", "M+Cl", "M-H2O-H")
#' library(RMassBank)
#' library(RMassBankData)
#' RmbDefaultSettings()
#' mzML_files <- list.files(system.file("spectra", package="RMassBankData"), ".mzML", full.names = TRUE)
#' mzML_file <- mzML_files[1]
#' cmpd_id <- "2818"
#' mzML_file <- mzML_files[5]
#' cmpd_id <- "2822"
#' loadList(system.file("list/NarcoticsDataset.csv",package="RMassBankData"))
#' smiles <- findSmiles(cmpd_id)
#' rt <- as.numeric(findRt(cmpd_id))
#' plotAdductEICs(mzML_file, pos_add, rt, rt_in_sec=FALSE, smiles = smiles)
#' plotAdductEICs(mzML_file, pos_add, rt, rt_in_sec=FALSE, smiles = smiles, addMSMS=TRUE)
#' # at this stage, log plots are not working properly...
#' plotAdductEICs(mzML_file, pos_add, rt, rt_in_sec=FALSE, smiles = smiles, isLog=TRUE)
#'
plotAdductEICs <- function(mzML_filePath, smiles, adduct_list, rt=60, rt_in_sec=TRUE, plot_title=NULL,
                           rt_window=NULL, mz_limit=0.001, isPos=TRUE, isLog=FALSE, isSwitching=FALSE,
                           greyscale=FALSE, colours=NULL, cpdID=NULL, ymin_log=3, switchShift=0,
                           kekulise=TRUE, addMSMS=FALSE, newRMB=TRUE) {
  # contributions from Emma, Benedikt & Michele
  # get adduct masses from SMILES
  add_masses <- getSuspectMasses(smiles, adduct_list)
  add_names <- colnames(add_masses)
  # remove NAs if present - especially for negative mode
  add_masses <- add_masses[ , colSums(is.na(add_masses)) == 0,drop=FALSE]
  add_names <- colnames(add_masses)
  # add retention time
  rt <- matrix(rt,length(add_masses))
  mz <- as.numeric(add_masses)

  #send to plotEICs
  screen_summary <- plotEICs(mzML_filePath, mz=mz, rt=rt, rt_in_sec=rt_in_sec, names=add_names,
                             smiles=smiles, plot_title=plot_title, rt_window=rt_window, mz_limit=mz_limit,
                             isPos=isPos, isLog=isLog, isSwitching=isSwitching, greyscale=greyscale,
                             colours = colours, cpdID=cpdID, ymin_log=ymin_log,
                             switchShift=switchShift, kekulise=kekulise, addMSMS=addMSMS, newRMB=newRMB)

  return(screen_summary)
}


#' Add MS/MS Scans to Plot
#'
#' @description This adds MS/MS Scans to EIC plots (or others) using \code{findMsMsHR.mass} from
#' \code{RMassBank}.
#'
#' @usage addMsMs(mzML_filePath, mz, rt, rt_window=NULL, maxCount=10, colour="black", isLog = FALSE)
#'
#' @param mzML_filePath Path to the mzML (or mzXML) file to extract the EICs.
#' @param mz mz value for MS/MS extraction.
#' @param rt rt value for MS/MS extraction (seconds).
#' @param rt_window RT window for extraction, as in \code{plotEICs}. If \code{NULL}, values
#' are extracted over whole run.
#' @param maxCount Maximum number of scans to extract, for \code{findMsMsHR.mass}.
#' @param colour Colour of the lines to plot. Default \code{black}; ideally same colour as EIC.
#' @param isLog Set whether log values should be plotted. Default \code{FALSE} - the setting
#' \code{TRUE} is completely untested (as EIC log plotting also not working)!
#'
#' @return Returns lines for the plot in the active device. If this returns errors, try
#' \code{\link{addMsMs.oldRMB}}. If this returns warnings, it is likely no MS/MS scans are present
#' matching the given limits (the warnings should indicate if this is the case).
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu>, Michael Stravs <michael.stravs@@eawag.ch>
#'
#' @seealso \code{\link{findMsMsHR.mass}}, \code{\link{plotEICs}}, \code{\link{addMsMs.oldRMB}}.
#'
#' @examples
#' RmbDefaultSettings()
#' mzML_files <- list.files(system.file("spectra", package="RMassBankData"), ".mzML", full.names = TRUE)
#' mzML_file <- mzML_files[1]
#' cmpd_id <- "2818"
#' loadList(system.file("list/NarcoticsDataset.csv",package="RMassBankData"))
#' smiles <- findSmiles(cmpd_id)
#' mz <- as.numeric(findMz(cmpd_id, mode="pH")[3])
#' rt <- as.numeric(findRt(cmpd_id))
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles)
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles, addMSMS=TRUE)
#'
addMsMs <- function(mzML_filePath, mz, rt, rt_window=NULL, maxCount=10, colour="black",
                    isLog = FALSE) {
  mz <- as.vector(mz)
  rt <- as.vector(rt) #assuming rt is in sec
  mzrt <- cbind(mz, rt)
  #RmbDefaultSettings()
  f <- openMSfile(mzML_filePath)
  # get MS/MS
  if (is.null(rt_window)) {
    msms <- apply(mzrt, 1, function(row)
    {
      findMsMsHR.mass(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE), maxCount = maxCount)
    })
  } else {
    msms <- apply(mzrt, 1, function(row)
    {
      findMsMsHR.mass(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE),
                      rtLimits = row[[2]] + rt_window, maxCount = maxCount)
    })
  }
  # then add to the plot
  for(specs in msms[[1]])
  {
    if(specs@found == TRUE) {

      df <- do.call(rbind, lapply(specs@children, function(sp) c(sp@rt, intensity = max(sp@intensity))))
      if (!isLog) {
        lines(intensity ~ retentionTime, data=df, col=colour, type='h' )
      } else {
        lines(log10(intensity) ~ retentionTime, data=df, col=colour, type='h' )
      }

    }
  }
}

#' Add MS/MS Scans to Plot - Backwards Compatible to Older RMassBank versions.
#'
#' @description This adds MS/MS Scans to EIC plots (or others) using \code{findMsMsHR.mass} from
#' \code{RMassBank}, compatible with pre 2.0.0 versions.
#'
#' @usage addMsMs.oldRMB(mzML_filePath, mz, rt, rt_window=NULL, maxCount=10, colour="black", isLog = FALSE)
#'
#' @param mzML_filePath Path to the mzML (or mzXML) file to extract the EICs.
#' @param mz mz value for MS/MS extraction.
#' @param rt rt value for MS/MS extraction (seconds).
#' @param rt_window RT window for extraction, as in \code{plotEICs}. If \code{NULL}, values
#' are extracted over whole run.
#' @param maxCount Maximum number of scans to extract, for \code{findMsMsHR.mass}.
#' @param colour Colour of the lines to plot. Default \code{black}; ideally same colour as EIC.
#' @param isLog Set whether log values should be plotted. Default \code{FALSE} - the setting
#' \code{TRUE} is completely untested (as EIC log plotting also not working)!
#'
#' @return Returns lines for the plot in the active device. If this returns errors, try
#' \code{\link{addMsMs}}. If this returns warnings, it is likely no MS/MS scans are present
#' matching the given limits (the warnings should indicate if this is the case).
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu>, Michael Stravs <michael.stravs@@eawag.ch>
#'
#' @seealso \code{\link{findMsMsHR.mass}}, \code{\link{plotEICs}}, \code{\link{addMsMs}}.
#'
#' @examples
#' library(RMassBank)
#' library(RMassBankData)
#' RmbDefaultSettings()
#' mzML_files <- list.files(system.file("spectra", package="RMassBankData"), ".mzML", full.names = TRUE)
#' mzML_file <- mzML_files[1]
#' cmpd_id <- "2818"
#' loadList(system.file("list/NarcoticsDataset.csv",package="RMassBankData"))
#' smiles <- findSmiles(cmpd_id)
#' mz <- as.numeric(findMz(cmpd_id, mode="pH")[3])
#' rt <- as.numeric(findRt(cmpd_id))
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles)
#' plotEICs(mzML_file, mz, rt, rt_in_sec=FALSE, smiles = smiles, addMSMS=TRUE, newRMB=FALSE)
#'
addMsMs.oldRMB <- function(mzML_filePath, mz, rt, rt_window=NULL, maxCount=10,
                           colour="black", isLog=FALSE) {
  mz <- as.vector(mz)
  rt <- as.vector(rt) #assuming rt is in sec
  mzrt <- cbind(mz, rt)
  #RmbDefaultSettings()
  f <- openMSfile(mzML_filePath)
  # get MS/MS
  if (is.null(rt_window)) {
    msms <- apply(mzrt, 1, function(row)
    {
      findMsMsHR.mass(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE), maxCount = maxCount)
    })
  } else {
    msms <- apply(mzrt, 1, function(row)
    {
      findMsMsHR.mass(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE),
                      rtLimits = row[[2]] + rt_window, maxCount = maxCount)
    })
  }
  # then add to the plot
  for(specs in msms[[n]])
  {
    if(specs$foundOK == TRUE) {
      if (!isLog) {
        lines(basePeakIntensity ~retentionTime, data = specs$childHeaders, col=colour, type='h' )
      } else {
        lines(log10(basePeakIntensity) ~retentionTime, data = specs$childHeaders, col=colour, type='h' )
      }

    }
  }
}


####functions below here need updating!!! ####

#almost superceded by the functions above - may need to rearrange some things so we can plot
# either individual EICs or multiple EICs on one plot. This function plots one by one.
plotEicMsMs <- function(file, file_desc, mz, rt, rt_window=c(-180,180)) {
  mz <- as.vector(mz)
  rt <- as.vector(rt) #assuming rt is in sec
  mzrt <- cbind(mz, rt)
  names <- paste("mz", mz, "_rt", rt, "_", file_desc,sep="")
  names_nodots <- paste("mz", gsub(".","_",mz,fixed=TRUE), "_rt", gsub(".","_",round(rt/60,2),fixed=TRUE), "_", file_desc,sep="")
  plot_title <- names_nodots
  pdf_title <- paste(file_desc, ".pdf",sep="")
  csv_name <- paste(file_desc, ".csv",sep="")
  #retreive the EICs
  RmbDefaultSettings()
  f <- openMSfile(file)
  h <- header(f)
  c <- makePeaksCache(f,h)
  eics <- apply(mzrt, 1,
                function(row)
                {
                  findEIC.cached(f,row[[1]],ppm(row[[1]],5,p=TRUE), row[[2]] + rt_window, headerCache=h, peaksCache=c)
                  #findEIC.cached(f,row[[1]],ppm(row[[1]],10,p=TRUE), headerCache=h, peaksCache=c)
                })
  #rm(c)
  #gc()
  ## add MSMS scans to the plot
  # find MS/MS for each mass
  msms <- apply(mzrt, 1,
                function(row)
                {
                  # findMsMsHR.mass.cached(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE),
                  #                        rtLimits = row[[2]] + c(-180,180), maxCount = 10, headerCache = h, peaksCache=c)
                  findMsMsHR.mass(f, row[[1]], 0.5, ppm(row[[1]],10,p=TRUE),
                                         rtLimits = row[[2]] + c(-180,180), maxCount = 10)
                })
  #add plotting
  colors <- rainbow(length(eics), start=2/6, end=1)
  #retrieve the summary data of the EICs
  maxmin_rt <- range(unlist(lapply(eics, function(eic) {range(eic$rt)})), finite = T)
  max_int <- max(unlist(lapply(eics, function(eic) {max(eic$intensity)})))
  rt_at_maxI <- unlist(lapply(eics, function(eic) {eic[which.max(eic$intensity),1]}))
  RT_minutes <- as.numeric(rt_at_maxI)/60
  I_at_maxI <- unlist(lapply(eics, function(eic) {eic[which.max(eic$intensity),2]}))
  screen_summary <- cbind(mz, rt, names, RT_minutes, I_at_maxI)
  # write outputs to CSV and PDF
  write.csv(screen_summary, file=csv_name)
  pdf(pdf_title)
  for(n in 1:length(eics))
  {
    plot.new()
    maxmin_rt_n <- range(eics[[n]]$rt)
    max_int_n <- max(eics[[n]]$intensity)
    plot.window(maxmin_rt_n, c(0,1.2*max_int_n))
    #plot.window(rt_window, c(0,max(I_at_maxI)))
    box()
    lines(intensity ~ rt ,data = eics[[n]], col=colors[[n]],lwd=1.75)
    for(specs in msms[[n]])
    {
      if(specs$foundOK == TRUE) {
        lines(basePeakIntensity ~retentionTime, data = specs$childHeaders,
              col=colors[[n]], type='h' )
      }
    }

    title(main=plot_title[n], xlab="RT (sec)", ylab="Intensity")
    #legend("topright", legend=names, col=colors, lty=1, lwd=3)
    axis(1, at=seq(0,maxmin_rt[2], by=60))
    #axis(1, at=seq(0,rt_window[2], by=60))
    axis(2)
  }
  dev.off()
  return(screen_summary)

}


plotEicsAlone <- function(file, file_desc, mz, rt, rt_window=c(-180,180)) {
  mz <- as.vector(mz)
  rt <- as.vector(rt) #assuming rt is in sec
  mzrt <- cbind(mz, rt)
  names <- paste("mz", mz, "_rt", rt, "_", file_desc,sep="")
  names_nodots <- paste("mz", gsub(".","_",mz,fixed=TRUE), "_rt", gsub(".","_",round(rt/60,2),fixed=TRUE), "_", file_desc,sep="")
  plot_title <- paste(names)
  pdf_title <- paste(file_desc, ".pdf",sep="")
  csv_name <- paste(file_desc, ".csv",sep="")
  #retreive the EICs
  RmbDefaultSettings()
  f <- openMSfile(file)
  h <- header(f)
  c <- makePeaksCache(f,h)
  eics <- apply(mzrt, 1,
                function(row)
                {
                  findEIC.cached(f,row[[1]],ppm(row[[1]],5,p=TRUE), row[[2]] + rt_window, headerCache=h, peaksCache=c)
                  #findEIC.cached(f,row[[1]],ppm(row[[1]],10,p=TRUE), headerCache=h, peaksCache=c)
                })
  #rm(c)
  #gc()
  colors <- rainbow(length(eics), start=2/6, end=1)
  #retrieve the summary data of the EICs
  maxmin_rt <- range(unlist(lapply(eics, function(eic) {range(eic$rt)})), finite = T)
  max_int <- max(unlist(lapply(eics, function(eic) {max(eic$intensity)})))
  rt_at_maxI <- unlist(lapply(eics, function(eic) {eic[which.max(eic$intensity),1]}))
  RT_minutes <- as.numeric(rt_at_maxI)/60
  I_at_maxI <- unlist(lapply(eics, function(eic) {eic[which.max(eic$intensity),2]}))
  screen_summary <- cbind(mz, rt, names, RT_minutes, I_at_maxI)
  # write outputs to CSV and PDF
  write.csv(screen_summary, file=csv_name)
  pdf(pdf_title)
  for(n in 1:length(eics))
  {
    plot.new()
    plot.window(maxmin_rt, c(0,max(I_at_maxI)))
    #plot.window(rt_window, c(0,max(I_at_maxI)))
    box()
    lines(intensity ~ rt ,data = eics[[n]], col=colors[[n]],lwd=1.75)
    title(main=plot_title[n], xlab="RT (sec)", ylab="Intensity")
    #legend("topright", legend=names, col=colors, lty=1, lwd=3)
    axis(1, at=seq(0,maxmin_rt[2], by=60))
    #axis(1, at=seq(0,rt_window[2], by=60))
    axis(2)
  }
  dev.off()
  return(screen_summary)

}

# This is in plotMSMS now
# plotSpectrum <- function(mz, inten, main, labels=NULL) {
#   plot(mz, inten, type="h",
#        #if(length(mz)>1) {}
#        xlim=c(0,max(mz)*1.1),
#        ylim=c(0,max(inten)*1.5),
#        main=main,
#        cex=1, lwd=2, col="black",
#        xlab="m/z", ylab="Intensity"
#   )
#   #minor.tick(nx=10, ny=10, tick.ratio=0.3)
#   invisible(NULL)
# }


getMsms <- function(file, file_desc, mz, rt, dppm=10, rt_window=c(-180,180), mostIntense = TRUE,ms_msms_cut=c(1e5,5e3)) {
  mz <- as.vector(mz)
  rt <- as.vector(rt) #assuming rt is in sec
  mzrt <- cbind(mz, rt)
  names <- paste("mz", mz, "_rt", rt, "_", file_desc,sep="")
  names_nodots <- paste("mz", gsub(".","_",round(mz,4),fixed=TRUE), "_rt", gsub(".","_",round(rt/60,2),fixed=TRUE), "_", file_desc,sep="")
  #plot_title <- paste(names_nodots,"_MSMS",sep="")
  pdf_title <- paste(file_desc, "_MSMS.pdf",sep="")
  csv_name <- paste(file_desc, "_MSMS.csv",sep="")
  msms_files <- vector(mode="character",length=length(mz))
  ms_files <- vector(mode="character",length=length(mz))
  rt_ms <- vector(mode="numeric",length=length(mz))
  rt_msms <- vector(mode="numeric",length=length(mz))
  #set up folders for results
  current_dir <- getwd()
  sample_dir <- paste(current_dir,"/",file_desc,sep="")
  if (!dir.exists(sample_dir)) dir.create(sample_dir)
  setwd(sample_dir) #reset this back to current_dir at end
  msms_dir <- paste(sample_dir,"/MSMS",sep="")
  if (!dir.exists(msms_dir)) dir.create(msms_dir)
  ms_dir <- paste(sample_dir,"/MS",sep="")
  if (!dir.exists(ms_dir)) dir.create(ms_dir)
  #open MS file
  RmbDefaultSettings()
  f <- openMSfile(file)
  h <- header(f)
  c <- makePeaksCache(f,h)
  # get the MS/MS
  if (mostIntense) {
    msms <- apply(mzrt, 1, function(row) {
      spec <- findMsMsHR.mass.cached(f, row[[1]], 0.5, ppm(row[[1]],dppm,p=TRUE),
                                     rtLimits = row[[2]] + rt_window, maxCount = 1, headerCache = h, peaksCache=c)
      spec <- spec[[1]]
      mzLimits <- list(
        mzMin = row[1] - ppm(row[1],10,p=TRUE),
        mzCenter = row[1],
        mzMax = row[1] + ppm(row[1],10,p=TRUE))
      spec$mz <- mzLimits
      #spec$id <- row[3]
      #  spec$formula <- row[[3]]
      return(spec)
    })
  } else {
    msms <- apply(mzrt, 1, function(row) {
      rtApex <- row[[2]]
      spec <- findMsMsHR.mass.cached(f, row[[1]], 0.5, ppm(row[[1]],dppm,p=TRUE),
                                     rtLimits = rtApex + rt_window, headerCache = h, peaksCache=c)
      # find the one closest to rt apex
      if(spec[[1]]$foundOK)
      {
        rtMsms <- unlist(lapply(spec, function(s) s$childHeaders$retentionTime))
        best <- which.min(abs(rtMsms-rtApex))
        spec<-spec[[best]]
      }
      else
        spec <- spec[[1]]

      mzLimits <- list(
        mzMin = row[1] - ppm(row[1],10,p=TRUE),
        mzCenter = row[1],
        mzMax = row[1] + ppm(row[1],10,p=TRUE))
      spec$mz <- mzLimits
      return(spec)
    })
  }
  pdf(file=pdf_title, width=11, height=6)
  # write to files
  for(n in 1:length(msms))
  {
    t <- msms[[n]]
    if(t$foundOK == F)
      next
    # HCD:
    cut <- ms_msms_cut[2]
    cut_ratio <- 0
    shot <- as.data.frame(t$peaks[[1]])
    shot <- shot[(shot$int >= cut) & (shot$int > max(shot$int) *
                                        cut_ratio), ]
    # remove satellite peaks
    shot <- filterPeakSatellites(shot)
    shot$mz <- round(shot$mz, 4)
    shot$int <- round(shot$int, 0)
    rt_msms[n] <-t$childHeaders$retentionTime[[1]]/60 #we need the real RT from the MSMS in here to see if we got the right one
    #define plot title
    msmsplot_title <- paste("mz", gsub(".","_",round(mz[n],4),fixed=TRUE), "_rt",
                            gsub(".","_",round(rt_msms[n],2),fixed=TRUE), "_", file_desc,"_MSMS",
                            "_HCD", t$childHeaders$collisionEnergy[[1]], sep="")
    # write to file
    write.table(shot,file=paste(msms_dir,"/",msmsplot_title,".txt",sep=""), sep=" ", row.names=F, col.names=F)
    msms_files[n] <- paste(msmsplot_title,".txt",sep="")

    # plot to PDF
    #plot.new()
    #plotSpectrum(shot$mz, shot$int, main=msmsplot_title)
    if(length(shot$mz)>1) {
      plotSpectrum(shot$mz, shot$int, main=msmsplot_title)
    }

    # MS1:
    #update 23.10: save in different dir and increase cut to 10000, not 5000
    cut <- ms_msms_cut[1]
    cut_ratio <- 0
    shot <- as.data.frame(t$parentPeak)
    shot <- shot[(shot$int >= cut) & (shot$int > max(shot$int) *
                                        cut_ratio), ]
    # do not filter satellites to preserve possible fine structure
    # shot <- filterPeakSatellites(shot)
    M <- floor(t$mz$mzCenter)
    # cut to [M, M+10]
    shot <- shot[(shot$mz >= M) & (shot$mz <= M+10),]

    shot$mz <- round(shot$mz, 4)
    shot$int <- round(shot$int, 0)
    rt_ms[n] <-t$parentHeader$retentionTime/60
    msplot_title <- paste("mz", gsub(".","_",round(mz[n],4),fixed=TRUE), "_rt",
                          gsub(".","_",round(rt_ms[n],2),fixed=TRUE), "_", file_desc,"_MS",sep="")
    write.table(shot,file=paste(ms_dir,"/",msplot_title,".txt",sep=""), sep=" ", row.names=F, col.names=F)
    ms_files[n] <- paste(msplot_title,".txt",sep="")
  }

  dev.off()
  # write the table with the filenames out:
  rt_min <- rt/60
  summary_data <- cbind(mz, rt, rt_min, rt_ms, rt_msms, ms_files, msms_files)
  write.csv(summary_data, csv_name)#, col.names=TRUE)
  setwd(current_dir)
}
schymane/ReSOLUTION documentation built on May 22, 2021, 3:41 a.m.