R/ms1_plots.R

Defines functions ms1_plots

Documented in ms1_plots

#' Make the Previous and Next MS1 Plots
#'
#' @description Generates a list with two plots: previous and next MS1 within an isolation window, and labeled with isotopes.
#'
#' @param ScanMetadata Object of the scan_metadata class from get_scan_metadata
#' @param ScanNumber Integer indicating which MS2 scan number should have its previous and next MS1 plots made.
#' @param Window Size of the MS1 plotting window from the Precursor M/Z in units of M/Z. Default is 5 M/Z.
#' @param Sequence Protein sequence for annotating experimental M/Z, if different from the ID data. Default is NULL.
#' @param IsotopicPercentageFilter Percentage written as a number between 0-100 to filter potential isotopes by relative abundance. Default is 10.
#' @param Interactive If True, an interactive plotly graphic will be returned. If False, a static ggplot graphic will be returned. Default is FALSE.
#' @param AlternativeGlossary Try a different glossary. See system.file("extdata", "Unimod_v20220602.csv", package = "pspecterlib)
#'     for formatting. 
#'
#' @details
#' Two plots will be returned in a list. The first is the previous MS1, and the second is the next MS1.
#'
#' @examples
#' \dontrun{
#'
#' # Test with bottom up data
#' MS1_Interactive <- ms1_plots(ScanMetadata = BU_ScanMetadata, ScanNumber = 31728, Window = 5,
#'                             Sequence = NULL, Interactive = TRUE, IsotopicPercentageFilter = 10)
#' MS1_Static <- ms1_plots(ScanMetadata = BU_ScanMetadata, ScanNumber = 31728, Window = 5,
#'                       Sequence = NULL, Interactive = FALSE, IsotopicPercentageFilter = 10)
#'
#' # See a previous MS1 with interactivity
#' MS1_Interactive[[1]]
#'
#' # See a next MS1 statically
#' MS1_Static[[2]]
#' 
#' # Try with a modified sequence 
#' MS1_Interactive2 <- ms1_plots(ScanMetadata = BU_ScanMetadata, ScanNumber = 31728, Window = 5,
#'                               Sequence = "IGA[Acetyl]VGGTENVSLTQSQMPAHNHLVAASTVSGTVKPLANDIIGAGLNK", 
#'                               Interactive = TRUE, IsotopicPercentageFilter = 10)
#'
#' }
#'
#' @export
ms1_plots <- function(ScanMetadata,
                     ScanNumber,
                     Window = 5,
                     Sequence = NULL,
                     IsotopicPercentageFilter = 10,
                     Interactive = TRUE,
                     AlternativeGlossary = NULL) {


  ##################
  ## CHECK INPUTS ##
  ##################

  # Asser that Scan Metadata is a scan metadata object
  if ("scan_metadata" %in% class(ScanMetadata) == FALSE) {
    stop("ScanMetadata must be a scan_metadata object generated by get_scan_metadata.")
  }

  # Assert the scan number is an integer
  if (!(is.numeric(ScanNumber))) {
    stop("ScanNumber must be an integer.")
  }

  # Round scan number
  ScanNumber <- round(ScanNumber)

  # Assert that scan number is for a MS2 scan
  MS2_check <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "MS Level"] == 2
  if (length(MS2_check) == 0 || MS2_check == FALSE) {
    stop("ScanNumber must be for a scan with an MS Level of 2.")
  }

  # Assert that window is a number
  if (!is.numeric(Window) || length(Window) > 1) {
    stop("Window must be a single integer.")
  }

  # Check sequence
  if (!is.null(Sequence)) {
    
    if (!is.null(AlternativeGlossary)) {
      if (!is_sequence(Sequence, AlternativeGlossary = AlternativeGlossary)) {
        stop("Sequence is not an acceptable input. See is_sequence.")
      }
    } else {
      if (!is_sequence(Sequence)) {
        stop("Sequence is not an acceptable input. See is_sequence.")
      }
    }
  }

  # Assert that IsotopicPercentageFilter is a number between 0-100
  if (!is.numeric(IsotopicPercentageFilter) || length(IsotopicPercentageFilter) != 1 || IsotopicPercentageFilter < 0 || IsotopicPercentageFilter > 100) {
    stop("The IsotopicPercentageFilter must be a number between 0-100.")
  }

  # Assert that interactive is a logical and not NA
  if (!is.logical(Interactive)) {
    stop("Interactive parameter must be either TRUE or FALSE.")
  }
  if (is.na(Interactive)) {Interactive <- FALSE}

  ###############################################
  ## PULL MS1 SCAN NUMBERS, PEAK DATA, and M/Z ##
  ###############################################

  # Pull Previous Scan Number
  PreviousMS1ScanNum <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Precursor Scan"] %>%
    unlist() %>% utils::head(1)

  # Get the position of the Next MS1 Scan Number
  NextMS1Pos <- match(PreviousMS1ScanNum, attr(ScanMetadata, "pspecter")$MS1Scans %>% unlist()) + 1

  # If the position is beyond the vector, NextMS1ScanNum is NA.
  if (NextMS1Pos > length(attr(ScanMetadata, "pspecter")$MS1Scans %>% unlist())) {
    NextMS1ScanNum <- NA
  } else {
    NextMS1ScanNum <- attr(ScanMetadata, "pspecter")$MS1Scans[NextMS1Pos] %>% unlist() %>% utils::head(1)
  }

  # Pull Previous MS1 Peak Data and MZ
  PreviousPeaks <- get_peak_data(ScanMetadata, PreviousMS1ScanNum)
  if (!is.na(NextMS1ScanNum)) {
    NextPeaks <- get_peak_data(ScanMetadata, NextMS1ScanNum)
  }

  # Get the Precursor M/Z
  PrecursorMZ <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Precursor M/Z"] %>% unlist()

  # Subset down peaks to the isolation window
  peakSubset <- function(Peaks, PrecursorMZ, Window) {
    Peaks <- Peaks[Peaks$`M/Z` <= PrecursorMZ + Window & Peaks$`M/Z` >= PrecursorMZ - Window,]
    if (nrow(Peaks) < 1) {message("No MS1 peaks identified. Consider increasing the window size.")}
    return(Peaks)
  }
  PreviousPeaks <- peakSubset(PreviousPeaks, PrecursorMZ, Window)
  if (!is.na(NextMS1ScanNum)) {
    NextPeaks <- peakSubset(NextPeaks, PrecursorMZ, Window)
  }

  # Use a different sequence if sequence is NULL
  if (is.null(Sequence)) {

    # Determine sequence for isotoping
    Sequence <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Sequence"] %>% unlist()

    # If the number of sequences is greater than 1, use the first one
    if (length(Sequence) > 1) {
      message("More than one sequence found for this scan number. Using the first one.")
      Sequence <- Sequence[1]
    }

    # If Sequence is still NULL or length 0, make Sequence an empty string
    if (is.na(Sequence) || is.null(Sequence) || length(Sequence) == 0) {
      return(
        list(
          annotated_spectrum_plot(PreviousPeaks, Interactive = Interactive),
          annotated_spectrum_plot(NextPeaks, Interactive = Interactive)
        )
      )

    }

  }
  
  # Load Glossary
  if (is.null(AlternativeGlossary)) {
    Glossary <- data.table::fread(
      system.file("extdata", "Unimod_v20220602.csv", package = "pspecterlib")
    )
    Convert <- convert_proforma(Sequence)
  } else {
    Glossary <- AlternativeGlossary
    Convert <- convert_proforma(Sequence, AlternativeGlossary)
  }
  
  # Extract sequence
  if (is.character(Convert)) {
    Sequence <- Convert
  } else {
    Sequence <- attr(Convert, "pspecter")$cleaned_sequence
  }

  # Get sequence molecular formula 
  MolForm <- Sequence %>% get_aa_molform()
  
  # Adjust molform 
  if (!is.character(Convert)) {
    
    # Change class
    class(Convert) <- "data.frame"
    
    # Extract named modifications
    Named <- Glossary[Glossary$Modification %in% Convert$Name,]
    
    if (length(Named) != 0) {
      
      for (row in 1:nrow(Named)) {
        
        premolform <- Named[row, 4:ncol(Glossary)] %>% 
          dplyr::select(colnames(.)[!is.na(.)]) %>% 
          paste0(colnames(.), ., collapse = "")
        
        
        MolForm <- add_molforms(MolForm, as.molform(premolform))
      }
      
    }
    
  }
  
  # Calculate isotope profile 
  IsoDist <- calculate_iso_profile(MolForm) %>%
    dplyr::rename(`M/Z` = mass, Isotope = isolabel)

  # Get the Precursor Charge
  PrecursorCharge <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Precursor Charge"] %>% unlist()

  # Apply Isotope Distribution as a change to the M+0 peak
  IsoDist$`M/Z` <- (IsoDist$`M/Z` / PrecursorCharge) + PrecursorMZ

  # Add any mass changes 
  if (!is.character(Convert)) {
    
    # Extract mass modifications
    MassChange <- Glossary[Glossary$Modification %in% Convert$Name == FALSE,]
    
    if (nrow(MassChange) > 0) {
      IsoDist$`M/Z` <- IsoDist$`M/Z` + sum(Convert$`AMU Change`)
    }
    
  }
  
  # Filter peaks at the isotopic percentage
  IsoDist <- IsoDist[IsoDist$`Isotopic Percentage` >= IsotopicPercentageFilter / 100,]

  # Match based on two truncated decimal points and add zero fills
  mergeIsoDistandPeaks <- function(IsoDist, Peaks) {

    # Truncate to two decimal places and merge peaks and isotopic distribution
    trunc2 <-  function(x) {trunc(x * 100)/100}
    IsoDist$`M/Z` <- IsoDist$`M/Z` %>% trunc2()
    Peaks$`M/Z` <- Peaks$`M/Z` %>% trunc2()
    Peaks <- merge(Peaks, IsoDist, by = "M/Z", all = T)

    # Label NA isotopes as blank and convert to factor
    Peaks[is.na(Peaks$Isotope), "Isotope"] <- "Experimental"
    Peaks$Isotope <- factor(Peaks$Isotope, levels = unique(Peaks$Isotope))

    # Zero center
    PeaksPre0 <- PeaksNext0 <- Peaks
    PeaksPre0$Intensity <- PeaksNext0$Intensity <- 0
    PeaksPre0$`M/Z` <- PeaksPre0$`M/Z` - 1e-12
    PeaksNext0$`M/Z` <- PeaksNext0$`M/Z` + 1e-12

    # Bind and order
    Peaks <- rbind(PeaksPre0, Peaks, PeaksNext0)
    Peaks <- Peaks[order(Peaks$`M/Z`),]

    # Rename Isotope to Peak Description
    Peaks <- Peaks %>% dplyr::rename("Peak Description" = Isotope)

    return(Peaks)
  }

  # Merge values and zero fill
  PreviousPeaks <- mergeIsoDistandPeaks(IsoDist, PreviousPeaks)
  if (!is.na(NextMS1ScanNum)) {
    NextPeaks <- mergeIsoDistandPeaks(IsoDist, NextPeaks)
  }

  # Set color pallete
  precount <- length(unique(PreviousPeaks$`Peak Description`)) - 1
  precount <- ifelse(precount < 3, 3, precount)
  ColorListPre <- RColorBrewer::brewer.pal(precount, "Set1")
  names(ColorListPre) <- unique(PreviousPeaks$`Peak Description`)[grepl("M", unique(PreviousPeaks$`Peak Description`))]
  ColorListPre["Experimental"] <- "#000000"

  if (!is.na(NextMS1ScanNum)) {
    postcount <- length(unique(NextPeaks$`Peak Description`)) - 1
    postcount <- ifelse(postcount < 3, 3, postcount)
    ColorListNext <- RColorBrewer::brewer.pal(postcount, "Set1")
    names(ColorListNext) <- unique(NextPeaks$`Peak Description`)[grepl("M", unique(NextPeaks$`Peak Description`))]
    ColorListNext["Experimental"] <- "#000000"
  }

  # Make plots
  PreviousMS1 <- ggplot2::ggplot(PreviousPeaks, ggplot2::aes(x = `M/Z`, y = Intensity, color = `Peak Description`)) +
    ggplot2::theme_bw() + ggplot2::geom_line(size = 1) + ggplot2::scale_color_manual(values = ColorListPre) +
    ggplot2::theme(legend.title = ggplot2::element_blank(), plot.title = ggplot2::element_text(hjust = 0.5))
  if (!is.na(NextMS1ScanNum)) {
    NextMS1 <- ggplot2::ggplot(NextPeaks, ggplot2::aes(x = `M/Z`, y = Intensity, color = `Peak Description`)) +
      ggplot2::theme_bw() + ggplot2::geom_line(size = 1) + ggplot2::scale_color_manual(values = ColorListNext) +
      ggplot2::theme(legend.title = ggplot2::element_blank(), plot.title = ggplot2::element_text(hjust = 0.5))
  }


  # Return plots differently if interactive or not
  if (Interactive == FALSE) {
    PreviousMS1 <- PreviousMS1 + ggplot2::ggtitle(bquote("Previous MS1, Precursor"~italic(.("M/Z:"))~.(PrecursorMZ)))
    if (!is.na(NextMS1ScanNum)) {
      NextMS1 <- NextMS1 + ggplot2::ggtitle(bquote("Next MS1, Precursor"~italic(.("M/Z:"))~.(PrecursorMZ)))
    } else {
      NextMS1 <- NULL
    }
    return(list(PreviousMS1, NextMS1))
  } else {
    PreviousMS1 <- PreviousMS1 %>% plotly::ggplotly() %>%
      plotly::layout(title = htmltools::HTML(paste0("Previous MS1, Precursor <i>M/Z</i>:", PrecursorMZ)), margin = list(t = 50))
    if (!is.na(NextMS1ScanNum)) {
      NextMS1 <- NextMS1 %>% plotly::ggplotly() %>%
        plotly::layout(title = htmltools::HTML(paste0("Next MS1, Precursor <i>M/Z</i>:", PrecursorMZ)), margin = list(t = 50))
    } else {
      NextMS1 <- NULL
    }
    return(list(PreviousMS1, NextMS1))
  }

}
EMSL-Computing/pspecterlib documentation built on Jan. 28, 2024, 8:13 p.m.