R/sequence_plot.R

Defines functions sequence_plot

Documented in sequence_plot

#' Plot the Sequence with Matched Fragments Annotations, split by a,b,c and x,y,z annotations
#'
#' @description A static plot with the sequence annotated with the lowest ppm error fragment
#'    per N-terminus (a, b, c) and C-terminus (x, y, z) fragment
#'
#' @param MatchedPeaks A matched_peaks object generated by get_matched_peaks. Required.
#' @param IncludeIsotopes A logical to indicate whether isotopes should be included. Default is FALSE.
#' @param RemoveChargeAnnotation A logical to indicate whether the annotation for charge states should be included
#'    in the plot. This does not remove charge states; rather, the annotation for charge states. Default is FALSE.
#' @param RemoveModification A logical to indicate whether the modification on the residue
#'    should be removed in the plot. Default is FALSE. 
#' @param WrapLength An integer to index how many letters should be printed before wrapping.
#'    Default is 10, but the value will vary depending on the size of your visualization.
#' @param LabelSize The size of the PTM labels. Default is 3. 
#'
#' @examples
#' \dontrun{
#'
#' # Test bottom up data
#' BU_Peak <- get_peak_data(ScanMetadata = BU_ScanMetadata, ScanNumber = 31728)
#' BU_Match <- get_matched_peaks(ScanMetadata = BU_ScanMetadata, PeakData = BU_Peak)
#' BU_Match2 <- get_matched_peaks(ScanMetadata = BU_ScanMetadata, PeakData = BU_Peak, AlternativeSequence = "IGA[Acetyl]VGGTENVSLTQSQMPAHNHLVAASTVSGTVKPLANDIIGAGLNK")
#'
#' # Make plots
#' sequence_plot(MatchedPeaks = BU_Match)
#' sequence_plot(MatchedPeaks = BU_Match, RemoveChargeAnnotation = TRUE)
#' sequence_plot(MatchedPeaks = BU_Match2)
#' 
#' 
#' }
#'
#' @export
sequence_plot <- function(MatchedPeaks,
                          IncludeIsotopes = FALSE,
                          RemoveChargeAnnotation = FALSE,
                          RemoveModification = FALSE, 
                          WrapLength = 8,
                          LabelSize = 3) {

  ##################
  ## CHECK INPUTS ##
  ##################

  # Assert that Matched Peaks is an object of the matched_peaks
  if ("matched_peaks" %in% class(MatchedPeaks) == FALSE) {
    stop("MatchedPeaks is not an object of the matched_peaks class generated by get_matched_peaks.")
  }

  # Assert that Include Isotopes is a single logical
  if (is.na(IncludeIsotopes) || is.logical(IncludeIsotopes) == FALSE || length(IncludeIsotopes) > 1) {
    stop("IncludeIsotopes needs to be a single logical: a TRUE or FALSE.")
  }

  # Assert that Remove Charge Annotation is a single logical
  if (is.na(RemoveChargeAnnotation) || is.logical(RemoveChargeAnnotation) == FALSE || length(RemoveChargeAnnotation) > 1) {
    stop("RemoveChargeAnnotation needs to be a single logical: a TRUE or FALSE.")
  }
  
  # Assert that Remove Modification is a single logical
  if (is.na(RemoveModification) || is.logical(RemoveModification) == FALSE || length(RemoveModification) > 1) {
    stop("RemoveModification needs to be a single logical: a TRUE or FALSE")
  }

  # Assert that WrapLength is a numeric, and then convert to integer
  if (is.numeric(WrapLength) == FALSE || length(WrapLength) > 1) {
    stop("WrapLength must be a single integer.")
  }

  WrapLength <- abs(round(WrapLength))

  ##############################
  ## MAKE SEQUENCE DATA FRAME ##
  ##############################

  # Split the sequence and number results
  SplitSeq <- attr(MatchedPeaks, "pspecter")$Sequence %>% strsplit("") %>% unlist()
  Residues <- paste0(SplitSeq, 1:length(SplitSeq))

  # Pull out fragment table from fragments
  FragmentTable <- MatchedPeaks

  # Remove isotopes if indicated
  if (IncludeIsotopes == FALSE) {
    FragmentTable <- FragmentTable[FragmentTable$Isotope == "M",]
  }

  # Split data by abc and xyz
  FragmentTableABC <- FragmentTable[FragmentTable$`General Type` %in% c("a", "b", "c"),]
  FragmentTableXYZ <- FragmentTable[FragmentTable$`General Type` %in% c("x", "y", "z"),]

  # Sort by best match per residue
  BestMatchABC <- FragmentTableABC %>% dplyr::group_by(Residue) %>% dplyr::slice_min(`PPM Error`, n = 1)
  BestMatchXYZ <- FragmentTableXYZ %>% dplyr::group_by(Residue) %>% dplyr::slice_min(`PPM Error`, n = 1)

  # Create a data table for the sequence where each letter has an x y position
  Seq <- attr(MatchedPeaks, "pspecter")$Sequence
  SeqDF <- data.table::data.table(
    "Amino Acid" = strsplit(Seq, "") %>% unlist(),
    "x" = rep_len(1:WrapLength, nchar(Seq)),
    "y" = (1:nchar(Seq) / -WrapLength) %>% floor()
  )

  # Add segments to identified ions: ABC Ions
  SeqDF$xbottom <- SeqDF$x + 0.45
  SeqDF$xbottomend <- SeqDF$x - 0.45
  SeqDF$ybottom <- SeqDF$y - 0.45
  SeqDF$ybottomend <- SeqDF$y - 0.45
  SeqDF$xright <- SeqDF$x + 0.45
  SeqDF$xrightend <- SeqDF$x + 0.45
  SeqDF$yright <- SeqDF$y - 0.45
  SeqDF$yrightend <- SeqDF$y - 0.1
  SeqDF$bottomcolor <- lapply(1:nchar(Seq), function(num) {
    Query <- BestMatchABC[BestMatchABC$`N Position` == num,]
    ifelse(nrow(Query) != 0, Query$`General Type`, NA)}) %>% unlist()

  # Add Segment to identified ions: XYZ Ions
  SeqDF$xtop <- SeqDF$x - 0.45
  SeqDF$xtopend <- SeqDF$x + 0.45
  SeqDF$ytop <- SeqDF$y + 0.45
  SeqDF$ytopend <- SeqDF$y + 0.45
  SeqDF$xleft <- SeqDF$x - 0.45
  SeqDF$xleftend <- SeqDF$x - 0.45
  SeqDF$yleft <- SeqDF$y + 0.45
  SeqDF$yleftend <- SeqDF$y + 0.1
  SeqDF$topcolor <- lapply(1:nchar(Seq), function(num) {
    Query <- BestMatchXYZ[BestMatchXYZ$`N Position` == num,]
    ifelse(nrow(Query) != 0, Query$`General Type`, NA)}) %>% unlist()

  # Add annotation
  SeqDF$abcAnnotationY <- SeqDF$y - 0.25
  SeqDF$abcAnnotation <- lapply(1:nchar(Seq), function(num) {
    Query <- BestMatchABC[BestMatchABC$`N Position` == num,]
    if (nrow(Query) != 0) {
      charge <- ifelse(Query$Z == 1, "", paste0(Query$Z, "+"))
      if (RemoveChargeAnnotation) {charge <- ""}
      paste(Query$Ion, charge)
    } else {paste("")}
  })
  SeqDF$xyzAnnotationY <- SeqDF$y + 0.25
  SeqDF$xyzAnnotation <- lapply(1:nchar(Seq), function(num) {
    Query <- BestMatchXYZ[BestMatchXYZ$`N Position` == num,]
    if (nrow(Query) != 0) {
      charge <- ifelse(Query$Z == 1, "", paste0(Query$Z, "+"))
      if (RemoveChargeAnnotation) {charge <- ""}
      paste(Query$Ion, charge)
    } else {paste("")}
  })

  # Set color list
  ColorList <- list("a" = "forestgreen", "b" = "steelblue", "c" = "darkviolet",
                    "x" = "pink3", "y" = "red", "z" = "darkorange")

  # Create base ggplot
  SeqPlot <- ggplot2::ggplot() +
    ggplot2::geom_text(data = SeqDF, ggplot2::aes(x = x, y = y, label = `Amino Acid`), size = 6) + 
    ggplot2::theme_void() + 
    ggplot2::ylim(c(min(SeqDF$y - 1), 0)) +
    ggplot2::geom_segment(data = SeqDF, ggplot2::aes(x = xbottom, xend = xbottomend, y = ybottom, yend = ybottomend, color = bottomcolor)) +
    ggplot2::geom_segment(data = SeqDF, ggplot2::aes(x = xright, xend = xrightend, y = yright, yend = yrightend, color = bottomcolor)) +
    ggplot2::geom_segment(data = SeqDF, ggplot2::aes(x = xtop, xend = xtopend, y = ytop, yend = ytopend, color = topcolor)) +
    ggplot2::geom_segment(data = SeqDF, ggplot2::aes(x = xleft, xend = xleftend, y = yleft, yend = yleftend, color = topcolor)) +
    ggplot2::geom_text(data = SeqDF, ggplot2::aes(x = x, y = abcAnnotationY, label = abcAnnotation, color = bottomcolor, size = 1)) +
    ggplot2::geom_text(data = SeqDF, ggplot2::aes(x = x, y = xyzAnnotationY, label = xyzAnnotation, color = topcolor, size = 1)) +
    ggplot2::scale_color_manual(values = ColorList, na.value = NA) + ggplot2::theme(legend.position = "none")
  
  # Add modifications
  if (!RemoveModification & !is.null(attributes(MatchedPeaks)$pspecter$PTMs)) {
    
    # Extract positions
    PTM_Anno <- SeqDF[attributes(MatchedPeaks)$pspecter$PTMs$`N Position`, c("x", "y")]
    
    # Shift x name right by 0.5
    PTM_Anno$x <- PTM_Anno$x + 0.25
    
    # Change name to modification
    PTM_Anno$PTM <- substr(attributes(MatchedPeaks)$pspecter$PTMs$Name, 1, 6)
    
    SeqPlot <- SeqPlot + ggplot2::geom_label(data = PTM_Anno, ggplot2::aes(x = x, y = y, label = PTM), size = LabelSize)
    
  }

  return(SeqPlot)

}
EMSL-Computing/pspecterlib documentation built on Jan. 28, 2024, 8:13 p.m.