R/visualizeResults.R

Defines functions plotNumMappingsPerPeak plotPeakLocations createMatrices plotOverlapDistribution plotPeakOrientations getBackgroundStyle

Documented in createMatrices getBackgroundStyle plotNumMappingsPerPeak plotOverlapDistribution plotPeakLocations plotPeakOrientations

# This script contains functions for visualizing the mapping results returned by
# mapPeaks
# Author: Daniel Fusca


#' Visualize number of closest features to each peak
#'
#' Given a set of peaks and the closest feature(s) to each peak (as returned by
#' mapPeaks), determines how many features are closest to each peak (i.e. how
#' many features each peak maps to), since a peak may have multiple closest
#' features (e.g. in cases of multiple overlaps). The distribution of features
#' per peak is plotted as a bar graph using ggplot2, and a dataframe is returned
#' that gives the number of closest features for each peak.
#'
#' @param peakFrame A dataframe containing a set of peaks, in the format
#'   returned by importBED (i.e. each row is a chromosome, start position, end
#'   position, name, score, and strand). This should be the same set of peaks
#'   used to perform the peak mapping that created mappingResult.
#' @param mappingResult A dataframe returned by mapPeaks giving the nearest
#'   feature(s) to each peak. Format of the dataframe is peak information,
#'   followed by feature information, followed by peak position and distance
#'   relative to feature, and percent of feature overlapped by peak (see
#'   documentation for mapPeaks for more details).
#' @param plotColor The color of the histogram bars on the resulting plot
#' @param mainTitle The main title of the resulting plot
#' @param xTitle The x-axis title of the resulting plot
#' @param yTitle The y-axis title of the resulting plot
#' @param backgroundStyle The style of the plot background. This must be
#'    one of either "grey" (for a grey-panelled background), "blackAndWhite"
#'    (for a white-panelled background), or "minimal" (for a background with
#'    no panelling).
#'
#' @return A dataframe giving the number of closest features to each peak in
#'   peakFrame. The first column (Peak) gives the name of the peak, and the
#'   second column (Closest_Features) gives the number of closest features to
#'   that peak. Additionally, a histogram is plotted showing the distribution of
#'   closest features per peak.
#'
#' @examples
#' \dontrun{
#'   mappingResult <- mapPeaks(H3K27me3Peaks, WS263Genes)
#'   closestFeatureCounts <- plotNumMappingsPerPeak(H3K27me3Peaks, mappingResult)
#'   closestFeatureCounts$Peak
#'   closestFeatureCounts$Closest_Features
#' }
#'
#' @export
#' @import ggplot2
#' @import graphics
plotNumMappingsPerPeak <- function(peakFrame, mappingResult,
                          plotColor = "red",
                          mainTitle = "Number of features closest to each peak",
                          xTitle = "Number of features per peak",
                          yTitle = "Number of peaks",
                          backgroundStyle = "blackAndWhite") {

  # Check that the dataframes of peak coordinates and mapping results match the
  # expected formats, as returned by importBED and mapPeaks, respectively. This
  # checking is done by helper functions in checkInput.R, and raises an error if
  # issues are detected with these dataframes. To simplify error messages, any
  # additional warnings produced by R related to these errors are suppressed.
  suppressWarnings({
    checkBEDInput(peakFrame)
    checkMappingResultInput(mappingResult)
  })

  peakList <- peakFrame$Name
  featuresPerPeak <-numeric(length(peakList))

  # Iterate through every peak in peakList and count how many features the
  # peak mapped to
  for (peakIndex in seq_along(peakList)) {
    peakMappings <- mappingResult[which(mappingResult$Peak_Name ==
                                          peakList[peakIndex]), ]
    numClosestFeatures <- dim(peakMappings)[1]

    # If this peak does not map onto any features, we want numClosestFeatures
    # to be 0, but the above command will set it to NULL
    if (is.null(numClosestFeatures)) {
      numClosestFeatures <- 0
    }

    featuresPerPeak[peakIndex] <- numClosestFeatures
  }

  # Plot the distribution of mappings per peak
  background <- getBackgroundStyle(backgroundStyle)

  resultPlot <- ggplot2::ggplot() + background +
    ggplot2::geom_bar(aes(featuresPerPeak), fill = plotColor) +
    ggplot2::theme(axis.text = ggplot2::element_text(size = 14),
    axis.title = ggplot2::element_text(size = 16),
    title = ggplot2::element_text(size = 16)) +
    ggplot2::labs(title = mainTitle,
               x = xTitle, y = yTitle)

  graphics::plot(resultPlot)

  peaksAndFeatureCounts <- data.frame(Peak=peakList,
                                      Closest_Features=featuresPerPeak)

  return(peaksAndFeatureCounts)
}


#' Plot locations of mapped peaks relative to start of closest feature
#'
#' Given a dataframe of mapped peaks as returned by mapPeaks, plots the
#' distribution of peaks relative to the start position of their closest
#' feature. On the resulting plot, the position of each dot gives how many peaks
#' overlap with the interval found that distance upstream or downstream of their
#' closest feature's start point. By default, 2500 bases upstream and 2500 bases
#' downstream of feature start positions are shown, and the user can specify how
#' many bases upstream and downstream of feature start positions should be
#' plotted. This function uses createMatrices to create the matrices of relative
#' peak locations used for plotting (for more details on these matrices, see the
#' documentation for createMatrices). The matrices used for plotting can
#' optionally be returned.
#'
#' The idea for this function, its parameters, and its helper createMatrix
#' (creating a matrix of bin scores relative to the start of genomic features,
#' in order to plot scores relative to feature starts) is based on the
#' computeMatrix and plotHeatmap functions from the deepTools software suite
#' (see References), but all code within
#' this function is my own unless otherwise noted; no code was actually taken
#' from deepTools. Plots are made using ggplot2, with the use of ggplot's
#' expand_limits function to control y-axis limits inspired by a StackOverflow
#' post by Brian Diggs (see References), and use of
#' ggplot's geom_point and geom_vline functions based on an online tutorial for
#' ggplot2
#' (see References)
#'
#' @param mappingResult A dataframe returned by mapPeaks giving the nearest
#'   feature(s) to each peak. Format of the dataframe is peak information,
#'   followed by feature information, followed by peak position and distance
#'   relative to feature, and percent of feature overlapped by peak (see
#'   documentation for mapPeaks for more details).
#' @param upstreamBins The number of bins (plotted points) upstream of each
#'   feature start position to be used for plotting. The total distance plotted
#'   upstream of feature starts is (upstreaBins * basesPerBin) base pairs
#' @param downstreamBins The number of bins (plotted points) downstream of each
#'   feature start position to be used for plotting. The total distance plotted
#'   downstream of feature starts is (upstreaBins * basesPerBin) base pairs
#' @param basesPerBin The width of each bin in the computed matrices, in base
#'   pairs. Combined with upstreamBins and downstreamBins, this parameter
#'   controls how far upstream and downstream of feature starts the matrices
#'   cover.
#' @param returnMatrix If True, the list of matrices calculated by
#'   createMatrices will be returned to the user.
#' @param plotColor The color of the points on the resulting plot
#' @param mainTitle The main title of the resulting plot
#' @param xTitle The x-axis title of the resulting plot
#' @param yTitle The y-axis title of the resulting plot
#' @param backgroundStyle The style of the plot background. This must be
#'    one of either "grey" (for a grey-panelled background), "blackAndWhite"
#'    (for a white-panelled background), or "minimal" (for a background with
#'    no panelling).
#'
#' @return If returnMatrix is True, the list of matrices calculated by
#'   createMatrices will be returned (see documentation of createMatrices for
#'   more details); otherwise, NULL is returned. In either case, a graph showing
#'   the distribution of peak locations relative to the start position of their
#'   mapped feature is shown, with dots on the graph representing peaks
#'   overlapping the interval found that many bases up/downstream of the start
#'   of their closest feature.
#'
#' @examples
#' \dontrun{
#'   mappingResult <- mapPeaks(H3K27me3Peaks, WS263Genes)
#'   plotPeakLocations(mappingResult)
#'   matrixList <- plotPeakLocations(mappingResult, 500, 500, returnMatrix = T)
#'   featureOneUpstreamScores <- matrixList[[1]][1, ]
#'   featureTwoDownstreamScores <- matrixList[[2]][2, ]
#' }
#'
#' @references
#' Brian Diggs. "set only lower bound of a limit for ggplot". 26 June 2012.
#' Accessed 29 September 2019. https://stackoverflow.com/a/11214406
#'
#' Fidel Ramirez, Devon P. Ryan, Bjorn Gruning, Vivek Bhardwaj, Fabian Kilpert,
#' Andreas S. Richter, Steffen Heyne, Friederike Dundar, and Thomas Manke.
#' deepTools2: A next Generation Web Server for Deep-Sequencing Data Analysis.
#' Nucleic Acids Research (2016). doi:10.1093/nar/gkw257.
#'
#' "ggplot2 line plot : Quick start guide - R software and data visualization"
#' Accessed 29 September 2019.
#' http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization
#'
#' @export
#' @import ggplot2
#' @import graphics
plotPeakLocations <- function(mappingResult, upstreamBins = 250,
                        downstreamBins = 250, basesPerBin = 10,
                        returnMatrix = F,
                        plotColor = "red",
                        mainTitle = "Peak positions relative to feature start",
                        xTitle = "Position relative to feature start (bp)",
                        yTitle = "Number of peaks",
                        backgroundStyle = "blackAndWhite") {

  # Check that the dataframe of mapping results matches the expected format, as
  # returned by mapPeaks. This checking is done by a helper function in
  # checkInput.R, and raises an error if issues are detected with this
  # dataframe. To simplify error messages, any additional warnings produced by R
  # related to these errors are suppressed.
  suppressWarnings({
    checkMappingResultInput(mappingResult)
  })

  # Check that the parameters for bin number and width are positive integers,
  # and raise an error if they aren't
  if (!(is.numeric(upstreamBins) && is.numeric(downstreamBins) &&
        is.numeric(basesPerBin))) {
    stop("Input parameters for bin number and width must be numbers.")
  }

  if (upstreamBins <= 0 || downstreamBins <= 0 || basesPerBin <= 0) {
    stop("Input parameters for bin number and width must be positive numbers.")
  }

  if (upstreamBins != round(upstreamBins) ||
      downstreamBins != round(downstreamBins) ||
      basesPerBin != round(basesPerBin)) {
    stop("Input parameters for bin number and width must be whole numbers.")
  }

  matrices <- createMatrices(mappingResult, upstreamBins, downstreamBins,
                             basesPerBin)
  combinedMatrix <- cbind(matrices[[1]], matrices[[2]])

  # Use of expand_limits to control y-axis limits inspired by StackOverflow post
  # by Brian Diggs (https://stackoverflow.com/a/11214406).
  # Usage of geom_point and geom_vline based on online tutorial for ggplot2
  # (http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization)
  background <- getBackgroundStyle(backgroundStyle)

  peakPlot <- ggplot2::ggplot() + background +
    ggplot2::geom_point(aes(x = basesPerBin * (seq(ncol(combinedMatrix)) - upstreamBins),
                            y = colSums(combinedMatrix)), color = plotColor) +
    ggplot2::geom_vline(xintercept = 0, linetype = "dashed", size = 1.25) +
    ggplot2::expand_limits(y = 0) +
    ggplot2::theme(axis.text = ggplot2::element_text(size = 14),
                   axis.title = ggplot2::element_text(size = 16),
                   title = ggplot2::element_text(size = 16)) +
    ggplot2::labs(title = mainTitle,
                  x = xTitle,
                  y = yTitle)

  graphics::plot(peakPlot)

  if (returnMatrix) {
    return(matrices)
  } else {
    return(invisible(NULL))
  }
}


#' Create matrices of peak locations before and after feature start
#'
#' Given a dataframe of mapped peaks as returned by mapPeaks, returns 2 matrices
#' to be used by plotPeakLocations to plot peak distributions relative to the
#' start of closest features. Every matrix row corresponds to a different
#' feature, and every column is a different "bin" - a genomic interval which is
#' a fixed distance upstream or downstream of a feature's start position. Each
#' matrix entry gives the number of peaks mapped to that feature which overlap
#' with that bin (i.e. the number of peaks that overlap an interval a fixed
#' distance up/downstream of the feature's start position, accounting for
#' feature strandedness). One matrix contains bins found upstream of the feature
#' start, and one matrix contains bins downstream of the feature start. The user
#' can specify how many bins each matrix will use and how wide each bin should
#' be, allowing the user to customize how far upstream or downstream of the
#' feature start will be used for plotting. Note that this is a helper function
#' for plotPeakLocation and should NOT be called directly by the user.
#'
#' The idea for this function, its parameters and the structure of its output
#' (creating a matrix of bin scores relative to the start of genomic features,
#' in order to plot scores relative to feature starts) is based on the
#' computeMatrix and plotHeatmap functions from the deepTools software suite
#' (see References), but the output is
#' slightly different and all code within this function is my own; no code was
#' actually taken from deepTools.
#'
#' @param mappingResult A dataframe returned by mapPeaks giving the nearest
#'   feature(s) to each peak. Format of the dataframe is peak information,
#'   followed by feature information, followed by peak position and distance
#'   relative to feature, and percent of feature overlapped by peak (see
#'   documentation for mapPeaks for more details).
#' @param upstreamBins The number of bins upstream of the feature start to be
#'   used in the returned matrices. The resulting plot will include all closest
#'   peaks witin (upstreamBins * basesPerBin) base pairs upstream of the feature
#'   start
#' @param downstreamBins The number of bins downstream of the feature start to
#'   be used in the returned matrices. The resulting plot will include all
#'   closest peaks within (downstreamBins * basesPerBin) base pairs downstream
#'   of the feature start
#' @param basesPerBin The width of each bin in the returned matrices, in base
#'   pairs. Combined with upstreamBins and downstreamBins, this parameter
#'   controls how far upstream and downstream of feature starts the matrices
#'   cover.
#'
#' @return A list of two matrices. The first matrix (upstreamMatrix) counts
#'   peaks in the region upstream of feature starts, and the second matrix
#'   (downstreamMatrix) counts peaks in the region downstream of feature starts.
#'   Each matrix row is a different feature in mapPeaks, and each column is a
#'   bin - an interval that is a fixed distance away from the start of that
#'   feature (e.g. with default settings, column 1 of upstreamMatrix represents
#'   the intervals 2500-2490 bases upstream of the start of that row's feature).
#'   Each matrix entry gives the number of peaks which overlap a given bin for a
#'   given feature. (Note that only peaks mapped to this feature are considered
#'   for this calculation). Bins in the rightmost column of upstreamMatrix are
#'   closest to the feature start, and move further upstream of the feature from
#'   right to left in the matrix. Similarly, bins in the leftmost column of
#'   downstreamMatrix are closest to the feature start, and move further
#'   downstream from left to right in the matrix. The width of each bin is
#'   determined by basesPerBin, and the number of bins in upstreamMatrix and
#'   downstreamMatrix are determined by upstreamBins and downstreamBins,
#'   respectively.
#'
#' @examples
#' \dontrun{
#'   mappingResult <- mapPeaks(H3K27me3Peaks, WS263Genes)
#'   matrices <- createMatrices(mappingResult)
#'   featureOneUpstreamScores <- matrices[[1]][1, ]
#'   featureTwoDownstreamScores <- matrices[[2]][2, ]
#'   widerMatrices <- createMatrices(mappingResult, 500, 500)
#' }
#'
#' @references
#' Fidel Ramirez, Devon P. Ryan, Bjorn Gruning, Vivek Bhardwaj, Fabian Kilpert,
#' Andreas S. Richter, Steffen Heyne, Friederike Dundar, and Thomas Manke.
#' deepTools2: A next Generation Web Server for Deep-Sequencing Data Analysis.
#' Nucleic Acids Research (2016). doi:10.1093/nar/gkw257.
#'
createMatrices <- function(mappingResult, upstreamBins = 250,
                           downstreamBins = 250, basesPerBin = 10) {

  features <- unique(mappingResult$Feature_Name)

  upstreamMatrix <- matrix(nrow = length(features), ncol = upstreamBins)
  downstreamMatrix <- matrix(nrow = length(features), ncol = downstreamBins)

  # Iterate over every different feature in mappingResult
  for (featureIndex in seq_along(features)) {

    feature <- features[featureIndex]

    # Get every peak that mapped to this feature
    peaksMappedToFeature <- mappingResult[which(mappingResult$Feature_Name ==
                                                  feature), ]

    featureStart <- peaksMappedToFeature$Feature_Start[1]
    featureEnd <- peaksMappedToFeature$Feature_End[1]
    featureStrand <- peaksMappedToFeature$Feature_Strand[1]

    upstreamBinScores <- rep(0, upstreamBins)
    downstreamBinScores <- rep(0, downstreamBins)


    # Determine the coordinates of the bins upstream and downstream of the
    # feature start point, accounting for some features being on the reverse
    # strand.

    # Each upstream bin has its left endpoint in upstreamBinStarts and its right
    # endpoint in upstreamBinEnds. Similarly, each downstream bin has its left
    # endpoint in downstreamBinStarts and its right endpoint in downstreamBinEnds
    if (featureStrand == "-") {
      # Gene is on reverse strand, so upstream is to the right and downstream
      # is to the left
      upstreamBinEnds <- seq(from = featureEnd + (upstreamBins* basesPerBin),
                             to = featureEnd + basesPerBin, by = -basesPerBin)
      upstreamBinStarts <- upstreamBinEnds - basesPerBin + 1

      downstreamBinEnds <- seq(from = featureEnd,
                               to = featureEnd - (basesPerBin * (downstreamBins - 1)),
                               by = -basesPerBin)
      downstreamBinStarts <- downstreamBinEnds - basesPerBin + 1


    } else {
      # Gene is either on forward strand or has no strand information, so
      # upstream is to the left and downstream is to the right
      upstreamBinStarts <- seq(from = featureStart - (upstreamBins* basesPerBin),
                               to = featureStart - basesPerBin, by = basesPerBin)
      upstreamBinEnds <- upstreamBinStarts + basesPerBin - 1

      downstreamBinStarts <- seq(from = featureStart,
                                 to = featureStart + (basesPerBin * (downstreamBins - 1)),
                                 by = basesPerBin)
      downstreamBinEnds <- downstreamBinStarts + basesPerBin - 1
    }


    # Iterate over every peak that had this feature as its closest feature
    for (peakIndex in seq_along(peaksMappedToFeature[ , 1])) {
      peakLeftPoint <- as.numeric(peaksMappedToFeature[peakIndex, 2])
      peakRightPoint <- as.numeric(peaksMappedToFeature[peakIndex, 3])

      # If this peak overlaps a bin, add 1 to the score for the corresponding
      # bin. Otherwise, add 0 to the score for that bin.
      upstreamIsOverlapped <- (peakLeftPoint <= upstreamBinEnds &
                                 peakRightPoint >= upstreamBinStarts)
      upstreamBinScores <- upstreamBinScores + upstreamIsOverlapped

      downstreamIsOverlapped <- (peakLeftPoint <= downstreamBinEnds &
                                   peakRightPoint >= downstreamBinStarts)
      downstreamBinScores <- downstreamBinScores + downstreamIsOverlapped

    }

    # Add the bin scores for this feature to the matrices of bin scores for all
    # features to be returned
    upstreamMatrix[featureIndex, ] <- upstreamBinScores
    downstreamMatrix[featureIndex, ] <- downstreamBinScores
  }

  return(list(upstreamMatrix, downstreamMatrix))

}

#' Plot distribution of overlap proportions
#'
#' Given a mapping of peaks returned by mapPeaks, plots the distribution of
#' overlap percentages (the proportion of each feature overlapped by each mapped
#' peak) as a histogram using ggplot2. Note that if a peak has multiple closest
#' features (e.g. in cases of overlaps), the overlaps with every closest feature
#' will each be plotted.
#'
#' @param mappingResult A dataframe returned by mapPeaks giving the nearest
#'   feature(s) to each peak. Format of the dataframe is peak information,
#'   followed by feature information, followed by peak position and distance
#'   relative to feature, and percent of feature overlapped by peak (see
#'   documentation for mapPeaks for more details).
#' @param plotColor The color of the histogram bars on the resulting plot
#' @param mainTitle The main title of the resulting plot
#' @param xTitle The x-axis title of the resulting plot
#' @param yTitle The y-axis title of the resulting plot
#' @param backgroundStyle The style of the plot background. This must be
#'    one of either "grey" (for a grey-panelled background), "blackAndWhite"
#'    (for a white-panelled background), or "minimal" (for a background with
#'    no panelling).
#'
#' @return A histogram giving the distribution of peak overlaps for every mapped
#'   peak-feature pair given in mappingResult. Each overlap is the percentage of
#'   a feature that is overlapped by a peak mapped to that feature.
#'
#' @examples
#' \dontrun{
#'   mappingResult <- mapPeaks(H3K27me3Peaks, WS263Genes)
#'   plotOverlapDistribution(mappingResult)
#' }
#'
#' @export
#' @import ggplot2
plotOverlapDistribution <- function(mappingResult,
                plotColor = "red",
                mainTitle = "Distribution of feature overlaps by mapped peaks",
                xTitle = "Proportion of feature overlapped by mapped peak",
                yTitle = "Number of peak-feature pairs",
                backgroundStyle = "blackAndWhite") {

  # Check that the dataframe of mapping results matches the expected format, as
  # returned by mapPeaks. This checking is done by a helper function in
  # checkInput.R, and raises an error if issues are detected with this
  # dataframe. To simplify error messages, any additional warnings produced by R
  # related to these errors are suppressed.
  suppressWarnings({
    checkMappingResultInput(mappingResult)
  })

  background <- getBackgroundStyle(backgroundStyle)

  resultPlot <- ggplot2::ggplot() + background +
    ggplot2::geom_histogram(aes(x = mappingResult$Percent_Overlap), bins = 100,
                            fill = plotColor) +
    ggplot2::theme(axis.text = ggplot2::element_text(size = 14),
    axis.title = ggplot2::element_text(size = 16),
    title = ggplot2::element_text(size = 16)) +
    ggplot2::labs(title = mainTitle,
                  x = xTitle,
                  y = yTitle)

  return(resultPlot)
}



#' Plot distribution of qualitative peak positions relative to mapped peaks
#'
#' Given a mapping of peaks returned by mapPeaks, plots the qualitative
#' distribution of relative peak locations to their closest feature(s), as
#' classified by mapPeaks. These locations are categorical classifications based
#' on how peaks are located upstream or downstream to their closest feature.
#' Note that if a peak has multiple closest features (e.g. in the case of
#' overlaps), the relative position to each feature will be included in the
#' plot.
#'
#' @param mappingResult A dataframe returned by mapPeaks giving the nearest
#'   feature(s) to each peak. Format of the dataframe is peak information,
#'   followed by feature information, followed by peak position and distance
#'   relative to feature, and percent of feature overlapped by peak (see
#'   documentation for mapPeaks for more details).
#' @param plotColor The color of the bars on the resulting plot
#' @param mainTitle The main title of the resulting plot
#' @param xTitle The x-axis title of the resulting plot
#' @param yTitle The y-axis title of the resulting plot
#' @param backgroundStyle The style of the plot background. This must be
#'    one of either "grey" (for a grey-panelled background), "blackAndWhite"
#'    (for a white-panelled background), or "minimal" (for a background with
#'    no panelling).
#'
#' @return A bar plot giving the distribution of relative peak positions for
#'   every mapped peak-feature pair given in mappingResult. The 6 possible
#'   classifications for relative peak position are:
#'   \itemize{
#'      \item "Downstream (no overlap)" - the peak is downstream of the
#'            feature but does not overlap it
#'      \item "Overlap (downstream)" - the peak overlaps with the feature
#'            and also extends downstream of the feature, but not upstream
#'      \item "Overlap (upstream and downstream)" - the feature is contained
#'            entirely within the peak
#'      \item "Overlap (upstream)" - the peak overlaps with the feature and
#'            also extends upstream of the feature, but not downstream
#'      \item "Overlap (within feature only)" - the peak is contained
#'            entirely with the feature
#'      \item "Upstream (no overlap)" - the peak is upstream of the feature
#'            but does not overlap it
#'   }
#'
#' @examples
#' \dontrun{
#'   mappingResult <- mapPeaks(H3K27me3Peaks, WS263Genes)
#'   plotPeakOrientations(mappingResult)
#' }
#'
#' @export
#' @import ggplot2
plotPeakOrientations <- function(mappingResult,
                  plotColor = "red",
                  mainTitle = "Position of peaks relative to closest features",
                  xTitle = "Relative peak position",
                  yTitle = "Number of peak-feature pairs",
                  backgroundStyle = "blackAndWhite"){

  # Check that the dataframe of mapping results matches the expected format, as
  # returned by mapPeaks. This checking is done by a helper function in
  # checkInput.R, and raises an error if issues are detected with this
  # dataframe. To simplify error messages, any additional warnings produced by R
  # related to these errors are suppressed.
  suppressWarnings({
    checkMappingResultInput(mappingResult)
  })

  background <- getBackgroundStyle(backgroundStyle)

  resultPlot <- ggplot2::ggplot() + background +
    ggplot2::geom_bar(aes(as.factor(mappingResult$Peak_Position)),
                      fill = plotColor) +
    ggplot2::theme(axis.text = ggplot2::element_text(size = 12),
                   axis.text.x = element_text(angle = 45, hjust = 1),
                   axis.title = ggplot2::element_text(size = 16),
                   title = ggplot2::element_text(size = 16)) +
    ggplot2::labs(title = mainTitle,
                  x = xTitle, y = yTitle)

  return(resultPlot)
}


#' Return a background style for plotting functions
#'
#' Given a string corresponding to a background style, returns a matching
#' ggplot2 function that can be used by PeakMapper's plotting functions to
#' create plots with the specified background style. If the supplied string is
#' not one of either "grey", "blackAndWhite", or "minimal", the ggplot2 function
#' corresponding to the "blackAndWhite" style will be returned. Note that this
#' is a helper function for PeakMapper's various plotting functions and should
#' NOT be called directly by the user.
#'
#' @param background The style of the plot background. This must be
#'    one of either "grey" (for a grey-panelled background), "blackAndWhite"
#'    (for a white-panelled background), or "minimal" (for a background with
#'    no panelling). Otherwise, the background style corresponding to
#'    "blackAndWhite" will be returned
#'
#' @return A ggplot2 theme function to be used by one of PeakMapper's plotting
#'    functions to determine the background layout. This function is either
#'    theme_grey() (if "grey" is input), theme_bw() (if "blackAndWhite" is
#'    input, or the user does not provide a valid input), or theme_classic()
#'    (if "minimal" is input)
#'
#' @examples
#' \dontrun{
#'   mappingResult <- mapPeaks(H3K27me3Peaks, WS263Genes)
#'   backgroundStyle <- getBackgroundStyle("grey")
#'   ggplot2::ggplot() + backgroundStyle +
#'        ggplot2::geom_bar(aes(as.factor(mappingResult$Peak_Position)))
#' }
#'
#' @import ggplot2
getBackgroundStyle <- function(background) {

  if (background == "grey") {
    return(ggplot2::theme_gray())
  } else if (background == "blackAndWhite") {
    return(ggplot2::theme_bw())
  } else if (background == "minimal") {
    return(ggplot2::theme_classic())
  } else {
    # The user did not provide a valid option, so give them a warning and return
    # the default background style
    warning(paste(background,
                  "is not a valid background style. Background styles must be",
                  "one of either 'grey', 'blackAndWhite', or 'minimal'."))
    return (ggplot2::theme_bw())
  }

}



# [END]
fuscada2/PeakMapper documentation built on Dec. 8, 2019, 12:35 p.m.