R/plotting.R

Defines functions plotMIRAScores xAxisForRegionPlots plotMIRAProfiles

Documented in plotMIRAProfiles plotMIRAScores

# Plotting functions to visualize outputs of MIRA
# Visualize MIRA profiles and MIRA scores




#' Plot summary methylation profile
#' 
#' Plot one or multiple methylation profiles. Displays each region set
#' in a different subplot.
#' If you only want to plot certain region sets,
#' subset with the `featID` parameter.
#' 
#' @param binnedRegDT A datatable with specific column names containing:
#' bin numbers(binnedRegionDT column), 
#' aggregated methylation values (methylProp column), 
#' name of the region set (featureID column), 
#' case/control column (sampleType column), 
#' sample name (sampleName column).
#' @param featID Region set names in a single string or vector of strings.
#' @param plotType Line or jitter (ggplot2). 
#' @param colBlindOption If TRUE, function will plot with a color blind
#' friendly palette which could be helpful when plotting multiple colors.
#' @param sampleTypeColName character object. The name of the column that
#' contains sample type or condition information (eg case vs control).
#' Line color will be assigned based on this if it is present and there are
#' more than two unique sample types.
#' 
#' @return A plot of class "gg"/ "ggplot" that shows MIRA profiles
#' @examples
#' data("exampleBins", package = "MIRA")
#' MIRAplot <- plotMIRAProfiles(binnedRegDT = exampleBins)
#' 
#' @export
plotMIRAProfiles <- function(binnedRegDT, 
                            featID = unique(binnedRegDT[, featureID]), 
                            plotType = "line",
                            colBlindOption = FALSE,
                            sampleTypeColName="sampleType"){
    binNum <- max(binnedRegDT[, bin])
    setkey(binnedRegDT, featureID)
    binPlot <- ggplot(data = binnedRegDT[featID], 
                     mapping = aes(x = factor(bin), 
                                   y = methylProp * 100)) +
                    theme_classic() + ylim(c(0, 100)) +
                    geom_hline(yintercept=c(0), alpha=.2) +
                    ylab("DNA Methylation (%)") + 
                    xlab("Genome Regions Surrounding Sites") +
                    scale_x_discrete(labels=xAxisForRegionPlots(binNum))
    if (colBlindOption) {
        binPlot <- binPlot + scale_color_brewer(name = "Sample Type", 
                                           palette = "Dark2")
    } else {
        binPlot <- binPlot + scale_color_brewer(name = "Sample Type", 
                                               palette = "Set1")
    }
        
    
    if (!(sampleTypeColName %in% names(binnedRegDT))) {
        # if no sampleType column then all lines/points will be black
        warning(cleanws("If you want to split up sample types by 
                        color use sampleTypeColName parameter."))
        
        # no color given if no sampleType
        if (plotType == "line") {
            binPlot <- binPlot + 
                geom_line(aes(group = sampleName)) + 
                facet_wrap(~featureID)
        } else if (plotType == "jitter") {
            binPlot <- binPlot + geom_jitter(alpha = .4) + 
                facet_wrap(~featureID)
        } else {
            stop('The only supported values for plotType are "line" and "jitter"')
        }
    } else {
        if (plotType == "line") {
            binPlot <- binPlot + 
                geom_line(aes(col = get(sampleTypeColName), group = sampleName)) + 
                facet_wrap(~featureID)
        } else if (plotType == "jitter") {
            binPlot <- binPlot + geom_jitter(aes(col = get(sampleTypeColName)), alpha = .4) + 
                facet_wrap(~featureID)
        } else {
            stop('The only supported values for plotType are "line" and "jitter"')
        }   
    }
    
    return(binPlot)
}

# A function to get right x axis numbers on the plotMIRAProfiles() plots
xAxisForRegionPlots <- function(binNum) {
    if ((binNum %% 2) == 0) { # even binNum
        tmp <- c((-1 * binNum / 2):-1, 1:(binNum / 2)) # no zero
        xAxis <- c(tmp[1], rep("", (binNum - 4) / 2), -1, 1, 
                  rep("", (binNum - 4) / 2), tmp[binNum])
    } else if ((binNum %% 2) == 1) { # odd binNum
        tmp <- (-1 * (binNum - 1) / 2):((binNum - 1) / 2)
        xAxis <- c(tmp[1], rep("", (binNum - 3) / 2), 0, 
                  rep("", (binNum - 3) / 2), tmp[binNum])
    }
    return(xAxis)
}

#' Plot MIRA scores and compare different conditions
#' 
#' Splits up samples by sample type. Displays each region set
#' in a different subplot.
#' If you only want to plot certain region sets,
#' subset with the `featID` parameter. 
#' 
#' Due to the limited number
#' of colors in the palette, a warning will be issued if 
#' there are too many (more than 9) region sets ('featureID's).
#' 
#' @param scoreDT A datatable with the following columns: 
#' score, featureID (names of region sets), ideally include 'sampleType'.
#' @param featID Region set name/names in a single string or vector of strings.
#' @param colBlindOption If TRUE, function will plot with a color blind
#' friendly palette which could be helpful when plotting multiple colors. 
#' @return a plot of class "gg"/"ggplot" that shows MIRA scores 
#' with geom_boxplot and geom_jitter (or geom_violin instead
#' of boxplot if no sampleType column is given).
#' @export
#' @examples
#' data(bigBinDT2)
#' exScores <- calcMIRAScore(bigBinDT2)
#' # adding annotation
#' sampleType <- rep(c("Ewing", "Muscle-related"), each = 24)
#' exScores <- cbind(exScores, sampleType)
#' exScorePlot <- plotMIRAScores(exScores)         
plotMIRAScores <- function(scoreDT, 
                           featID = unique(scoreDT[, featureID]),
                           colBlindOption = FALSE){
    # the preferred option when 'sampleType' is a column
    if ("sampleType" %in% colnames(scoreDT)) {
        sampleTypeNum <- length(unique(scoreDT[, sampleType]))
        setkey(scoreDT, featureID)
        scorePlot <- ggplot(data = scoreDT[featID], 
                           mapping = aes(x = sampleType, 
                                         y = score, 
                                         col = sampleType)) + 
            theme_classic() +
            ylab("MIRA Score") + xlab("Sample Type") +
            geom_boxplot(aes(fill = sampleType), alpha = 0.75) + 
            geom_jitter(data = scoreDT[featID], 
                        mapping = aes(x = sampleType, y = score)) + 
            scale_color_manual(guide = FALSE, values = rep("black", 
                                                           sampleTypeNum)) +
            facet_wrap(~featureID)
        if (colBlindOption) {
            scorePlot <- scorePlot + scale_fill_brewer(name = "Sample Type", 
                                                      palette="Dark2")
        } else {
            scorePlot <- scorePlot + scale_fill_brewer(name = "Sample Type", 
                                                      palette="Set1")
        }
        
    } else {
        # a less fancy plot when sampleType column is not present
        setkey(scoreDT, featureID)
        scorePlot <- ggplot(data = scoreDT[featID], 
                           mapping = aes(x = "", y = score, 
                                         col = featureID)) + 
            theme_classic() +
            theme(axis.title.x = element_blank(),
                  axis.ticks.x = element_blank()) +
            ylab("MIRA Score") + # xlab("") +
            geom_violin(aes(fill = featureID), alpha = 0.75) +
            geom_jitter() +
            scale_color_manual(guide = FALSE, 
                               values = rep("black", length(featID))) +
            facet_wrap(~featureID)
        if (colBlindOption) {
            scorePlot <- scorePlot + scale_fill_brewer(name = "Region Set", 
                                                      palette="Dark2")
        } else {
            scorePlot <- scorePlot + scale_fill_brewer(name = "Region Set", 
                                                      palette="Set1")
        }
        
    }
        
    return(scorePlot)
}
databio/MIRA documentation built on April 16, 2020, 9:53 p.m.