R/plotting.R

Defines functions plotRegion .plotSmoothData .bsGetGr .bsPlotTitle .bsGetCol .bsHighlightRegions .bsPlotPoints .bsPlotLines plotManyRegions plotGeneTrack plotAnnoTrack

Documented in plotManyRegions plotRegion

plotAnnoTrack <- function(gr, annoTrack, cex) {
    ## check may need to be modified
    if(!all(sapply(annoTrack, function(xx) is(xx, "GRanges"))))
        stop("all elements in 'annoTrack' needs to be 'GRanges'")
    plot(start(gr), 1, type = "n", xaxt = "n", yaxt = "n", bty = "n",
         ylim = c(0.5, length(annoTrack) + 0.5), xlim = c(start(gr), end(gr)), xlab = "", ylab = "")
    lapply(seq(along = annoTrack), function(ii) {
        jj <- length(annoTrack) + 1- ii
        ir <- subsetByOverlaps(annoTrack[[ii]], gr)
        if(length(ir) > 0)
            rect(start(ir)-0.5, jj - 0.15, end(ir), jj + 0.15, col = "grey60", border = NA)
        mtext(names(annoTrack)[ii], side = 2, at = jj, las = 1, line = 1,
              cex = cex, adj = 0.3)
        })
}

# Based on plotAnnoTrack() and
# https://jhu-genomics.slack.com/archives/hansen_gtex/p1461760583000142
# TODO: Use standard Bioconductor object for storing gene information rather
#       than this ad hoc data.frame (e.g., based on TxDb.* packages)
plotGeneTrack <- function(gr, geneTrack, cex) {
    geneTrack_gr <- makeGRangesFromDataFrame(geneTrack)
    ol <- findOverlaps(geneTrack_gr, gr)
    genes <- geneTrack[queryHits(ol), ]
    plot(start(gr), 1, type = "n", xaxt = "n", yaxt = "n", bty = "n",
         ylim = c(-1.5, 1.5), xlim = c(start(gr), end(gr)),
         xlab = "", ylab = "", cex.lab = 4, lheight = 2, cex.axis = 1)
    if (nrow(genes) > 0) {
        for (g in 1:nrow(genes)) {
            geneind2 = which(geneTrack$gene_name == genes$gene_name[g])
            geneind2 = geneind2[which(geneTrack$isoforms[geneind2] == 1)]
            direction = unique(geneTrack$strand[geneind2])
            ES = geneTrack$start[geneind2]
            EE = geneTrack$end[geneind2]
            Exons = cbind(ES, EE)
            if (direction == "+") {
                lines(x = c(min(ES), max(EE)),
                      y = c(0.65, 0.65))
                apply(Exons, 1, function(x) {
                    polygon(c(x[1], x[2], x[2], x[1]),
                            c(0.45, 0.45, 0.85, 0.85), col = "darkgrey")
                    })
                text((max(start(gr), min(ES)) +
                          min(end(gr), max(EE))) / 2, 1.2,
                     genes$gene_name[g], cex = cex)
            } else {
                lines(x = c(min(ES), max(EE)),
                      y = c(-0.65, -0.65))
                apply(Exons, 1, function(x)
                    polygon(c(x[1], x[2], x[2], x[1]),
                            c(-0.45, -0.45, -0.85, -0.85),
                            col = "darkgrey"))
                text((max(start(gr), min(ES)
                ) + min(end(gr), max(EE)
                )) / 2, -1.2, genes$gene_name[g], cex = cex)
            }

        }
    }
}

plotManyRegions <- function(BSseq, regions = NULL, extend = 0, main = "", addRegions = NULL,
                            annoTrack = NULL, cex.anno = 1,
                            geneTrack = NULL, cex.gene = 1.5,
                            col = NULL, lty = NULL, lwd = NULL,
                            BSseqStat = NULL, stat = "tstat.corrected", stat.col = "black",
                            stat.lwd = 1, stat.lty = 1, stat.ylim = c(-8,8),
                            mainWithWidth = TRUE, regionCol = alpha("red", 0.1), addTicks = TRUE,
                            addPoints = FALSE, pointsMinCov = 5, highlightMain = FALSE, verbose = TRUE) {
    cat("[plotManyRegions] preprocessing ...")
    if(!is.null(regions)) {
        if(is(regions, "data.frame"))
            gr <- data.frame2GRanges(regions, keepColumns = FALSE)
        else
            gr <- regions
        if(!is(gr, "GRanges"))
            stop("'regions' needs to be either a 'data.frame' (with a single row) or a 'GRanges' (with a single element)")
    } else {
        gr <- granges(BSseq)
    }
    gr <- resize(gr, width = 2*extend + width(gr), fix = "center")
    BSseq <- subsetByOverlaps(BSseq, gr)
    if(!is.null(BSseqStat))
        BSseqStat <- subsetByOverlaps(BSseqStat, gr)

    if(length(start(BSseq)) == 0)
        stop("No overlap between BSseq data and regions")
    if(!is.null(main) && length(main) != length(gr))
        main <- rep(main, length = length(gr))
    cat("done\n")
    for(ii in seq(along = gr)) {
        if(verbose) cat(sprintf("[plotManyRegions]   plotting region %d (out of %d)\n", ii, nrow(regions)))
        plotRegion(BSseq = BSseq, region = regions[ii,], extend = extend,
                   col = col, lty = lty, lwd = lwd, main = main[ii], BSseqStat = BSseqStat,
                   stat = stat, stat.col = stat.col, stat.lwd = stat.lwd,
                   stat.lty = stat.lty, stat.ylim = stat.ylim,
                   addRegions = addRegions, regionCol = regionCol, mainWithWidth = mainWithWidth,
                   annoTrack = annoTrack, cex.anno = cex.anno,
                   geneTrack = geneTrack, cex.gene = cex.gene,
                   addTicks = addTicks, addPoints = addPoints,
                   pointsMinCov = pointsMinCov, highlightMain = highlightMain)
    }
}

.bsPlotLines <- function(x, y, col, lty, lwd, plotRange) {
    if(sum(!is.na(y)) <= 1)
        return(NULL)
    xx <- seq(from = plotRange[1], to = plotRange[2], length.out = 500)
    yy <- approxfun(x, y)(xx)
    lines(xx, yy, col = col, lty = lty, lwd = lwd)
}

.bsPlotPoints <- function(x, y, z, col, pointsMinCov) {
    points(x[z>pointsMinCov], y[z>pointsMinCov], col = col, pch = 16, cex = 0.5)
}

.bsHighlightRegions <- function(regions, gr, ylim, regionCol, highlightMain) {
    if(is.data.frame(regions))
        regions <- data.frame2GRanges(regions)
    if(highlightMain)
        regions <- c(regions, gr)
    if(is.null(regions)) return(NULL)
    ## regions <- pintersect(region, rep(gr, length(regions)))
    ## regions <- regions[width(regions) == 0]
    regions <- subsetByOverlaps(regions, gr)
    regions <- pintersect(regions, rep(gr, length(regions)))
    if(length(regions) == 0)
        return(NULL)
    rect(xleft = start(regions), xright = end(regions), ybottom = ylim[1],
         ytop = ylim[2], col = regionCol, border = NA)
}

.bsGetCol <- function(object, col, lty, lwd) {
    ## Assumes that object has pData and sampleNames methods
    if(is.null(col)) {
        if("col" %in% names(pData(object)))
            col <- pData(object)[["col"]]
        else
            col <- rep("black", nrow(pData(object)))
    }
    if(length(col) != ncol(object))
        col <- rep(col, length.out = ncol(object))
    if(is.null(names(col)))
        names(col) <- sampleNames(object)

    if(is.null(lty)) {
        if("lty" %in% names(pData(object)))
            lty <- pData(object)[["lty"]]
        else
            lty <- rep(1, ncol(object))
    }
    if(length(lty) != ncol(object))
        lty <- rep(lty, length.out = ncol(object))
    if(is.null(names(lty)))
        names(lty) <- sampleNames(object)

    if(is.null(lwd)) {
        if("lwd" %in% names(pData(object)))
            lwd <- pData(object)[["lwd"]]
        else
            lwd <- rep(1, nrow(pData(object)))
    }
    if(length(lwd) != ncol(object))
        lwd <- rep(lwd, length.out = ncol(object))
    if(is.null(names(lwd)))
        names(lwd) <- sampleNames(object)

    return(list(col = col, lty = lty, lwd = lwd))
}

.bsPlotTitle <- function(gr, extend, main, mainWithWidth) {
    if(is.data.frame(gr))
        gr <- data.frame2GRanges(gr)
    if(length(gr) > 1) {
        warning("plotTitle: gr has more than one element")
        gr <- gr[1]
    }
    plotChr <- as.character(seqnames(gr))
    plotRange <- c(start(gr), end(gr))
    regionCoord <- sprintf("%s: %s - %s", plotChr,
                           format(plotRange[1], big.mark = ",", scientific = FALSE),
                           format(plotRange[2], big.mark = ",", scientific = FALSE))
    if(mainWithWidth) {
        regionWidth <- sprintf("width = %s, extended = %s",
                               format(width(gr), big.mark = ",", scientific = FALSE),
                               format(extend, big.mark = ",", scientific = FALSE))
        regionCoord <- sprintf("%s (%s)", regionCoord, regionWidth)
    }
    if(main != "") {
        main <- sprintf("%s\n%s", main, regionCoord)
    } else {
        main <- regionCoord
    }
    main
}

.bsGetGr <- function(object, region, extend) {
    if(is.null(region)) {
        gr <- GRanges(seqnames = seqnames(object)[1],
                      ranges = IRanges(start = min(start(object)),
                      end = max(start(object))))
    } else {
        if(is(region, "data.frame"))
            gr <- data.frame2GRanges(region, keepColumns = FALSE)
        else
            gr <- region
        if(!is(gr, "GRanges") || length(gr) != 1)
            stop("'region' needs to be either a 'data.frame' (with a single row) or a 'GRanges' (with a single element)")
        gr <- resize(gr, width = 2*extend + width(gr), fix = "center")
    }
    gr
}


.plotSmoothData <- function(BSseq, region, extend, addRegions, col, lty, lwd, regionCol,
                            addTicks, addPoints, pointsMinCov, highlightMain) {
    gr <- .bsGetGr(BSseq, region, extend)
    BSseq <- subsetByOverlaps(BSseq, gr)

    ## Extract basic information
    sampleNames <- sampleNames(BSseq)
    names(sampleNames) <- sampleNames
    positions <- start(BSseq)
    smoothPs <- getMeth(BSseq, type = "smooth")
    rawPs <- getMeth(BSseq, type = "raw")
    coverage <- getCoverage(BSseq)

    # Realise in memory data that are to be plotted
    if (addPoints) {
        rawPs <- as.array(rawPs)
        coverage <- as.array(coverage)
    }
    smoothPs <- as.array(smoothPs)

    ## get col, lwd, lty
    colEtc <- .bsGetCol(object = BSseq, col = col, lty = lty, lwd = lwd)

    ## The actual plotting
    plot(positions[1], 0.5, type = "n", xaxt = "n", yaxt = "n",
         ylim = c(0,1), xlim = c(start(gr), end(gr)), xlab = "", ylab = "Methylation")
    axis(side = 2, at = c(0.2, 0.5, 0.8))
    if(addTicks)
        rug(positions)

    .bsHighlightRegions(regions = addRegions, gr = gr, ylim = c(0,1),
                        regionCol = regionCol, highlightMain = highlightMain)

    if(addPoints) {
        sapply(1:ncol(BSseq), function(sampIdx) {
            abline(v = positions[rawPs[, sampIdx] > 0.1], col = "grey80", lty = 1)
        })
    } # This adds vertical grey lines so we can see where points are plotted

    sapply(1:ncol(BSseq), function(sampIdx) {
        .bsPlotLines(positions, smoothPs[, sampIdx], col = colEtc$col[sampIdx],
                     lty = colEtc$lty[sampIdx], lwd = colEtc$lwd[sampIdx],
                     plotRange = c(start(gr), end(gr)))
    })

    if(addPoints) {
        sapply(1:ncol(BSseq), function(sampIdx) {
            .bsPlotPoints(positions, rawPs[, sampIdx], coverage[, sampIdx],
                          col = colEtc$col[sampIdx], pointsMinCov = pointsMinCov)
        })
    }
}


plotRegion <- function(BSseq, region = NULL, extend = 0, main = "", addRegions = NULL,
                       annoTrack = NULL, cex.anno = 1,
                       geneTrack = NULL, cex.gene = 1.5,
                       col = NULL, lty = NULL, lwd = NULL,
                       BSseqStat = NULL, stat = "tstat.corrected", stat.col = "black",
                       stat.lwd = 1, stat.lty = 1, stat.ylim = c(-8,8),
                       mainWithWidth = TRUE,
                       regionCol = alpha("red", 0.1), addTicks = TRUE, addPoints = FALSE,
                       pointsMinCov = 5, highlightMain = FALSE) {

    opar <- par(mar = c(0,4.1,0,0), oma = c(5,0,4,2), mfrow = c(1,1))
    on.exit(par(opar))
    if(is.null(BSseqStat) && is.null(geneTrack)) {
        layout(matrix(1:2, ncol = 1), heights = c(2,1))
    } else if (is.null(geneTrack)) {
        layout(matrix(1:3, ncol = 1), heights = c(2,2,1))
    } else {
        layout(matrix(1:4, ncol = 1), heights = c(2,2,1,0.3))
    }

    .plotSmoothData(BSseq = BSseq, region = region, extend = extend, addRegions = addRegions,
                    col = col, lty = lty, lwd = lwd, regionCol = regionCol,
                    addTicks = addTicks, addPoints = addPoints,
                    pointsMinCov = pointsMinCov, highlightMain = highlightMain)
    gr <- .bsGetGr(BSseq, region, extend)

    if(!is.null(BSseqStat)) {
        BSseqStat <- subsetByOverlaps(BSseqStat, gr)
        if(is(BSseqStat, "BSseqTstat")) {
            stat.values <- as.array(getStats(BSseqStat)[, "tstat.corrected"])
            stat.values <- as.array(stat.values)
            stat.type <- "tstat"
        }
        if(is(BSseqStat, "BSseqStat")) {
            stat.type <- getStats(BSseqStat, what = "stat.type")
            if(stat.type == "tstat") {
                stat.values <- getStats(BSseqStat, what = "stat")
                stat.values <- as.array(stat.values)
            }
            if(stat.type == "fstat") {
                stat.values <- sqrt(getStats(BSseqStat, what = "stat"))
                stat.values <- as.array(stat.values)
            }
        }
        plot(start(gr), 0.5, type = "n", xaxt = "n", yaxt = "n",
             ylim = stat.ylim, xlim = c(start(gr), end(gr)), xlab = "", ylab = stat.type)
        axis(side = 2, at = c(-5,0,5))
        abline(h = 0, col = "grey60")
        .bsPlotLines(start(BSseqStat), stat.values, lty = stat.lty, col = stat.col, lwd = stat.lwd,
                     plotRange = c(start(gr), end(gr)))
    }

    if(!is.null(annoTrack))
        plotAnnoTrack(gr, annoTrack, cex.anno)

    if (!is.null(geneTrack))
        plotGeneTrack(gr, geneTrack, cex.gene)

    if(!is.null(main)) {
        main <- .bsPlotTitle(gr = region, extend = extend, main = main,
                             mainWithWidth = mainWithWidth)
        mtext(side = 3, text = main, outer = TRUE, cex = 1)
    }
    return(invisible(NULL))
}

##     plotP <- function(sample, label = "", col) {
##         plot(positions[1], 0.5, type = "n", xaxt = "n", yaxt = "n",
##              ylim = c(0,1), xlim = plotRange, xlab = "", ylab = "")
##         plotRects(c(0,1))
##         rawp <- getP(BSseq, sample = sample, type = "raw", addPositions = TRUE, addConfint = TRUE)
##         cols <- rep(alpha("black", 0.5), nrow(rawp))
##         segments(x0 = rawp$pos, y0 = rawp$lower, y1 = rawp$upper, col = cols)
##         points(positions, rawp$p, col = cols)
##         if(nrow(BSseq$coef) > 0) {
##             fitp <- getP(BSseq, sample = sample, type = "fit", addPosition = TRUE, addConfint = FALSE)
##             lines(fitp$pos, fitp$p, col = col,, lty = 1)
##             ## lines(fitp$pos, fitp$lower, col = col, lty = 2)
##             ## lines(fitp$pos, fitp$upper, col = col, lty = 2)
##             text(plotRange[1], 0.1, labels = label)
##         }
##     }

##     plotD <- function(sample1, sample2, col) {
##         plot(positions[1], 0.5, type = "n", xaxt = "n", yaxt = "n",
##              ylim = c(-1,1), xlim = plotRange, xlab = "", ylab = "")
##         plotRects(c(-1,1))
##         abline(h = 0, col = alpha("black", 0.6), lty = 2)
##         rawd <- getD(BSseq, sample1 = sample1, sample2 = sample2, type = "raw",
##                      addPositions = TRUE)
##         cols <- rep(alpha("black", 0.5), nrow(rawd))
##         segments(x0 = rawd$pos, rawd$lower, y1 = rawd$upper, col = cols)
##         points(rawd$pos, rawd$d, col = cols)
##         if(nrow(BSseq$coef) > 0) {
##             fitd <- getD(BSseq, sample1 = sample1, sample2 = sample2, type = "fit", addPositions = TRUE)
##             lines(fitd$pos, fitd$d, col = col, lty = 1)
##             ## lines(fitd$pos, fitd$lower, col = "blue", lty = 2)
##             ## lines(fitd$pos, fitd$upper, col = "blue", lty = 2)
##         }
##     }

Try the bsseq package in your browser

Any scripts or data that you put into this service are public.

bsseq documentation built on Nov. 8, 2020, 7:53 p.m.