R/plots.R

Defines functions plotCoverage restrictFeatures plotImage getPadding getLayoutParameters extractFeaturesFromVariants plotVariants plotFeatures addAlpha setFeatureColors xcoordinates getGraphInfo plotRanges plotTrackRanges scorePerExon plotTrackScore getExonCoordinates plotExonGraph plotSpliceGraph exonGraphEdges addDummyNodes exonGraphNodes exonGraph

Documented in plotCoverage plotFeatures plotSpliceGraph plotVariants

exonGraph <- function(features, tx_view)
{

    if (tx_view) {

        tx_name <- txName(features)
        i <- which(elementNROWS(tx_name) == 0)
        tx_name[i] <- as(feature2name(features[i]), "CharacterList")
        features <- features[togroup0(tx_name)]
        mcols(features)$tx_name <- unlist(tx_name)

    }

    E <- features[type(features) == "E"]
    J <- features[type(features) == "J"]

    v <- exonGraphNodes(E, J, tx_view)
    d <- exonGraphEdges(v, J, tx_view)

    g <- graph.data.frame(d = d, directed = TRUE, vertices = v)

    invisible(g)

}

exonGraphNodes <- function(E, J, tx_view)
{

    v <- data.frame(
        name = seq_along(E),
        coordinates = gr2co(E),
        type = type(E),
        featureID = featureID(E),
        stringsAsFactors = FALSE)

    v$color <- mcols(E)$color
    v$label <- mcols(E)$label

    if (tx_view) {

        v$tx_name <- mcols(E)$tx_name

    }

    v <- addDummyNodes(v, E, J, tx_view)

    return(v)

}

addDummyNodes <- function(v, E, J, tx_view)
{

    E_A <- gr2co(flank(E, -1, TRUE))
    E_D <- gr2co(flank(E, -1, FALSE))
    J_D <- gr2co(flank(J, -1, TRUE))
    J_A <- gr2co(flank(J, -1, FALSE))

    if (tx_view) {

        E_A <- paste0(E_A, "_", mcols(E)$tx_name)
        E_D <- paste0(E_D, "_", mcols(E)$tx_name)
        J_D <- paste0(J_D, "_", mcols(J)$tx_name)
        J_A <- paste0(J_A, "_", mcols(J)$tx_name)

    }

    dummy_nodes <- union(setdiff(J_D, E_D), setdiff(J_A, E_A))

    if (length(dummy_nodes) > 0) {

        if (tx_view) {

            coordinates <- vapply(strsplit(dummy_nodes, "_"), "[",
                character(1), 1)

        } else {

            coordinates <- dummy_nodes

        }

        v_dummy <- data.frame(
            name = nrow(v) + seq_along(dummy_nodes),
            coordinates = coordinates,
            type = NA,
            featureID = NA_integer_,
            stringsAsFactors = FALSE)

        if (!is.null(v$color)) {

            v_dummy$color <- NA

        }
        if (!is.null(v$label)) {

            v_dummy$label <- NA

        }
        if (tx_view) {

            v_dummy$tx_name <- vapply(strsplit(dummy_nodes, "_"), "[",
                character(1), 2)

        }

        v <- rbindDfsWithoutRowNames(v, v_dummy)

    }

    v <- v[order(co2gr(v$coordinates)), ]

    return(v)

}

exonGraphEdges <- function(v, J, tx_view)
{

    if (length(J) == 0) {

        d <- data.frame(
            from = character(),
            to = character(),
            stringsAsFactors = FALSE)

        return(d)

    }

    E_A <- flank(co2gr(v$coordinates), -1, TRUE)
    E_D <- flank(co2gr(v$coordinates), -1, FALSE)

    if (tx_view) {

        mcols(E_A)$tx_name <- v$tx_name
        mcols(E_D)$tx_name <- v$tx_name

    }

    J_D <- flank(J, -1, TRUE)
    J_A <- flank(J, -1, FALSE)

    ol <- findOverlaps(E_D, J_D)

    if (tx_view) {

        qH <- queryHits(ol)
        sH <- subjectHits(ol)
        ol <- ol[mcols(E_D)$tx_name[qH] == mcols(J_D)$tx_name[sH]]

    }

    i_from <- queryHits(ol)
    i_junc <- subjectHits(ol)

    ol <- findOverlaps(J_A[i_junc], E_A)

    if (tx_view) {

        qH <- queryHits(ol)
        sH <- subjectHits(ol)
        ol <- ol[mcols(J_A)$tx_name[i_junc][qH] == mcols(E_A)$tx_name[sH]]

    }

    i_from <- i_from[queryHits(ol)]
    i_junc <- i_junc[queryHits(ol)]
    i_to <- subjectHits(ol)

    d <- data.frame(
        from = v$name[i_from],
        to = v$name[i_to],
        coordinates = gr2co(J)[i_junc],
        type = "J",
        featureID = featureID(J)[i_junc],
        stringsAsFactors = FALSE)

    d$color <- mcols(J)$color[i_junc]
    d$label <- mcols(J)$label[i_junc]

    if (tx_view) {

        d$tx_name <- v$tx_name[i_from]

    }

    return(d)

}

##' Plot the splice graph implied by splice junctions and exon bins.
##' Invisibly returns a \code{data.frame} with details of plotted
##' features, including genomic coordinates.
##'
##' By default, the color of features in the splice graph is
##' determined by annotation status (see arguments \code{color},
##' \code{color_novel}) and feature labels are generated automatically
##' (see argument \code{label}). Alternatively, colors and labels can
##' be specified via metadata columns \dQuote{color} and
##' \dQuote{label}, respectively.
##'
##' @title Plot splice graph
##' @param x \code{SGFeatures} or \code{SGVariants} object
##' @param geneID Single gene identifier used to subset \code{x}
##' @param geneName Single gene name used to subset \code{x}
##' @param eventID Single event identifier used to subset \code{x}
##' @param which \code{GRanges} used to subset \code{x}
##' @param toscale Controls which parts of the splice graph are drawn to
##'   scale. Possible values are \dQuote{none} (exonic and intronic regions
##'   have constant length), \dQuote{exon} (exonic regions are drawn to scale)
##'   and \dQuote{gene} (both exonic and intronic regions are drawn to scale).
##' @param label Format of exon/splice junction labels,
##'   possible values are \dQuote{id} (format E1,... J1,...), \dQuote{name}
##'   (format type:chromosome:start-end:strand), \dQuote{label} for labels
##'   specified in metadata column \dQuote{label}, or \dQuote{none}
##'   for no labels.
##' @param color Color used for plotting the splice graph. Ignored if features
##'   metadata column \dQuote{color} is not \code{NULL}.
##' @param color_novel Features with missing annotation are
##'   highlighted in \code{color_novel}. Ignored if features
##'   metadata column \dQuote{color} is not \code{NULL}.
##' @param color_alpha Controls color transparency
##' @param color_labels Logical indicating whether label colors should
##'   be the same as feature colors
##' @param border Determines the color of exon borders, can be \dQuote{fill}
##'   (same as exon color), \dQuote{none} (no border), or a valid color name
##' @param curvature Numeric determining curvature of plotted splice junctions.
##' @param ypos Numeric vector of length two, indicating the vertical
##'   position and height of the exon bins in the splice graph,
##'   specificed as fraction of the height of the plotting region
##'   (not supported for \code{tx_view = TRUE})
##' @param score \code{RLeList} containing nucleotide-level scores
##'   to be plotted with the splice graph
##' @param score_color Color used for plotting scores
##' @param score_ylim Numeric vector of length two, determining y-axis range
##'   for plotting scores
##' @param score_ypos Numeric vector of length two, indicating the vertical
##'   position and height of the score panel, specificed as fraction of the
##'   height of the plotting region
##' @param score_nbin Number of bins for plotting scores
##' @param score_summary Function used to calculate per-bin score summaries
##' @param score_label Label used to annotate score panel
##' @param ranges \code{GRangesList} to be plotted with the splice graph
##' @param ranges_color Color used for plotting ranges
##' @param ranges_ypos Numeric vector of length two, indicating the vertical
##'   position and height of the ranges panel, specificed as fraction of the
##'   height of the plotting region
##' @param main Plot title
##' @param tx_view Plot transcripts instead of splice graph (experimental)
##' @param tx_dist Vertical distance between transcripts as fraction of height
##'   of plotting region
##' @param short_output Logical indicating whether the returned data frame
##'   should only include information that is likely useful to the user
##' @return \code{data.frame} with information on exon bins and
##'   splice junctions included in the splice graph
##' @examples
##' \dontrun{
##' sgf_annotated <- annotate(sgf_pred, txf_ann)
##' plotSpliceGraph(sgf_annotated)
##' }
##' \dontrun{
##' sgv_annotated <- annotate(sgv_pred, txf_ann)
##' plotSpliceGraph(sgv_annotated)
##' }
##' NULL
##' @author Leonard Goldstein

plotSpliceGraph <- function(x, geneID = NULL, geneName = NULL,
    eventID = NULL, which = NULL, toscale = c("exon", "none", "gene"),
    label = c("id", "name", "label", "none"), color = "gray",
    color_novel = color, color_alpha = 0.8, color_labels = FALSE,
    border = "fill", curvature = NULL, ypos = c(0.5, 0.1),
    score = NULL, score_color = "darkblue",
    score_ylim = NULL, score_ypos = c(0.3, 0.1), score_nbin = 200,
    score_summary = mean, score_label = NULL, ranges = NULL,
    ranges_color = "darkblue", ranges_ypos = c(0.1, 0.1),
    main = NULL, tx_view = FALSE, tx_dist = 0.2, short_output = TRUE)
{

    toscale <- match.arg(toscale)
    label <- match.arg(label)

    if (!is(x, "SGFeatures") && !is(x, "SGVariants")) {

        stop("x must be an SGFeatures or SGVariants object")

    }

    if (is(x, "SGVariants")) x <- updateObject(x, verbose = TRUE)

    if (!is.null(score) && !is(score, "RleList")) {

        stop("score must be an RleList or NULL")

    }

    if (!is.null(ranges) && !is(ranges, "GRangesList")) {

        stop("ranges must be a GRangesList or NULL")

    }

    x <- restrictFeatures(x, geneID, eventID, which, geneName)

    if (length(x) == 0) { return() }

    if (is(x, "SGVariants")) {

        x <- extractFeaturesFromVariants(x)

    }

    x <- setFeatureColors(x, color, color_novel, color_alpha)

    g <- exonGraph(x, tx_view)

    exon_coordinates <- getExonCoordinates(g, toscale)

    plot(NA, xlim = c(-1, 1), ylim = c(-1, 1), xaxs = "i", yaxs = "i",
        axes = FALSE, xlab = NA, ylab = NA)

    df <- plotExonGraph(g, exon_coordinates, ypos, "both", border,
        curvature, label, color_labels, 1, 3, tx_view, tx_dist)

    if (!is.null(score)) {

        plotTrackScore(exon_coordinates, score, score_color, score_ylim,
            score_ypos, score_nbin, score_summary, score_label)

    }

    if (!is.null(ranges)) {

        plotTrackRanges(exon_coordinates, toscale, ranges, ranges_color,
            ranges_ypos)

    }

    text(x = 0, y = 0.95, labels = main, pos = 1, offset = 0, font = 2)

    if (short_output) { df <- df[c("id", "name", "type", "featureID")] }

    invisible(df)

}

plotExonGraph <- function(g, exon_coordinates, ypos, include, border,
    curvature, label, color_labels, label_pos_exon, label_pos_junction,
    tx_view, tx_dist)
{

    ## data frames of nodes and edges
    gv <- nodes(g)
    gd <- edges(g)

    i_from <- match(gd$from, gv$name)
    i_to <- match(gd$to, gv$name)

    if (tx_view) {

        vertex_y <- as.integer(as.factor(as.character(gv$tx_name)))
        vertex_y <- vertex_y - 1
        vertex_y <- vertex_y * 2 * tx_dist
        vertex_y <- vertex_y - mean(unique(vertex_y))
        edge_short <- rep(TRUE, nrow(gd))

    } else {

        vertex_y <- rep(-1 + 2 * ypos[1], nrow(gv))
        edge_short <- rank(exon_coordinates$vertex_x)[i_to] ==
            rank(exon_coordinates$vertex_x)[i_from] + 1

    }

    vertex_pos <- rep(label_pos_exon, nrow(gv))

    if (is.null(label_pos_junction)) {

        edge_pos <- ifelse(edge_short, 1, 3)

    } else {

        edge_pos <- rep(label_pos_junction, nrow(gd))

    }

    if (is.null(curvature)) {

        edge_curved <- as.numeric(!edge_short)

    } else {

        edge_curved <- rep(curvature, nrow(gd))

    }

    pin <- par()$pin
    asp <- pin[2] / pin[1]
    edge_curved <- edge_curved / asp

    vertex_frame_color <- switch(border, fill = gv$color, none = NA, border)
    vertex_shape <- ifelse(!is.na(gv$type), "rectangle", "none")
    edge_lty <- rep(1, nrow(gd))

    if (include == "exons") {

        edge_lty <- rep(0, nrow(gd))

    } else if (include == "junctions") {

        vertex_shape <- rep("none", nrow(gv))

    }

    plot(g,
        add = TRUE,
        rescale = FALSE,
        layout = cbind(exon_coordinates$vertex_x, vertex_y),
        vertex.size = exon_coordinates$vertex_width * 100,
        vertex.size2 = 2 * ypos[2] * 100,
        vertex.color = gv$color,
        vertex.frame.color = vertex_frame_color,
        vertex.shape = vertex_shape,
        vertex.label = NA,
        edge.color = gd$color,
        edge.width = 2 * par()$cex,
        edge.lty = edge_lty,
        edge.curved = edge_curved,
        edge.label = NA,
        edge.arrow.mode = rep(0, nrow(gd)))

    df <- getGraphInfo(g, exon_coordinates$vertex_x, vertex_y,
        exon_coordinates$vertex_width, vertex_pos, edge_curved,
        edge_pos, color_labels, tx_view)

    if (label != "none") {

        i <- seq_len(nrow(df))

        if (include == "exons") {

            i <- which(df$type == "E")

        } else if (include == "junctions") {

            i <- which(df$type == "J")

        }

        if (length(i) > 0) {

            df_tmp <- df[i, ]

            text(x = df_tmp$x,
                y = df_tmp$y + c(E = -1, J = 0)[df_tmp$type] * ypos[2],
                labels = df_tmp[, label],
                pos = df_tmp$pos,
                offset = c(E = 0.5, J = 0.5)[df_tmp$type],
                col = df_tmp$color)

        }

    }

    if (tx_view) {

        df_collapsed <- unique(df[, c("y", "tx_name")])
        mtext(side = 4, at = df_collapsed$y, text = df_collapsed$tx_name,
            cex = par()$cex, line = 0.5, las = 1)

    }

    cols <- c("id", "name", "type", "featureID", "label", "color")
    cols <- cols[cols %in% names(df)]
    df <- df[, cols]

    return(df)

}

getExonCoordinates <- function(g, toscale)
{

    ## data frames of nodes
    gv <- nodes(g)

    ## exon and gene coordinates
    gr_exon <- co2gr(gv$coordinates)
    gr_gene <- range(gr_exon)

    if (length(gr_gene) > 1) {

        stop("features must be on the same chromosome and strand")

    }

    ## exon coordinates relative to gene locus
    ir_exon <- ranges(mapToTranscripts(gr_exon, split(gr_gene, 1),
        ignore.strand = FALSE))

    ## exonic regions
    ir_exonic <- reduce(ir_exon)

    ## exon x-coordinates and widths
    exon_coordinates <- xcoordinates(ir_exon, ir_exonic, toscale)
    exon_width <- exon_coordinates$w
    exon_x <- exon_coordinates$x

    coordinates <- list(
        gene_chrom = as.character(seqnames(gr_gene)),
        gene_strand = as.character(strand(gr_gene)),
        exon_start = start(gr_exon),
        exon_end = end(gr_exon),
        vertex_x = exon_x,
        vertex_width = exon_width
    )

    return(coordinates)

}

plotTrackScore <- function(exon_coordinates, score, color, ylim, ypos,
    nbin, summary, label)
{

    if (is(score, "RleList")) score <- score[[exon_coordinates$gene_chrom]]

    tmp <- lapply(seq_along(exon_coordinates$vertex_x),
        scorePerExon, exon_coordinates, score)

    bp_x_start <- do.call(c, lapply(tmp, "[[", "x_start"))
    bp_x_end <- do.call(c, lapply(tmp, "[[", "x_end"))
    bp_y <- do.call(c, lapply(tmp, "[[", "y"))

    excl <- which(bp_x_start == bp_x_end)

    if (length(excl) > 0) {

        bp_x_start <- bp_x_start[-excl]
        bp_x_end <- bp_x_end[-excl]
        bp_y <- bp_y[-excl]

    }

    bin_size <- 2 / nbin
    bin_breaks <- seq(-1, 1, length.out = nbin + 1)

    bp_bin <- IRanges(
        as.integer(cut(bp_x_start, bin_breaks, right = FALSE)),
        as.integer(cut(bp_x_end, bin_breaks, right = TRUE)))

    bin_y <- tapply(bp_y[rep(seq_along(bp_bin), width(bp_bin))],
        unlist(as(bp_bin, "IntegerList")), summary)

    if (is.null(ylim)) {

        ymin <- floor(min(bin_y, na.rm = TRUE, finite = TRUE))
        ymax <- ceiling(max(bin_y, na.rm = TRUE, finite = TRUE))

        if (ymin >= 0) {

            ylim <- c(0, ymax)

        } else {

            ylim <- c(ymin, ymax)

        }

    }

    bin_y[bin_y < ylim[1]] <- ylim[1]
    bin_y[bin_y > ylim[2]] <- ylim[2]

    ypos_2 <- c(-1 + 2 * ypos[1] - ypos[2], 2 * ypos[2])

    bin_y <- bin_y - ylim[1]
    bin_y <- bin_y / diff(range(ylim)) * ypos_2[2]
    bin_y <- bin_y + ypos_2[1]

    px <- rep(bin_breaks, rep(2, length(bin_breaks)))
    py <- c(ypos_2[1], rep(bin_y, rep(2, length(bin_y))), ypos_2[1])
    polygon(px, py, border = NA, col = color)

    axis(side = 2, at = c(ypos_2[1], ypos_2[1] + ypos_2[2]), labels = ylim,
        mgp = c(3, 0.5, 0.5), tcl = -0.25, las = 1)

    mtext(side = 2, at = ypos_2[1] + 0.5 * ypos_2[2],
        cex = par()$cex, text = label, line = 3, las = 1)

}

scorePerExon <- function(i, co, score)
{

    ir_exon <- IRanges(co$exon_start[i], co$exon_end[i])
    y <- as.numeric(score[ir_exon])
    if (co$gene_strand == "-") { y <- rev(y) }

    ex_start <- co$vertex_x[i] - 0.5 * co$vertex_width[i]
    ex_end <- co$vertex_x[i] + 0.5 * co$vertex_width[i]

    d <- (ex_end - ex_start) / length(y)

    ex_bp_x_start <- seq(ex_start, ex_end - d, length.out = length(y))
    ex_bp_x_end <- seq(ex_start + d, ex_end, length.out = length(y))
    ex_bp_y <- y

    if (i < length(co$vertex_x) && co$exon_start[i + 1] - co$exon_end[i] > 1) {

        ir_intron <- IRanges(co$exon_end[i], co$exon_start[i + 1]) - 1
        y <- as.numeric(score[ir_intron])

        if (co$gene_strand == "-") {

          y <- rev(y)

        }

        if (co$gene_strand == "+") {

            in_start <- co$vertex_x[i] + 0.5 * co$vertex_width[i]
            in_end <- co$vertex_x[i + 1] - 0.5 * co$vertex_width[i + 1]

        } else {

            in_start <- co$vertex_x[i + 1] + 0.5 * co$vertex_width[i + 1]
            in_end <- co$vertex_x[i] - 0.5 * co$vertex_width[i]

        }

        d <- (in_end - in_start) / length(y)

        in_bp_x_start <- seq(in_start, in_end - d, length.out = length(y))
        in_bp_x_end <- seq(in_start + d, in_end, length.out = length(y))
        in_bp_y <- y

        out_bp_x_start <- c(ex_bp_x_start, in_bp_x_start)
        out_bp_x_end <- c(ex_bp_x_end, in_bp_x_end)
        out_bp_y <- c(ex_bp_y, in_bp_y)

    } else {

        out_bp_x_start <- ex_bp_x_start
        out_bp_x_end <- ex_bp_x_end
        out_bp_y <- ex_bp_y

    }

    out_bp_x_start[which(out_bp_x_start < -1)] <- -1
    out_bp_x_start[which(out_bp_x_start > 1)] <- 1
    out_bp_x_end[which(out_bp_x_end < -1)] <- -1
    out_bp_x_end[which(out_bp_x_end > 1)] <- 1

    out <- list(x_start = out_bp_x_start, x_end = out_bp_x_end, y = out_bp_y)

    return(out)

}

plotTrackRanges <- function(exon_coordinates, toscale, score, color, ypos)
{

    gr_exon <- GRanges(exon_coordinates$gene_chrom,
        IRanges(exon_coordinates$exon_start,
        exon_coordinates$exon_end), exon_coordinates$gene_strand)
    gr_gene <- range(gr_exon)

    score <- score[which(unlist(range(score)) %over% gr_gene)]
    score <- endoapply(score, restrict, start(gr_gene), end(gr_gene))

    ypos_2 <- c(-1 + 2 * ypos[1] - ypos[2], 2 * ypos[2])

    n <- length(score)
    h <- ypos_2[2] / (2 * n - 1)
    y <- seq(ypos_2[1] + ypos_2[2] - 0.5 * h, ypos_2[1] + 0.5 * h,
        length.out = n)

    x <- mapply(plotRanges, score, y,
        MoreArgs = list(exon_coordinates = exon_coordinates,
            toscale = toscale, h = h, color = color))

    if (!is.null(names(score))) text(x, y, names(score), pos = 1)

}

plotRanges <- function(exon_coordinates, toscale, score, color, y, h)
{

    gr_exon <- GRanges(exon_coordinates$gene_chrom,
        IRanges(exon_coordinates$exon_start,
        exon_coordinates$exon_end), exon_coordinates$gene_strand)
    gr_gene <- range(gr_exon)

    ir_score <- ranges(mapToTranscripts(score, split(gr_gene, 1),
        ignore.strand = FALSE))
    ir_exon <- ranges(mapToTranscripts(gr_exon, split(gr_gene, 1),
        ignore.strand = FALSE))
    ir_exonic <- reduce(ir_exon)

    coords <- xcoordinates(ir_score, ir_exonic, toscale)

    xleft <- coords$x - 0.5 * coords$w
    xright <- coords$x + 0.5 * coords$w
    ybottom <- y - 0.5 * h
    ytop <- y + 0.5 * h

    mapply(rect, xleft = xleft, xright = xright,
        MoreArgs = list(ybottom = ybottom, ytop = ytop,
            col = color, border = NA))

    x <- 0.5 * (min(xleft) + max(xright))

    return(x)

}

getGraphInfo <- function(g, x_exon, y_exon, w_exon, vertex_pos,
    edge_curved, edge_pos, color_labels, tx_view)
{

    ## data frames of nodes and edges
    gv <- nodes(g)
    gd <- edges(g)

    i_from <- match(gd$from, gv$name)
    i_to <- match(gd$to, gv$name)

    ## coordinates for junction labels
    x_junc <- 0.5 * (x_exon[i_from] + 0.5 * w_exon[i_from] +
        x_exon[i_to] - 0.5 * w_exon[i_to])
    w_junc <- x_exon[i_to] - 0.5 * w_exon[i_to] -
        (x_exon[i_from] + 0.5 * w_exon[i_from])
    y_junc <- 0.5 * (y_exon[i_from] + y_exon[i_to]) +
        edge_curved * w_junc * 1/3

    ## output
    i_exon <- which(!is.na(gv$type))
    i_exon <- i_exon[order(x_exon[i_exon])]
    i_junc <- order(x_exon[i_from], w_junc)

    co_E <- gv$coordinates[i_exon]
    co_J <- gd$coordinates[i_junc]

    df <- data.frame(
        id = c(
            paste0(rep("E", length(co_E)),
                as.integer(factor(co_E, levels = unique(co_E)))),
            paste0(rep("J", length(co_J)),
                as.integer(factor(co_J, levels = unique(co_J))))),
        name = paste0(c(gv$type[i_exon], gd$type[i_junc]), ":",
            c(gv$coordinates[i_exon], gd$coordinates[i_junc])),
        type = c(rep("E", length(co_E)), rep("J", length(co_J))),
        featureID = c(gv$featureID[i_exon], gd$featureID[i_junc]),
        x = c(x_exon[i_exon], x_junc[i_junc]),
        y = c(y_exon[i_exon], y_junc[i_junc]),
        pos = c(vertex_pos[i_exon], edge_pos[i_junc]))

    if (color_labels) {

        df$color <- addAlpha(c(gv$color[i_exon], gd$color[i_junc]), 1)

    } else {

        df$color <- "black"

    }

    if (!is.null(gv$label) && !is.null(gd$label)) {

        df$label <- c(gv$label[i_exon], gd$label[i_junc])

    }

    if (tx_view) {

        df$tx_name <- c(gv$tx_name[i_exon], gd$tx_name[i_junc])

    }

    return(df)

}

xcoordinates <- function(ir, ir_exonic, toscale)
{

    n_exonic <- length(ir_exonic)

    ## intronic regions
    ir_intronic <- IRanges(end(ir_exonic)[-n_exonic],
        start(ir_exonic)[-1]) - 1
    n_intronic <- length(ir_intronic)

    ## set widths for exonic/intronic regions
    ## if toscale is 'none' or 'exon', half of the x-axis range is used
    ## for plotting exonic regions and half for intronic regions
    if (toscale == "none") {

        w_exonic <- rep(1 / n_exonic, n_exonic)
        w_intronic <- rep(1 / n_intronic, n_intronic)

    } else if (toscale == "exon") {

        w_exonic <- width(ir_exonic) / sum(width(ir_exonic))
        w_intronic <- rep(1 / n_intronic, n_intronic)

    } else if (toscale == "gene") {

        w_exonic <- 2 * width(ir_exonic) / width(range(ir_exonic))
        w_intronic <- 2 * width(ir_intronic) / width(range(ir_exonic))

    }

    ## if there are no intronic regions, use full x-axis range
    ## for exonic regions
    if (toscale %in% c("none", "exon") && n_intronic == 0) {

        w_exonic <- 2

    }

    o <- order(c(ir_exonic, ir_intronic))
    ir_block <- c(ir_exonic, ir_intronic)[o]
    w_block <- c(w_exonic, w_intronic)[o]
    x_block <- c(0, cumsum(w_block[-length(w_block)])) - 1

    ol <- findOverlaps(ir, ir_block)
    ol_min <- as.integer(tapply(subjectHits(ol), queryHits(ol), min))
    ol_max <- as.integer(tapply(subjectHits(ol), queryHits(ol), max))

    x_1 <- x_block[ol_min] + (start(ir) - start(ir_block)[ol_min]) /
        width(ir_block)[ol_min] * w_block[ol_min]
    x_2 <- x_block[ol_max] + (end(ir) - start(ir_block)[ol_max] + 1) /
        width(ir_block)[ol_max] * w_block[ol_max]

    out <- list("x" = x_1 + 0.5 * (x_2 - x_1), "w" = x_2 - x_1)

    return(out)

}

setFeatureColors <- function(features, color, color_novel, alpha)
{

    if (is.null(mcols(features)$color)) {

        features_color <- rep(color, length(features))

        if (!is.null(color_novel) && !is.null(txName(features))) {

            txName <- txName(features)
            i_novel <- which(elementNROWS(txName) == 0)
            features_color[i_novel] <- color_novel

        }

    } else {

        features_color <- mcols(features)$color

    }

    if (!is.null(alpha)) {

        features_color <- addAlpha(features_color, alpha)

    }

    mcols(features)$color <- features_color

    return(features)

}

addAlpha <- function(col, alpha)
{

    col_rgb <- col2rgb(col)/255
    rgb(col_rgb[1, ], col_rgb[2, ], col_rgb[3, ], alpha)

}

##' Plot splice graph and heatmap of expression values.
##'
##' @title Plot splice graph and heatmap of expression values
##' @inheritParams plotSpliceGraph
##' @param x \code{SGFeatureCounts} object
##' @param cex Scale parameter for feature labels and annotation
##' @param assay Name of assay to be plotted in the heatmap
##' @param include Include \dQuote{exons}, \dQuote{junctions} or
##'   \dQuote{both} in the heatmap
##' @param transform Transformation applied to assay data
##' @param Rowv Determines order of rows. Either a vector of values used to
##'   reorder rows, or \code{NA} to suppress reordering, or \code{NULL} for
##'   hierarchical clustering.
##' @param distfun Distance function used for hierarchical clustering
##'   of rows (samples)
##' @param hclustfun Clustering function used for hierarchical clustering
##'   of rows (samples)
##' @param margin Width of right-hand margin as fraction of width of the
##'   graphics device. Ignored if \code{square} is \code{TRUE}.
##' @param RowSideColors Character vector (or list of character vectors)
##'   with length(s) equal to \code{ncol(x)} containing color names for
##'   horizontal side bars for sample annotation
##' @param square Logical, if \code{TRUE} margins are set such that
##'   cells in the heatmap are square
##' @param cexRow Scale factor for row (sample) labels
##' @param cexCol Scale factor for column (feature) labels
##' @param labRow Character vector of row (sample) labels
##' @param col Heatmap colors
##' @param zlim Range of values for which colors should be plotted,
##'   if \code{NULL} range of finite values
##' @param heightPanels Numeric vector of length two indicating height of
##'   the top and bottom panels.
##' @param ... further arguments passed to \code{plotSpliceGraph}
##' @return \code{data.frame} with information on exon bins and
##'   splice junctions included in the splice graph
##' @examples
##' \dontrun{
##' sgfc_annotated <- annotate(sgfc_pred, txf_ann)
##' plotFeatures(sgfc_annotated)
##' }
##' NULL
##' @author Leonard Goldstein

plotFeatures <- function(x, geneID = NULL, geneName = NULL,
    which = NULL, tx_view = FALSE, cex = 1,
    assay = "FPKM", include = c("junctions", "exons", "both"),
    transform = function(x) { log2(x + 1) }, Rowv = NULL,
    distfun = dist, hclustfun = hclust, margin = 0.2,
    RowSideColors = NULL, square = FALSE, cexRow = 1, cexCol = 1,
    labRow = colnames(x), col = colorRampPalette(c("black", "gold"))(256),
    zlim = NULL, heightPanels = c(1, 2), ...)
{

    include <- match.arg(include)

    if (!is(x, "SGFeatureCounts")) {

        stop("x must be an SGFeatureCounts object")

    }

    x <- restrictFeatures(x, geneID = geneID, which = which,
        geneName = geneName)

    if (nrow(x) == 0) { return() }

    features <- rowRanges(x)

    if (!is.null(RowSideColors) && !is.list(RowSideColors)) {

        RowSideColors <- list(RowSideColors)

    }

    n_sample <- ncol(x)
    include_type <- switch(include, junctions = "J", exons = "E",
        both = c("E", "J"))
    n_feature <- length(which(type(features) %in% include_type))

    pars <- getLayoutParameters(n_sample, n_feature, margin, heightPanels,
        RowSideColors, square, tx_view)
    layout(pars$mat, heights = pars$hei, widths = pars$wid)
    par(cex = cex, mai = c(pars$mai[1], 0, pars$mai[3], 0))

    df <- plotSpliceGraph(x = features, label = "id", tx_view = tx_view,
        short_output = FALSE, ...)

    i_df <- switch(include,
        exons = which(df$type == "E"),
        junctions = which(df$type == "J"),
        both = seq_len(nrow(df)))
    i_df <- i_df[!duplicated(df$name[i_df])]
    i <- match(df$name[i_df], feature2name(features))

    X <- transform(assay(x[i, ], assay))
    labCol <- df$id[i_df]
    colLabCol <- df$color[i_df]

    plotImage(X, Rowv = Rowv, distfun = distfun,
        hclustfun = hclustfun, RowSideColors = RowSideColors,
        cexRow = cexRow, cexCol = cexCol, labRow = labRow,
        labCol = labCol, colLabCol = colLabCol,
        col = col, zlim = zlim)

    frame()
    
    df <- df[c("id", "name", "type", "featureID")]

    invisible(df)

}

##' Plot splice graph and heatmap of splice variant frequencies.
##'
##' @title Plot splice graph and heatmap of splice variant frequencies
##' @inheritParams plotSpliceGraph
##' @inheritParams plotFeatures
##' @param x \code{SGVariantCounts} object
##' @param cex Scale parameter for feature labels and annotation
##' @param transform Transformation applied to splice variant frequencies
##' @param expand_variants Experimental option - leave set to \code{FALSE}
##' @param ... further arguments passed to \code{plotSpliceGraph}
##' @return \code{data.frame} with information on exon bins and
##'   splice junctions included in the splice graph
##' @examples
##' \dontrun{
##' sgvc_annotated <- annotate(sgvc_pred, txf_ann)
##' plotVariants(sgvc_annotated)
##' }
##' NULL
##' @author Leonard Goldstein

plotVariants <- function(x, eventID = NULL, tx_view = FALSE,
    cex = 1, transform = function(x) { x }, Rowv = NULL,
    distfun = dist, hclustfun = hclust, margin = 0.2,
    RowSideColors = NULL, square = FALSE, cexRow = 1, cexCol = 1,
    labRow = colnames(x), col = colorRampPalette(c("black", "gold"))(256),
    zlim = c(0, 1), heightPanels = c(1, 2), expand_variants = FALSE, ...)
{

    if (!is(x, "SGVariantCounts")) {

        stop("x must be an SGVariantCounts object")

    }

    x <- updateObject(x, verbose = TRUE)

    x <- restrictFeatures(x, eventID = eventID,
        expand_variants = expand_variants)

    if (nrow(x) == 0) { return() }

    variant_not_quantifiable <- elementNROWS(featureID5p(x)) == 0 &
        elementNROWS(featureID3p(x)) == 0
    event_not_quantifiable <- tapply(variant_not_quantifiable, eventID(x),
        any)

    if (all(event_not_quantifiable)) {

        warning("none of the selected splice events could be quantified")

    } else if (any(event_not_quantifiable)) {

        warning("one or more splice events could not be quantified")

    }

    if (expand_variants) {

        x <- expandSGVariantCounts(x, eventID)

    }

    X <- transform(variantFreq(x))

    if (!is.null(RowSideColors) && !is.list(RowSideColors)) {

        RowSideColors <- list(RowSideColors)

    }

    exclude <- which(colSums(is.na(X)) == nrow(X))
    n_exclude <- length(exclude)

    if (n_exclude > 0) {

        labRow <- labRow[-exclude]
        x <- x[, -exclude]
        X <- X[, -exclude]
        RowSideColors <- lapply(RowSideColors, "[", -exclude)
        warning(paste("excluded", n_exclude, "samples with all values NA"))

    }

    n_sample <- ncol(x)
    n_feature <- nrow(x)

    pars <- getLayoutParameters(n_sample, n_feature, margin, heightPanels,
        RowSideColors, square, tx_view)
    layout(pars$mat, heights = pars$hei, widths = pars$wid)
    par(cex = cex, mai = c(pars$mai[1], 0, pars$mai[3], 0))

    df <- plotSpliceGraph(x = rowRanges(x), label = "label",
        tx_view = tx_view, ...)

    if (all(event_not_quantifiable) || n_sample == 0) {

        invisible(df)

    }

    labCol <- seq_len(nrow(X))
    colLabCol <- "black"

    plotImage(X, Rowv = Rowv, distfun = distfun,
        hclustfun = hclustfun, RowSideColors = RowSideColors,
        cexRow = cexRow, cexCol = cexCol, labRow = labRow,
        labCol = labCol, colLabCol = colLabCol,
        col = col, zlim = zlim)

    frame()
    
    invisible(df)

}

extractFeaturesFromVariants <- function(variants)
{

    id2variant <- split(togroup0(variants), featureID(unlist(variants)))
    id2variant <- unstrsplit(sort(unique(CharacterList(id2variant))), ",")
    features <- uniqueFeatures(unlist(variants))
    mcols(features)$label <- id2variant[match(
        featureID(features), names(id2variant))]

    return(features)

}

getLayoutParameters <- function(n_sample, n_feature, margin, heightPanels,
    RowSideColors, square, tx_view)
{

    n_RSC <- length(RowSideColors)

    mat <- rbind(
        c(0, 1, 0),
        c(0, 0, 0),
        c(0, 2, 0),
        c(0, 0, 0))

    for (i in seq_len(n_RSC)) {

        n <- max(mat) + 1
        mat <- cbind(c(0, 0, 0, 0), c(0, 0, n, 0), mat)

    }

    heightTopPanel <- heightPanels[1] / sum(heightPanels)
    heightBottomPanel <- heightPanels[2] / sum(heightPanels)

    mar_wid <- c(0.05, margin)
    mar_hei <- c(0.05, 0.2) * heightBottomPanel

    img_wid <- 1 - sum(mar_wid) - 0.025 * n_RSC * 2
    img_hei <- heightBottomPanel - sum(mar_hei)

    wid <- c(mar_wid[1], rep(0.025, n_RSC * 2), img_wid, mar_wid[2])
    hei <- c(heightTopPanel, mar_hei[1], img_hei, mar_hei[2])

    if (square) {

        padding <- getPadding(n_sample, n_feature, wid, hei)

        i_wid_mar <- c(1, length(wid))
        i_wid_img <- length(wid) - 1

        wid[i_wid_mar] <- wid[i_wid_mar] + 0.5 * padding[1]
        wid[i_wid_img] <- wid[i_wid_img] - padding[1]

        i_hei_mar <- c(2, 4)
        i_hei_img <- 3

        hei[i_hei_mar] <- hei[i_hei_mar] + 0.5 * padding[2]
        hei[i_hei_img] <- hei[i_hei_img] - padding[2]

    }

    ## add color key

    k <- ncol(mat)
    r <- nrow(mat)
    mat <- cbind(mat[, seq_len(k - 2)], mat[, k - 1], mat[, k - 1], mat[, k])
    wid <- c(wid[seq_len(k - 2)], wid[k - 1] * 2/3, wid[k - 1] * 1/3, wid[k])
    mat <- rbind(mat, 0, 0)
    mat[nrow(mat) - 1, ncol(mat) - 1] <- max(mat) + 1
    hei <- c(hei[seq_len(r - 1)], rep(0.05, 2) * heightBottomPanel,
        hei[r] - 0.1 * heightBottomPanel)

    ## reserve top right panel for user-defined plots

    mat[1, ncol(mat)] <- max(mat) + 1
    
    ## splice graph margins and aspect ratio

    ds <- dev.size()
    dev_wid <- ds[1]
    dev_hei <- ds[2]

    mai_hei <- c(0.05, 0.05) * dev_hei * heightTopPanel

    if (tx_view) {

        mai_wid <- c(0.05, margin) * dev_wid

    } else {

        mai_wid <- c(0.05, 0.05) * dev_wid

    }

    mai <- c(mai_hei[1], mai_wid[1], mai_hei[2], mai_wid[2])

    list(mat = mat, wid = wid, hei = hei, mai = mai)

}

getPadding <- function(n_sample, n_feature, wid, hei)
{

    ds <- dev.size()
    dev_wid <- ds[1]
    dev_hei <- ds[2]

    img_wid <- wid[length(wid) - 1]
    img_hei <- hei[3]

    img_wid_real <- img_wid / sum(wid) * dev_wid
    img_hei_real <- img_hei / sum(hei) * dev_hei

    cel_wid_real <- img_wid_real / n_feature
    cel_hei_real <- img_hei_real / n_sample

    if (cel_wid_real > cel_hei_real) {

        pad_wid <- (1 - cel_hei_real / cel_wid_real) * img_wid
        pad_hei <- 0

    } else {

        pad_wid <- 0
        pad_hei <- (1 - cel_wid_real / cel_hei_real) * img_hei

    }

    pad <- c(pad_wid, pad_hei)

    return(pad)

}

plotImage <- function(x, Rowv = NA, distfun, hclustfun, RowSideColors,
    cexRow, cexCol, labRow = NULL, labCol = NULL, colLabCol, col, zlim)
{

    if (is.null(Rowv)) {

        if (ncol(x) > 1) {

            j <- hclustfun(distfun(t(x)))$order

        } else {

            j <- 1

        }

    } else if (is.na(Rowv)) {

        j <- rev(seq_len(ncol(x)))

    } else {

        j <- Rowv

    }

    x <- x[, j]
    RowSideColors <- lapply(RowSideColors, "[", j)

    if (!is.matrix(x)) { x <- matrix(x, ncol = length(j)) }
    if (is.null(zlim)) { zlim <- range(x[is.finite(x)]) }

    par(mai = c(0, 0, 0, 0))
    image(x, col = col, zlim = zlim, axes = FALSE)

    if (!is.null(labCol)) {

        mtext(side = 3, at = seq(from = 0, to = 1,
            length.out = length(labCol)), text = labCol,
            cex = par()$cex * cexCol, col = colLabCol,
            line = 0.25, las = 3)

    }

    if (!is.null(labRow)) {

        mtext(side = 4, at = seq(from = 0, to = 1, length.out = ncol(x)),
            text = labRow[j], cex = par()$cex * cexRow,
            line = 0.5, las = 1)

    }

    for (r in seq_along(RowSideColors)) {

        par(mai = c(0, 0, 0, 0))
        image(matrix(seq_len(ncol(x)), nrow = 1),
            col = RowSideColors[[r]], axes = FALSE)
        mtext(side = 3, text = names(RowSideColors)[r],
            cex = par()$cex * cexCol, line = 0.25, las = 3)

    }

    par(mai = c(0, 0, 0, 0))
    image(matrix(seq_along(col), ncol = 1), col = col, axes = FALSE)
    mtext(side = 1, at = c(0, 1), text = format(zlim, digits = 2),
        cex = par()$cex, line = 0.25, las = 1)

}

restrictFeatures <- function(x, geneID = NULL, eventID = NULL, which = NULL,
    geneName = NULL, expand_variants = FALSE)
{

    if (is(x, "Counts")) {

        y <- rowRanges(x)

    } else {

        y <- x

    }

    if (!is.null(geneID) || !is.null(eventID) || !is.null(geneName)) {

        if (!is.null(geneID)) {

            index <- which(geneID(y) %in% geneID)

        } else if (!is.null(eventID)) {

            index <- which(eventID(y) %in% eventID)

            if (length(eventID) > 1) {

                featureIDs <- featureID(y)
                featureIDs <- gsub("(", "", featureIDs, fixed = TRUE)
                featureIDs <- gsub(")", "", featureIDs, fixed = TRUE)
                featureIDs <- gsub("|", ",", featureIDs, fixed = TRUE)
                featureIDs <- unlist(strsplit(featureIDs, ",", fixed = TRUE))

                if (any(duplicated(featureIDs))) {

                    stop("cannot plot overlapping events")

                }

            }

            if (expand_variants) {

                featureIDs <- unique(unlist(strsplit(
                    unlist(expandString(featureID(y)[index])), ",",
                        fixed = TRUE)))
                index <- unlist(lapply(featureIDs,
                    function (id) { grep(paste0("(^|,)", id, "(,|$)"),
                        featureID(y)) }))
                eventIDs <- unique(eventID(y)[index])
                index <- which(eventID(y) %in% eventIDs)

            }

        } else if (!is.null(geneName)) {

            if (is.null(geneName(y))) {

                stop("missing geneName")

            }

            if (length(geneName) > 1) {

                stop("geneName must be of length 1")

            }

            index <- which(any(geneName(y) == geneName))

            if (all(type(y)[index] %in% c("D", "A"))) index <- vector()
            
        }

        y <- y[index]

    } else if (!is.null(which)) {

        ol <- findOverlaps(y, which, type = "within")

        index <- which(
            type(y) == "E" & (y %over% which) |
            type(y) == "J" & (seq_along(y) %in% queryHits(ol)))

        y <- restrict(y[index], start = start(which), end = end(which))


    } else {

        return(x)

    }

    if (is(x, "Counts")) {

        x <- x[index, ]
        rowRanges(x) <- y
        return(x)

    } else {

        return(y)

    }

}

##' Plot read coverage and splice junction read counts for an individual
##' sample or averaged across samples.
##'
##' @title Plot read coverage and splice junction read counts
##' @inheritParams plotSpliceGraph
##' @param x \code{SGFeatureCounts} or \code{SGFeatures} object.
##'   If \code{x} is an \code{SGFeatureCounts} object that includes
##'   multiple samples, average coverage and splice junction counts
##'   are obtained.
##' @param sample_info Data frame with sample information.
##'   If \code{x} is an \code{SGFeatureCounts} object, sample information
##'   is obtained from \code{colData(x)}. If \code{sample_info} includes
##'   multiple samples, average coverage and splice junction counts
##'   are obtained.
##' @param sizefactor Numeric vector with length equal to the number of
##'   samples in \code{sample_info}. Used to scale coverages and splice
##'   junction counts before plotting, or before averaging across samples.
##'   Set to \code{NA} to disable scaling. If \code{NULL}, size factors
##'   are calculated as the number of bases sequenced (the product of library
##'   size and average number of bases sequenced per read or fragment),
##'   plotted coverages and splice junction counts are per 1 billion
##'   sequenced bases.
##' @param color Color used for plotting coverages
##' @param ylim Numeric vector of length two, determining y-axis range used
##'   for plotting coverages.
##' @param nbin Number of bins for plotting coverages
##' @param summary Function used to calculate per-bin coverage summaries
##' @param label Optional y-axis label
##' @param min_anchor Integer specifiying minimum anchor length
##' @param cores Number of cores available for parallel processing.
##' @return \code{data.frame} with information on splice junctions included
##'   in the splice graph
##' @examples
##' \dontrun{
##' par(mfrow = c(4, 1))
##' for (j in seq_len(4)) plotCoverage(sgfc_pred[, j])
##' }
##' NULL
##' @author Leonard Goldstein

plotCoverage <- function(x, geneID = NULL, geneName = NULL,
    eventID = NULL, which = NULL, sample_info = NULL, sizefactor = NA,
    toscale = c("exon", "none", "gene"), color = "darkblue",
    ylim = NULL, label = NULL, nbin = 200, summary = mean,
    curvature = 1, main = NULL, min_anchor = 1, cores = 1)
{

    toscale <- match.arg(toscale)

    x <- restrictFeatures(x, geneID, eventID, which, geneName)

    if (is(x, "SGFeatures") && !is.null(sample_info)) {

        sgf <- x
        sgfc <- getSGFeatureCounts(sample_info, sgf,
            min_anchor = min_anchor, cores = cores)

    } else if (is(x, "SGFeatureCounts")) {

        sample_info <- colData(x)
        sgf <- rowRanges(x)
        sgfc <- x

    } else {

        stop("either x must be an SGFeatureCounts object,
            or x must be an SGFeatures object and sample_info not NULL")

    }

    if (is.null(sizefactor)) {

        sizefactor <- calculateSizeFactor(sample_info)

    } else if (length(sizefactor) == 1 && is.na(sizefactor)) {

        sizefactor <- rep(1, nrow(sample_info))

    } else if (length(sizefactor) != nrow(sample_info)) {

        stop("sizefactor must have length equal to the number of samples")

    }

    scaled_cov <- getCoverage(sample_info, range(sgf), sizefactor, cores)
    average_cov <- Reduce("+", scaled_cov) / length(scaled_cov)

    scaled_counts <- sweep(counts(sgfc), 2, sizefactor, FUN = "/")
    average_counts <- rowSums(scaled_counts) / ncol(scaled_counts)

    sgf <- setFeatureColors(sgf, color, color, 1)
    sgf$label <- round(average_counts)
    g <- exonGraph(sgf, FALSE)
    exon_coordinates <- getExonCoordinates(g, toscale)

    plot(NA, xlim = c(-1, 1), ylim = c(-1, 1), xaxs = "i", yaxs = "i",
        axes = FALSE, xlab = NA, ylab = NA)
    plotTrackScore(exon_coordinates, average_cov, color, ylim, c(0.5, 1),
        nbin, summary, label)
    df <- plotExonGraph(g, exon_coordinates, c(0, 1), "junctions", "none",
        curvature, "label", FALSE, 1, NULL, FALSE, NA)
    text(x = 0, y = 0.95, labels = main, pos = 1, offset = 0, font = 2)

    invisible(df)

}
ldg21/SGSeq documentation built on Oct. 14, 2020, 9:51 p.m.