R/plotPairwise.R

Defines functions .makeOLaps .addColourCol .addYSide .addXSide .addRho .addLabels plotPairwise

Documented in plotPairwise

#' @title Plot Pairwise Values from a GRangeList
#'
#' @description
#' Plot Pairwise Values from a GRangeList by overlapping GRanges
#'
#' @details
#' This function enables pairwise plotting of two elements within a GRangesList.
#' All elements of the GRangesList will contain the same columns, so a set of
#' consensus ranges are first formed, before then taking all values from each
#' GRangesList element which overlap the range and producing a piarwise plot.
#'
#' Given that not all ranges will have values in both elements, side panels are
#' produced which can show the distribution of plotted values, along with those
#' which are only found in one of the foundational GRanges. These can take the
#' form of density, violin or boxplots.
#'
#' Addition columns, such as Differential Signal status can also be used to
#' form pairwise groups and colour the points.
#'
#' If a column in the GRangesList is suitable for labelling points, such as a
#' column with genes mapped to each range, this can be specified using the
#' argument `label = "col_to_label"`.
#' Only the furthest point from the origin will be labelled within each group
#' used to colour the points.
#' Labels will only be added if they lie beyond a circle of radius `min_d` from
#' the origin.
#' If multiple genes are mapped to the range, these will be separated by the
#' string provided in the `label_sep` argument.
#'
#' A regression line and correlation co-efficient are added to the plot by
#' default, but can be hidden easily if preferred
#'
#' @param x A GRangesList
#' @param var The colunm to compare between list elements
#' @param index Which list elements to compare
#' @param colour Optional column to use for combining across elements and
#' setting point colour
#' @param label Optional column to use for labelling ranges with the most
#' extreme changes
#' @param p,method,ignore.strand,min_width Passed to [makeConsensus()]
#' @param name_sep Character string to separate names of the GRangesList and
#' the selected column. Will appear as axis-labels
#' @param xside,yside Will call geom_(x/y)side* from the package ggside and
#' show additional panels on the top and right of the plot respectively
#' @param side_panel_width Set the relative widths of the side panels
#' @param side_alpha Set fill transparency in side panels
#' @param xside_axis_pos Position for axis_labels in the top panel when using
#' a discrete axis
#' @param yside_axis_label Wrapping for axis labels on the right-side panel when
#' using a discrete axis. Set to waiver() to turn off wrapping
#' @param smooth logical(1). If TRUE a regression line will be drawn using
#' \link[ggplot2]{geom_smooth}. To add this manually, set to FALSE and call
#' this geom with any custom parameters after creating the plot
#' @param rho_geom Used to add correlation coefficients for the two values
#' @param rho_pos Place the correlation coefficient within the plotting region
#' @param rho_col,rho_size,rho_alpha Parameters for displaying the correlation
#' @param label_geom Used to add labels from the column specified in label
#' @param label_width Label text will be truncated to this length
#' @param label_sep If multiple values (e.g. genes) are mapped to a range,
#' separate values using this string
#' @param label_size,label_alpha Passed to the geom used for adding labels
#' @param min_d Labels will only be added if the points lie circle beyond a
#' sircle of this radius
#' @param group_sep Text separator used to separate categories when specifying colour
#' @param simplify_equal logical(1) When combining columns from both elements
#' for the colour categories, should shared values be annotated as 'Both ...'
#' instead of having longer, more difficult to read annotations.
#' @param plot_theme Sets the initial theme by using the default theme for the
#' current R session via get_theme()
#' @param ... Passed to `geom_point()` for the main panel
#'
#' @return A `ggside` or `ggplot2` object
#'
#' @examples
#' theme_set(theme_bw())
#' set.seed(100)
#' gr1 <- GRanges(paste0("chr1:", seq(10, 150, by = 10)))
#' width(gr1) <- 5
#' gr1$logFC <- rnorm(length(gr1))
#' gr1$FDR <- rbeta(length(gr1), 1, 8)
#'
#' gr2 <-  GRanges(paste0("chr1:", seq(51, 250, by = 15)))
#' width(gr2) <- 4
#' gr2$logFC <- rnorm(length(gr2))
#' gr2$FDR <- rbeta(length(gr2), 1, 8)
#' grl <- GRangesList(TF1 = gr1, TF2 = gr2)
#' grl <- addDiffStatus(grl)
#'
#' # Using the defaults
#' plotPairwise(grl, var = "logFC")
#'
#' # Density plots on the side panels
#' plotPairwise(
#'   grl, var = "logFC", xside = "density", yside = "density",
#'   side_alpha = 0.5
#' )
#'
#' # Turning off side panels, regression line and correlations
#' plotPairwise(
#'   grl, var = "logFC", xside = "none", yside = "none",
#'   rho_geom = "none", smooth = FALSE
#' )
#'
#' # Add colours using the status column
#' plotPairwise(grl, var = "logFC", colour = "status") +
#'   scale_fill_manual(values = rep_len(c("blue", "red", "white", "grey"), 8)) +
#'   guides(fill = "none")
#'
#' @importFrom S4Vectors mcols
#' @importFrom rlang !! sym ensym
#' @export
plotPairwise <- function(
        x, var, colour, label, index = c(1, 2),
        p = 0, method = "union", ignore.strand = TRUE, min_width = 0,
        xside = c("boxplot", "density", "violin", "none"),
        yside = c("boxplot", "density", "violin", "none"),
        side_panel_width = c(0.3, 0.4), side_alpha = 1,
        xside_axis_pos = "right", yside_axis_label = scales::label_wrap(10),
        smooth = TRUE, rho_geom = c("text", "label", "none"), rho_col = "black",
        rho_size = 4, rho_pos = c(0.05, 0.95),  rho_alpha = 1,
        label_geom = c("label_repel", "label", "text_repel", "text", "none"),
        label_width = 20, label_sep = "; ", label_size = 3.5, label_alpha = 0.7,
        min_d = 1, group_sep = " - ", simplify_equal = TRUE, name_sep = " ",
        plot_theme = theme_get(), ...
) {
    ## Basic checks
    stopifnot(is(x, "GRangesList"))
    stopifnot(max(index) <= length(x))
    stopifnot(length(x) == 2)
    x <- tryCatch(x[index])
    nm <- names(x)
    stopifnot(length(nm) == 2)
    stopifnot(is(plot_theme, "theme"))

    mc_names <- .mcolnames(x[[1]])
    var <- as.character(ensym(var))
    var <- match.arg(var, mc_names)
    stopifnot(is.numeric(mcols(x[[1]])[[var]]))
    x_lab <- paste(nm[[1]], var, sep = name_sep)
    y_lab <- paste(nm[[2]], var, sep = name_sep)

    if (missing(colour)) {
        colour <- NULL
    } else {
        colour <- as.character(ensym(colour))
        colour <- sym(match.arg(colour, mc_names))
    }
    if (missing(label)) {
        label <- NULL
    } else {
        label <- as.character(ensym(label))
        label <- sym(match.arg(label, mc_names))
    }

    xside <- match.arg(xside)
    yside <- match.arg(yside)
    side_panel_width <- rep_len(side_panel_width, 2)
    rho_geom <- match.arg(rho_geom)
    label_geom <- match.arg(label_geom)

    ## The basic df for plotting
    ol <- .makeOLaps(x, var, name_sep, p, method, ignore.strand, min_width)
    if (!is.null(colour)) {
        ol <- .addColourCol(
            x, ol, colour, x_lab, y_lab, group_sep, simplify_equal
        )
    }

    df <- dplyr::filter(ol, !!sym("detected") == "Both Detected")
    p <- ggplot(df, aes(!!sym(x_lab), !!sym(y_lab))) +
        geom_point(aes(colour = !!colour), ...) +
        plot_theme

    if (xside != "none")
        p <- .addXSide(p, ol, x, xside, colour, xside_axis_pos, side_alpha)

    if (yside != "none")
        p <- .addYSide(p, ol, x, yside, colour, yside_axis_label, side_alpha)

    if (xside != "none" | yside != "none") {
        w_x <- side_panel_width[[1]]
        w_y <- side_panel_width[[2]]
        p <- p + theme(
            ggside.panel.scale.x = w_x, ggside.panel.scale.y = w_y,
            axis.title.x = element_text(hjust = 0.5 * (1 - w_y)),
            axis.title.y = element_text(hjust = 0.5 * (1 - w_x))
        )
    }
    if (smooth)
        p <- p + geom_smooth(method = "lm", se = FALSE, formula = "y ~ x")

    if (rho_geom != "none")
        p <- .addRho(
            p, ol[[x_lab]], ol[[y_lab]], rho_geom, rho_pos, rho_col, rho_size,
            rho_alpha
        )
    if (!is.null(label) & label_geom != "none") {
        p <- .addLabels(
            x, p, ol, label, colour, x_lab, y_lab, min_d, label_alpha,
            label_size, label_sep, label_geom, label_width
        )
    }
    p + labs(fill = c())

}

#' @importFrom dplyr bind_rows
#' @importFrom S4Vectors mcols
#' @importFrom stringr str_trunc
#' @importFrom ggrepel geom_text_repel geom_label_repel
#' @importFrom rlang !!
#' @keywords internal
.addLabels <- function(
        x, p, ol, label, colour, x_lab, y_lab, min_d, .alpha, .size, .sep, .geom,
        .width
) {
    grp_col <- "detected"
    if (!is.null(colour)) grp_col <- as.character(colour)
    nm <- names(x)
    ol$d <- ol[[x_lab]]^2 + ol[[y_lab]]^2
    split_ol <- split(ol, f = ol[[as.character(grp_col)]])
    split_ol <- lapply(split_ol, dplyr::filter, d == max(d), d > min_d^2)
    split_ol <- split_ol[vapply(split_ol, nrow, integer(1)) == 1]
    ## Now there should be a list of single-row tibbles without NA values
    split_ol <- lapply(
        split_ol,
        function(df) {
            ind <- unlist(df[nm])
            lb <- c(
                unlist(mcols(x[[1]])[[as.character(label)]][ind[[1]]]),
                unlist(mcols(x[[2]])[[as.character(label)]][ind[[2]]])
            )
            lb <- paste(unique(lb), collapse = .sep)
            if (length(lb) == 0) return(NULL)
            df[[as.character(label)]] <- str_trunc(lb, width = .width)
            df
        }
    )
    ol <- bind_rows(split_ol)
    if (nrow(ol) == 0) return(p)
    f <- match.fun(paste0("geom_", .geom))
    p + f(
        aes(label = !!label, colour = {{ colour }}),
        data = ol, alpha = .alpha, size = .size, show.legend = FALSE
    )

}

#' @importFrom stats cor
#' @keywords internal
.addRho <- function(p, xvals, yvals, geom, pos, colour, size, .alpha) {
    pos <- rep_len(pos, 2)
    stopifnot(is.numeric(pos))
    rng_x <- range(xvals, na.rm = TRUE)
    rng_y <- range(yvals, na.rm = TRUE)
    cor_x <- min(rng_x) + pos[[1]] * diff(rng_x)
    cor_y <- min(rng_y) + pos[[2]] * diff(rng_y)
    rho <- round(cor(xvals, yvals, use = "pairwise.complete.obs"), 3)
    lab <- paste("rho ==", rho)
    p + annotate(
        geom, x = cor_x, y = cor_y, label = lab, parse = TRUE,
        colour = colour, size = size, alpha = .alpha
    )
}

#' @import ggside
#' @importFrom S4Vectors mcols
#' @importFrom rlang !! sym
#' @importFrom forcats fct_relabel fct_na_value_to_level
#' @importFrom stringr str_replace_na
#' @keywords internal
.addXSide <- function(p, ol, x, xside, xside_var, label_side, alpha) {
    ## NB: This will be drawn above the plot
    nm <- names(x)
    if (is.null(xside_var)) {
        col <- "detected"
    } else{
        mcol <- as.character(xside_var)
        col <- paste(nm[[2]], mcol, sep = "_")
        vals <- mcols(x[[2]])[[mcol]][ol[[nm[[2]]]]]
        if (is.factor(vals)) {
            vals <- fct_na_value_to_level(vals, "Undetected")
            vals <- fct_relabel(vals, function(x) paste(nm[[2]], x))
        } else {
            vals <- str_replace_na(vals, "Undetected")
            vals <- paste(nm[[2]], vals)
        }
        ol[[col]] <- vals
    }
    stopifnot(col %in% colnames(ol))
    ol <- droplevels(dplyr::filter(ol, !is.na(!!sym(nm[[1]]))))
    if (xside == "density") {
        x_lab <- p$labels$x
        p <- p + geom_xsidedensity(
            aes(x = !!sym(x_lab), y = after_stat(density), fill = !!sym(col)),
            alpha = alpha,
            data = ol, colour = NA
        )
    }
    if (xside == "boxplot") {
        p <- p + geom_xsideboxplot(
            aes(y = !!sym(col), fill = !!sym(col)), data = ol,
            alpha = alpha,
            orientation = "y"
        ) +
            scale_xsidey_discrete(position = label_side)
    }
    if (xside == "violin") {
        p <- p + geom_xsideviolin(
            aes(y = !!sym(col), fill = !!sym(col)), data = ol,
            alpha = alpha,
            orientation = "y", draw_quantiles = 0.5, trim = FALSE
        ) +
            scale_xsidey_discrete(position = label_side)
    }

    p

}

#' @import ggside
#' @importFrom S4Vectors mcols
#' @importFrom rlang !! sym
#' @importFrom forcats fct_relabel fct_na_value_to_level
#' @importFrom stringr str_replace_na
#' @keywords internal
.addYSide <- function(p, ol, x, yside, yside_var, lab, alpha) {
    ## NB: This will be drawn to the right of the plot
    nm <- names(x)
    if (is.null(yside_var)) {
        col <- "detected"
    } else{
        mcol <- as.character(yside_var)
        col <- paste(nm[[1]], mcol, sep = "_")
        vals <- mcols(x[[1]])[[mcol]][ol[[nm[[1]]]]]
        if (is.factor(vals)) {
            vals <- fct_na_value_to_level(vals, "Undetected")
            vals <- fct_relabel(vals, function(x) paste(nm[[1]], x))
        } else {
            vals <- str_replace_na(vals, "Undetected")
            vals <- paste(nm[[1]], vals)
        }
        ol[[col]] <- vals

    }
    stopifnot(col %in% colnames(ol))
    ol <- droplevels(dplyr::filter(ol, !is.na(!!sym(nm[[2]]))))
    if (yside == "density") {
        y_lab <- p$labels$y
        p <- p + geom_ysidedensity(
            aes(y = !!sym(y_lab), x = after_stat(density), fill = !!sym(col)),
            alpha = alpha,
            data = ol, orientation = "y", colour = NA
        )
    }
    if (yside == "boxplot") {
        p <- p + geom_ysideboxplot(
            aes(x = !!sym(col), fill = !!sym(col)), data = ol,
            alpha = alpha,
            orientation = "x"
        ) +
            scale_ysidex_discrete(labels = lab)
    }
    if (yside == "violin") {
        p <- p + geom_ysideviolin(
            aes(x = !!sym(col), fill = !!sym(col)), data = ol,
            orientation = "x", draw_quantiles = 0.5,
            alpha = alpha,
            trim = FALSE
        ) +
            scale_ysidex_discrete(labels = lab)
    }

    p

}


#' @importFrom S4Vectors mcols
#' @importFrom tidyr unite
#' @importFrom rlang !! !!! syms
#' @importFrom methods is
#' @importFrom forcats fct_relabel fct_cross fct_na_value_to_level
#' @importFrom stringr str_replace_na
#' @importFrom glue glue
#' @keywords internal
.addColourCol <- function(x, ol, colour, x_lab, y_lab, .sep, simplify = TRUE) {
    nm <- names(x)
    col <- as.character(colour)
    ## These columns must be character/factor. They'll be the same type as a GRL
    is_char <- is(mcols(x[[1]])[[col]], "character")
    is_fact <- is(mcols(x[[1]])[[col]], "factor")
    stopifnot(is_char | is_fact)
    ## Now merge as required
    temp <- lapply(nm, function(i) mcols(x[[i]])[[col]][ol[[i]]])
    which_equal <- which(temp[[1]] == temp[[2]])
    pat <- "({nm[[1]]}|{nm[[2]]}).+{.sep}({nm[[1]]}|{nm[[2]]})"
    if (is_char) {
        temp <- lapply(temp, str_replace_na, "Undetected")
        temp <- lapply(seq_along(temp), function(i) paste(nm[[i]], temp[[i]]))
        names(temp) <- nm
        temp_df <- as_tibble(temp)
        temp_df <- unite(
            temp_df, col = !!colour, !!!syms(nm), sep = .sep, remove = FALSE
        )
        if (simplify) {
            temp_df[[col]][which_equal] <- str_replace_all(
                temp_df[[col]][which_equal], glue(pat), "Both"
            )
        }
    } else {
        temp <- lapply(temp, fct_na_value_to_level, "Undetected")
        temp <- lapply(
            seq_along(temp),
            function(i) fct_relabel(temp[[i]], function(x) paste(nm[[i]], x))
        )
        names(temp) <- nm
        temp_df <- as_tibble(temp)
        temp_df[[col]] <- fct_cross(
            temp_df[[nm[[2]]]], temp_df[[nm[[1]]]], sep = .sep
        )
        if (simplify) {
            lv_same <- levels(droplevels(temp_df[[col]][which_equal]))
            lv_same <- str_replace_all(lv_same, glue(pat), "Both")
            levels(temp_df[[col]]) <- c(levels(temp_df[[col]]), lv_same)
            temp_df[[col]][which_equal] <-  str_replace_all(
                temp_df[[col]][which_equal], glue(pat), "Both"
            )
        }
        temp_df <- droplevels(temp_df)
    }
    ol[[col]] <- temp_df[[col]]
    ol
}

#' @importFrom dplyr case_when
#' @keywords internal
.makeOLaps <- function(x, var, name_sep, .p, .method, .ignore.str, .min_width) {
    nm <- names(x)
    cons <- makeConsensus(x, .p, NULL, .method, .ignore.str, FALSE, .min_width)
    cons <- granges(cons)
    ol <- .mapHits(x, cons)
    ol <- .addCols(x, ol, var, NULL, NULL, name_sep)

    ol$range <- as.character(cons)[ol$consensus_peak]
    ol$detected <- case_when(
        !is.na(ol[[nm[[1]]]]) & !is.na(ol[[nm[[2]]]]) ~ "Both Detected",
        is.na(ol[[nm[[1]]]]) & !is.na(ol[[nm[[2]]]]) ~ paste(nm[[2]], "Only"),
        !is.na(ol[[nm[[1]]]]) & is.na(ol[[nm[[2]]]]) ~ paste(nm[[1]], "Only"),
        TRUE ~ NA_character_
    )
    lv <- c("Both Detected", paste(nm, "Only"))
    ol$detected <- factor(ol$detected, levels = lv)
    ol
}
steveped/extraChIPs documentation built on May 12, 2024, 2:55 p.m.