R/plots.R

Defines functions plotTargetingDrugsVSsimilarPerturbations mergeDatasetsBy findIntersectingCompounds compareQuantile plot.referenceComparison plotComparison prepareLabel plotSingleCorr plotGSEA plotMetricDistribution plotESplot performGSEA

Documented in compareQuantile findIntersectingCompounds performGSEA plotESplot plotGSEA plotMetricDistribution plot.referenceComparison plotSingleCorr plotTargetingDrugsVSsimilarPerturbations

# Internal GSEA plot -----------------------------------------------------------

#' Perform GSEA
#'
#' @importFrom fgsea calcGseaStat
#'
#' @keywords internal
#' @return List with results of running GSEA
performGSEA <- function(pathways, stats) {
    pathway <- unname(as.vector(na.omit(match(pathways, names(stats)))))
    pathway <- sort(pathway)
    gseaStat <- calcGseaStat(stats, selectedStats=pathway,
                             returnAllExtremes=TRUE)
    enrichment <- data.frame(
        rank =c(0, c(pathway - 1, pathway), length(stats) + 1),
        score=c(0, c(gseaStat$bottoms, gseaStat$tops), 0))
    res <- list(pathway=pathway, enrichmentScore=enrichment, stat=gseaStat)
    return(res)
}

#' Render GSEA enrichment plot
#'
#' @importFrom ggplot2 ggplot aes geom_point geom_hline geom_line annotate
#' scale_x_continuous scale_y_continuous theme theme_bw ggtitle labs
#' element_text element_blank element_rect expansion
#'
#' @keywords internal
#' @return GSEA enrichment plot
plotESplot <- function(enrichmentScore, gseaStat, compact=FALSE) {
    score <- NULL
    amp <- range(enrichmentScore$score)
    ES  <- amp[which.max(abs(amp))]
    if (compact) {
        rugLength <- 20
        rugAlpha  <- 0.05
    } else {
        rugLength <- 0.1
        rugAlpha  <- 0.2
    }
    enrichmentPlot <- ggplot(enrichmentScore, aes(x=rank, y=score)) +
        geom_rug(alpha=rugAlpha, sides="b", length=unit(rugLength, "npc")) +
        geom_line(colour="orange", na.rm=TRUE, size=0.7) +
        geom_hline(yintercept=0, colour="darkgrey", linetype="longdash") +
        geom_hline(yintercept=ES, colour="#3895D3") +
        scale_x_continuous(expand=c(0,0)) +
        labs(y="Enrichment score") +
        theme_bw() +
        theme(axis.title.x=element_blank(),
              axis.text.x=element_blank(),
              axis.ticks.x=element_blank(),
              panel.grid.major.x=element_blank(),
              panel.grid.minor=element_blank(),
              axis.text=element_text(size=10),
              axis.title=element_text(size=12))
    if (compact) {
        enrichmentPlot <- enrichmentPlot +
            theme(axis.title.y=element_blank(),
                  axis.text.y=element_blank(),
                  axis.ticks.y=element_blank())
    } else {
        enrichmentPlot <- enrichmentPlot +
            scale_y_continuous(expand=expansion(mult=c(0.2, 0.1)))
                               # breaks=c(round(ES, 2), seq(-20, 20, .1)))
    }

    if (ES > 0) {
        label_x     <- max(enrichmentScore$rank)
        label_hjust <- 1.2
        label_vjust <- 1.5
    } else {
        label_x     <- 0
        label_hjust <- -0.2
        label_vjust <- -0.5
    }
    enrichmentPlot <- enrichmentPlot +
        annotate("text", y=ES, colour="#3895D3", label=round(ES, 3),
                 x=label_x, hjust=label_hjust, vjust=label_vjust)
    return(enrichmentPlot)
}

#' Plot metric distribution
#'
#' @importFrom ggplot2 ggplot aes scale_x_continuous scale_y_continuous theme
#' theme_bw labs guides element_text geom_area scale_fill_gradient2 geom_raster
#' @importFrom scales extended_breaks
#'
#' @keywords internal
#' @return Metric distribution plot
plotMetricDistribution <- function(stat, compact=FALSE) {
    # Scale number of breaks according to number of ranked elements
    breaks       <- max(round(-120 + 55 * log10(length(stat))), 10)
    quantile     <- cut(stat, breaks=breaks, labels=FALSE)
    quantile     <- seq(min(stat), max(stat), length.out=breaks)[quantile]
    rankedMetric <- data.frame(sort=seq(stat), stat=stat, quantile=quantile)

    if (compact) {
        aes        <- aes_string("sort", 0, fill="stat")
        metricPlot <- ggplot(rankedMetric, aes) +
            geom_raster()
    } else {
        aes        <- aes_string("sort", "stat")
        metricPlot <- ggplot(rankedMetric, aes) +
            geom_area(aes_string(group="quantile", fill="quantile"), na.rm=TRUE,
                      position="identity")
    }
    xBreaks <- c(min(rankedMetric$sort),
                 extended_breaks()(rankedMetric$sort),
                 max(rankedMetric$sort))
    xBreaks <- xBreaks[xBreaks != 0]
    # Check if last tick is too close to potentially overlap
    if (xBreaks[length(xBreaks)] - xBreaks[length(xBreaks) - 1] < 2000) {
        xBreaks <- xBreaks[-c(length(xBreaks) - 1)]
    }
    xBreaksAdj <- c(ifelse(compact, 0, 0.5), rep(0.5, length(xBreaks) - 2), 1)

    metricPlot <- metricPlot +
        scale_x_continuous(expand=c(0, 0), breaks=xBreaks) +
        scale_y_continuous(expand=c(0, 0)) +
        labs(x="Rank", y="Ranked metric") +
        guides(colour="none", fill="none") +
        scale_fill_gradient2(low="dodgerblue", mid="grey95", high="orangered",
                             midpoint=0) +
        theme_bw() +
        theme(plot.margin=unit(c(0, 0, 5.5, 0), "pt"),
              panel.grid.major.x=element_blank(),
              panel.grid.minor=element_blank())
    # May potentially break with a future ggplot2 update...
    metricPlot <- suppressWarnings(
        metricPlot + theme(axis.text.x=element_text(hjust=xBreaksAdj)))
    if (compact) {
        metricPlot <- metricPlot +
            theme(axis.text=element_blank(),
                  axis.title=element_blank(),
                  axis.ticks=element_blank())
    } else {
        metricPlot <- metricPlot +
            theme(axis.text=element_text(size=10),
                  axis.title=element_text(size=12))
    }
    return(metricPlot)
}

#' Plot gene set enrichment analysis (GSEA)
#'
#' @param stats Named numeric vector: statistics
#' @inheritParams plot.perturbationChanges
#' @inheritParams plot.referenceComparison
#' @param gseaParam Numeric: GSEA-like parameter
#' @param compact Boolean: render a compact version of the GSEA plot?
#'
#' @importFrom ggplot2 ggtitle theme unit
#' @importFrom cowplot plot_grid ggdraw draw_label
#' @importFrom stats na.omit
#'
#' @return Grid of plots illustrating a GSEA plot
#' @keywords internal
plotGSEA <- function(stats, geneset, genes=c("both", "top", "bottom"),
                     title="GSEA plot", gseaParam=1, compact=FALSE) {
    genes <- match.arg(genes)
    if (is.list(geneset)) {
        topGenes    <- c(geneset[["top"]], geneset[["custom"]])
        bottomGenes <- geneset[["bottom"]]
    } else {
        topGenes    <- geneset
        bottomGenes <- NULL
    }
    if (!any(genes %in% c("top", "both")))    topGenes    <- NULL
    if (!any(genes %in% c("bottom", "both"))) bottomGenes <- NULL

    statsOrd <- sort(stats, decreasing=TRUE)
    statsAdj <- abs(statsOrd ^ gseaParam)
    statsAdj <- sign(statsOrd) * statsAdj / max(statsAdj)

    plotOneESplot <- function(end, genes, stats, compact=FALSE) {
        if (is.null(genes)) return(NULL)
        gseaRes        <- suppressWarnings(performGSEA(genes, stats))
        enrichmentPlot <- plotESplot(gseaRes$enrichmentScore, gseaRes$stat,
                                     compact=compact)
        return(enrichmentPlot)
    }
    titleSize    <- ifelse(compact, 10, 14)
    title        <- ggdraw() + draw_label(title, size=titleSize)
    topESplot    <- plotOneESplot("top",    topGenes,    statsAdj, compact)
    bottomESplot <- plotOneESplot("bottom", bottomGenes, statsAdj, compact)
    metricPlot   <- plotMetricDistribution(statsOrd, compact)

    plots <- list(topESplot, bottomESplot, metricPlot)
    plots <- Filter(length, plots)

    metricPlotHeight <- ifelse(compact, 3, 6)
    plotHeights <- c(1, 6,
                     if (!is.null(topGenes))    6,
                     if (!is.null(bottomGenes)) 6)
    plotHeights[length(plotHeights)] <- metricPlotHeight
    grid <- plot_grid(title, plotlist=plots, align="v", ncol=1,
                      rel_heights=plotHeights)
    return(grid)
}

# Internal scatter plot for correlations ---------------------------------------

#' Render scatter plot to show a single relationship
#'
#' @param perturbation List of named numeric vectors containing the differential
#' expression profile score per gene for a perturbation; each perturbation of
#' the list will be rendered with a different colour
#' @param ylabel Character: Y axis label
#' @param diffExprGenes Named numeric vector
#' @inheritParams plot.perturbationChanges
#'
#' @importFrom ggplot2 ggplot geom_point geom_rug geom_abline xlab ylab
#' geom_density_2d theme guides guide_legend theme_bw ggtitle
#'
#' @keywords internal
#' @return Scatter plot
plotSingleCorr <- function(perturbation, ylabel, diffExprGenes, title=NULL) {
    prepareDFperPert <- function(perturbation, diffExprGenes) {
        # Intersect common genes
        genes         <- intersect(names(perturbation), names(diffExprGenes))
        perturbation  <- perturbation[genes]
        diffExprGenes <- diffExprGenes[genes]

        df <- data.frame(diffExprGenes, zscores=perturbation)
        return(df)
    }
    dfPerPert  <- lapply(perturbation, prepareDFperPert, diffExprGenes)
    dfAllPerts <- bind_rows(dfPerPert)
    dfAllPerts <- cbind(dfAllPerts, perturbation=rep(names(dfPerPert),
                                                     lapply(dfPerPert, nrow)))
    dfAllPerts$perturbation <- parseCMapID(dfAllPerts$perturbation,
                                           cellLine=TRUE)

    multipleCellLines <- length(perturbation) > 1
    if (multipleCellLines) {
        aesMap <- aes_string("diffExprGenes", "zscores", colour="perturbation")
        guide  <- guides(colour=guide_legend(title="Cell line"))
    } else {
        aesMap <- aes_string("diffExprGenes", "zscores")
        guide  <- NULL
    }

    plot <- ggplot(dfAllPerts, aesMap) +
        geom_point(alpha=0.1) +
        geom_rug(alpha=0.1) +
        geom_density_2d() +
        xlab("Differentially expressed genes (input)") +
        ylab(ylabel) +
        theme_bw() +
        theme(legend.position="bottom") +
        guide
    if (!is.null(title)) {
        plot <- plot +
            ggtitle(title) +
            theme(plot.title = element_text(hjust = 0.5))
    }
    return(plot)
}

# Exported plot functions ------------------------------------------------------

prepareLabel <- function(data) {
    prepareLabel_similarPerturbations <- function(k, pert) {
        info <- print(pert, pert[[1]][[k]])
        collapse <- function(var) paste(unique(var), collapse="/")

        name <- collapse(info$metadata$pert_iname)
        type <- collapse(info$metadata$pert_type)

        isOverexpression <- startsWith(type, "trt_oe")
        isLossOfFunction <- startsWith(type, "trt_sh") ||
            startsWith(type, "trt_xpr")

        if (isOverexpression) {
            name <- paste(name, "OE")
        } else if (isLossOfFunction) {
            name <- paste(name, "KD")
        }

        cellLine <- unique(info$metadata$cell_id)
        if (length(unique(cellLine)) > 1) cellLine <- "mean"

        dose       <- collapse(info$metadata$pert_idose)
        timepoint  <- collapse(info$metadata$pert_itime)
        res <- sprintf("%s (%s, %s at %s)", name, cellLine, dose, timepoint)
        return(res)
    }

    prepareLabel_targetingDrugs <- function(k, data) {
        compoundInfo    <- attr(data, "compoundInfo")
        data$compound   <- as.character(data$compound)
        compoundInfo$id <- as.character(compoundInfo$id)

        merged          <- merge(data, compoundInfo, by.x="compound", by.y="id")
        compound        <- merged[k]
        name            <- compound[["name"]]
        if (is.null(name) || is.na(name) || name == "") {
            name <- compound[["compound"]]
            if (is.na(name)) name <- "NA"
        }
        target <- compound[["target"]]
        target <- gsub(", |;", "/", target)
        if (is.null(target) || target == "") target <- "?"

        targetPathway <- compound[["target pathway"]]
        if (is.null(targetPathway)) {
            targetPathway <- NA
            res <- sprintf("%s (%s)", name, target)
        } else {
            res <- sprintf("%s (%s: %s)", name, target, targetPathway)
        }
        return(res)
    }

    if (is(data, "similarPerturbations")) {
        FUN <- prepareLabel_similarPerturbations
    } else if (is(data, "targetingDrugs")) {
        FUN <- prepareLabel_targetingDrugs
    }

    res <- sapply(seq(nrow(data)), FUN, data)
    return(res)
}

plotComparison <- function(x, method, n, showMetadata,
                           plotNonRankedPerturbations, alpha, title=NULL) {
    if (method == "gsea") {
        stat     <- "GSEA"
        statRank <- "GSEA_rank"
        yLabel   <- "Weighted Connectivity Score (WTCS)"
    } else if (method == "rankProduct") {
        stat     <- "rankProduct_rank"
        statRank <- stat
        yLabel   <- "Rank product's rank"
    } else {
        stat     <- paste0(method, "_coef")
        statRank <- paste0(method, "_rank")
        yLabel   <- sprintf("%s's correlation coefficient", capitalize(method))
    }
    if (!stat %in% colnames(x)) {
        stop(sprintf("'%s' was not run with method '%s'",
                     deparse(substitute(x)), method))
    }

    # Label perturbations
    if (!plotNonRankedPerturbations) {
        x <- x[order(x[[statRank]], na.last=NA)]
        x$ranked <- "Ranked"
    } else {
        x$ranked <- ifelse(!is.na(x[[statRank]]), "Ranked", "Non-ranked")
    }
    sortedPert <- order(x[[stat]], decreasing=TRUE)

    top <- bottom <- n
    if (length(n) == 2) {
        top    <- n[[1]]
        bottom <- n[[2]]
    }
    index   <- unique(c(head(sortedPert, top), tail(sortedPert, bottom)))
    x$label <- ""

    if (showMetadata && is(x, "referenceComparison")){
        x$label[index] <- prepareLabel(x[index])
    } else {
        x$label[index] <- x[[1]][index]
    }

    # Correlation coefficient with labels for top and bottom perturbations
    vars <- aes_string(x=1, y=stat, label="label", colour="ranked")
    rug  <- geom_rug(alpha=alpha, sides="l")

    plot <- ggplot(x, vars) +
        rug +
        scale_colour_manual(values=c("Ranked"="#56B4E9",
                                     "Non-ranked"="#999999")) +
        geom_text_repel(nudge_x=0.15, direction="y", hjust=0, segment.size=0.2,
                        show.legend=FALSE, size=6) +
        xlim(1, 1.375) +
        ylab(yLabel) +
        theme_classic(base_size=18) +
        theme(legend.title=element_blank(), legend.position="bottom",
              axis.line.x=element_blank(), axis.ticks.x=element_blank(),
              axis.text.x=element_blank(), axis.title.x=element_blank())

    if (length(unique(x$ranked)) == 1) plot <- plot + guides(colour="none")
    if (method == "rankProduct") plot <- plot + scale_y_reverse()

    if (!is.null(title)) {
        plot <- plot +
            ggtitle(title) +
            theme(plot.title = element_text(hjust = 0.5))
    }

    return(plot)
}

#' Plot data comparison
#'
#' If \code{element = NULL}, comparison is plotted based on all elements.
#' Otherwise, show scatter or GSEA plots for a single element compared with
#' previously given differential expression results.
#'
#' @param x \code{referenceComparison} object: obtained after running
#'   \code{\link{rankSimilarPerturbations}()} or
#'   \code{\link{predictTargetingDrugs}()}
#' @param element Character: identifier in the first column of \code{x}
#' @param method Character: method to plot results; choose between
#'   \code{spearman}, \code{pearson}, \code{gsea} or \code{rankProduct} (the
#'   last one is only available if \code{element = NULL})
#' @param n Numeric: number of top and bottom genes to label (if a vector of two
#'   numbers is given, the first and second numbers will be used as the number
#'   of top and bottom genes to label, respectively); only used if
#'   \code{element = NULL}
#' @param showMetadata Boolean: show available metadata information instead of
#'   identifiers (if available)? Only used if \code{element = NULL}
#' @param alpha Numeric: transparency; only used if \code{element = NULL}
#' @param plotNonRankedPerturbations Boolean: plot non-ranked data in grey? Only
#'   used if \code{element = NULL}
#' @param genes Character: when plotting gene set enrichment analysis (GSEA),
#'   plot most up-regulated genes (\code{genes = "top"}), most down-regulated
#'   genes (\code{genes = "bottom"}) or both (\code{genes = "both"}); only used
#'   if \code{method = "gsea"} and \code{geneset = NULL}
#' @inheritParams prepareCMapPerturbations
#' @inheritParams compareAgainstReferencePerMethod
#' @inheritParams plot.perturbationChanges
#'
#' @param ... Extra arguments currently not used
#'
#' @importFrom graphics plot
#' @importFrom R.utils capitalize
#' @importFrom ggplot2 ggplot aes_string geom_point geom_hline ylab theme
#'   element_blank scale_colour_manual xlim theme_classic guides scale_y_reverse
#' @importFrom ggrepel geom_text_repel
#'
#' @family functions related with the ranking of CMap perturbations
#' @family functions related with the prediction of targeting drugs
#' @return Plot illustrating the reference comparison
#' @export
#'
#' @examples
#' # Example of a differential expression profile
#' data("diffExprStat")
#'
#' \dontrun{
#' # Download and load CMap perturbations to compare with
#' cellLine <- "HepG2"
#' cmapMetadataKD <- filterCMapMetadata(
#'   "cmapMetadata.txt", cellLine=cellLine,
#'   perturbationType="Consensus signature from shRNAs targeting the same gene")
#'
#' cmapPerturbationsKD <- prepareCMapPerturbations(
#'   cmapMetadataKD, "cmapZscores.gctx", "cmapGeneInfo.txt", loadZscores=TRUE)
#' }
#'
#' # Rank similar CMap perturbations
#' compareKD <- rankSimilarPerturbations(diffExprStat, cmapPerturbationsKD)
#'
#' # Plot ranked list of CMap perturbations
#' plot(compareKD, method="spearman")
#' plot(compareKD, method="spearman", n=c(7, 3))
#' plot(compareKD, method="pearson")
#' plot(compareKD, method="gsea")
#'
#' # Plot results for a single perturbation
#' pert <- compareKD[[1, 1]]
#' plot(compareKD, pert, method="spearman", zscores=cmapPerturbationsKD)
#' plot(compareKD, pert, method="pearson", zscores=cmapPerturbationsKD)
#' plot(compareKD, pert, method="gsea", zscores=cmapPerturbationsKD)
#'
#' # Predict targeting drugs based on a given differential expression profile
#' gdsc <- loadExpressionDrugSensitivityAssociation("GDSC 7")
#' predicted <- predictTargetingDrugs(diffExprStat, gdsc)
#'
#' # Plot ranked list of targeting drugs
#' plot(predicted, method="spearman")
#' plot(predicted, method="spearman", n=c(7, 3))
#' plot(predicted, method="pearson")
#' plot(predicted, method="gsea")
#'
#' # Plot results for a single targeting drug
#' drug <- predicted$compound[[4]]
#' plot(predicted, drug, method="spearman")
#' plot(predicted, drug, method="pearson")
#' plot(predicted, drug, method="gsea")
plot.referenceComparison <- function(x, element=NULL,
                                     method=c("spearman", "pearson", "gsea",
                                              "rankProduct"),
                                     n=c(3, 3), showMetadata=TRUE,
                                     plotNonRankedPerturbations=FALSE,
                                     alpha=0.3,
                                     genes=c("both", "top", "bottom"), ...,
                                     zscores=NULL, title=NULL) {
    if (!is.null(element)) {
        if (length(element) > 1) {
            stop("argument 'element' should be a character of length one")
        } else if (element %in% method) {
            # Legacy: 'method' as the second argument
            method  <- element
            element <- NULL
        }
    }
    cols   <- tolower(colnames(x))
    method <- method[tolower(method) %in% gsub("_rank", "", cols)]
    if (length(method) == 0) stop("no supported methods selected")
    method <- method[[1]]
    method <- match.arg(method)

    if (is.null(element)) {
        plot <- plotComparison(
            x, method=method, n=n, showMetadata=showMetadata, title=title,
            plotNonRankedPerturbations=plotNonRankedPerturbations, alpha=alpha)
    } else if (is(x, "similarPerturbations")) {
        metadata     <- attr(x, "metadata")
        # Filter out average cell line perturbations (not found in zscores file)
        metadata     <- metadata[!is.na(parseCMapID(metadata[[1]],
                                                    cellLine=TRUE))]
        geneInfo     <- attr(x, "geneInfo")
        compoundInfo <- attr(x, "compoundInfo")
        if (is.null(zscores)) zscores <- attr(x, "zscoresFilename")
        cmapPerturbations <- suppressMessages(
            prepareCMapPerturbations(metadata, zscores, geneInfo, compoundInfo))
        input   <- c(attr(x, "diffExprGenes"), # legacy
                     attr(x, "input"))
        geneset <- c(attr(x, "pathways"), # legacy
                     attr(x, "geneset"))
        plot  <- plotPerturbationChanges(cmapPerturbations, element,
                                         input=input, method=method,
                                         geneset=geneset, genes=genes, ...,
                                         title=title)
    } else if (is(x, "targetingDrugs")) {
        plot <- plotTargetingDrug(x, element, method=method, genes=genes, ...,
                                  title=title)
    }
    return(plot)
}

#' Compare vector against its quantile
#'
#' Check which elements of the vector are lower/greater than or equal to the
#' quantile of a given vector.
#'
#' @param vec Numeric vector
#' @param prob Numeric: probability value between [0,1] to produce sample
#'   quantiles
#' @param lte Boolean: check if values are <= quantile? If \code{FALSE}, checks
#'   if values are >= quantile
#'
#' @importFrom stats quantile
#'
#' @return Boolean vector regarding compared elements
#' @keywords internal
compareQuantile <- function(vec, prob, lte=FALSE) {
    if (lte) {
        cmp  <- `<=`
    } else {
        cmp  <- `>=`
        prob <- 1 - prob
    }
    res <- cmp(vec, quantile(vec, prob))
    return(res)
}

#' Check for intersecting compounds across specific columns on both datasets
#'
#' @return List containing three elements: matching compounds
#'   \code{commonCompounds} between column \code{key 1} and \code{key 2} from
#'   the first and second datasets, respectively
#' @keywords internal
findIntersectingCompounds <- function(data1, data2, keys1=NULL, keys2=NULL) {
    showSelectedCols <- is.null(keys1) || is.null(keys2)

    keys <- list()
    keys$cmap  <- c("compound_perturbation", "pert_iname", "pert_id", "smiles",
                    "InChIKey", "pubchem_cid")
    keys$nci60 <- c("compound", "PubChem SID", "PubChem CID", "SMILES")
    keys$ctrp  <- c("compound", "name", "broad id", "SMILES")
    keys$gdsc  <- c("compound", "name")
    keys       <- unique(unlist(keys))

    # Filter keys based on dataset columns
    if (is.null(keys1)) keys1 <- keys[keys %in% colnames(data1)]
    names(keys1) <- keys1
    if (is.null(keys2)) keys2 <- keys[keys %in% colnames(data2)]
    names(keys2) <- keys2

    compareDatasetIds <- function(key1, key2, data1, data2) {
        values1 <- stripStr(data1[[key1]])
        values2 <- stripStr(data2[[key2]])
        matches <- which(values1 %in% na.omit(values2)) # Avoid matching NAs
        return(data1[[key1]][matches])
    }

    res <- list(key1=NULL, key2=NULL, commonCompounds=NULL)
    for (col1 in keys1) {
        for (col2 in keys2) {
            cmp <- compareDatasetIds(col1, col2, data1, data2)
            if (length(cmp) >= length(res$commonCompounds)) {
                # Save params if number of matching compounds is same or larger
                res$key1 <- col1
                res$key2 <- col2
                res$commonCompounds <- cmp
            }
        }
    }

    if (showSelectedCols) {
        message(sprintf(paste(
            "Columns '%s' and '%s' were matched based on %s common values; to",
            "manually select columns to compare, please set the arguments",
            "'keyColTargetingDrugs' and 'keyColSimilarPerturbations'"),
            res$key1, res$key2, length(res$commonCompounds)))
    }
    return(res)
}

mergeDatasetsBy <- function(column, data1, data2, showAllScores=FALSE,
                            key1=NULL, key2=NULL, suffixes=paste0(".", 1:2)) {
    if (!column %in% colnames(data1) && !column %in% colnames(data2)) {
        error <- "Selected column ('%s') not available in provided datasets"
        stop(sprintf(error, column))
    }

    # Find intersecting compounds between datasets
    data1table <- as.table(data2, clean=FALSE)
    data2table <- as.table(data1, clean=FALSE)

    res  <- findIntersectingCompounds(data1table, data2table, key1, key2)
    key1 <- res$key1
    key2 <- res$key2

    # Convert key columns to same class if needed
    key1val <- data1table[[key1]]
    key2val <- data2table[[key2]]
    areSameClass <- function(key1val, key2val, cmp) cmp(key1val) && cmp(key2val)

    FUN <- NULL
    if (length(res$commonCompounds) > 0) {
        if (!areSameClass(key1val, key2val, is.character)) {
            FUN <- as.character
        } else if (!areSameClass(key1val, key2val, is.integer)) {
            FUN <- as.integer
        } else if (!areSameClass(key1val, key2val, is.numeric)) {
            FUN <- as.numeric
        } else if (!areSameClass(key1val, key2val, is.factor)) {
            FUN <- as.factor
        } else if (!areSameClass(key1val, key2val, is.logical)) {
            FUN <- as.logical
        }
    }
    if (!is.null(FUN)) data2table[[key2]] <- FUN(data2table[[key2]])

    # Merge data based on intersecting compounds
    df <- merge(data2table, data1table, by.x=key2, by.y=key1,
                suffixes=suffixes, allow.cartesian=TRUE)

    # Discard missing values
    cols <- paste0(column, suffixes)
    df   <- df[!is.na(df[[cols[[1]]]]) & !is.na(df[[cols[[2]]]]), ]

    # Only show the best (instead of all) scores
    isRank <- endsWith(column, "rank")
    if (!showAllScores) {
        df <- df[order(df[[cols[[2]]]], decreasing=isRank)]
        df <- df[unique(match(df[[1]], df[[1]]))]
    }
    attr(df, "cols")   <- cols
    attr(df, "isRank") <- isRank
    return(df)
}

#' Plot similar perturbations against predicted targeting drugs
#'
#' @param targetingDrugs \code{targetingDrugs} object
#' @param similarPerturbations \code{similarPerturbations} object
#' @param column Character: column to plot (must be available in both databases)
#' @param labelBy Character: column in \code{as.table(similarPerturbations)} or
#'   \code{as.table(targetingDrugs)} to be used for labelling
#' @param showAllScores Boolean: show all scores? If \code{FALSE}, only the best
#'   score per compound will be plotted
#' @param quantileThreshold Numeric: quantile (between 0 and 1) to highlight
#'   values of interest
#' @param keyColTargetingDrugs Character: column from \code{targetingDrugs} to
#'   match against \code{keyColSimilarPerturbations}; if not set, it will be
#'   automatically determined
#' @param keyColSimilarPerturbations Character: column from
#'   \code{similarPerturbations} to match against \code{keyColTargetingDrugs};
#'   if not set, it will be automatically determined
#'
#' @importFrom ggplot2 ggplot geom_point xlab ylab theme_bw
#' @importFrom ggrepel geom_text_repel
#' @importFrom data.table data.table
#'
#' @family functions related with the ranking of CMap perturbations
#' @family functions related with the prediction of targeting drugs
#' @return \code{ggplot2} plot
#' @export
#'
#' @examples
#' # Rank similarity against CMap compound perturbations
#' similarPerts <- rankSimilarPerturbations(diffExprStat,
#'                                          cmapPerturbationsCompounds)
#'
#' # Predict targeting drugs
#' gdsc <- loadExpressionDrugSensitivityAssociation("GDSC 7")
#' predicted <- predictTargetingDrugs(diffExprStat, gdsc)
#'
#' plotTargetingDrugsVSsimilarPerturbations(predicted, similarPerts,
#'                                          "spearman_rank")
plotTargetingDrugsVSsimilarPerturbations <- function(
    targetingDrugs, similarPerturbations, column, labelBy="pert_iname",
    quantileThreshold=0.25, showAllScores=FALSE,
    keyColTargetingDrugs=NULL, keyColSimilarPerturbations=NULL) {

    suffixes <- c(".targetingDrugs", ".similarPerturbations")
    df <- mergeDatasetsBy(
        column, targetingDrugs, similarPerturbations, showAllScores,
        suffixes=suffixes,
        key1=keyColTargetingDrugs, key2=keyColSimilarPerturbations)

    cols   <- attr(df, "cols")
    isRank <- attr(df, "isRank")
    highlight <- compareQuantile(
        df[[cols[[1]]]], quantileThreshold, lte=TRUE) &
        compareQuantile(df[[cols[[2]]]], quantileThreshold, lte=!isRank)

    xlabel <- "Gene expression and drug sensitivity association"
    source <- attr(targetingDrugs, "expressionDrugSensitivityCor")$source
    if (!is.null(source)) xlabel <- sprintf("%s (%s)", xlabel, source)
    xlabel <- sprintf("%s: %s", xlabel, column)
    ylabel <- paste("CMap comparison:", column)

    plot <- ggplot(df, aes_string(cols[[1]], cols[[2]])) +
        geom_point(data=df[highlight],  colour="orange", show.legend=FALSE) +
        geom_point(data=df[!highlight], colour="grey",   show.legend=FALSE,
                   alpha=0.7) +
        xlab(xlabel) +
        ylab(ylabel) +
        theme_bw()

    if (!is.null(labelBy) && labelBy %in% colnames(df)) {
        label             <- df[[labelBy]]
        label[!highlight] <- NA
        plot              <- plot + geom_text_repel(label=label, na.rm=TRUE)
    }
    attr(plot, "data")     <- df
    attr(plot, "suffixes") <- suffixes
    return(plot)
}

Try the cTRAP package in your browser

Any scripts or data that you put into this service are public.

cTRAP documentation built on Nov. 8, 2020, 10:58 p.m.