R/heatmapOnOffMarkers.R

Defines functions heatmapOnOffMarkers

Documented in heatmapOnOffMarkers

# heatmapOnOffMarkers.R

# from CSReport_v1.4.3.R

#' heatmapOnOffMarkers
#'
#' This function is called by CellScoreReport to make a heatmap of the standards
#' (donor and target) marker genes and the test samples for the defined
#' transition, as generated by the OnOff() function. Gene symbols are not
#' plotted as this is only intended as an overview of marker expression in
#' test samples.
#' @param test.data a data.frame of CellScore values as calculated by
#'   CellScore(), for only a group of test samples.
#' @param markergenes a data.frame of marker genes, as calculated by OnOff().
#' @param pdata a data.frame containing the phenotype of the expression dataset.
#' @param calls a matrix containing the present/absent calls where genes are
#' in rows and samples in columns.
#' @return This function returns invisibly the visualised binary matrix of
#'   the absence/presence of the cell type markers (rows) across the samples
#'   (columns) in the given study.
#' @keywords heatmap on/off markers standards donor target
#' @export
#' @seealso \code{\link[CellScore]{CellScore}} for details on CellScore, and
#'   \code{\link[hgu133plus2CellScore]{hgu133plus2CellScore}} for details on the
#'   specific ExpressionSet object that shoud be provided as an input.
#' @importFrom gplots heatmap.2
#' @importFrom grDevices cm.colors
#' @examples
#' ## Load the expression set for the standard cell types
#' library(Biobase)
#' library(hgu133plus2CellScore) # eset.std
#'
#' ## Locate the external data files in the CellScore package
#' rdata.path <- system.file("extdata", "eset48.RData", package = "CellScore")
#' tsvdata.path <- system.file("extdata", "cell_change_test.tsv",
#'                              package = "CellScore")
#'
#' if (file.exists(rdata.path) && file.exists(tsvdata.path)) {
#'
#'    ## Load the expression set with normalized expressions of 48 test samples
#'    load(rdata.path)
#'
#'    ## Import the cell change info for the loaded test samples
#'    cell.change <- read.delim(file= tsvdata.path, sep="\t",
#'                              header=TRUE, stringsAsFactors=FALSE)
#'
#'    ## Combine the standards and the test data
#'    eset <- combine(eset.std, eset48)
#'
#'    ## Generate cosine similarity for the combined data
#'    ## NOTE: May take 1-2 minutes on the full eset object
#'    ## so we subset it for 4 cell types
#'    pdata <- pData(eset)
#'    sel.samples <- pdata$general_cell_type %in% c("ESC", "EC", "FIB", "KER", 
#'                  "ASC", "NPC", "MSC", "iPS", "piPS")
#'    eset.sub <- eset[, sel.samples]
#'    cs <- CosineSimScore(eset.sub, cell.change, iqr.cutoff=0.1)
#'
#'    ## Generate the on/off scores for the combined data
#'    individ.OnOff <- OnOff(eset.sub, cell.change, out.put="individual")
#'
#'    ## Generate the CellScore values for all samples
#'    cellscore <- CellScore(data=eset.sub, transitions=cell.change, scores.onoff=individ.OnOff$scores,
#'                           scores.cosine=cs$cosine.samples)
#'    ## Get the CellScore fvalues rom valid transitions defined by cell.change
#'    ## table
#'    plot.data <- extractTransitions(cellscore, cell.change)
#'
#'    ## Define a plot group variable
#'    plot.data$plot_group <- paste(plot.data$experiment_id,
#'                                 plot.data$cxkey.subcelltype, sep="_")
#'    ## Sort the scores 1) by target 2) by donor 3) by study
#'    plot.data.ordered <- plot.data[order(plot.data$target,
#'                                         plot.data$donor_tissue,
#'                                         plot.data$experiment_id), ]
#'	 ## How many plot_groups are there?
#'	 table(plot.data$plot_group)
#'
#'    ## pick one plot_group to plot
#'    group <- unique(plot.data$plot_group)[4]
#'
#'    ## Select scores for only one plot group
#'    test.data <- plot.data.ordered[plot.data.ordered$plot_group %in% group, ]
#'
#'    ## Generate the group on/off scores for the combined data
#'    group.OnOff <- OnOff(eset.sub, cell.change, out.put="marker.list")
#'
#'    calls <- assayDataElement(eset.sub, "calls")
#'    rownames(calls) <- if("feature_id" %in% names(fData(eset.sub))) { fData(eset.sub)[, "feature_id"] } else { fData(eset.sub)[, "probe_id"] }
#'
#'    ## Plot
#'    heatmapOnOffMarkers(test.data, group.OnOff$markers, pData(eset.sub),
#'                       calls)
#' }

heatmapOnOffMarkers <- function(test.data, markergenes, pdata, calls) {

    ############################################################################
    ## PART 0. Check function arguments
    ############################################################################
    fun.main <- deparse(match.call()[[1]])
    .stopIfNotDataFrame(test.data, 'test.data', fun.main)
    .stopIfNotDataFrame(markergenes, 'markergenes', fun.main)
    .stopIfNotDataFrame(pdata, 'pdata', fun.main)
    .stopIfNotBinaryMatrix(calls, 'calls', fun.main)

    ############################################################################
    ## PART I. Data preparation
    ############################################################################
    ## Get the standard data and marker list for the given transition
    celltype <- list(donor=test.data$start[1],
                     target=test.data$target[1],
                     test=test.data$sub_cell_type1[1])
    transition <- paste0(celltype$donor, "->",celltype$target)
    marker.list <- list(target="target", donor="donor")

    marker.list <- lapply(marker.list,
                          function(group){
                              sel <- markergenes$comparison == transition &
                                  markergenes$group == celltype[[group]]
                              markergenes[sel, ]
                          })

    samples.list <-
        lapply(names(celltype),
               function(group){
                   if (group == "test"){
                       sel <- pdata$sample_id %in% test.data$sample_id
                   }else{
                       sel <- !is.na(pdata$category) &
                           pdata$general_cell_type %in% celltype[[group]]
                   }
                   intersect(rownames(pdata)[sel], colnames(calls))
               })
    names(samples.list) <- names(celltype)
    samples.vector <- unlist(samples.list[c("donor", "test", "target")])

    calls.list <- lapply(marker.list,
                         function(mat){
                             sel <- rownames(calls) %in% mat$feature_id
                             calls[sel, samples.vector]
                         })

    ############################################################################
    ## PART II. Heatmap
    ############################################################################
    ## Combine data into matrix
    plot.me <- as.matrix(do.call("rbind", calls.list))

    ## Set title
    main.title <- paste0(test.data$experiment_id[1], ": ", celltype$test, "\n",
                         "Transition from ", transition)

    ## Set colours
    col.list <- .getMainColours("all", FALSE)

    col.columns <- rep("black", ncol(plot.me))
    for (group in names(col.list)){
        sel <- colnames(plot.me) %in% samples.list[[group]]
        col.columns[sel] <- col.list[[group]]
    }

    heatmap.2(plot.me, col=cm.colors(8),
              cexRow=0.7,
              cexCol=1,
              main=main.title,
              dendrogram="none",
              Colv=FALSE,
              Rowv=FALSE,
              labRow=NA,
              labCol=NA,
              ColSideColors=col.columns,
              trace="none",
              scale="none",
              key=FALSE,
              symkey=FALSE,
              symbreaks=FALSE,
              #      margins = c(15, 10),
              offsetRow=0.1,
              offsetCol=0.1,
              adjCol = c(NA, 0.5),
              adjRow = c(0,NA),
              #      lwid=c(1,2),
              #      lhei=c(1,10),
              density.info="none")

    invisible(plot.me)
}
nmah/CellScore documentation built on May 4, 2023, 2:52 p.m.