R/ggForestPlot.R
In MetaIntegrator: Meta-Analysis of Gene Expression Data

Documented in ggForestPlot

#' Compare effect sizes of a gene across all datasets in meta-analysis
#' 
#' @description A forest plot can be used to compare the expression values of a gene across different datasets. 
#' The area of the blue boxes is proportional to the number of samples in the study and black lines indicate the standard error of the effect sizes for each study (by default the 95\% confidence interval). 
#' The summary effect size for all studies is indicated as an orange diamond below and the width of the diamond indicates the summary standard error.
#' @param metaObject a filtered metaObject, i.e. it needs to include a \code{filterObject} generated by the function \code{filterGenes()} 
#' @param genes character vector containing the genes for which the forest plot should be generated
#' @param confLevel confidence level
#' @param facetCols integer that specifies how many columns are going to be used for the plot
#' @param facetScales same as ggplot's facet_wrap: should Scales be fixed ("fixed", the default), free ("free"), or free in one dimension ("free_x", "free_y")
#' @param boxScales a numeric vector of length 2 providing scaling factors for the plot. Specifies minimum and maximum size.
#' @return ggplot2 Plot comparing effect sizes of a gene across datasets
#' @author Winston A. Haynes, Jiaying Toh, Michele Donato
#' @seealso	\code{\link{filterGenes}},  \code{\link{runMetaAnalysis}},  \code{\link{violinPlot}}
#' @examples
#' # compare effect sizes of the Gene1 for all discovery datasets in tinyMetaObject 
#' ggForestPlot(tinyMetaObject, genes="Gene1")
#' @keywords 
#' hplot 
#' graphs
#' @export
#' @import data.table ggplot2 magrittr 
#' @importFrom stats qnorm relevel
#' @importFrom dplyr %>%

ggForestPlot = function(metaObject, 
                        genes, 
                        confLevel = 0.95,
                        facetCols = NULL, 
                        facetScales = 'free_x',
                        boxScales = c(6,16)){
  
  if(is.null(facetCols)){
    facetCols = ifelse(length(genes) == 1, 1, 4)
  }
  
  ci.value <- -stats::qnorm((1 - confLevel)/2)
  
  studyDataMeans <- 
    as.data.table(
      metaObject$metaAnalysis$datasetEffectSizes, 
      keep.rownames = T)[rn %in% genes]
  
  studyDataSEs <- 
    as.data.table(
      metaObject$metaAnalysis$datasetEffectSizeStandardErrors, 
      keep.rownames = T)[rn %in% genes] 
  
  pooledMean <- metaObject$metaAnalysis$pooledResults[genes, 
                                                      "effectSize"]
  pooledSE <- metaObject$metaAnalysis$pooledResults[genes, 
                                                    "effectSizeStandardError"]
  
  studyDataMeans$Summary = pooledMean
  
  studyDataMeans = studyDataMeans %>% 
    data.table::melt(id.vars = 'rn') %>% 
    data.table::setnames('value', 'means')
  
  studyDataSEs$Summary = pooledSE
  studyDataSEs = studyDataSEs %>%
    data.table::melt(id.vars = 'rn') %>%
    data.table::setnames('value', 'SEs')
  
  studyData = merge(studyDataMeans, studyDataSEs, by = c('rn', 'variable')) %>% 
    data.table::setnames(c('rn', 'variable'), c('gene', 'study'))
  
  pooled = data.table(gene = genes, means = pooledMean, SEs = pooledSE)
  pooled$study = 'Summary'
  pooled[, cilb := means - ci.value*SEs]
  pooled[, ciub := means + ci.value*SEs]
  pooled[, RelConf:=1/(ciub-cilb)]
  
  studyData[, ok:=is.finite(means + SEs)]
  
  if (is.null(xlim)) {
    studyData[, xlim.lb := min(means[ok] - ci.value * SEs[ok], na.rm = TRUE)] 
    studyData[, xlim.ub := max(means[ok] + ci.value * SEs[ok], na.rm = TRUE)]
  }
  
  studyData[, cilb:=means - ci.value*SEs]
  studyData[, ciub:=means + ci.value*SEs]
  studyData[, RelConf:=1/((ciub-cilb))]
  
  studyData$study = stats::relevel(studyData$study, ref = 'Summary')
  
  # this is so that I don't plot the box for the summary
  studyData[study == 'Summary', `:=`(means = NA, SEs = NA, ok = FALSE, cilb = NA, ciub = NA, RelConf = NA)]
  
  nStudies = length(unique(studyData$study))
  
  labelstext = c('Summary', as.character(unique(studyData$study)))
  labelspos = seq_along(labelstext)-1
  
  # pooled polygons details
  pooled_poly = cbind(
    # X VALUES FOR SUMMARY
    pooled[, list(gene, cilb, means, ciub, means)] %>%
      data.table::melt(id.vars = 'gene') %>%
      data.table::setnames('value', 'x'), 
    # Y VALUES FOR SUMMARY
    pooled[, list(gene, 1.02, 0.85, 1.02, 1.15)] %>%
      data.table::melt(id.vars = 'gene') %>%
      data.table::setnames('value', 'y'))[,.(gene,x,y)]
  
  ggplot(studyData, aes_string('means', 'study')) + 
    # x = 0 vline
    geom_vline(xintercept = 0.0, linetype=2, alpha=0.3) + 
    # error bars
    geom_errorbarh(alpha=0.75, color="black", height = 0.1, aes_string(xmax = 'ciub', xmin = 'cilb')) + 
    # actual gene boxes
    geom_point(aes_string(size = 'RelConf'), shape = 15, color = 'cornflowerblue') + 
    # change relative scale of boxes
    scale_size(range = boxScales, guide=FALSE) +
    # can play with themes here
    theme_bw() + 
    theme(text = element_text(size=16),
          axis.title.y = element_blank()) +
    xlab('Standardized Mean Difference (log2 scale)')+
    facet_wrap(~gene, ncol = facetCols, scales = facetScales) +
    # This is the summary polygon
    geom_polygon(data = pooled_poly, aes_string(x = 'x', y = 'y'), fill = 'coral2') +
    # play with y lims
    coord_cartesian(ylim = c(1.2, nStudies))
}


#declare global variables for variables in data.table/with notation to avoid R CMD CHECK notes
utils::globalVariables(c("rn","cilb","means","SEs","ciub","RelConf","ok","xlim.lb","xlim.ub","study","gene","x","y","%>%"))