R/discoversomaticInteractions.R

#' discoversomaticInteractions
#' 
#' Function adapted to maftools where given a .maf file, it graphs the somatic interactions between a group of genes, i.e., the combination of gene expression and mutation data to detect mutually exclusive or co-ocurring events.
#' 
#' @param maf maf object generated by read.maf
#' @param top check for interactions among top 'n' number of genes. Defaults to top 25. genes
#' @param genes List of genes among which interactions should be tested. If not provided, test will be performed between top 25 genes.
#' @param pvalue Default c(0.05, 0.01) p-value threshold. You can provide two values for upper and lower threshold.
#' @param getMutexMethod Method for the `getMutex` function (by default "ShiftedBinomial")
#' @param getMutexMixed Mixed parameter for the `getMutex` function (by default TRUE)
#' @param returnAll If TRUE returns test statistics for all pair of tested genes. Default FALSE, returns for only genes below pvalue threshold.
#' @param geneOrder Plot the results in given order. Default NULL.
#' @param fontSize 	cex for gene names. Default 0.8
#' @param showSigSymbols Default TRUE. Heighlight significant pairs
#' @param showCounts Default FALSE. Include number of events in the plot
#' @param countStats Default 'all'. Can be 'all' or 'sig'
#' @param countType Default 'all'. Can be 'all', 'cooccur', 'mutexcl'
#' @param countsFontSize Default 0.8
#' @param countsFontColor Default 'black'
#' @param colPal colPalBrewer palettes. Default 'BrBG'. See RColorBrewer::display.brewer.all() for details
#' @param showSum show [sum] with gene names in plot, Default TRUE
#' @param colNC Number of different colors in the palette, minimum 3, default 9
#' @param nShiftSymbols shift if positive shift SigSymbols by n to the left, default 5
#' @param sigSymbolsSize size of symbols in the matrix and in legend. Default 2
#' @param sigSymbolsFontSize size of font in legends. Default 0.9
#' @param pvSymbols vector of pch numbers for symbols of p-value for upper and lower thresholds c(upper, lower). Default c(46, 42) 
#' @param limitColorBreaks limit color to extreme values. Default TRUE
#' 
#' @details Internally, this function run the getMutex function. With the 'getMutexMethod' parameter user might select
#' the 'method' parameter of the getMutex function. For more details run '?getMutex'
#' 
#' #' @return A list of data.tables and it will print a heatmap with the results.
#' 
#' @examples 
#' \donttest{
#' \dontrun{
#' 
#'   #An example of how to perform the function,
#'   #using data from TCGA, Colon Adenocarcinoma in this case. 
#'   
#'   #coad.maf <- GDCquery_Maf("COAD", pipelines = "muse") %>% read.maf
#'   coad.maf <- read.maf(GDCquery_Maf("COAD", pipelines = "muse"))
#'   discoversomaticInteractions(maf = coad.maf, top = 35, pvalue = c(1e-2, 2e-3))
#'   }
#' 
#' }
#' 
#' 
#' @references Mayakonda A, Lin DC, Assenov Y, Plass C, Koeffler HP. 2018. Maftools:
#'    efficient and comprehensive analysis of somatic variants in cancer.
#'    Genome Research. http://dx.doi.org/10.1101/gr.239244.118
#' 
#' @import maftools
#' @import data.table
#' @import RColorBrewer
#' @import methods
#' @importFrom utils getFromNamespace
#' @export

discoversomaticInteractions <- function (maf, top = 25, genes = NULL, pvalue = c(0.05, 0.01),
                                         getMutexMethod = "ShiftedBinomial",getMutexMixed = TRUE,
                                         returnAll = TRUE, geneOrder = NULL, fontSize = 0.8, 
                                         showSigSymbols = TRUE, showCounts = FALSE, countStats = "all", 
                                         countType = "all", countsFontSize = 0.8, 
                                         countsFontColor = "black", colPal = "BrBG", showSum = TRUE,
                                         colNC = 9, nShiftSymbols = 5, sigSymbolsSize = 2, 
                                         sigSymbolsFontSize = 0.9, pvSymbols = c(46, 42), 
                                         limitColorBreaks = TRUE){
  
  
  oldpar <- par(no.readonly = TRUE)
  on.exit(par(oldpar))
  
  Hugo_Symbol <- NULL
  Tumor_Sample_Barcode <- NULL
  gene1 <- NULL
  gene2 <- NULL
  `.` <- NULL
  event_ratio <- NULL
  `01` <- NULL
  `10` <- NULL
  `11` <- NULL
  pValue <- NULL
  pair <- NULL
  AlteredSamples <- NULL
  par <- NULL
  abline <- NULL
  mtext <- NULL
  Event <- NULL
  text <- NULL
  points <- NULL
  axis <- NULL
  
  if(is.null(maf)){
    stop("not input maf file")
  }
  
  if (is.null(genes)) {
    genes = maf@data$Hugo_Symbol
    topgenes = maftools::getGeneSummary(x = maf)[1:top, Hugo_Symbol]
  }
  if (length(genes) < 2) {
    stop("Minimum two genes required!")
  }

  # om = maftools:::createOncoMatrix(m = maf, g = genes)
  createOncoMatrix_2 <- getFromNamespace("createOncoMatrix","maftools")
  om = createOncoMatrix_2(m = maf, g = genes)
  all.tsbs = levels(getSampleSummary(x = maf)[, Tumor_Sample_Barcode])
  mutMat = t(om$numericMatrix)
  missing.tsbs = all.tsbs[!all.tsbs %in% rownames(mutMat)]
  
  if (nrow(mutMat) < 2) {
    stop("Minimum two genes required!")
  }
  
  mutMat[mutMat > 0] = 1
  if (length(missing.tsbs) > 0) {
    missing.tsbs = as.data.frame(matrix(data = 0, nrow = length(missing.tsbs), 
                                        ncol = ncol(mutMat)), row.names = missing.tsbs)
    colnames(missing.tsbs) = colnames(mutMat)
    mutMat = rbind(mutMat, missing.tsbs)
  }
  
  PM_new <- getPM(t(as.matrix(mutMat)))
  # PM_new <- as.matrix(PM_new)
  # rownames(PM_new) <- colnames(mutMat)
  ffx <- match(topgenes, colnames(mutMat))
  miPM <- PM_new[ffx,]
  interactions <- getMutex(A = t(as.matrix(mutMat[,topgenes])), PM = miPM,
                           lower.tail = T,method = getMutexMethod,
                           mixed = getMutexMixed)
  
  rownames(interactions) <- colnames(interactions) <- topgenes
  
  interactions <- log10(interactions * .5) *(interactions <.5) - 
    log10((1-interactions) * .5) *(interactions >=.5)
  interactions <- as.matrix(interactions)

  # TODO: does it make any sense the odds ratio for the Poisson-binomial??
  # oddsRatio <- oddsGenes <- sapply(1:ncol(mutMat), function(i) sapply(1:ncol(mutMat), 
  #                                                                     function(j) {
  #                                                                       f <- try(fisher.test(mutMat[, i], mutMat[, j]), 
  #                                                                                silent = TRUE)
  #                                                                       if (class(f) == "try-error") 
  #                                                                         f = NA
  #                                                                       else f$estimate
  #                                                                     }))
  # rownames(interactions) = colnames(interactions) = rownames(oddsRatio) = colnames(oddsRatio) = colnames(mutMat)

  sigPairs = which(x = 10^-abs(interactions) < 1, arr.ind = TRUE)
  sigPairs2 = which(x = 10^-abs(interactions) >= 1, arr.ind = TRUE)
  
  if (nrow(sigPairs) < 1) {
    stop("No meaningful interactions found.")
  }
  sigPairs = rbind(sigPairs, sigPairs2)
  sigPairsTbl = data.table::rbindlist(lapply(X = seq_along(1:nrow(sigPairs)), 
                                             function(i) {
                                               x = sigPairs[i, ]
                                               g1 = rownames(interactions[x[1], x[2], drop = FALSE])
                                               g2 = colnames(interactions[x[1], x[2], drop = FALSE])
                                               tbl = as.data.frame(table(apply(X = mutMat[, c(g1, 
                                                                                              g2), drop = FALSE], 1, paste, collapse = "")))
                                               combn = data.frame(t(tbl$Freq))
                                               colnames(combn) = tbl$Var1
                                               pval = 10^-abs(interactions[x[1], x[2]])
                                               fest = interactions[x[1], x[2]]
                                               d = data.table::data.table(gene1 = g1, gene2 = g2, 
                                                                          pValue = pval, OddsRatio = fest)
                                               d = cbind(d, combn)
                                               d
                                             }), fill = TRUE)
  sigPairsTbl = sigPairsTbl[!gene1 == gene2]
  sigPairsTbl[is.na(sigPairsTbl)] = 0
  sigPairsTbl$Event = ifelse(test = sigPairsTbl$oddsRatio > 
                               1, yes = "Co_Occurence", no = "Mutually_Exclusive")
  sigPairsTbl$pair = apply(X = sigPairsTbl[, .(gene1, gene2)], 
                           MARGIN = 1, FUN = function(x) paste(sort(unique(x)), 
                                                               collapse = ", "))
  sigPairsTbl[, `:=`(event_ratio, `01` + `10`)]
  sigPairsTbl[, `:=`(event_ratio, paste0(`11`, "/", event_ratio))]
  sigPairsTblSig = sigPairsTbl[order(as.numeric(pValue))][!duplicated(pair)]
  
  if (nrow(interactions) >= 5) {
    diag(interactions) <- 0
    m <- nrow(interactions)
    n <- ncol(interactions)
    col_pal = RColorBrewer::brewer.pal(9, colPal)
    col_pal = grDevices::colorRampPalette(colors = col_pal)
    col_pal = col_pal(m * n - 1)
    if (!is.null(geneOrder)) {
      if (!all(rownames(interactions) %in% geneOrder)) {
        stop("Genes in geneOrder does not match the genes used for analysis.")
      }
      interactions = interactions[geneOrder, geneOrder]
    }
    interactions[lower.tri(x = interactions, diag = TRUE)] = NA
    gene_sum = getGeneSummary(x = maf)[Hugo_Symbol %in% 
                                         rownames(interactions), .(Hugo_Symbol, AlteredSamples)]
    data.table::setDF(gene_sum, rownames = gene_sum$Hugo_Symbol)
    gene_sum = gene_sum[rownames(interactions), ]
    
    if (!all(rownames(gene_sum) == rownames(interactions))) {
      stop(paste0("Row mismatches!"))
    }
    
    if (!all(rownames(gene_sum) == colnames(interactions))) {
      stop(paste0("Column mismatches!"))
    }
    
    if (showSum) {
      rownames(gene_sum) = paste0(apply(gene_sum, 1, paste, 
                                        collapse = " ["), "]")
    }
    
    par(bty = "n", mar = c(1, 4, 4, 2) + 0.1, las = 2, fig = c(0, 
                                                               1, 0, 1))
    breaks = NA
    if (limitColorBreaks) {
      minLog10pval = 3
      breaks <- seq(-minLog10pval, minLog10pval, length.out = m * 
                      n + 1)
      interactions4plot = interactions
      interactions4plot[interactions4plot < (-minLog10pval)] = -minLog10pval
      interactions4plot[interactions4plot > minLog10pval] = minLog10pval
      interactions = interactions4plot
    }
    
    image(x = 1:n, y = 1:m, interactions, col = col_pal, 
          xaxt = "n", yaxt = "n", xlab = "", ylab = "", xlim = c(0, 
                                                                 n + 1), ylim = c(0, n + 1), breaks = seq(-3, 
                                                                                                          3, length.out = (nrow(interactions) * ncol(interactions))))
    abline(h = 0:n + 0.5, col = "white", lwd = 0.5)
    abline(v = 0:n + 0.5, col = "white", lwd = 0.5)
    mtext(side = 2, at = 1:m, text = rownames(gene_sum), 
          cex = fontSize, font = 3)
    mtext(side = 3, at = 1:n, text = rownames(gene_sum), 
          cex = fontSize, font = 3)
    
    if (showCounts) {
      countStats = match.arg(arg = countStats, choices = c("all", 
                                                           "sig"))
      countType = match.arg(arg = countType, choices = c("all", 
                                                         "cooccur", "mutexcl"))
      if (countStats == "sig") {
        w = arrayInd(which(10^-abs(interactions) < max(pvalue)), 
                     rep(m, 2))
        for (i in 1:nrow(w)) {
          g1 = rownames(interactions)[w[i, 1]]
          g2 = colnames(interactions)[w[i, 2]]
          g12 = paste(sort(c(g1, g2)), collapse = ", ")
          if (countType == "all") {
            e = sigPairsTblSig[pValue < max(pvalue)][pair %in% 
                                                       g12, event_ratio]
          }
          else if (countType == "cooccur") {
            e = sigPairsTblSig[pValue < max(pvalue)][Event %in% 
                                                       "Co_Occurence"][pair %in% g12, `11`]
          }
          else if (countType == "mutexcl") {
            e = sigPairsTblSig[pValue < max(pvalue)][Event %in% 
                                                       "Mutually_Exclusive"][pair %in% g12, `11`]
          }
          if (length(e) == 0) {
            e = 0
          }
          text(w[i, 1], w[i, 2], labels = e, font = 3, 
               col = countsFontColor, cex = countsFontSize)
        }
      }
      else if (countStats == "all") {
        w = arrayInd(which(10^-abs(interactions) < max(pvalue)), 
                     rep(m, 2))
        w2 = arrayInd(which(10^-abs(interactions) >= 
                              max(pvalue)), rep(m, 2))
        w = rbind(w, w2)
        for (i in 1:nrow(w)) {
          g1 = rownames(interactions)[w[i, 1]]
          g2 = colnames(interactions)[w[i, 2]]
          g12 = paste(sort(c(g1, g2)), collapse = ", ")
          if (countType == "all") {
            e = sigPairsTblSig[pair %in% g12, event_ratio]
          }
          else if (countType == "cooccur") {
            e = sigPairsTblSig[pair %in% g12, `11`]
          }
          else if (countType == "mutexcl") {
            e = sigPairsTblSig[pair %in% g12, `01` + 
                                 `10`]
          }
          if (length(e) == 0) {
            e = 0
          }
          text(w[i, 1], w[i, 2], labels = e, font = 3, 
               col = countsFontColor, cex = countsFontSize)
        }
      }
    }
    if (showSigSymbols) {
      w = arrayInd(which(10^-abs(interactions) < min(pvalue)), 
                   rep(m, 2))
      points(w, pch = pvSymbols[2], col = "black", cex = sigSymbolsSize)
      w = arrayInd(which((10^-abs(interactions) < max(pvalue)) & 
                           (10^-abs(interactions) > min(pvalue))), rep(m, 
                                                                       2))
      points(w, pch = pvSymbols[1], col = "black", cex = sigSymbolsSize)
    }
    if (showSigSymbols) {
      points(x = n - nShiftSymbols, y = 0.7 * n, pch = pvSymbols[2], 
             cex = sigSymbolsSize)
      text(x = n - nShiftSymbols, y = 0.7 * n, paste0(" P < ", 
                                                      min(pvalue)), pos = 4, cex = sigSymbolsFontSize, 
           adj = 0)
      points(x = n - nShiftSymbols, y = 0.65 * n, pch = pvSymbols[1], 
             cex = sigSymbolsSize)
      text(x = n - nShiftSymbols, y = 0.65 * n, paste0(" P < ", 
                                                       max(pvalue)), pos = 4, cex = sigSymbolsFontSize)
    }
    par(fig = c(0.4, 0.7, 0, 0.4), new = TRUE)
    image(x = c(0.8, 1), y = seq(0, 1, length.out = 200), 
          z = matrix(seq(0, 1, length.out = 200), nrow = 1), 
          col = col_pal, xlim = c(0, 1), ylim = c(0, 1), axes = FALSE, 
          xlab = NA, ylab = NA)
    atLims = seq(0, 1, length.out = 7)
    axis(side = 4, at = atLims, tcl = -0.15, labels = c("> 3 (Mutually exclusive)", 
                                                        2, 1, 0, 1, 2, ">3 (Co-occurence)"), lwd = 0.5, 
         cex.axis = sigSymbolsFontSize, line = 0.2)
    text(x = 0.4, y = 0.5, labels = "-log10(P-value)", srt = 90, 
         cex = sigSymbolsFontSize, xpd = TRUE)
  }
  if (!returnAll) {
    sigPairsTblSig = sigPairsTblSig[pValue < min(pvalue)]
  }
  return(sigPairsTblSig)
}

Try the Rediscover package in your browser

Any scripts or data that you put into this service are public.

Rediscover documentation built on April 14, 2023, 5:14 p.m.