R/coocMutEx.R
In PrecisionTrialDrawer: A Tool to Analyze and Design NGS Based Custom Gene Panels

# Multiple ggplot in one figure
.multiplot <- function(plotlist=NULL, plot_layout=NULL) {
    # Modified from R cookbook
    # require(grid)

    # Make a list from the ... arguments and plotlist
    plots <- plotlist

    numPlots = length(plots)

    # Make the panel
    plotCols = plot_layout[2] # Number of columns of plots
    plotRows = plot_layout[1] # Number of rows needed, calculated from # of cols

    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(plotRows, plotCols)))
    vplayout <- function(x, y)
        viewport(layout.pos.row = x, layout.pos.col = y)

    # Make each plot, in the correct location
    for(i in seq_len(numPlots)) {
        curRow = ceiling(i/plotCols)
        curCol = (i-1) %% plotCols + 1
        print(plots[[i]], vp = vplayout(curRow,  curCol))
    }
}

# Given a casted matrix, calculated pvalue for cooccurence
# and pvalue for mutual exclusivity with montercarlo or fisher test
# mutuated from package cooccur
.coocMutEx <- function (mat , thresh = FALSE , prob = c("hyper" , "firth")) {
    prob <- prob[1]
    spp_site_mat <- t(mat)
    spp_key <- data.frame(num = seq_len(nrow(spp_site_mat))
      , spp = row.names(spp_site_mat))
    spp_site_mat[spp_site_mat > 0] <- 1
    nsite <- ncol(spp_site_mat)
    nspp <- nrow(spp_site_mat)
    spp_pairs <- choose(nspp, 2)
    obs_cooccur <- prob_cooccur <- exp_cooccur <- 
        matrix(nrow = spp_pairs, ncol = 3)
    prob_occur <- incidence <- cbind(seq_len(nspp)
      , rowSums(spp_site_mat, na.rm = TRUE))
    prob_occur[ , 2] <- prob_occur[ , 2]/nsite
    spp_pairs_mat <- t(combn(nspp,2))
    pairs_observations <- apply(spp_pairs_mat, 1, function(x) {
        spp <- x[1]
        spp_next <- x[2]
        pairs <- sum( spp_site_mat[spp, ] * spp_site_mat[spp_next, ] )
        pairs_prob <- sum( prob_occur[spp, 2] * prob_occur[spp_next, 2] )
        pairs_exp <- pairs_prob * nsite
        return(c(pairs, pairs_prob, pairs_exp))
        })

    obs_cooccur <- cbind(spp_pairs_mat, pairs_observations[1,])
    prob_cooccur <- cbind(spp_pairs_mat, pairs_observations[2,])
    exp_cooccur <- cbind(spp_pairs_mat, pairs_observations[3,])
    if (thresh) {
        # n_pairs <- nrow(prob_cooccur)
        prob_cooccur <- prob_cooccur[exp_cooccur[, 3] >= 1, ,drop=FALSE]
        obs_cooccur <- obs_cooccur[exp_cooccur[, 3] >= 1, ,drop=FALSE]
        exp_cooccur <- exp_cooccur[exp_cooccur[, 3] >= 1, ,drop=FALSE]
        # n_omitted <- n_pairs - nrow(prob_cooccur)
    }
    output <- lapply(seq_len(nrow(obs_cooccur)) , function(row) {
        # browser()
        sp1 <- obs_cooccur[row, 1]
        sp2 <- obs_cooccur[row, 2]
        # sp1_inc <- incidence[incidence[, 1] == sp1, 2]
        # sp2_inc <- incidence[incidence[, 1] == sp2, 2]
        # max_inc <- max(sp1_inc, sp2_inc)
        # min_inc <- min(sp1_inc, sp2_inc)
        prob_share_site <- rep(0, (nsite + 1))
        if (prob == "hyper") {
            # all.probs <- phyper(0:min_inc, min_inc, nsite - min_inc, max_inc)
            # prob_share_site[1] <- all.probs[1]
            # prob_share_site[2:length(all.probs)] <- diff(all.probs)
            p_lt <- table(mat[ , sp1] , mat[, sp2]) %>% 
                    fisher.test(. , alternative = "less")
            p_lt <- p_lt$p.value                    
            p_gt <- table(mat[ , sp1] , mat[, sp2]) %>% 
                    fisher.test(. , alternative = "greater")
            p_gt <- p_gt$p.value
        }
        if( prob == "firth"){
            firth <- tryCatch({
            brglm::brglm( mat[ , sp1] ~ mat[, sp2] 
                , family = binomial("logit") 
                , method="brglm.fit")
            } , warning=function(w) {
            warning("No convergence, using regular glm")
            firth <- brglm::brglm( mat[ , sp1] ~ mat[, sp2] 
                , family = binomial("logit") 
                , method="glm.fit")
            return(firth)
            })
          firthSum <- brglm::summary.brglm(firth)
          firthP <- firthSum$coefficients[2 , "Pr(>|z|)"]
          if(firthSum$coefficients[2 , "z value"]>=0){
            p_gt <- firthP
            p_lt <- 1
          } else {
            p_lt <- firthP
            p_gt <- 1
          }
        }
        return(c(sp1, sp2, p_lt, p_gt))
        }) %>% do.call("rbind" , .)
    output <- as.data.frame(output)
    colnames(output) <- c("sp1", "sp2", "pVal.MutEx", "pVal.Cooc")
    # Substitue NaNs and NA with 1
    output$pVal.Cooc <- ifelse(is.nan(output$pVal.Cooc) , 1 
      , output$pVal.Cooc)
    output$pVal.MutEx <- ifelse(is.nan(output$pVal.MutEx) , 1 
      , output$pVal.MutEx)
    # Substitute 0s with a very very small pvalue
    output$pVal.Cooc <- ifelse( output$pVal.Cooc==0 , 10^-100 
      , output$pVal.Cooc)
    output$pVal.MutEx <- ifelse( output$pVal.MutEx==0 , 10^-100 
      , output$pVal.MutEx)
    sp1_name <- merge(x = data.frame(order = seq_len(length(output$sp1)), 
            sp1 = output$sp1), y = spp_key, by.x = "sp1", by.y = "num", 
            all.x = TRUE, sort = FALSE)
    sp2_name <- merge(x = data.frame(order = seq_len(length(output$sp2)), 
            sp2 = output$sp2), y = spp_key, by.x = "sp2", by.y = "num", 
            all.x = TRUE, sort = FALSE)
    output$sp1_name <- sp1_name[with(sp1_name, order(order)), 
            "spp"]
    output$sp2_name <- sp2_name[with(sp2_name, order(order)), 
            "spp"]
    n_samp <- nrow(mat)
    TableOR_corr <- function(x , y){
          xtab <- table(x , y)+.5
          n00 <- xtab[1,1]
          n01 <- xtab[1,2]
          n10 <- xtab[2,1]
          n11 <- xtab[2,2]
          OR <- (n00 * n11)/(n01 * n10)
          return(OR)
        }
    stats <- t(apply(output , 1 , function(x) {
                    # browser()
                    pair <- c(x["sp1_name"] , x["sp2_name"])
                    mat_pair <- mat[ , pair]
                    freq_1 <- sum(mat_pair[ , 1])/n_samp
                    freq_2 <- sum(mat_pair[ , 2])/n_samp
                    combFreq <- mat_pair[ , 1] + mat_pair[ , 2]
                    combFreq[combFreq>1] <- 1
                    combFreq <- sum(combFreq)/n_samp
                    or <- TableOR_corr(mat_pair[ , 1] , mat_pair[ , 2])
                    c(or , freq_1 , freq_2 , combFreq)
                }))
    colnames(stats) <- c("OR" 
                        , "freq_1" 
                        , "freq_2" 
                        , "combFreq")
    finalOutput <- cbind(output , stats)
    # finalOutput <- finalOutput[ , c("sp1_name" , "sp2_name" 
    #                               , "pVal.MutEx", "pVal.Cooc"
    #                               ,"OR" , "freq_1" , "freq_2" , "combFreq")]
    finalOutput <- finalOutput[ , c("sp1_name" , "sp2_name" 
                                  , "pVal.MutEx", "pVal.Cooc"
                                  ,"OR")]
    finalOutput <- .changeFactor(finalOutput)
    return(finalOutput)
}

# When data are all not significant or there are no data at all,
# Plot a cool empty plot with title and text
.emptyGGplot <- function(label , title){
  df <- data.frame()
  empty <- ggplot(df) + 
      geom_point() + 
      xlim(0, 10) + 
      ylim(0, 10) + 
      xlab("") + 
      ylab("") + 
      theme(axis.line=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        legend.position="none",
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank()) +
      annotate("text"
              , label = label
              , x = 5, y = 5, size = 4, colour = "black") +
      ggtitle(title) +
      theme(plot.margin=unit(c(1,1,1,1), "cm")
        ,legend.justification=c(1,0), legend.position=c(1,0))
  return(empty)
}

# cool upper matrix plot, mutuated from package cooccur
.coocMutEx_melter <- function(co_tab, plotrandom , pvalthr){
  if(is.null(co_tab)){
    return(NULL)
  }
  all_elements <- unique(c(co_tab$sp1_name , co_tab$sp2_name))
  mat <- matrix(0 , nrow=length(all_elements) , ncol=length(all_elements))
  dimnames(mat) <- list(all_elements, all_elements)
  co_tab$pVal.MutEx <- -co_tab$pVal.MutEx
  for(i in seq_len(nrow(co_tab))){
    x <- co_tab[i , ,drop=TRUE]
    sp1 <- x[["sp1_name"]]
    sp2 <- x[["sp2_name"]]
    if(abs(x[["pVal.Cooc"]])<abs(x[["pVal.MutEx"]])) {
      if(x[["pVal.Cooc"]]<=pvalthr){
          mat[sp1,sp2] <- -log10(x[["pVal.Cooc"]])
          mat[sp2,sp1] <- -log10(x[["pVal.Cooc"]])
      }
    } else {
      if(abs(x[["pVal.MutEx"]])<=pvalthr){
          mat[sp1,sp2] <- log10(abs(x[["pVal.MutEx"]]))
          mat[sp2,sp1] <- log10(abs(x[["pVal.MutEx"]]))
      }
    }
  }
  rmrandom <- function(mat){
    rowidx <- matrixStats::rowAlls(mat, value=0)
    colidx <- matrixStats::colAlls(mat, value=0)
    idx <- rowidx & colidx
    return(mat[!idx , !idx])
  }
  if(plotrandom){
    cormat <- mat
  } else {
    cormat <- rmrandom(mat)
  }
  get_upper_tri <- function(cormat){
    cormat[lower.tri(cormat)]<- NA
    return(cormat)
  }
  upper_tri <- get_upper_tri(cormat)
  # Melt the correlation matrix
  melted_cormat <- reshape2::melt(upper_tri, na.rm = TRUE)
  #if(all(melted_cormat$value==0)){
  #    return(NULL
        # .emptyGGplot(label= "No Significant pair"
        #                       , title=title
        #                       )
  #    )
  #}
  melted_cormat$value <- ifelse(melted_cormat$value==0 , NA 
    , melted_cormat$value)
  return(melted_cormat)
}

.plot_coocMutEx <- function(melted_cormat 
  , title , minMelt , maxMelt , pvalthr){
  # minMelt <- min(melted_cormat$value , na.rm=TRUE)
  # maxMelt <- max(melted_cormat$value , na.rm=TRUE)
  log10pvalthr <- - log10(pvalthr)
  # fakedf <- data.frame(Random=NA)
  # Avoid complaints from check
  Var1=Var2=value=NULL
  # melted_cormat$value <- ifelse(is.na(melted_cormat$value),0,melted_cormat$value)
  ggheatmap <- ggplot(melted_cormat, aes(Var1, Var2)) + 
    geom_tile(aes(fill = value), colour ="white" ) +
    scale_fill_gradient2(space="Lab"
        # NOTE 02/12/2020 - Using default colors to avoid approx() error
        # ,low = "navy blue"
        # ,na.value="dark gray"
        # ,high="#FFCC66"
        # ,midpoint=0
        # ,mid="dark gray"
        ,name="p-value\nblue cooc\nred mutex"
        ,breaks=c(minMelt
                  # ,-log10pvalthr
                  , 0
                  # ,log10pvalthr
                  , maxMelt
                  )
        ,labels=c(paste("Lower mutex p:" ,
          if(minMelt<=-log10pvalthr) {
            prettyNum(10^minMelt , digits=3)
          } else {
            "no significance"
          })
          # , "MutEx Threshold"
          , "Random"
          # , "Cooc Threshold"
          , paste("Lower cooc p:" ,
            if(maxMelt>=log10pvalthr) {
              prettyNum(10^-maxMelt , digits=3)
            } else {
              "no significance"
            })
          )
        ,limits=c(minMelt,maxMelt)) +
    theme_minimal() + # minimal theme
    xlab("") + ylab("") + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, 
                    size = 12, hjust = 1)) +
    coord_fixed() +
    ggtitle(title)
    return(ggheatmap)
}



setGeneric('coocMutexPlot', function(object 
    , var=c("drug","group","gene_symbol")
    , alterationType=c("copynumber" , "expression" , "mutations" , "fusions")
    , grouping=c(NA , "drug" , "group" , "alteration_id" , "tumor_type")
    , tumor_type=NULL
    , collapseMutationByGene=FALSE
    , collapseByGene=FALSE
    , tumor.weights=NULL
    , style=c("cooc" , "dendro")
    , prob = c("hyper" , "firth")
    , drop = FALSE
    , noPlot=FALSE
    , pvalthr=0.05
    , plotrandom=TRUE
    , ncolPlot =FALSE
    , ...) {
  standardGeneric('coocMutexPlot')
})
setMethod('coocMutexPlot', 'CancerPanel', function(object 
    , var=c("drug","group","gene_symbol")
    , alterationType=c("copynumber" , "expression" , "mutations" , "fusions")
    , grouping=c(NA , "drug" , "group" , "alteration_id" , "tumor_type")
    , tumor_type=NULL
    , collapseMutationByGene=FALSE
    , collapseByGene=FALSE
    , tumor.weights=NULL
    , style=c("cooc" , "dendro")
    , prob = c("hyper" , "firth")
    , drop = FALSE
    , noPlot=FALSE
    , pvalthr=0.05
    , plotrandom=TRUE
    , ncolPlot =FALSE
    , ...){
  #-------------------------------
  # CHECK PARAMETER CONSISTENCY
  #-------------------------------

  possibleAlterations <- c("copynumber" , "expression" 
    , "mutations" , "fusions")
  possibleGrouping <- c(NA , "drug" , "group" 
    , "alteration_id" , "tumor_type")
  possibleProb <- c("firth" , "hyper")
  possiblestyle <- c("cooc" , "dendro")
  prob <- prob[1]
  style <- style[1]
  if(style %notin% possiblestyle){
    stop(paste("style can only be one of the following" , 
      paste(possiblestyle , collapse=", ")))
  }
  if( prob %notin% possibleProb){
    stop(paste("prob can only be one of the following" ,
      paste(possibleProb , collapse=", ")))
  }
  if(any(alterationType %notin% possibleAlterations)){
    stop(paste("alterationType can only be one or more of the following" ,
      paste(possibleAlterations , collapse=", ")))
  }
  if( length(grouping)>1 & any(is.na(grouping)) ){
    grouping <- NA
  }
  if(!any(is.na(grouping))){
    if(any(grouping %notin% possibleGrouping))
      stop(paste("grouping can only be one of the following:" ,
        paste(possibleGrouping , collapse=", ")))
  }
  if(("alteration_id" %in% grouping) & length(alterationType)<2){
    stop(paste("If you select 'alteration_id' as grouping variable",
      ", select more than one alterationType"))
  }
  if(!is.numeric(pvalthr)){
    stop("pvalthr must be a numeric value")
  } else {
    if(pvalthr>1 | pvalthr<=0){
      stop("pvalthr must be a number between 0 and 1")
    }
  }
  if(var[1] %eq% grouping[1]){
    stop("var and grouping cannot be to the same value")
  }
  if(!is.logical(plotrandom)){
    stop("plotrandom can be either TRUE or FALSE")
  }

  #----------------------------
  # GRAB DATA AND SAMPLES
  #----------------------------

  de <- dataExtractor(object=object , alterationType=alterationType 
                      , tumor_type=tumor_type 
                      , collapseMutationByGene=collapseMutationByGene 
                      , collapseByGene=collapseByGene 
                      , tumor.weights=tumor.weights)
  mydata <- de$data
  mysamples <- de$Samples
  tum_type_diff <- de$tumor_not_present
  rm(de)
  
  #----------------------------
  # RESHAPE DATA
  #----------------------------
  
  if(is.na(grouping)){
    mydata_split <- list("NA"=mydata)
  } else {
    mydata_split <- split(mydata , mydata[[grouping]])
  }
  mydata_split <- lapply(mydata_split , function(x) {
            # browser()
            tums <- unique(x$tumor_type)
            if(grouping %in% "tumor_type"){
              x$case_id <- factor(x$case_id 
                , levels=unique(unlist(mysamples[tums])))
            } else {
              x$case_id <- factor(x$case_id 
                , levels=unique(unlist(mysamples["all_tumors"])))
            }
            return(x)
    })
  # Create a matrix on the basis of the var value
  mat_split <- lapply(mydata_split , function(x) {
              # x$case_id <- factor(x$case_id , levels=)
              if("alteration_id" %in% colnames(x)){
                mat <- suppressMessages(
                  reshape2::acast(x
                        , as.formula( paste("case_id~" , var))
                        , fun.aggregate=length
                        , value.var="alteration_id"
                        , drop=drop))
              } else {
                mat <- suppressMessages(
                  reshape2::acast(x
                          , as.formula( paste("case_id~" , var))
                          , fun.aggregate=length
                          , drop=drop))
              }
              mat[ mat> 1] <- 1
              return(mat)
    })
  # return(mat_split)
  if(style=="dendro"){
      mat_dist <- lapply(mat_split , function(x) dist(t(x) , method="binary") )
      if(!noPlot){
      opar <- par("mfrow" , "mar")
      on.exit(par(opar))
          return({
              n_plots <- length(mat_dist)
              par(mfrow=.mfrow(n_plots, ncolPlot))
              for(i in seq_len(length(mat_dist))){
          title <- paste0(grouping , ": " , names(mat_dist)[i])
          if(attributes(mat_dist[[i]])$Size>2){
                    plot(hclust( mat_dist[[i]]  , ...)
                        , xlab="" 
                        , main=title)
          } else {
            plot(c(0, 1), c(0, 1)
                , ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
            text(x = 0.5, y = 0.5
                , "Not enough elements\nto display\n(minimum 3)"
                ,cex = 1, col = "black" , main=names(mydata_split)[i])
          }
              }
          })
      } else {
      return(mat_split)
    }
  }
  # Cooccurence 
  cooc <- lapply(seq_len(length(mat_split)) , function(i) {
            x <- mat_split[[i]]
            if(ncol(x)>1){
              out <- .coocMutEx(x , thresh=FALSE , prob=prob)
              out$grouping <- names(mydata_split)[i]
            } else {
              out <- NULL
            }
            return(out)
    })
  if(noPlot){
    return(do.call("rbind" , cooc))
  } else {
    n_plots <- length(cooc)
    # Automatic detection of plot best layout
    plot_layout <- .mfrow(n_plots, ncolPlot)
    all_melted <- lapply(seq_len(length(cooc)), function(i){
      .coocMutEx_melter(cooc[[i]] , plotrandom=plotrandom , pvalthr=pvalthr)
    })
    minMax <- lapply(all_melted , function(melted_cormat) {
      if(is.null(melted_cormat)){
        return(NULL)
      }
      if(all(is.na(melted_cormat$value ))){
        return(NULL)
      } else {
        return( c(min(melted_cormat$value , na.rm=TRUE)[1] 
          , max(melted_cormat$value , na.rm=TRUE)[1]) )
      }
    })

    minMelt <- tryCatch( min(unlist(lapply(minMax , '[' , 1)) , na.rm=TRUE) 
      , error=function(e) return(NULL))
    maxMelt <- tryCatch( max(unlist(lapply(minMax , '[' , 2)) , na.rm=TRUE) 
      , error=function(e) return(NULL))
    whichIsMax <- which.max(abs(c(minMelt, maxMelt)))
    if(whichIsMax==1){
      maxMelt <- - minMelt
    } else {
      minMelt <- - maxMelt
    }
    all_plots <- lapply(seq_len(length(all_melted)) , function(i){
        if( is.na(grouping) && toupper(names(mydata_split)[i])=="NA" ){
            title <- ""
        } else {
            title <- paste0(toupper(grouping) , ": " , 
                    toupper(names(mydata_split)[i]))
        }
        if(!is.null(cooc[[i]])){
          if(all(is.na(all_melted[[i]]$value))){
            return(.emptyGGplot(label= "No Significant pairs", title=title))
          }
          out <- .plot_coocMutEx(all_melted[[i]] 
            , title=title , minMelt=minMelt , maxMelt=maxMelt , pvalthr=pvalthr)
        } else {
          out <- .emptyGGplot(
            label= paste("MutEx and Cooc Analysis require at least 2 elements",
                          "No possible Mutex or Cooc Analysis" , sep= "\n")
                    , title=title)
        }
        })
    return(.multiplot(all_plots , plot_layout=plot_layout))
  }
})