R/computePeturbEnrichment.R

Defines functions computePeturbEnrichment

Documented in computePeturbEnrichment

#' Compute drug Enrichment, drugs activity or drugs sensitivity based on database and annotation provided as input parameter
#'
#' @param mFC = A matrix of differential gene expression fold-change of own experiment, the rows name of the matrix must be the genes names
#' @param mDrugEnrich = a large matrix represents the gene expression fold change in the presence of different perturbations, either various drugs concentrations or other genetic engineering techniques as gene-knock-in etc..
#' @param DrugsAnnot  =is  matrix represents the drugs annotation that contains the targets, mechanism of action , clinical phase, disease area, and indication
#' @param methods the computation methods of Enrichment, either using GSEA algorithm or the rank correlation.
#' @param nmin the minimum number of drugs
#' @param nprune  takes only the (nprune) top matching drugs for each comparison to reduce the size of the matrix. default is 0 to take the full matrix
#' @param contrast is a character string represent the two compared conditions(contrast) as it is provided from the fold-change matrix
#' @return drugs a list of drugs enrichment stats and annotations
#' @export
#' @import stats
#' @importFrom Matrix sparse.model.matrix
#' @importFrom Matrix head
#' @importFrom qlcMatrix  corSparse
#' @importFrom fgsea  fgseaSimple
#' @importFrom uwot umap
#'
#' @examples # from the data-sets provided as examples within the package load the .rda files
#' # DrugsAnnot, mDrugEnrich, mFC
#' drugs <- computePeturbEnrichment(
#'   mFC = mFC, mDrugEnrich = mDrugEnrich, DrugsAnnot = DrugsAnnot,
#'   methods = "GSEA", nprune = 250, contrast = NULL
#' )
computePeturbEnrichment <- function(mFC, mDrugEnrich, DrugsAnnot, methods = c("GSEA", "cor"),
                                    nmin = 15, nprune = 0, contrast = NULL) {
  if (is.null(contrast)) {
    contrast <- colnames(mFC)
  }
  contrast <- intersect(contrast, colnames(mFC))

  mFC <- mFC[, contrast, drop = FALSE]

  xdrugs <- gsub("[@_].*$", "", colnames(mDrugEnrich))
  ndrugs <- length(table(xdrugs))

  message("number of profiles: ", ncol(mDrugEnrich))
  message("number of drugs: ", ndrugs)

  ## create drug meta sets
  meta.gmt <- tapply(colnames(mDrugEnrich), xdrugs, list)
  meta.gmt <- meta.gmt[which(sapply(meta.gmt, length) >= nmin)]
  length(meta.gmt)
  if (length(meta.gmt) == 0) {
    message("WARNING::: computeDrugEnrichment : no valid genesets!!")
    return(NULL)
  }

  ## first level (rank) correlation
  message("Calculating first level rank correlation ...")
  gg <- intersect(rownames(mDrugEnrich), rownames(mFC))
  length(gg)
  if (length(gg) < 20) {
    message("WARNING::: computeDrugEnrichment : not enough common genes!!")
    return(NULL)
  }
  rnk1 <- apply(mDrugEnrich[gg, , drop = FALSE], 2, rank, na.last = "keep")
  rnk2 <- apply(mFC[gg, , drop = FALSE], 2, rank, na.last = "keep")
  R1 <- stats::cor(rnk1, rnk2, use = "pairwise")
  dim(R1)

  R1 <- R1 + 1e-8 * matrix(stats::rnorm(length(R1)), nrow(R1), ncol(R1))
  colnames(R1) <- colnames(mFC)
  rownames(R1) <- colnames(mDrugEnrich)
  dim(R1)

  ## experiment to drug
  results <- list()
  if ("cor" %in% methods) {
    message("Calculating drug enrichment using rank correlation ...")

    D <- Matrix::sparse.model.matrix(~ 0 + xdrugs)
    dim(D)
    colnames(D) <- sub("^xdrugs", "", colnames(D))
    rownames(D) <- colnames(mDrugEnrich)
    rho2 <- qlcMatrix::corSparse(D, R1)
    rownames(rho2) <- colnames(D)
    colnames(rho2) <- colnames(R1)
    rho2 <- rho2[order(-rowMeans(rho2**2)), , drop = FALSE]
    cor.pvalue <- function(x, n) stats::pnorm(-abs(x / ((1 - x**2) / (n - 2))**0.5))
    P <- apply(rho2, 2, cor.pvalue, n = nrow(D))
    Q <- apply(P, 2, stats::p.adjust, method = "fdr")
    results[["cor"]] <- list(X = rho2, Q = Q, P = P)
  }

  if ("GSEA" %in% methods) {
    message("Calculating drug enrichment using GSEA ...")
    res0 <- list()
    i <- 1
    for (i in 1:ncol(R1)) {
      suppressWarnings(res0[[i]] <- fgsea::fgseaSimple(meta.gmt, stats = R1[, i], nperm = 1000))
    }
    names(res0) <- colnames(R1)
    length(res0)

    mNES <- sapply(res0, function(x) x$NES)
    mQ <- sapply(res0, function(x) x$padj)
    mP <- sapply(res0, function(x) x$pval)
    if (length(res0) == 1) {
      mNES <- cbind(mNES)
      mP <- cbind(mP)
      mQ <- cbind(mQ)
    }

    pw <- res0[[1]]$pathway
    rownames(mNES) <- rownames(mQ) <- rownames(mP) <- pw
    colnames(mNES) <- colnames(mQ) <- colnames(mP) <- colnames(mFC)
    msize <- res0[[1]]$size
    dim(R1)
    results[["GSEA"]] <- list(X = mNES, Q = mQ, P = mP, size = msize)
  }
  names(results)

  ## this takes only the top matching drugs for each comparison to
  ## reduce the size of the matrices

  if (nprune > 0) {
    message(" pruning : nprune = ", nprune)
    k <- 1
    for (k in 1:length(results)) {
      res <- results[[k]]
      ## reduce solution set with top-N of each comparison
      mtop <- apply(abs(res$X), 2, function(x) Matrix::head(order(-x), nprune)) ## absolute NES!!
      mtop <- unique(as.vector(mtop))
      length(mtop)
      top.idx <- unique(unlist(meta.gmt[mtop]))
      length(top.idx)
      results[[k]]$X <- res$X[mtop, , drop = FALSE]
      results[[k]]$P <- res$P[mtop, , drop = FALSE]
      results[[k]]$Q <- res$Q[mtop, , drop = FALSE]
      results[[k]]$size <- res$size[mtop]
    }
  }

  ## UMAP clustering
  message("[computePerturbEnrichment] UMAP clustering...")
  top.drugs <- unique(unlist(sapply(results, function(r) rownames(r$X))))
  sel <- which(xdrugs %in% top.drugs)

  cX <- scale(mDrugEnrich[, sel], center = FALSE)
  cX <- cX - rowMeans(cX)
  cX <- scale(cX, center = TRUE)
  pos <- uwot::umap(t(cX), fast_sgd = TRUE)
  dim(pos)
  rownames(pos) <- colnames(cX)
  results$clust <- pos
  rp <- as.array(R1[rownames(pos), ])
  results$stats <- rp

  drugs <- results

  ## --------------- attach annotation
  if (!is.null(DrugsAnnot)) {
    DrugsAnnot$drug <- DrugsAnnot$pert_iname
    DrugsAnnot <- DrugsAnnot[match(rownames(results[["GSEA"]]$X), rownames(DrugsAnnot)), ]
    rownames(DrugsAnnot) <- rownames(results[["GSEA"]]$X)
    Matrix::head(DrugsAnnot)
    drugs[["annot"]] <- DrugsAnnot[, c("drug", "moa", "target")]
  }

  message("[computePerturbEnrichment] done!")

  return(drugs)
}
bigomics/SPACEconnect documentation built on March 19, 2022, 3:39 a.m.