#' Compute drug Enrichment, drugs activity or drugs sensitivity based on database and annotation provided as input parameter
#'
#' @param mFC = A matrix of differential gene expression fold-change of own experiment, the rows name of the matrix must be the genes names
#' @param mDrugEnrich = a large matrix represents the gene expression fold change in the presence of different perturbations, either various drugs concentrations or other genetic engineering techniques as gene-knock-in etc..
#' @param DrugsAnnot =is matrix represents the drugs annotation that contains the targets, mechanism of action , clinical phase, disease area, and indication
#' @param methods the computation methods of Enrichment, either using GSEA algorithm or the rank correlation.
#' @param nmin the minimum number of drugs
#' @param nprune takes only the (nprune) top matching drugs for each comparison to reduce the size of the matrix. default is 0 to take the full matrix
#' @param contrast is a character string represent the two compared conditions(contrast) as it is provided from the fold-change matrix
#' @return drugs a list of drugs enrichment stats and annotations
#' @export
#' @import stats
#' @importFrom Matrix sparse.model.matrix
#' @importFrom Matrix head
#' @importFrom qlcMatrix corSparse
#' @importFrom fgsea fgseaSimple
#' @importFrom uwot umap
#'
#' @examples # from the data-sets provided as examples within the package load the .rda files
#' # DrugsAnnot, mDrugEnrich, mFC
#' drugs <- computePeturbEnrichment(
#' mFC = mFC, mDrugEnrich = mDrugEnrich, DrugsAnnot = DrugsAnnot,
#' methods = "GSEA", nprune = 250, contrast = NULL
#' )
computePeturbEnrichment <- function(mFC, mDrugEnrich, DrugsAnnot, methods = c("GSEA", "cor"),
nmin = 15, nprune = 0, contrast = NULL) {
if (is.null(contrast)) {
contrast <- colnames(mFC)
}
contrast <- intersect(contrast, colnames(mFC))
mFC <- mFC[, contrast, drop = FALSE]
xdrugs <- gsub("[@_].*$", "", colnames(mDrugEnrich))
ndrugs <- length(table(xdrugs))
message("number of profiles: ", ncol(mDrugEnrich))
message("number of drugs: ", ndrugs)
## create drug meta sets
meta.gmt <- tapply(colnames(mDrugEnrich), xdrugs, list)
meta.gmt <- meta.gmt[which(sapply(meta.gmt, length) >= nmin)]
length(meta.gmt)
if (length(meta.gmt) == 0) {
message("WARNING::: computeDrugEnrichment : no valid genesets!!")
return(NULL)
}
## first level (rank) correlation
message("Calculating first level rank correlation ...")
gg <- intersect(rownames(mDrugEnrich), rownames(mFC))
length(gg)
if (length(gg) < 20) {
message("WARNING::: computeDrugEnrichment : not enough common genes!!")
return(NULL)
}
rnk1 <- apply(mDrugEnrich[gg, , drop = FALSE], 2, rank, na.last = "keep")
rnk2 <- apply(mFC[gg, , drop = FALSE], 2, rank, na.last = "keep")
R1 <- stats::cor(rnk1, rnk2, use = "pairwise")
dim(R1)
R1 <- R1 + 1e-8 * matrix(stats::rnorm(length(R1)), nrow(R1), ncol(R1))
colnames(R1) <- colnames(mFC)
rownames(R1) <- colnames(mDrugEnrich)
dim(R1)
## experiment to drug
results <- list()
if ("cor" %in% methods) {
message("Calculating drug enrichment using rank correlation ...")
D <- Matrix::sparse.model.matrix(~ 0 + xdrugs)
dim(D)
colnames(D) <- sub("^xdrugs", "", colnames(D))
rownames(D) <- colnames(mDrugEnrich)
rho2 <- qlcMatrix::corSparse(D, R1)
rownames(rho2) <- colnames(D)
colnames(rho2) <- colnames(R1)
rho2 <- rho2[order(-rowMeans(rho2**2)), , drop = FALSE]
cor.pvalue <- function(x, n) stats::pnorm(-abs(x / ((1 - x**2) / (n - 2))**0.5))
P <- apply(rho2, 2, cor.pvalue, n = nrow(D))
Q <- apply(P, 2, stats::p.adjust, method = "fdr")
results[["cor"]] <- list(X = rho2, Q = Q, P = P)
}
if ("GSEA" %in% methods) {
message("Calculating drug enrichment using GSEA ...")
res0 <- list()
i <- 1
for (i in 1:ncol(R1)) {
suppressWarnings(res0[[i]] <- fgsea::fgseaSimple(meta.gmt, stats = R1[, i], nperm = 1000))
}
names(res0) <- colnames(R1)
length(res0)
mNES <- sapply(res0, function(x) x$NES)
mQ <- sapply(res0, function(x) x$padj)
mP <- sapply(res0, function(x) x$pval)
if (length(res0) == 1) {
mNES <- cbind(mNES)
mP <- cbind(mP)
mQ <- cbind(mQ)
}
pw <- res0[[1]]$pathway
rownames(mNES) <- rownames(mQ) <- rownames(mP) <- pw
colnames(mNES) <- colnames(mQ) <- colnames(mP) <- colnames(mFC)
msize <- res0[[1]]$size
dim(R1)
results[["GSEA"]] <- list(X = mNES, Q = mQ, P = mP, size = msize)
}
names(results)
## this takes only the top matching drugs for each comparison to
## reduce the size of the matrices
if (nprune > 0) {
message(" pruning : nprune = ", nprune)
k <- 1
for (k in 1:length(results)) {
res <- results[[k]]
## reduce solution set with top-N of each comparison
mtop <- apply(abs(res$X), 2, function(x) Matrix::head(order(-x), nprune)) ## absolute NES!!
mtop <- unique(as.vector(mtop))
length(mtop)
top.idx <- unique(unlist(meta.gmt[mtop]))
length(top.idx)
results[[k]]$X <- res$X[mtop, , drop = FALSE]
results[[k]]$P <- res$P[mtop, , drop = FALSE]
results[[k]]$Q <- res$Q[mtop, , drop = FALSE]
results[[k]]$size <- res$size[mtop]
}
}
## UMAP clustering
message("[computePerturbEnrichment] UMAP clustering...")
top.drugs <- unique(unlist(sapply(results, function(r) rownames(r$X))))
sel <- which(xdrugs %in% top.drugs)
cX <- scale(mDrugEnrich[, sel], center = FALSE)
cX <- cX - rowMeans(cX)
cX <- scale(cX, center = TRUE)
pos <- uwot::umap(t(cX), fast_sgd = TRUE)
dim(pos)
rownames(pos) <- colnames(cX)
results$clust <- pos
rp <- as.array(R1[rownames(pos), ])
results$stats <- rp
drugs <- results
## --------------- attach annotation
if (!is.null(DrugsAnnot)) {
DrugsAnnot$drug <- DrugsAnnot$pert_iname
DrugsAnnot <- DrugsAnnot[match(rownames(results[["GSEA"]]$X), rownames(DrugsAnnot)), ]
rownames(DrugsAnnot) <- rownames(results[["GSEA"]]$X)
Matrix::head(DrugsAnnot)
drugs[["annot"]] <- DrugsAnnot[, c("drug", "moa", "target")]
}
message("[computePerturbEnrichment] done!")
return(drugs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.