enrichTest <- function(x, clust.col, noise.clust.id=NULL, cat.col, equal.props=FALSE, alternative="greater")
{
#' @title Enrichment Testing
#'
#' @author Julian Spagnuolo
#'
#' @param x data.frame/matrix. Data to be tested against, should contain column of cluster IDs and categorical/annotation column.
#' @param clust.col character vector. Column name in x containing the cluster ID's.
#' @param noise.clust.id character. ID of noise clusters if present (i.e DBscan ID's noise as "0"). If NULL this will not be used. Default is NULL.
#' @param cat.col character vector. Column name in x with the categorical (factor) data to be tested for enrichment.
#' @param equal.props logical/boolean. If TRUE binomial test will use an equal hypothesised probability of success in the binomial test, if FALSE binomial test uses the global proportion (including the noise points if present). Default is FALSE. Note: The hypergeometric test will always use the global proportion.
#' @param alternative character. Indicates alternative hypothesis of binomial test. Must be one of "two.sided", "greater", or "less". You can specify just the initial letter.Default is "greater"
#'
#' @details For the binomial test,
#'
#' @export
# get vector of clusters to test for enrichment.
unique.clusts <- unique(x[,clust.col])
# remove the noise points if requested.
if(!is.null(noise.clust.id))
{
unique.clusts <- unique.clusts[!(unique.clusts == noise.clust.id)]
}
unique.cats <- unique(x[,cat.col])
if(isTRUE(equal.props))
{
prob <- data.frame(Var1=unique.cats, Freq=1/length(unique.cats))
}
else
{
prob <- table(x[,cat.col])/nrow(x)
prob <- as.data.frame(prob) # cat == Var1, prob == Freq
}
# initialise results output object
results <- data.frame(cluster=rep(unique.clusts, times=length(unique.cats)),
category=rep(unique.cats, each=length(unique.clusts)), cluster.size=NA,
proportion=NA, binomial.pval=NA, binomial.FDR=NA, hg.pval=NA, hg.FDR=NA,
stringsAsFactors = TRUE, row.names = NULL)
results <- results[order(results$cluster),]
rownames(results) <- 1:nrow(results)
# begin testing
for(i in unique.clusts)
{
for(n in unique.cats)
{
# perform binomial test
res <- binom.test(x=nrow(x[which(x[,clust.col] == i & x[,cat.col] == n),]),
n=nrow(x[which(x[,clust.col] == i),]),
p=prob[which(prob$Var1 == n),"Freq"],
conf.level = 0.95, alternative = alternative)[c("estimate","p.value")]
results[which(results[,"cluster"] == i & results[,"category"] == n),c("proportion","binomial.pval")] <- res
results[which(results[,"cluster"] == i),"cluster.size"] <- nrow(x[which(x[,clust.col] == i),])
# perform hypergeometric test
res <- phyper(q=nrow(x[which(x[,clust.col] == i & x[,cat.col] == n),]),
m=nrow(x[which(x[,cat.col] == n),]),
n=nrow(x)-nrow(x[which(x[,cat.col] == n),]),
k=nrow(x[which(x[,clust.col] == i),]),
lower.tail=FALSE, log.p=FALSE)
results[which(results[,"cluster"] == i & results[,"category"] == n),"hg.pval"] <- res
}
}
results$binomial.FDR <- p.adjust(p=results$binomial.pval, method="fdr")
results$hg.FDR <- p.adjust(p=results$hg.pval, method="fdr")
results$bin.FDR <- signif(results$binomial.FDR, digits=3)
results$bin.pval <- signif(results$binomial.pval, digits=3)
results$prop <- signif(results$proportion, digits=3)
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.