Nothing
#' Cluster enrichment test
#'
#' Takes a clustering object generted by cmeans or kmeans algorithm and determine the enrichment of each cluster
#' and then the overall enrichment of this clustering object based on an annotation file.
#'
#' @param clustObj the clustering object generated by cmeans or kmeans.
#' @param annotation a list with names correspond to kinases and elements correspond to substrates belonging to each kinase.
#' @param effectiveSize the size of kinase-substrate groups to be considered for calculating enrichment. Groups that are too small
#' or too large will be removed from calculating overall enrichment of the clustering.
#' @param pvalueCutoff a pvalue cutoff for determining which kinase-substrate groups to be included in calculating overall enrichment of the clustering.
#' @param universe the universe of genes/proteins/phosphosites etc. that the enrichment is calculated against.
#' @return a list that contains both the p-value indicating the overall enrichment and a sublist that details the enrichment of each individual cluster.
#' @export
#'
#' @examples
#' # simulate a time-series data with six distinctive profile groups and each group with
#' # a size of 500 phosphorylation sites.
#' simuData <- temporalSimu(seed=1, groupSize=500, sdd=1, numGroups=4)
#'
#' # create an artificial annotation database. Generate 100 kinase-substrate groups each
#' # comprising 50 substrates assigned to a kinase.
#' # among them, create 5 groups each contains phosphorylation sites defined to have the
#' # same temporal profile.
#' kinaseAnno <- list()
#' groupSize <- 500
#' for (i in 1:5) {
#' kinaseAnno[[i]] <- paste("p", (groupSize*(i-1)+1):(groupSize*(i-1)+50), sep="_")
#' }
#'
#' for (i in 6:100) {
#' set.seed(i)
#' kinaseAnno[[i]] <- paste("p", sample.int(nrow(simuData), size = 50), sep="_")
#' }
#' names(kinaseAnno) <- paste("KS", 1:100, sep="_")
#'
#' # testing enrichment of clustering results by partition the data into six clusters
#' # using cmeans algorithm.
#' clustObj <- e1071::cmeans(simuData, centers=6, iter.max=50, m=1.25)
#' clustEnrichment(clustObj, annotation=kinaseAnno, effectiveSize=c(5, 100), pvalueCutoff=0.05)
#'
#' @import stats
clustEnrichment <- function(clustObj, annotation, effectiveSize, pvalueCutoff=0.05, universe=NULL) {
numOfcluster <- nrow(clustObj$centers)
enrich.list <- list()
counter <- 0;
listName <- c()
for (j in 1:numOfcluster) {
if (is.null(universe)) {
enrich.stats <- enrichmentTest(clust=names(which(clustObj$cluster == j)), annotation=annotation, universe=names(clustObj$cluster))
} else {
enrich.stats <- enrichmentTest(clust=names(which(clustObj$cluster == j)), annotation=annotation, universe)
}
# effective cluster filtering
sizes <- as.numeric(enrich.stats[,3])
pvalues <- as.numeric(enrich.stats[,2])
idx <- which((sizes >= effectiveSize[1]) & (sizes <= effectiveSize[2]) & (pvalues < pvalueCutoff))
if (length(idx) != 0) {
listName <- c(listName, paste("cluster", j))
counter <- counter + 1
enrich.list[[counter]] <- matrix(enrich.stats[idx,], ncol=4)
colnames(enrich.list[[counter]]) <- c("kinase", "pvalue", "size", "substrates")
}
}
names(enrich.list) <- listName
# integrating the enrichment scores
fisher.pvalue <- 1
if(length(enrich.list) != 0) {
# fitness
cluster.pvalue <- sapply(enrich.list, function(x){
# minimum p for a cluster
min(as.numeric(x[,2]))
})
# fisher for combining overall clustering
fisher.pvalue <- stats::pchisq(-2*sum(log(cluster.pvalue)), 2*length(cluster.pvalue), lower.tail = FALSE)
#cluster.pvalue.sort <- sort(cluster.pvalue)
#if (length(cluster.pvalue.sort) > 5) {
# fisher.pvalue <- pchisq(-2*sum(log(cluster.pvalue.sort[1:5])), 2*5, lower.tail = FALSE)
#} else {
# fisher.pvalue <- pchisq(-2*sum(log(cluster.pvalue)), 2*length(cluster.pvalue), lower.tail = FALSE)
#}
if(fisher.pvalue == 0) {
print("Fisher's combined pvalue is too small. The estimation maybe inaccurate!")
}
}
results <- list()
results$fisher.pvalue <- fisher.pvalue
results$enrich.list <- enrich.list
return(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.