#' Calculate PAC
#'
#' This function calculates the proportion of ambiguous clustering (PAC) for
#' each value of \emph{k} tested via consensus clustering.
#'
#' @param consensus_mats A list of consensus matrices, as created by a call to
#' \code{\link{consensus}}, \code{\link{M3C}}, or \code{
#' \link[ConsensusClusterPlus]{ConsensusClusterPlus}}.
#' @param pac_window Lower and upper bounds for the consensus index sub-interval
#' over which to calculate the PAC. Must be on (0, 1).
#' @param plot Return plot of PAC scores by \emph{k}?
#'
#' @details
#' Consensus clustering is a method for testing the stability of cluster
#' membership under resampling (Monti et al., 2003). Senbabaoglu et al. (2014)
#' demonstrated that traditional methods for estimating optimal cluster number
#' fail when probes are not independent, which they rarely are in omic data. The
#' authors propose a new measure, the proportion of ambiguous clustering (PAC),
#' which represents the increase in the empirical CDF curve for each potential
#' cluster number \emph{k} over a user-defined sub-interval of the consensus
#' index generated by the consensus cluster algorithm. The default settings of
#' \code{pac_window = c(0.1, 0.9)} are taken from the original PAC paper, and
#' generally lead to stable results.
#'
#' @return A data frame with PAC scores for each value of \emph{k} in \code{
#' consensus_mats}. If \code{plot = TRUE}, then PAC scores by \emph{k} are
#' plotted.
#'
#' @references
#' Monti, S., Tamayo, P., Mesirov, J., & Golub, T. (2003).
#' \href{http://link.springer.com/article/10.1023/A:1023949509487}{Consensus
#' Clustering: A Resampling-Based Method for Class Discovery and Visualization
#' of Gene Expression Microarray Data}. \emph{Machine Learning}, \emph{52}:
#' 91-118.
#'
#' Senbabaoglu, Y., Michailidis, G. & Li, J.Z. (2014).
#' \href{http://www.nature.com/articles/srep06207}{Critical limitations of
#' consensus clustering in class discovery}. \emph{Scientific Reports}, \emph{
#' 4}:6207.
#'
#' @examples
#' mat <- matrix(rnorm(1000 * 12), nrow = 1000, ncol = 12)
#' cc <- consensus(mat)
#' pac <- PAC(cc, plot = TRUE)
#'
#' @export
#' @importFrom purrr keep
#' @import dplyr
#' @import ggplot2
#'
PAC <- function(consensus_mats,
pac_window = c(0.1, 0.9),
plot = FALSE) {
# Preliminaries
max_k <- length(consensus_mats)
if (!is.list(consensus_mats)) {
stop('consensus_mats must be a list object containing consensus matrices.')
}
if (any(names(consensus_mats) == 'consensusMatrix')) {
consensus_mats <- lapply(seq_len(max_k), function(k) {
if (k == 1L) {
return(NULL)
} else {
return(consensus_mats[[k]]$consensusMatrix)
}
})
}
if (length(pac_window) != 2) {
stop('pac_window must be a vector of length 2.')
}
if (min(pac_window) <= 0 || max(pac_window) >= 1) {
stop('Both values of pac_window must be on (0, 1).')
}
# Calculate PAC
suppressWarnings(
pacs <- expand.grid(k = 2:max_k,
Idx = pac_window) %>%
rowwise(.) %>%
mutate(CDF = ecdf(consensus_mats[[k]] %>% keep(lower.tri(.)))(Idx),
k = as.factor(k)) %>%
group_by(k) %>%
mutate(PAC = diff(CDF)) %>%
distinct(k, PAC) %>%
ungroup(.) %>%
mutate(k = as.integer(k))
)
# Plot output
if (plot) {
pacs <- pacs %>%
mutate(Optimal = PAC == min(PAC))
p <- ggplot(pacs, aes(k, PAC)) +
geom_point(aes(color = Optimal), size = 3) +
geom_line() +
scale_x_continuous(breaks = seq(0, length(consensus_mats), 1)) +
scale_color_manual(values = c('black', 'red')) +
guides(color = guide_legend(reverse = TRUE)) +
labs(x = expression('Cluster Number'~italic(k))) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
print(p)
}
# Export
return(pacs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.