Nothing
#' Graphical Displays
#'
#' Graph cumulative distribution function (CDF) graphs, relative change in area
#' under CDF curves, heatmaps, and cluster assignment tracking plots.
#'
#' `graph_cdf` plots the CDF for consensus matrices from different algorithms.
#' `graph_delta_area` calculates the relative change in area under CDF curve
#' between algorithms. `graph_heatmap` generates consensus matrix heatmaps for
#' each algorithm in `x`. `graph_tracking` tracks how cluster assignments change
#' between algorithms. `graph_all` is a wrapper that runs all graphing
#' functions.
#'
#' @param x an object from [consensus_cluster()]
#' @param mat same as `x`, or a list of consensus matrices computed from `x` for
#' faster results
#' @param cl same as `x`, or a matrix of consensus classes computed from `x` for
#' faster results
#' @return Various plots from \code{graph_*{}} functions. All plots are
#' generated using `ggplot`, except for `graph_heatmap`, which uses
#' [NMF::aheatmap()]. Colours used in `graph_heatmap` and `graph_tracking`
#' utilize [RColorBrewer::brewer.pal()] palettes.
#' @name graphs
#' @author Derek Chiu
#' @export
#' @examples
#' # Consensus clustering for 3 algorithms
#' library(ggplot2)
#' set.seed(911)
#' x <- matrix(rnorm(80), ncol = 10)
#' CC1 <- consensus_cluster(x, nk = 2:4, reps = 3,
#' algorithms = c("hc", "pam", "km"), progress = FALSE)
#'
#' # Plot CDF
#' p <- graph_cdf(CC1)
#'
#' # Change y label and add colours
#' p + labs(y = "Probability") + stat_ecdf(aes(colour = k)) +
#' scale_color_brewer(palette = "Set2")
#'
#' # Delta Area
#' p <- graph_delta_area(CC1)
#'
#' # Heatmaps with column side colours corresponding to clusters
#' CC2 <- consensus_cluster(x, nk = 3, reps = 3, algorithms = "hc", progress =
#' FALSE)
#' graph_heatmap(CC2)
#'
#' # Track how cluster assignments change between algorithms
#' p <- graph_tracking(CC1)
graph_cdf <- function(mat) {
dat <- get_cdf(mat)
p <- ggplot(dat, aes(x = !!sym("CDF"), colour = !!sym("k"))) +
stat_ecdf() +
facet_wrap(~Method) +
labs(x = "Consensus Index",
y = "CDF",
title = "Consensus Cumulative Distribution Functions")
print(p)
return(p)
}
#' @rdname graphs
#' @references https://stackoverflow.com/questions/4954507/calculate-the-area-under-a-curve
#' @export
graph_delta_area <- function(mat) {
dat <- get_cdf(mat) %>%
dplyr::group_by(.data$Method, .data$k) %>%
dplyr::summarize(AUC = sum(diff(seq(0, 1, length.out = length(.data$k))) *
(utils::head(.data$CDF, -1) + utils::tail(.data$CDF, -1))) / 2) %>%
dplyr::mutate(da = c(.data$AUC[1], diff(.data$AUC) / .data$AUC[-length(.data$AUC)]))
if (length(unique(dat$k)) > 1) {
p <- ggplot(dat, aes(x = !!sym("k"), y = !!sym("da"))) +
geom_line(group = 1) +
geom_point() +
facet_wrap(~Method) +
labs(y = "Relative change in Area under CDF curve",
title = "Delta Area")
print(p)
return(p)
}
}
#' Calculate CDF for each clustering algorithm at each k
#' @noRd
get_cdf <- function(mat) {
if (inherits(mat, "array")) {
mat <- consensus_combine(mat, element = "matrix")
}
mat %>%
purrr::modify_depth(2, ~ .x[lower.tri(.x, diag = TRUE)]) %>%
purrr::imap(~ purrr::set_names(.x, paste(.y, names(.x), sep = "."))) %>%
purrr::flatten_dfc() %>%
tidyr::gather("Group", "CDF", names(.)) %>%
tidyr::separate("Group", c("k", "Method"), sep = "\\.") %>%
dplyr::mutate(k = factor(.data$k, levels = as.integer(unique(.data$k))))
}
#' @param main heatmap title. If `NULL` (default), the titles will be taken from
#' names in `mat`
#'
#' @rdname graphs
#' @export
graph_heatmap <- function(mat, main = NULL) {
if (inherits(mat, "array")) {
mat <- consensus_combine(mat, element = "matrix")
}
dat <- mat %>%
purrr::flatten() %>%
magrittr::set_names(list(purrr::map(mat, names)[[1]], names(mat)) %>%
purrr::cross() %>%
purrr::map_chr(paste, collapse = " k="))
main <- paste(main %||% names(dat), "Consensus Matrix")
assertthat::assert_that(length(main) == length(purrr::flatten(mat)))
annCol <- purrr::map2(dat, rep(as.numeric(names(mat)),
each = unique(purrr::map_int(mat, length))),
~ data.frame(Cluster = paste0("C", hc(stats::dist(.x), k = .y))))
pal <- c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F",
"#E5C494", "#B3B3B3") # RColorBrewer Set2
annColors <- annCol %>%
purrr::map(~ list(Cluster = stats::setNames(
head(pal, dplyr::n_distinct(.)),
levels(unlist(.))
)))
purrr::pwalk(list(dat, annCol, annColors, main), ~ {
NMF::aheatmap(
x = ..1,
color = "PuBuGn",
Rowv = FALSE,
Colv = TRUE,
labRow = NA,
labCol = NA,
hclustfun = function(d) stats::hclust(d, method = "average"),
annCol = ..2,
annColors = ..3,
main = ..4
)
})
}
#' @rdname graphs
#' @export
graph_tracking <- function(cl) {
if (inherits(cl, "array")) {
cl <- consensus_combine(cl, element = "class")
}
dat <- cl %>%
purrr::imap(~ `colnames<-`(.x, paste(.y, colnames(.x), sep = "."))) %>%
do.call(cbind, .) %>%
as.data.frame() %>%
tidyr::gather("Group", "Class", names(.)) %>%
tidyr::separate("Group", c("k", "Method"), sep = "\\.") %>%
cbind(Samples = seq_len(unique(purrr::map_int(cl, nrow)))) %>%
dplyr::mutate_at(dplyr::vars(c("Class", "Method", "Samples")), factor)
if (length(unique(dat$k)) > 1) {
p <- ggplot(dat, aes(x = !!sym("Samples"), y = !!sym("k"))) +
geom_tile(aes(fill = !!sym("Class"))) +
facet_wrap(~Method) +
scale_fill_brewer(palette = "Set2") +
ggtitle("Tracking Cluster Assignments Across k") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
print(p)
return(p)
}
}
#' @rdname graphs
#' @export
graph_all <- function(x) {
mat <- consensus_combine(x, element = "matrix")
cl <- consensus_combine(x, element = "class")
graph_cdf(mat)
graph_delta_area(mat)
graph_heatmap(mat)
graph_tracking(cl)
}
#' Comparing ranked Algorithms vs internal indices (ii) in heatmap
#' @inheritParams dice
#' @param E object in `dice`
#' @param clusters object in `dice`
#' @noRd
algii_heatmap <- function(data, nk, E, clusters, ref.cl = NULL) {
# Cluster list to keep
cl.list <- E %>%
consensus_combine(element = "class") %>%
magrittr::extract(as.character(nk))
# Final cluster object construction depends on value of nk
if (length(nk) > 1) {
fc <- purrr::map2(cl.list, nk,
~ magrittr::set_colnames(.x, paste_k(colnames(.), .y))) %>%
purrr::map2(split_clusters(clusters), cbind) %>%
purrr::map(~ apply(., 2, relabel_class, ref.cl = ref.cl %||% .[, 1])) %>%
do.call(cbind, .) %>%
as.data.frame()
} else {
fc <- cl.list %>%
do.call(cbind, .) %>%
cbind.data.frame(clusters) %>%
purrr::map_df(relabel_class, ref.cl = ref.cl %||% .[, 1])
}
# Internal indices
ii <- ivi_table(fc, data)
# Heatmap: order algorithms by ranked ii, remove indices with NaN
hm <- ii %>%
dplyr::select(-.data$Algorithms) %>%
magrittr::extract(match(consensus_rank(ii, 1)$top.list, rownames(.)),
purrr::map_lgl(., ~ all(!is.nan(.x))))
# Plot heatmap with annotated colours, column scaling, no further reordering
NMF::aheatmap(
hm,
annCol = data.frame(Criteria = c(rep("Maximized", 5),
rep("Minimized", ncol(hm) - 5))),
annColors = list(Criteria = stats::setNames(c("darkgreen", "deeppink4"),
c("Maximized", "Minimized"))),
Colv = NA, Rowv = NA, scale = "column", col = "PiYG",
main = "Ranked Algorithms on Internal Validity Indices"
)
}
#' Split clusters matrix into list based on value of k
#' @noRd
split_clusters <- function(clusters) {
tc <- t(clusters)
split.data.frame(x = tc,
f = stringr::str_split_fixed(
string = rownames(tc),
pattern = " ",
n = 2
)[, 2]) %>%
purrr::map(t)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.