cola.R
In cola: A Framework for Consensus Partitioning

## ---- echo = FALSE, message = FALSE---------------------------------------------------------------
library(markdown)
options(markdown.HTML.options = c(options('markdown.HTML.options')[[1]], "toc"))
library(knitr)
knitr::opts_chunk$set(
    error = FALSE,
    tidy  = FALSE,
    message = FALSE,
    fig.align = "center")
options(markdown.HTML.stylesheet = "custom.css")
options(rmarkdown.html_vignette.check_title = FALSE)
options(width = 100)
library(ComplexHeatmap)
library(circlize)
library(cola)

## -------------------------------------------------------------------------------------------------
library(cola)

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # code is only for demonstration
#  mat = adjust_matrix(mat)

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # code is only for demonstration
#  res = consensus_partition(mat,
#      top_value_method = "MAD",
#      top_n = c(1000, 2000, 3000),
#      partition_method = "kmeans",
#      max_k = 6,
#      p_sampling = 0.8,
#      partition_repeat = 50,
#      anno = NULL)

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # code is only for demonstration
#  rl = run_all_consensus_partition_methods(mat,
#      top_value_method = c("SD", "MAD", ...),
#      partition_method = c("hclust", "kmeans", ...),
#      mc.cores = ...)
#  cola_report(rl, output_dir = ...)

## -------------------------------------------------------------------------------------------------
all_top_value_methods()

## -------------------------------------------------------------------------------------------------
register_top_value_methods(
    max = function(mat) matrixStats::rowMaxs(mat),
    QCD = function(mat) {
        qa = matrixStats::rowQuantiles(mat, probs = c(0.25, 0.75))
        (qa[, 2] - qa[, 1])/(qa[, 2] + qa[, 1])
    })
all_top_value_methods()

## -------------------------------------------------------------------------------------------------
remove_top_value_methods(c("max", "QCD"))
all_top_value_methods()

## ---- echo = FALSE, fig.width = 5, fig.height = 5-------------------------------------------------
source("ATC.R")
par(mar = c(4, 4, 1, 1))
ATC_definition()

## ---- echo = FALSE, fig.width = 8, fig.height = 8-------------------------------------------------
ATC_simulation()

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # code is only for demonstration
#  register_top_value_methods(
#      ATC_spearman = function(m) ATC(m, method = "spearman"),
#      ATC_bicor = function(m) ATC(m, cor_fun = WGCNA::bicor)
#  )

## -------------------------------------------------------------------------------------------------
all_partition_methods()

## -------------------------------------------------------------------------------------------------
register_partition_methods(
    random = function(mat, k) {
        sample(letters[1:k], ncol(mat), replace = TRUE)
    }
)

## -------------------------------------------------------------------------------------------------
remove_partition_methods("random")

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # code is only for demonstration
#  register_partition_methods(
#      hclust_cor = function(mat, k) cutree(hclust(as.dist(1 - cor(mat))), k)
#  )

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # code is only for demonstration
#  library(kohonen)
#  register_partition_methods(
#      SOM = function(mat, k, ...) {
#          kr = floor(sqrt(ncol(mat)))
#          somfit = som(t(mat), grid = somgrid(kr, kr, "hexagonal"), ...)
#          m = somfit$codes[[1]]
#          m = m[seq_len(nrow(m)) %in% somfit$unit.classif, ]
#          cl = cutree(hclust(dist(m)), k)
#          group = numeric(ncol(mat))
#          for(cl_unique in unique(cl)) {
#              ind = as.numeric(gsub("V", "", names(cl)[which(cl == cl_unique)]))
#              l = somfit$unit.classif %in% ind
#              group[l] = cl_unique
#          }
#          group
#      }
#  )
#  library(NMF)
#  register_partition_methods(
#      NMF = function(mat, k, ...) {
#          fit = nmf(mat, rank = k, ...)
#          apply(fit@fit@H, 2, which.max)
#      }, scale_method = "max-min"
#  )

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # code is only for demonstration
#  register_SOM()
#  register_NMF()

## ---- echo = FALSE, fig.width = 9, fig.height = 4-------------------------------------------------
data(cola_rl)
m1 = get_consensus(cola_rl["SD:kmeans"], k = 2)
m2 = get_consensus(cola_rl["SD:kmeans"], k = 3)
grid.newpage()
pushViewport(viewport(layout = grid.layout(nr = 1, nc = 2)))
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 1))
draw(Heatmap(m1, name = "matrix1", show_row_dend = FALSE, show_column_dend = FALSE, col = colorRamp2(c(0, 1), c("white", "blue"))), show_heatmap_legend = TRUE, newpage = FALSE, padding = unit(c(5, 5, 5, 5), "mm"))
popViewport()
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 2))
draw(Heatmap(m2, name = "matrix2", show_row_dend = FALSE, show_column_dend = FALSE, col = colorRamp2(c(0, 1), c("white", "blue"))), show_heatmap_legend = TRUE, newpage = FALSE, padding = unit(c(5, 5, 5, 5), "mm"))
popViewport(2)

## ---- echo = FALSE, fig.width = 6, fig.height = 5-------------------------------------------------
source("silhouette.R")

## ---- echo = FALSE, fig.width = 14, fig.height = 14/3---------------------------------------------
m1 = get_consensus(cola_rl["SD:kmeans"], k = 2)
m2 = get_consensus(cola_rl["SD:kmeans"], k = 3)

grid.newpage()
pushViewport(viewport(layout = grid.layout(nr = 1, nc = 3)))
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 1))
draw(Heatmap(m1, name = "matrix1", show_row_dend = FALSE, show_column_dend = FALSE, col = colorRamp2(c(0, 1), c("white", "blue"))), show_heatmap_legend = TRUE, newpage = FALSE, padding = unit(c(5, 5, 5, 5), "mm"))
popViewport()
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 2))
draw(Heatmap(m2, name = "matrix2", show_row_dend = FALSE, show_column_dend = FALSE, col = colorRamp2(c(0, 1), c("white", "blue"))), show_heatmap_legend = TRUE, newpage = FALSE, padding = unit(c(5, 5, 5, 5), "mm"))
popViewport()

pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 3))
f1 = ecdf(m1[upper.tri(m1, diag = FALSE)])
f2 = ecdf(m2[upper.tri(m2, diag = FALSE)])
pushViewport(viewport(xscale = c(0, 1), yscale = c(0, 1), x = unit(2, "cm"), y = unit(2, "cm"),
    just = c("left", "bottom"), width = unit(1, "npc") - unit(2.5, "cm"), height = unit(1, "npc") - unit(2.5, "cm")))
x = seq(0, 1, by = 0.01)
grid.lines(x, f1(x), gp = gpar(col = "red"))
grid.lines(x, f2(x), gp = gpar(col = "blue"))
grid.segments(c(0.1, 0.9), c(0, 0), c(0.1, 0.9), c(1, 1), gp = gpar(col = "grey", lty = 2))
grid.points(c(0.1, 0.9), f1(c(0.1, 0.9)), pch = 16, gp = gpar(col = "red"))
grid.points(c(0.1, 0.9), f2(c(0.1, 0.9)), pch = 16, gp = gpar(col = "blue"))
grid.xaxis(gp = gpar(fontsize = 10)); grid.text("x", x = 0.5, y = unit(-1.5, "cm"))
grid.yaxis(gp = gpar(fontsize = 10)); grid.text("P(X <= x)", x = unit(-1.5, "cm"), y = 0.5, rot = 90)
lgd = Legend(labels = c("matrix 1", "matrix 2"), type = "lines", 
    legend_gp = gpar(col = c("red", "blue")))
draw(lgd, x = unit(1, "npc"), y = unit(0.2, "npc"), just = "right")
upViewport(2)

## ---- echo = FALSE, fig.width = 14, fig.height = 14/5---------------------------------------------
grid.newpage()
pushViewport(viewport(layout = grid.layout(nr = 1, nc = 5)))
for(k in 2:6) {
    m = get_consensus(cola_rl["SD:kmeans"], k = k)
    pushViewport(viewport(layout.pos.row = 1, layout.pos.col = k-1))
    draw(Heatmap(m, show_row_dend = FALSE, show_column_dend = FALSE, 
        col = colorRamp2(c(0, 1), c("white", "blue"))), show_heatmap_legend = FALSE, 
        newpage = FALSE, column_title = paste0("k = ", k))
    popViewport()
}
popViewport()

## ---- echo = FALSE, fig.width = 10, fig.height = 5------------------------------------------------
par(mfrow = c(1, 2))
plot_ecdf(cola_rl["SD:kmeans"], lwd = 1)
area_increased = get_stats(cola_rl["SD:kmeans"])[, "area_increased"]
plot(2:6, area_increased, type = "b", xlab = "k", ylab = "area increased")

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # code is only for demonstration
#  rl = run_all_consensus_partition_methods(mat,
#      top_value_method = c("SD", "MAD", ...),
#      partition_method = c("hclust", "kmeans", ...),
#      mc.cores = ...)

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # code is only for demonstration
#  collect_plots(rl, fun = consensus_heatmap, k = ...)
#  collect_plots(rl, fun = membership_heatmap, k = ...)
#  collect_plots(rl, fun = get_signatures, k = ...)

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # code is only for demonstration
#  collect_classes(rl, k = ...)

## -------------------------------------------------------------------------------------------------
data(golub_cola)

## -------------------------------------------------------------------------------------------------
golub_cola

## -------------------------------------------------------------------------------------------------
golub_cola["ATC:skmeans"]

## ---- fig.width = 10, fig.height = 10-------------------------------------------------------------
res = golub_cola["ATC:skmeans"] # the ConsensusPartition object
select_partition_number(res)

## -------------------------------------------------------------------------------------------------
consensus_heatmap(res, k = 3)

## -------------------------------------------------------------------------------------------------
membership_heatmap(res, k = 3)

## -------------------------------------------------------------------------------------------------
dimension_reduction(res, k = 3)

## ---- results = "hide"----------------------------------------------------------------------------
get_signatures(res, k = 3)

## ---- results = "hide"----------------------------------------------------------------------------
get_signatures(res, k = 3, top_signatures = 100)

## -------------------------------------------------------------------------------------------------
collect_classes(res)

## ---- results = "hide", fig.width = 14, fig.height = 14/5*4---------------------------------------
collect_plots(res)

## ---- fig.width = 14, fig.height = 14/5*4, results = "hide"---------------------------------------
collect_plots(golub_cola, fun = consensus_heatmap, k = 3)

## ---- fig.width = 10------------------------------------------------------------------------------
collect_classes(golub_cola, k = 3)

## ---- fig.width = 10, fig.height = 10-------------------------------------------------------------
collect_stats(golub_cola, k = 3)

## ---- eval = FALSE--------------------------------------------------------------------------------
#  # code is only for demonstration
#  mat = adjust_matrix(mat) # for some datasets, you don't need this line.
#  rl = run_all_consensus_partition_methods(mat, mc.cores = ...)
#  cola_report(rl, output_dir = ...) # Alles ist da!

## -------------------------------------------------------------------------------------------------
sessionInfo()