Nothing
# Functions related to meta-clusters (groups of highly replicable clusters).
#' Extracts groups of reciprocal top hits from a 1-vs-best AUROC matrix.
#'
#' Note that meta-clusters are *not* cliques, but connected components, e.g.,
#' if 1<->2 and 1<->3 are reciprocal top hits, {1, 2, 3} is a meta-cluster,
#' independently from the relationship between 2 and 3.
#'
#' @param best_hits Matrix of AUROCs produced by MetaNeighborUS.
#' @param threshold AUROC threshold. Two clusters belong to the same
#' meta-cluster if they are reciprocal top hits and their similarity exceeds
#' the threshold *both* ways
#' (AUROC(1->2) > threshold *AND* AUROC(2->1) > threshold).
#'
#' @return A named list, where names are default meta-cluster names, and values
#' are vectors of cluster names, one vector per meta-cluster. The last element
#' of the list is called "outliers" and contains all clusters that had no match
#' in any other dataset.
#'
#' @export
extractMetaClusters <- function(best_hits, threshold = 0) {
comp <- igraph::components(make_graph(best_hits, threshold))
result <- list()
outliers <- c()
for (i in seq_len(comp$no)) {
members <- names(which(comp$membership == i))
if (length(members) > 1) {
result[[paste0("meta_cluster", length(result)+1)]] <- members
} else {
outliers <- c(outliers, members)
}
}
result$outliers <- outliers
return(result)
}
# Build undirected graph where vertices are clusters and edges are reciprocal
# top hits.
make_graph <- function(best_hits, threshold = 0) {
adj <- 0*best_hits
# keep hits above threshold
adj[best_hits > threshold] <- 1
# keep only reciprocal hits
adj <- adj * t(adj)
igraph::graph_from_adjacency_matrix(adj)
}
#' Summarize meta-cluster information in a table.
#'
#' @param meta_clusters Meta-cluster list generated by extractMetaClusters.
#' @param best_hits Matrix of AUROCs used to extract meta-clusters.
#' @param outlier_label Element of meta-cluster list containing outlier
#' clusters.
#'
#' @return A data.frame. Column "meta_cluster" contains meta-cluster names,
#' "clusters" lists the clusters belonging to each meta-cluster,
#' "n_studies" is the number of studies spanned by the meta-cluster,
#' "score" is the average similarity between meta-cluster members
#' (average AUROC, NAs are treated as 0).
#'
#' @export
scoreMetaClusters <- function(meta_clusters, best_hits,
outlier_label = "outliers") {
best_hits[is.na(best_hits)] <- 0
modules <- meta_clusters[names(meta_clusters) != outlier_label]
mc_summary <- lapply(names(modules), function(mc_name) {
clusters <- meta_clusters[[mc_name]]
data.frame(
meta_cluster = mc_name,
clusters = paste(clusters, collapse = "; "),
n_studies = length(unique(getStudyId(clusters))),
score = mean(best_hits[clusters, clusters])
)
})
result <- do.call(rbind.data.frame, mc_summary)
result <- result[order(result$n_studies, result$score, decreasing=TRUE),]
outliers <- data.frame(
meta_cluster = outlier_label,
clusters = paste(meta_clusters[[outlier_label]], collapse = "; "),
n_studies = 1,
score = NA
)
result <- rbind(result, outliers)
rownames(result) <- result$meta_cluster
return(result)
}
#' Plot meta-cluster badges, each badge is a small AUROC heatmap restricted to
#' a specific meta-cluster.
#'
#' @param meta_clusters Meta-cluster list generated by extractMetaClusters.
#' @param best_hits Matrix of AUROCs used to extract meta-clusters.
#' @param reorder Reorder datasets by similarity for each badge? By default,
#' the same dataset ordering is used for each badge.
#' @param cex Size factor controling label size.
#' @param study_cols Named vector where values are RGB colors and names are
#' unique study identifiers (corresponding to study_id).
#' If NULL, a default color palette is used.
#' @param auroc_cols Vector containing RGB colors used to encode AUROC levels.
#' The length of auroc_cols must correspond to the length of auroc_breaks - 1.
#' @param auroc_breaks Numeric vector used to bin AUROC values for color coding.
#'
#' @export
plotMetaClusters <- function(
meta_clusters, best_hits, reorder=FALSE, cex = 1, study_cols = NULL,
auroc_breaks = c(0, 0.5, 0.7, 0.9, 0.95, 0.99, 1),
auroc_cols = grDevices::colorRampPalette(c("white", "blue"))(length(auroc_breaks)-1)
) {
if (length(meta_clusters) == 0) { return(list()); }
dendrogram <- if (reorder) "both" else "none"
if (is.null(study_cols)) {
study_ids <- unique(getStudyId(unlist(meta_clusters)))
study_cols <- gg_color(length(study_ids))
names(study_cols) <- study_ids
}
for (i in seq_along(meta_clusters)) {
c <- meta_clusters[[i]]
dat <- best_hits[c, c]
comp_cols <- study_cols[getStudyId(rownames(dat))]
comp_cell_types <- getCellType(rownames(dat))
if (reorder) {
new_order <- stats::as.dendrogram(order_sym_matrix(dat))
} else {
new_order <- FALSE
}
gplots::heatmap.2(
dat, margins = c(10,10),
labRow = comp_cell_types, labCol = comp_cell_types,
key.xlab="AUROC", key.title=NA, cexRow = cex, cexCol = cex,
trace = "none", breaks = auroc_breaks, col = auroc_cols,
Rowv = new_order, Colv = new_order, dendrogram = dendrogram,
RowSideColors = rev(comp_cols), ColSideColors = comp_cols,
revC = TRUE, main = names(meta_clusters)[i]
)
graphics::par(lend = 1)
graphics::legend(
"topright", inset = c(0, 0), legend = names(study_cols),
col = study_cols, pt.cex = 1, cex = 1, lwd = 10, bty="n"
)
}
}
# Emulate default ggplot colors
gg_color <- function(n) {
hues <- seq(15, 375, length = n + 1)
grDevices::hcl(h = hues, l = 65, c = 100)[1:n]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.