R/meta_clusters.R

Defines functions gg_color plotMetaClusters scoreMetaClusters make_graph extractMetaClusters

Documented in extractMetaClusters plotMetaClusters scoreMetaClusters

# Functions related to meta-clusters (groups of highly replicable clusters).

#' Extracts groups of reciprocal top hits from a 1-vs-best AUROC matrix.
#'
#' Note that meta-clusters are *not* cliques, but connected components, e.g.,
#' if 1<->2 and 1<->3 are reciprocal top hits, {1, 2, 3} is a meta-cluster,
#' independently from the relationship between 2 and 3.
#'
#' @param best_hits Matrix of AUROCs produced by MetaNeighborUS.
#' @param threshold AUROC threshold. Two clusters belong to the same
#' meta-cluster if they are reciprocal top hits and their similarity exceeds
#' the threshold *both* ways
#' (AUROC(1->2) > threshold *AND* AUROC(2->1) > threshold).
#'
#' @return A named list, where names are default meta-cluster names, and values
#' are vectors of cluster names, one vector per meta-cluster. The last element
#' of the list is called "outliers" and contains all clusters that had no match
#' in any other dataset.
#'
#' @export
extractMetaClusters <- function(best_hits, threshold = 0) {
  comp <- igraph::components(make_graph(best_hits, threshold))
  result <- list()
  outliers <- c()
  for (i in seq_len(comp$no)) {
    members <- names(which(comp$membership == i))
    if (length(members) > 1) {
      result[[paste0("meta_cluster", length(result)+1)]] <- members
    } else {
      outliers <- c(outliers, members)
    }
  }
  result$outliers <- outliers
  return(result)
}

# Build undirected graph where vertices are clusters and edges are reciprocal
# top hits.
make_graph <- function(best_hits, threshold = 0) {
  adj <- 0*best_hits
  # keep hits above threshold
  adj[best_hits > threshold] <- 1
  # keep only reciprocal hits
  adj <- adj * t(adj)
  igraph::graph_from_adjacency_matrix(adj)
}

#' Summarize meta-cluster information in a table.
#'
#' @param meta_clusters Meta-cluster list generated by extractMetaClusters.
#' @param best_hits Matrix of AUROCs used to extract meta-clusters.
#' @param outlier_label Element of meta-cluster list containing outlier
#' clusters.
#'
#' @return A data.frame. Column "meta_cluster" contains meta-cluster names,
#' "clusters" lists the clusters belonging to each meta-cluster,
#' "n_studies" is the number of studies spanned by the meta-cluster,
#' "score" is the average similarity between meta-cluster members
#' (average AUROC, NAs are treated as 0).
#'
#' @export
scoreMetaClusters <- function(meta_clusters, best_hits,
                              outlier_label = "outliers") {
    best_hits[is.na(best_hits)] <- 0
    modules <- meta_clusters[names(meta_clusters) != outlier_label]
    mc_summary <- lapply(names(modules), function(mc_name) {
        clusters <- meta_clusters[[mc_name]]
        data.frame(
            meta_cluster = mc_name,
            clusters = paste(clusters, collapse = "; "),
            n_studies = length(unique(getStudyId(clusters))),
            score = mean(best_hits[clusters, clusters])
        )
    })
    result <- do.call(rbind.data.frame, mc_summary)
    result <- result[order(result$n_studies, result$score, decreasing=TRUE),]
    outliers <- data.frame(
        meta_cluster = outlier_label,
        clusters = paste(meta_clusters[[outlier_label]], collapse = "; "),
        n_studies = 1,
        score = NA
    )
    result <- rbind(result, outliers)
    rownames(result) <- result$meta_cluster
    return(result)
}

#' Plot meta-cluster badges, each badge is a small AUROC heatmap restricted to
#' a specific meta-cluster.
#'
#' @param meta_clusters Meta-cluster list generated by extractMetaClusters.
#' @param best_hits Matrix of AUROCs used to extract meta-clusters.
#' @param reorder Reorder datasets by similarity for each badge? By default,
#' the same dataset ordering is used for each badge.
#' @param cex Size factor controling label size.
#' @param study_cols Named vector where values are RGB colors and names are
#' unique study identifiers (corresponding to study_id).
#' If NULL, a default color palette is used.
#' @param auroc_cols Vector containing RGB colors used to encode AUROC levels. 
#' The length of auroc_cols must correspond to the length of auroc_breaks - 1.
#' @param auroc_breaks Numeric vector used to bin AUROC values for color coding.
#'
#' @export
plotMetaClusters <- function(
    meta_clusters, best_hits, reorder=FALSE, cex = 1, study_cols = NULL,
    auroc_breaks = c(0, 0.5, 0.7, 0.9, 0.95, 0.99, 1),
    auroc_cols = grDevices::colorRampPalette(c("white", "blue"))(length(auroc_breaks)-1)
) {
  if (length(meta_clusters) == 0) { return(list()); }

  dendrogram <- if (reorder) "both" else "none"
  if (is.null(study_cols)) {
      study_ids <- unique(getStudyId(unlist(meta_clusters)))
      study_cols <- gg_color(length(study_ids))
      names(study_cols) <- study_ids
  }
    
  for (i in seq_along(meta_clusters)) {
    c <- meta_clusters[[i]]
    dat <- best_hits[c, c]
    comp_cols <- study_cols[getStudyId(rownames(dat))]
    comp_cell_types <- getCellType(rownames(dat))
    if (reorder) {
      new_order <- stats::as.dendrogram(order_sym_matrix(dat))
    } else {
      new_order <- FALSE
    }
    gplots::heatmap.2(
      dat, margins = c(10,10),
      labRow = comp_cell_types, labCol = comp_cell_types,
      key.xlab="AUROC", key.title=NA, cexRow = cex, cexCol = cex,
      trace = "none", breaks = auroc_breaks, col = auroc_cols, 
      Rowv = new_order, Colv = new_order, dendrogram = dendrogram,
      RowSideColors = rev(comp_cols), ColSideColors = comp_cols,
      revC = TRUE, main = names(meta_clusters)[i]
    )
    graphics::par(lend = 1)
    graphics::legend(
      "topright", inset = c(0, 0), legend = names(study_cols),
      col = study_cols, pt.cex = 1, cex = 1, lwd = 10, bty="n"
    )
  }
}

# Emulate default ggplot colors
gg_color <- function(n) {
  hues <- seq(15, 375, length = n + 1)
  grDevices::hcl(h = hues, l = 65, c = 100)[1:n]
}

Try the MetaNeighbor package in your browser

Any scripts or data that you put into this service are public.

MetaNeighbor documentation built on Nov. 8, 2020, 5:40 p.m.