#' @title SummarizeHeatmaps
#'
#' @description Summarize heatmap count matrices from individual bam samples.
#'
#' @details This function takes a list of heatmap count matrices (from individual bam files) and summarizes them (by user defined groups).
#' The matrices should have unique row names and consistent dimensions. This implementation is optimized for performance.
#'
#' @param counts A list of counts across peak summits generated by SummitHeatmap.
#' Each element must be a matrix with unique row names.
#' @param sampleList A list where each element is a character vector of sample names that should be grouped.
#' The name of each element of this list specifies the group name.
#' @param summarizing Function to use for summarizing data. Defaults to mean.
#' Must be a function that can be applied to a numeric vector (e.g., mean, median, sum).
#' @param verbose Logical indicating whether to print progress messages. Default is TRUE.
#'
#' @return A list of summarized (by group) counts across peak summits.
#'
#' @examples
#' counts <- list(matrix(rnorm(21000,2,1),ncol=21,
#' nrow=100,dimnames=list(1:100,-10:10)),
#' matrix(rnorm(21000,2,1),ncol=21,
#' nrow=100,dimnames=list(1:100,-10:10)),
#' matrix(rnorm(21000,2,1),ncol=21,
#' nrow=100,dimnames=list(1:100,-10:10)),
#' matrix(rnorm(21000,2,1),ncol=21,
#' nrow=100,dimnames=list(1:100,-10:10)))
#' bamNames <- c("counts1","counts2","counts3","counts4")
#' names(counts) <- bamNames
#' SummarizeHeatmaps(counts,sampleList=list
#' (countsA=c("counts1","counts2"),countsB=c("counts3","counts4")))
#'
#'@importFrom utils head
#'
#' @export
SummarizeHeatmaps <- function(counts, sampleList, summarizing = mean, verbose = TRUE) {
# Fast input validation
if (!is.list(counts)) stop("'counts' must be a list of matrices")
if (length(counts) == 0) stop("'counts' list is empty")
if (is.null(names(counts))) stop("'counts' list elements must be named")
if (!is.list(sampleList)) stop("'sampleList' must be a list")
if (length(sampleList) == 0) stop("'sampleList' is empty")
if (is.null(names(sampleList))) stop("'sampleList' elements must be named to specify group names")
if (!is.function(summarizing)) stop("'summarizing' must be a function (e.g., mean, median, sum)")
# Check first matrix once
if (!is.matrix(counts[[1]])) stop("First element in 'counts' is not a matrix")
if (is.null(rownames(counts[[1]]))) stop("First matrix does not have row names")
# Store dimensions once
nrow_first <- nrow(counts[[1]])
ncol_first <- ncol(counts[[1]])
# Quick check for duplicate rownames in first matrix
if (anyDuplicated(rownames(counts[[1]])) > 0) {
dups <- duplicated(rownames(counts[[1]]))
stop(sprintf("First matrix contains %d duplicated rownames: %s",
sum(dups), paste(head(rownames(counts[[1]])[dups], 5), collapse=", ")))
}
# Check all matrices in one pass
is_valid <- vapply(seq_along(counts)[-1], function(i) {
if (!is.matrix(counts[[i]])) {
stop(sprintf("Element %s in 'counts' is not a matrix", names(counts)[i]))
}
if (is.null(rownames(counts[[i]]))) {
stop(sprintf("Matrix %s does not have row names", names(counts)[i]))
}
if (nrow(counts[[i]]) != nrow_first || ncol(counts[[i]]) != ncol_first) {
stop(sprintf("Matrix %s has inconsistent dimensions (%d×%d vs %d×%d)",
names(counts)[i], nrow(counts[[i]]), ncol(counts[[i]]), nrow_first, ncol_first))
}
if (anyDuplicated(rownames(counts[[i]])) > 0) {
dups <- duplicated(rownames(counts[[i]]))
stop(sprintf("Matrix %s contains %d duplicated rownames: %s",
names(counts)[i], sum(dups), paste(head(rownames(counts[[i]])[dups], 5), collapse=", ")))
}
return(TRUE)
}, logical(1))
# Check if all samples in sampleList exist in counts
all_samples <- names(counts)
for (i in seq_along(sampleList)) {
missing_samples <- setdiff(sampleList[[i]], all_samples)
if (length(missing_samples) > 0) {
stop(sprintf("Group '%s' contains samples that don't exist in counts: %s",
names(sampleList)[i], paste(missing_samples, collapse=", ")))
}
}
# Pre-allocate output list
meanCounts <- vector("list", length(sampleList))
names(meanCounts) <- names(sampleList)
# Process each group
for (i in seq_along(sampleList)) {
group_name <- names(meanCounts)[i]
if (verbose) message(sprintf("Summarizing group '%s' with %d samples", group_name, length(sampleList[[i]])))
# Check if group has at least one sample
if (length(sampleList[[i]]) == 0) {
warning(sprintf("Group '%s' contains no samples, skipping", group_name))
next
}
# Extract samples for this group
group_samples <- sampleList[[i]]
# Optimization: For single sample groups, just copy the matrix
if (length(group_samples) == 1) {
meanCounts[[i]] <- counts[[group_samples]]
next
}
# For multiple samples, optimize by using arrays directly
tryCatch({
# Pre-allocate a 3D array to store all matrices in this group
arr_size <- c(length(group_samples), nrow_first, ncol_first)
arr <- array(0, dim = arr_size)
# Fill the array more efficiently
for (j in seq_along(group_samples)) {
arr[j,,] <- counts[[group_samples[j]]]
}
# Apply the summarizing function along the first dimension
meanCounts[[i]] <- apply(arr, c(2, 3), summarizing)
# Ensure proper dimension names
rownames(meanCounts[[i]]) <- rownames(counts[[1]])
colnames(meanCounts[[i]]) <- colnames(counts[[1]])
}, error = function(e) {
stop(sprintf("Error when summarizing group '%s': %s", group_name, e$message))
})
}
return(meanCounts)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.