#' Summarizes the TopDom Best Scores across TopDomOverlapScores
#'
#' @param fit A [base::list] of K `TopDomOverlapScores` objects
#'
#' @param weights A character string specifying how overlap scores across
#' domains should be weighted.
#'
#' @param domain_length (optional) A length-two numeric vector (min, max)
#' specifying the minimum and maximum length of domains to include in the
#' summary. Any domains with lengths outside this range will be discarded.
#' If `NULL` (default) or `c(-Inf, +Inf)`, all domains are included.
#'
#' @param drop_reference If `TRUE`, then the reference partition is dropped.
#'
#' @return A [base::data.frame] with (K-1) rows.
#'
#' @importFrom stats mad quantile sd
#' @importFrom matrixStats weightedMean weightedSd weightedMad
# @importFrom Hmisc wtd.quantile
#' @export
overlap_score_summary <- function(fit, weights = c("uniform", "by_length"), domain_length = NULL, drop_reference = TRUE) {
stopifnot(is.list(fit))
lapply(fit, FUN = function(x) stopifnot(inherits(x, "TopDomOverlapScores"), length(x) == 1L))
weights <- match.arg(weights, choices = c("uniform", "by_length"))
if (!is.null(domain_length)) stopifnot(is.numeric(domain_length), length(domain_length) == 2L, !anyNA(domain_length))
n <- length(fit)
stopifnot(n >= 2L)
attrs <- attributes(fit)
stopifnot(is.list(attrs), length(attrs) > 1L)
partition_by <- attrs[["partition_by"]]
stopifnot(length(partition_by) == 1L, !is.na(partition_by), is.character(partition_by))
if (partition_by %in% c("reads_by_half", "cells_by_half")) {
stopifnot(n == 2L)
}
chr <- attrs[["chromosome"]]
stopifnot(length(chr) == 1L, !is.na(chr), is.character(chr))
bin_size <- attrs[["bin_size"]]
stopifnot(length(bin_size) == 1L, !is.na(bin_size), is.numeric(bin_size), bin_size >= 1L)
min_cell_size <- attrs[["min_cell_size"]]
stopifnot(length(min_cell_size) == 1L, !is.na(min_cell_size), is.numeric(min_cell_size), min_cell_size >= 1L)
ref <- attrs[["reference_partition"]]
stopifnot(length(ref) == 1L, !is.na(ref), ref >= 1L)
if (drop_reference) fit <- fit[-ref]
data <- lapply(fit, FUN = function(x) {
if (!inherits(x, "TopDomOverlapScores")) return(data.frame(best_score = double(0L), best_length = integer(0L)))
y <- x[[chr]]
y[c("best_score", "best_length")]
})
## Filter by domain lengths?
if (!is.null(domain_length)) {
data <- lapply(data, FUN = function(xy) {
stopifnot(is.data.frame(xy))
len <- xy[["best_length"]]
keep <- (domain_length[1] <= len & len <= domain_length[2])
xy[keep, ]
})
}
summary <- lapply(data, FUN = function(xy) {
stopifnot(is.data.frame(xy))
count <- nrow(xy)
score <- xy[["best_score"]]
length <- xy[["best_length"]]
## Drop missing values
keep <- !is.na(score) & !is.na(length) & length > 0L
score <- score[keep]
length <- length[keep]
n <- length(score)
if (weights == "uniform") {
mean_hat <- mean(score)
quantile_hat <- quantile(score)
sd_hat <- sd(score)
mad_hat <- mad(score)
} else if (weights == "by_length") {
## FIXME: Workaround for https://github.com/harrelfe/Hmisc/issues/119
wtd.quantile <- Hmisc::wtd.quantile
mean_hat <- weightedMean(score, w = length)
quantile_hat <- wtd.quantile(score, weights = length)
## Make names consistent with stats::quantile() names
names(quantile_hat) <- gsub("^[ ]+", "", names(quantile_hat))
sd_hat <- weightedSd(score, w = length)
mad_hat <- weightedMad(score, w = length)
}
data.frame(
chromosome = chr,
min_cell_size = min_cell_size,
bin_size = bin_size,
fraction = attrs[["fraction"]]["test"],
ref_fraction = attrs[["fraction"]]["reference"],
mean = mean_hat,
as.list(quantile_hat),
sd = sd_hat,
mad = mad_hat,
count = count,
n = n,
check.names = FALSE,
stringsAsFactors = FALSE
)
})
summary <- do.call(rbind, summary)
attr(summary, "weights") <- weights
attr(summary, "partition_by") <- partition_by
if (!drop_reference) attr(summary, "reference_partition") <- ref
attr(summary, "seed") <- attrs[["seed"]]
summary
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.