#
# grouped counts converted to proportions
#
# Similar to shifts, but we end up with the same number of rows as we started
#
# Weights are simply total reads
#
weighted_proportions <- function(mat, name) {
n <- nrow(mat)
m <- ncol(mat)
totals <- colSums(mat)
total <- sum(totals)
good <- totals > 0
props <- matrix(NA, nrow=n,ncol=m)
weights <- matrix(0, nrow=n,ncol=m)
props[,good] <- t(t(mat[,good]) / totals[good])
mean_props <- rowSums(mat) / max(1,total)
# Bernoulli variance
per_read_vars <- mean_props*(1-mean_props)
weights[,good] <- outer(rep(1,n), totals[good])
rownames(props) <- rownames(mat)
rownames(weights) <- rownames(mat)
df <- data.frame(
group=name,
total_reads=total,
per_read_var=per_read_vars,
stringsAsFactors=FALSE)
rownames(df) <- rownames(mat)
list(props=props, weights=weights, df=df)
}
counts_proportions_inner <- function(counts, groups, typecast) {
relevant <- unique(unlist(groups))
counts <- as.matrix(counts[relevant,,drop=FALSE])
results <- map(seq_along(groups), function(i) {
weighted_proportions(counts[groups[[i]],,drop=FALSE], names(groups)[i])
})
rm(counts)
props <- do.call(rbind, map(results, "props"))
weights <- do.call(rbind, map(results, "weights"))
df <- do.call(rbind, map(results, "df"))
rm(results)
SummarizedExperiment(
assays=list(
x=typecast(props),
weights=typecast(weights)),
rowData=df)
}
#' Produce a weitrix of proportions within groups
#'
#' Produce a weitrix of proportions between 0 and 1.
#' The input is read counts at a collection of features
#' in a collection of samples.
#' The features need to be grouped, for example by gene.
#' The proportions will add to 1 within each group.
#'
#' @param counts
#' A matrix of read counts.
#' Rows are peaks and columns are samples.
#'
#' @param grouping
#' A data frame defining the grouping of features.
#' Should have a column "group" naming the group and
#' a column "name" naming the feature
#' (corresponding to \code{rownames(counts)}).
#'
#' @param verbose
#' If TRUE, output some debugging and progress information.
#'
#' @param typecast
#' A function to convert a matrix to a matrix-like type.
#' Applied at the chunk level, before all chunks are \code{rbind}ed.
#' Allows use of memory-efficient matrix representations.
#'
#' @return
#' A SummarizedExperiment object with metadata fields marking it as a weitrix.
#'
#' @examples
#' grouping <- data.frame(
#' group=c("A","A","A","B","B"),
#' name=c("p1","p2","p3","p4","p5"))
#'
#' counts <- rbind(
#' p1=c(1,2,0),
#' p2=c(0,1,0),
#' p3=c(1,0,0),
#' p4=c(0,0,1),
#' p5=c(0,2,1))
#'
#' wei <- counts_proportions(counts, grouping)
#'
#' weitrix_x(wei)
#' weitrix_weights(wei)
#' rowData(wei)
#'
#' @export
counts_proportions <- function(counts, grouping, verbose=TRUE, typecast=identity) {
assert_that(
is.data.frame(grouping),
"group" %in% colnames(grouping),
"name" %in% colnames(grouping))
groups <- split(grouping$name, grouping$group)
parts <-
partitions(length(groups), ncol(counts)/length(groups)*nrow(counts))
if (verbose)
message("Calculating proportions in ",length(parts)," blocks")
result <- lapply(parts, function(part) {
counts_proportions_inner(counts, groups[part], typecast=typecast)
})
result <- do.call(rbind, result)
colnames(result) <- colnames(counts)
result <- bless_weitrix(result, "x", "weights")
metadata(result)$weitrix$trend_formula <-
"~log(per_read_var)+well_knotted_spline(log(total_reads),3)"
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.