#' @title Aggregate different rows assigned to the same ID by calculating a
#' weighted mean
#'
#' @description First row means are calculated to summarize across replicates
#' identified by the groupCol in the colData. Then different row means that are
#' assigned to the same feature ID given by the idCol in the rowData are
#' summarized by calculating a weighted mean. This weighted mean is the sum of
#' the squared row means divided by the sum of the row means. If all row means
#' are 0, they remain 0 in the output.
#'
#' @author Fiona Ross
#'
#' @param SE a \code{SummarizedExperiment} object that contains an assay with
#' values to be aggregated, a colData column that assigns samples to their
#' group and a rowData column with IDs to indicate which rows to combine.
#' @param assay the name of the assay in the SummarizedExperiment object that
#' should be aggregated.
#' @param idCol the column name in the rowData of the SummarizedExperiment
#' indicating the feature ID.
#' @param groupCol the column name in the colData of the SummarizedExperiment
#' indicating which columns belong to the same group and should be averaged as
#' replicates, before the weighted mean is calculated across rows.
#' @param log2Transformed a \code{logical} indicating whether values in the
#' assay are log2 transformed. If log2Transformed is TRUE, an exponential
#' transformation will be applied before aggregating the values and another
#' log transformation afterwards.
#'
#' @return The output is a \code{data.frame} with one column for each of the
#' unique names in the groupCol and one row for each of the unique IDs in the
#' idCol. The row and column names are the respective unique values. The entries
#' represent the weighted means for each unique feature ID. If all the input
#' values were NA, the aggregated value is also NA, while for all zero, the
#' output remains zero. If log2Transformed is true the output will be log2
#' transformed again.
#'
#' @examples
#' set.seed(123)
#' meansRows <- sample(1:100, 10, replace = TRUE)
#' dat <- unlist(lapply(meansRows, function(m) {
#' rnorm(n = 5, mean = m, sd = 0.1*m)
#' }))
#' ma <- matrix(dat, nrow = 10, ncol = 5, byrow = TRUE)
#' IDs <- data.frame(ID = sample(c("A", "B", "C", "D"), size = 10, replace = TRUE))
#' Groups <- data.frame(group = c("Y","Y", "Z", "Z", "Z"))
#' mockSE <- SummarizedExperiment::SummarizedExperiment(
#' assays = list(counts = ma),
#' rowData = IDs,
#' colData = Groups)
#' weightedMeanByID(mockSE, "counts", idCol = "ID", log2Transformed = FALSE)
#'
#' @importFrom stats aggregate weighted.mean
#' @importFrom SummarizedExperiment assay assays rowData colData
#' @importFrom dplyr mutate across where
#' @export
weightedMeanByID <- function(SE, assay, idCol = "GENEID", groupCol = "group",
log2Transformed = TRUE) {
.assertPackagesAvailable("stats")
if (!is(SE, "SummarizedExperiment")) {
stop("Provide data to aggregate in a SummarizedExperiment")
}
if (!is.character(assay)) {
stop("Provide the assay name as a character")
}
if (!is.character(idCol)) {
stop("Provide the name of the rowData column as a character")
}
if (!is.character(groupCol)) {
stop("Provide the name of the colData column as a character")
}
if (!is.logical(log2Transformed)) {
stop("log2Transformed must be a logical indicating if",
" data is log2 transformed")
}
if (!idCol %in% colnames(SummarizedExperiment::rowData(SE))) {
stop("idCol must be an existing column name in the rowData of the",
" SummarizedExperiment object")
}
if (!groupCol %in% colnames(SummarizedExperiment::colData(SE))) {
stop("groupCol must be an existing column name in the colData of the",
" SummarizedExperiment object")
}
if (!assay %in% names(SummarizedExperiment::assays(SE))) {
stop("The assay name must be an existing assay of the",
" SummarizedExperiment object")
}
if (any(is.na(SummarizedExperiment::assay(SE)))) {
warning("The assay contains NAs, remaining values will be aggregated.")
}
if (log2Transformed) {
ma <- 2^SummarizedExperiment::assay(SE, assay)
} else {
ma <- SummarizedExperiment::assay(SE, assay)
}
uniqueIDs <- unique(SummarizedExperiment::rowData(SE)[, idCol])
weightedMeanByID <- do.call(cbind, lapply(unique(SE[[groupCol]]), function(g) {
sel <- SE[[groupCol]] == g
ma <- ma[, sel, drop = FALSE]
promoterenr <- as.data.frame(rowMeans(ma, na.rm = TRUE))
promoterenr$gene_id <- SummarizedExperiment::rowData(SE)[, idCol]
weightedMeanByID <- stats::aggregate(
. ~ gene_id,
data = promoterenr,
FUN = function(x) {
if (sum(x) != 0) {
stats::weighted.mean(x, x)
} else if (sum(x) == 0) {
0
}
})
if (log2Transformed) {
weightedMeanByID <- weightedMeanByID |>
dplyr::mutate(across(where(is.numeric), log2))
}
rownames(weightedMeanByID) <- weightedMeanByID$gene_id
weightedMeanByID$gene_id <- NULL
colnames(weightedMeanByID) <- "weightedMean"
if (any(!unique(promoterenr$gene_id) %in% rownames(weightedMeanByID))) {
append <- unique(promoterenr$gene_id)[!unique(promoterenr$gene_id) %in% rownames(weightedMeanByID)]
append <- data.frame(weightedMean = rep(NA, length(append)), row.names = append)
weightedMeanByID <- rbind(weightedMeanByID, append)
}
colnames(weightedMeanByID) <- g
weightedMeanByID <- weightedMeanByID[uniqueIDs, , drop = FALSE]
return(weightedMeanByID)
}))
return(weightedMeanByID)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.