#' Construct data of percent-spliced-in (PSI) matrices and group-average PSIs
#'
#' `makeMatrix()` constructs a matrix of PSI values of the given alternative
#' splicing events (ASEs).\cr\cr
#' `makeMeanPSI()` constructs a table of "average" PSI values, with samples
#' grouped by a number of given conditions (e.g. "group A" and "group B") of a
#' given condition category (e.g. condition "treatment").
#' See details below.
#' @details
#' Note that this function takes the geometric mean of PSI, by first converting
#' all values to logit(PSI), taking the average logit(PSI) values of each
#' condition, and then converting back to PSI using inverse logit.
#'
#' Samples with low splicing coverage (either due to insufficient sequencing
#' depth or low gene expression) are excluded from calculation of mean PSIs.
#' The threshold can be set using `depth_threshold`. Excluding these samples is
#' appropriate because the uncertainty of PSI is high when the total included /
#' excluded count is low. Note that events where all samples in a condition is
#' excluded will return a value of `NaN`.
#'
#' Using logit-transformed PSI values is appropriate because PSI values are
#' bound to the (0,1) interval, and are often thought to be beta-distributed.
#' The link function often used with beta-distributed models is the logit
#' function, which is defined as `logit(x) = function(x) log(x / (1 - x))`,
#' and is equivalent to [stats::qlogis]. Its inverse is equivalent to
#' [stats::plogis].
#'
#' Users wishing to calculate arithmetic means of PSI are advised to use
#' [makeMatrix], followed by [rowMeans] on subsetted sample columns.
#' @param se (Required) A \linkS4class{NxtSE} object generated by [makeSE]
#' @param event_list A character vector containing the names of ASE events
#' (as given by the `EventName` column of differential ASE results table
#' generated by one of the [ASE-methods], or
#' the rownames of the \linkS4class{NxtSE} object)
#' @param sample_list (default = `colnames(se)`) In `makeMatrix()`, a list of
#' sample names in the given experiment to be included in the returned matrix
#' @param method In `makeMatrix()`, rhe values to be returned
#' (default = "PSI"). It can
#' alternately be "logit" which returns logit-transformed PSI values, or
#' "Z-score" which returns Z-score-transformed PSI values
#' @param depth_threshold (default = 10) Samples with the number of reads
#' supporting either included or excluded isoforms below this values are
#' excluded
#' @param logit_max PSI values close to 0 or 1 are rounded up/down
#' to `plogis(-logit_max)` and `plogis(logit_max)`, respectively. See details.
#' @param na.percent.max (default = 0.1) The maximum proportion of values in
#' the given dataset that were transformed to `NA` because of low splicing
#' depth. ASE events where there are a higher proportion (default 10%) `NA`
#' values will be excluded from the final matrix. Most heatmap
#' functions will spring an error if there are too many NA values in any
#' given row. This option caps the number of NA values to avoid returning
#' this error.
#' @param condition The name of the column containing the condition values in
#' `colData(se)`
#' @param conditionList A list (or vector) of condition values of which to
#' calculate mean PSIs
#' @return
#' For `makeMatrix`: A matrix of PSI (or alternate) values, with
#' columns as samples and rows as ASE events.
#'
#' For `makeMeanPSI`: A 3 column data frame, with the first column containing
#' `event_list` list of ASE events, and the last 2 columns containing the
#' average PSI values of the nominator and denominator conditions.
#' @examples
#' se <- SpliceWiz_example_NxtSE()
#'
#' colData(se)$treatment <- rep(c("A", "B"), each = 3)
#'
#' event_list <- rowData(se)$EventName
#'
#' mat <- makeMatrix(se, event_list[1:10])
#'
#' diag_values <- makeMeanPSI(se, event_list,
#' condition = "treatment",
#' conditionList = list("A", "B")
#' )
#' @name make_plot_data
#' @aliases makeMatrix makeMeanPSI
#' @md
NULL
#' @describeIn make_plot_data constructs a matrix of PSI values of the given
#' alternative splicing events (ASEs)
#' @export
makeMatrix <- function(
se,
event_list,
sample_list = colnames(se),
method = c("PSI", "logit", "Z-score"),
depth_threshold = 10, logit_max = 5, na.percent.max = 0.1
) {
if (!any(event_list %in% rownames(se))) .log(
"None of events in event_list matches those in the NxtSE object")
method <- match.arg(method)
inc <- as.matrix(assay(se, "Included")[
event_list, sample_list, drop = FALSE])
exc <- as.matrix(assay(se, "Excluded")[
event_list, sample_list, drop = FALSE])
mat <- inc / (inc + exc)
mat[inc + exc < depth_threshold] <- NA
if(!is.na(na.percent.max)) {
which_exclude <- rowSums(is.na(mat)) < na.percent.max * ncol(mat)
mat <- mat[which_exclude, , drop = FALSE]
}
if (method == "PSI") {
# essentially M/Cov
} else if (method == "logit") {
mat <- qlogis(mat)
mat[mat > logit_max] <- logit_max
mat[mat < -logit_max] <- -logit_max
} else if (method == "Z-score") {
mat <- mat - rowMeans(mat)
mat <- mat / rowSds(mat)
}
return(as.matrix(mat))
}
#' @describeIn make_plot_data constructs a table of "average" PSI values
#' @export
makeMeanPSI <- function(
se, event_list = rownames(se),
condition,
conditionList,
depth_threshold = 10, logit_max = 10
) {
if (!any(event_list %in% rownames(se))) .log(
"None of events in event_list matches those in the NxtSE object")
mat <- makeMatrix(
se, event_list,
colnames(se)[colData(se)[, condition] %in% conditionList],
method = "logit",
depth_threshold = depth_threshold,
logit_max = logit_max,
na.percent.max = NA
)
if(nrow(mat) < length(event_list) || !all(rownames(mat) == event_list)) {
.log("Error retrieving matrix via makeMatrix")
}
# use logit method to calculate geometric mean
df <- data.frame(EventName = event_list)
for(cond in conditionList) {
if(cond %in% colData(se)[, condition]) {
cond_samples <- colnames(se)[
colData(se)[, condition] == cond]
df$newcond <- plogis(rowMeans(mat[, cond_samples], na.rm = TRUE))
colnames(df)[ncol(df)] <- paste(condition, cond, sep = "_")
} else {
.log(paste(
cond, "not found in", condition, "-", cond, "ignored"
), "message")
df$newcond <- rep(NA, nrow(df))
colnames(df)[ncol(df)] <- paste(condition, cond, sep = "_")
}
}
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.