#' Canonical Correspondence Analysis and Redundancy Analysis
#'
#' These functions perform Canonical Correspondence Analysis on data stored
#' in a \code{SummarizedExperiment}.
#'
#' @inheritParams getDominant
#' @inheritParams getDissimilarity
#'
#' @details
#' For \code{run*} a
#' \code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}}
#' or a derived object.
#'
#' @param formula If \code{x} is a
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
#' a formula can be supplied. Based on the right-hand side of the given formula
#' \code{colData} is subset to \code{variables}.
#'
#' \code{variables} and \code{formula} can be missing, which turns the CCA analysis
#' into a CA analysis and dbRDA into PCoA/MDS.
#'
#' @param variables \code{Character scalar}. When \code{x} is a \code{SummarizedExperiment},
#' \code{variables} can be used to specify variables from \code{colData}.
#'
#' When \code{x} is a matrix, \code{variables} is a \code{data.frame} or
#' an object coercible to one containing the variables to use.
#'
#' All variables are used. Please subset, if you want to consider only some of them.
#' \code{variables} and \code{formula} can be missing, which turns the CCA analysis
#' into a CA analysis and dbRDA into PCoA/MDS.
#'
#' @param test.signif \code{Logical scalar}. Should the PERMANOVA and analysis of
#' multivariate homogeneity of group dispersions be performed.
#' (Default: \code{TRUE})
#'
#' @param altexp \code{Character scalar} or \code{integer scalar}. Specifies an alternative experiment
#' containing the input data.
#'
#' @param name \code{Character scalar}. A name for the column of the
#' \code{colData} where results will be stored. (Default: \code{"CCA"})
#'
#' @param scores \code{Character scalar}. Specifies scores to be returned. Must be
#' 'wa' (site scores found as weighted averages (cca) or weighted sums (rda) of
#' v with weights Xbar, but the multiplying effect of eigenvalues removed) or
#' 'u' ((weighted) orthonormal site scores). (Default: \code{'wa'})
#'
#' @param exprs_values Deprecated. Use \code{assay.type} instead.
#'
#' @param ... additional arguments passed to vegan::cca or vegan::dbrda and
#' other internal functions.
#' \itemize{
#' \item{\code{method} a dissimilarity measure to be applied in dbRDA and
#' possible following homogeneity test. (By default: \code{method="euclidean"})}
#' \item{\code{scale}: \code{Logical scalar}. Should the expression values be
#' standardized? \code{scale} is disabled when using \code{*RDA} functions.
#' Please scale before performing RDA. (Default: \code{TRUE})}
#' \item{\code{na.action}: \code{function}. Action to take when missing
#' values for any of the variables in \code{formula} are encountered.
#' (Default: \code{na.fail})}
#' \item{\code{full} \code{Logical scalar}. should all the results from the
#' significance calculations be returned. When \code{full=FALSE}, only
#' summary tables are returned. (Default: \code{FALSE})}
#' \item{\code{homogeneity.test}: \code{Character scalar}. Specifies
#' the significance test used to analyse \code{vegan::betadisper} results.
#' Options include 'permanova' (\code{vegan::permutest}), 'anova'
#' (\code{stats::anova}) and 'tukeyhsd' (\code{stats::TukeyHSD}).
#' (By default: \code{homogeneity.test="permanova"})}
#' \item{\code{permutations} a numeric value specifying the number of permutations
#' for significance testing in \code{vegan::anova.cca}. (By default: \code{permutations=999})}
#' }
#'
#' @details
#' *CCA functions utilize \code{vegan:cca} and *RDA functions \code{vegan:dbRDA}.
#' By default, dbRDA is done with euclidean distances, which is equivalent to
#' RDA.
#'
#' Significance tests are done with \code{vegan:anova.cca} (PERMANOVA). Group
#' dispersion, i.e., homogeneity within groups is analyzed with
#' \code{vegan:betadisper} (multivariate homogeneity of groups dispersions (variances))
#' and statistical significance of homogeneity is tested with a test
#' specified by \code{homogeneity.test} parameter.
#'
#' @return
#' For \code{getCCA} a matrix with samples as rows and CCA dimensions as
#' columns. Attributes include calculated \code{cca}/\code{rda} object and
#' significance analysis results.
#'
#' For \code{addCCA} a modified \code{x} with the results stored in
#' \code{reducedDim} as the given \code{name}.
#'
#' @name runCCA
#' @seealso
#' For more details on the actual implementation see \code{\link[vegan:cca]{cca}}
#' and \code{\link[vegan:dbrda]{dbrda}}
#'
#' @examples
#' library(miaViz)
#' data("enterotype", package = "mia")
#' tse <- enterotype
#'
#' # Perform CCA and exclude any sample with missing ClinicalStatus
#' tse <- addCCA(tse,
#' formula = data ~ ClinicalStatus,
#' na.action = na.exclude)
#'
#' # Plot CCA
#' plotCCA(tse, "CCA",
#' colour_by = "ClinicalStatus")
#'
#' # Fetch significance results
#' attr(reducedDim(tse, "CCA"), "significance")
#'
#' tse <- transformAssay(tse, method = "relabundance")
#'
#' # Specify dissimilarity measure
#' tse <- addRDA(tse,
#' formula = data ~ ClinicalStatus,
#' assay.type = "relabundance",
#' method = "bray",
#' name = "RDA_bray")
#'
#' # To scale values when using *RDA functions, use
#' # transformAssay(MARGIN = "features", ...)
#' tse <- transformAssay(tse,
#' method = "standardize",
#' MARGIN = "features")
#'
#' # Data might include taxa that do not vary. Remove those because after
#' # z-transform their value is NA
#' tse <- tse[rowSums(is.na(assay(tse, "standardize"))) == 0, ]
#'
#' # Calculate RDA
#' tse <- addRDA(tse,
#' formula = data ~ ClinicalStatus,
#' assay.type = "standardize",
#' name = "rda_scaled",
#' na.action = na.omit)
#'
#' # Plot RDA
#' plotRDA(tse, "rda_scaled",
#' colour_by = "ClinicalStatus")
#'
#' # A common choice along with PERMANOVA is ANOVA when statistical significance
#' # of homogeneity of groups is analysed. Moreover, full significance test
#' # results can be returned.
#' tse <- addRDA(tse,
#' formula = data ~ ClinicalStatus,
#' homogeneity.test = "anova",
#' full = TRUE)
#'
#' # Example showing how to pass extra parameters, such as 'permutations',
#' # to anova.cca
#' tse <- addRDA(tse,
#' formula = data ~ ClinicalStatus,
#' permutations = 500)
#'
NULL
#' @rdname runCCA
setGeneric("getCCA", signature = c("x"),
function(x, ...)
standardGeneric("getCCA"))
#' @rdname runCCA
setGeneric("addCCA", signature = c("x"),
function(x, ...)
standardGeneric("addCCA"))
#' @rdname runCCA
setGeneric("getRDA", signature = c("x"),
function(x, ...)
standardGeneric("getRDA"))
#' @rdname runCCA
setGeneric("addRDA", signature = c("x"),
function(x, ...)
standardGeneric("addRDA"))
.remove_special_functions_from_terms <- function(terms){
names(terms) <- terms
m <- regexec("^Condition\\(([^\\(\\)]*)\\)$|^([^\\(\\)]*)$", terms)
m <- regmatches(terms, m)
terms <- vapply(m,
function(n){
n <- n[seq.int(2L,length(n))]
n[n != ""]
},
character(1))
terms
}
.get_dependent_var_name <- function(formula){
# Get dependent variable from factors
dep_var <- rownames(attr(terms(formula), "factors"))[1]
# If it is NULL, get it from the formula
if( is.null(dep_var) ){
dep_var <- as.character(formula)[2]
}
return(dep_var)
}
#' @importFrom stats as.formula na.fail
.calculate_cca <- function(
x, formula, variables, scores, scale = TRUE, na.action = na.fail, ...){
# input check
if(!.is_a_bool(scale)){
stop("'scale' must be TRUE or FALSE.", call. = FALSE)
}
#
x <- as.matrix(t(x))
variables <- as.data.frame(variables)
# Instead of letting na.action pass through, give informative error
# about missing values.
if( any(is.na(variables)) && isTRUE(all.equal(na.action, na.fail)) ){
stop("Variables contain missing values. Set na.action to na.exclude",
" to remove samples with missing values.", call. = FALSE)
}
if(ncol(variables) > 0L && !missing(formula)){
dep_var_name <- .get_dependent_var_name(formula)
assign(dep_var_name, x)
# recast formula in current environment
form <- as.formula(paste(as.character(formula)[c(2,1,3)],
collapse = " "))
cca <- vegan::cca(form, data = variables, scale = scale,
na.action = na.action, ...)
X <- cca$CCA
} else if(ncol(variables) > 0L) {
cca <- vegan::cca(X = x, Y = variables, scale = scale,
na.action = na.action, ...)
X <- cca$CCA
} else {
cca <- vegan::cca(X = x, scale = scale, na.action = na.action, ...)
X <- cca$CA
}
# Create the matrix to be returned
ans <- X[[scores]]
attr(ans, "rotation") <- X$v
attr(ans, "eigen") <- X$eig
attr(ans, "cca") <- cca
ans
}
#' @export
#' @rdname runCCA
setMethod("getCCA", "ANY",
function(x, ...){
.calculate_cca(x, ...)
})
#' @importFrom stats terms
#' @importFrom SummarizedExperiment colData
.get_variables_from_data_and_formula <- function(x, formula){
if(missing(formula)){
return(NULL)
}
terms <- rownames(attr(terms(formula),"factors"))
terms <- terms[terms != as.character(formula)[2L]]
terms <- .remove_special_functions_from_terms(terms)
if(!all(terms %in% colnames(colData(x)))){
stop("All variables on the right hand side of 'formula' must be ",
"present in colData(x).", call. = FALSE)
}
colData(x)[,terms,drop=FALSE]
}
.get_formula_from_data_and_variables <- function(x, variables){
# Check that variables specify columns from colData
if( all(!is.character(variables)) ){
stop("'variables' should be a character specifying variables from ",
"colData(x).",
call. = FALSE)
}
if(!all(variables %in% colnames(colData(x)))){
stop("All variables must be ",
"present in colData(x).", call. = FALSE)
}
# Create a formula based on variables
formula <- as.formula(paste0("x ~ ",
paste(variables, collapse = " + ")))
formula
}
#' @export
#' @rdname runCCA
setMethod("getCCA", "SummarizedExperiment",
function(x, formula, variables, test.signif = TRUE,
assay.type = assay_name, assay_name = exprs_values, exprs_values = "counts",
scores = "wa", ...)
{
# Check assay.type and get assay
.check_assay_present(assay.type, x)
mat <- assay(x, assay.type)
# Check test.signif
if( !.is_a_bool(test.signif) ){
stop("'test.signif' must be TRUE or FALSE.", call. = FALSE)
}
if( !(.is_a_string(scores) && scores %in% c("wa", "u")) ){
stop("'scores' must be 'wa' or 'u'.",
call. = FALSE)
}
# If formula is missing but variables are not
if( !missing(variables) && missing(formula) ){
# Create a formula based on variables
formula <- .get_formula_from_data_and_variables(x, variables)
# Get the data from colData
variables <- colData(x)[ , variables, drop = FALSE]
} else{
# Otherwise if formula is provided, get variables based on formula
# (If formula is not provided variables is just empty data.frame)
variables <- .get_variables_from_data_and_formula(x, formula)
}
cca <- .calculate_cca(mat, formula, variables, scores, ...)
# Test significance if specified
if( test.signif ){
res <- .test_rda(mat, attr(cca, "cca"), variables, ...)
attr(cca, "significance") <- res
}
return(cca)
}
)
#' @export
#' @rdname runCCA
#' @aliases getCCA
calculateCCA <- function(x,...){
getCCA(x,...)
}
#' @export
#' @rdname runCCA
#' @importFrom SingleCellExperiment reducedDim<-
setMethod("addCCA", "SingleCellExperiment",
function(x, formula, variables, altexp = NULL, name = "CCA", ...)
{
if (!is.null(altexp)) {
y <- altExp(x, altexp)
} else {
y <- x
}
# Calculate CCA
cca <- getCCA(y, formula, variables, ...)
# Add object to reducedDim
x <- .add_object_to_reduceddim(x, cca, name = name, ...)
return(x)
}
)
#' @export
#' @rdname runCCA
#' @aliases addCCA
runCCA <- function(x,...){
addCCA(x,...)
}
#' @importFrom vegan sppscores<-
#' @importFrom stats na.fail
.calculate_rda <- function(
x, formula, variables, scores, method = distance,
distance = "euclidean", na.action = na.fail, ...){
#
# Transpose and ensure that the table is in matrix format
x <- as.matrix(t(x))
# If formula is missing (vegan:dbrda requires formula)
if( missing(formula) ){
formula <- x ~ 1
}
# Get the dependent variable
dep_var_name <- .get_dependent_var_name(formula)
# Dependent variable is the assay x. It does not matter what is the left-side
# of the formula; it is always the assay
assign(dep_var_name, x)
# recast formula in current environment
form <- as.formula(paste(as.character(formula)[c(2,1,3)],
collapse = " "))
# If variables are provided
if( !missing(variables) ){
# Convert into data.frame
variables <- as.data.frame(variables)
# Instead of letting na.action pass through, give informative error
# about missing values.
if( any(is.na(variables)) && isTRUE(all.equal(na.action, na.fail)) ){
stop("Variables contain missing values. Set na.action to ",
"na.exclude to remove samples with missing values.",
call. = FALSE)
}
# Calculate RDA with variables
rda <- vegan::dbrda(formula = form, data = variables, distance = method,
na.action = na.action, ...)
} else{
# Otherwise calculate RDA without variables
rda <- vegan::dbrda(formula = form, distance = method,
na.action = na.action, ...)
}
# Get CCA
if( !is.null(rda$CCA) ){
X <- rda$CCA
# Get species scores. Get only those samples that are included in rda
# object (some might missing due missing metadata)
species_scores <- x[ rownames(X[[scores]]), ]
} else{
# If variable(s) do not explain inertia at all, CCA is NULL. Then take CA
X <- rda$CA
# Get species scores (whole data since metadata was not in input)
species_scores <- x
# If scores is "wa", but they are not available
if( scores == "wa" ){
warning("'wa' scores are not available. Defaults to 'u'.", call. = FALSE)
scores <- "u"
}
}
# Add species scores since they are missing from dbrda object (in cca they are included)
sppscores(rda) <- species_scores
# Create the matrix to be returned
ans <- X[[scores]]
attr(ans, "rotation") <- X$v
attr(ans, "eigen") <- X$eig
attr(ans, "rda") <- rda
ans
}
# Add RDA/CCA to reducedDim
.add_object_to_reduceddim <- function(
tse, rda, name, subset.result = TRUE, ...){
# Test subset
if( !.is_a_bool(subset.result) ){
stop("'subset.result' must be TRUE or FALSE.", call. = FALSE)
}
#
# If samples do not match / there were samples without appropriate metadata
# and they are now removed
if( !all(colnames(tse) %in% rownames(rda)) && subset.result ){
# Take a subset
tse <- tse[ , rownames(rda) ]
# Give a message
warning(
"Certain samples are removed from the result because they did ",
"not include sufficient metadata.", call. = FALSE)
} else if( !all(colnames(tse) %in% rownames(rda)) && !subset.result ){
# If user do not want to subset the data
# Save attributes from the object
attr <- attributes(rda)
attr <- attr[ !names(attr) %in% c("dim", "dimnames")]
# Find samples that are removed
samples_not_found <- setdiff(colnames(tse), rownames(rda))
# Create an empty matrix
mat <- matrix(nrow = length(samples_not_found), ncol=ncol(rda))
rownames(mat) <- samples_not_found
# Combine the data and order it in correct order
rda <- rbind(rda, mat)
rda <- rda[colnames(tse), ]
# Add attributes
attr <- c(attributes(rda), attr)
attributes(rda) <- attr
}
# Add object to reducedDIm
reducedDim(tse, name) <- rda
return(tse)
}
# Perform PERMANOVA and homogeneity analysis to RDA object
#' @importFrom vegan anova.cca betadisper
#' @importFrom stats anova
.test_rda <- function(mat, rda, variables, ...){
# Perform permanova for whole model and for variables
permanova_model <- anova.cca(rda, by = NULL, ...)
if( !is.null(variables) ){
res <- .test_rda_vars(
mat, rda, variables, permanova_model, by = "margin", ...)
} else{
res <- list(permanova = permanova_model)
}
return(res)
}
# Test association of variables to ordination
.test_rda_vars <- function(
mat, rda, variables, permanova_model, by = "margin", full = FALSE,
homogeneity.test = "permanova", method = distance, distance = "euclidean",
...){
# Check full parameter
if( !.is_a_bool(full) ){
stop("'full' must be TRUE or FALSE.", call. = FALSE)
}
# Check homogeneity.test
if( !(is.character(homogeneity.test) &&
length(homogeneity.test) == 1 &&
homogeneity.test %in% c("permanova", "anova", "tukeyhsd")) ){
stop("'homogeneity.test' must be 'permanova', 'anova', or 'tukeyhsd'.",
call. = FALSE)
}
#
# Perform PERMANOVA
permanova <- anova.cca(rda, by = by, ...)
# Create a table from the results
# PERMANOVAs
table_model <- as.data.frame(permanova_model)
permanova_tab <- as.data.frame(permanova)
# Take only model results
permanova_tab <- rbind(table_model[1, ], permanova_tab)
# Add info about total variance
permanova_tab[ , "Total variance"] <- permanova_tab["Model", 2] + permanova_tab["Residual", 2]
# Add info about explained variance
permanova_tab[ , "Explained variance"] <- permanova_tab[ , 2] / permanova_tab[ , "Total variance"]
# Perform homogeneity analysis
mat <- t(mat)
# Get the dissimilarity matrix based on original dissimilarity index
# provided by user. If the analysis is CCA, disable method; calculate
# always euclidean distances because CCA is based on Euclidean distances.
if( length(class(rda)) == 1 && is(rda, 'cca') ){
dist_mat <- vegdist(mat, method = "euclidean")
} else{
dist_mat <- vegdist(mat, method = method, ...)
}
# For all variables run the analysis
homogeneity <- lapply(colnames(variables), function(x){
# Get variable values
var <- variables[[x]]
# Run betadisper.
# Suppress possible warnings: "some squared distances are negative and changed to zero"
# Suppress possible messages: "missing observations due to 'group' removed"
suppressWarnings(
suppressMessages(
betadisper_res <- betadisper(dist_mat, group = var)
)
)
# Run significance test
significance <- .homogeneity_significance(betadisper_res, homogeneity.test, ...)
# Return the results as a list
models <- list(betadisper_res, significance[["obj"]])
names(models) <- c("betadisper", homogeneity.test)
res <- list(
models = models,
table = significance[["table"]]
)
return(res)
})
names(homogeneity) <- colnames(variables)
# Get models
homogeneity_model <- lapply(homogeneity, function(x) x[["models"]])
names(homogeneity_model) <- colnames(variables)
# Get tables
homogeneity_tab <- lapply(homogeneity, function(x) x[["table"]])
# Combine tables
homogeneity_tab <- do.call(rbind, homogeneity_tab)
# Return whole data or just a tables
if( full ){
res <- list(
permanova = list(
summary = permanova_tab,
model = permanova_model,
variables = permanova),
homogeneity = list(
summary = homogeneity_tab,
variables = homogeneity_model)
)
} else{
res <- list(
permanova = permanova_tab,
homogeneity = homogeneity_tab)
}
return(res)
}
# Perform statistical test for group homogeneity results
.homogeneity_significance <- function(betadisper_res, homogeneity.test, ...){
# Run specified significance test
if( homogeneity.test == "anova" ){
res <- stats::anova(betadisper_res, ...)
} else if ( homogeneity.test == "tukeyhsd" ){
res <- stats::TukeyHSD(betadisper_res, ...)
} else{
res <- vegan::permutest(betadisper_res, ...)
}
# Get summary table from the results
if( homogeneity.test == "anova" ){
tab <- as.data.frame(res)
} else if( homogeneity.test == "tukeyhsd" ){
tab <- res[["group"]]
} else{
tab <- res[["tab"]]
}
# Modify permanova/anova table
if( homogeneity.test != "tukeyhsd" ){
# Add info about total variance
tab[ , "Total variance"] <- tab["Groups", "Sum Sq"] + tab["Residuals", "Sum Sq"]
# Add info about explained variance
tab[ , "Explained variance"] <- tab[ , "Sum Sq"] / tab[ , "Total variance"]
# Get only groups row (drop residuals row)
tab <- tab[1, ]
}
# Create list from the object and results
res <- list(
obj = res,
table = tab
)
return(res)
}
#' @export
#' @rdname runCCA
setMethod("getRDA", "ANY",
function(x, ...){
.calculate_rda(x, ...)
})
#' @export
#' @rdname runCCA
setMethod("getRDA", "SummarizedExperiment",
function(x, formula, variables, test.signif = TRUE,
assay.type = assay_name, assay_name = exprs_values, exprs_values = "counts",
scores = "wa", ...)
{
# Check assay.type and get assay
.check_assay_present(assay.type, x)
mat <- assay(x, assay.type)
# Check test.signif
if( !.is_a_bool(test.signif) ){
stop("'test.signif' must be TRUE or FALSE.", call. = FALSE)
}
if( !(.is_a_string(scores) && scores %in% c("wa", "u", "v")) ){
stop("'scores' must be 'wa', 'u', or 'v'.",
call. = FALSE)
}
# If formula is missing but variables are not
if( !missing(variables) && missing(formula) ){
# Create a formula based on variables
formula <- .get_formula_from_data_and_variables(x, variables)
# Get the data from colData
variables <- colData(x)[ , variables, drop = FALSE]
} else{
# Otherwise if formula is provided, get variables based on formula
# (If formula is not provided variables is just empty data.frame)
variables <- .get_variables_from_data_and_formula(x, formula)
}
# Calculate RDA
rda <- .calculate_rda(mat, formula, variables, scores, ...)
# Test significance if specified
if( test.signif ){
res <- .test_rda(mat, attr(rda, "rda"), variables, ...)
attr(rda, "significance") <- res
}
return(rda)
}
)
#' @export
#' @rdname runCCA
#' @aliases getRDA
calculateRDA <- function(x,...){
getRDA(x,...)
}
#' @export
#' @rdname runCCA
#' @importFrom SingleCellExperiment reducedDim<-
setMethod("addRDA", "SingleCellExperiment",
function(x, formula, variables, altexp = NULL, name = "RDA", ...)
{
if (!is.null(altexp)) {
y <- altExp(x, altexp)
} else {
y <- x
}
# Calculate RDA
rda <- getRDA(y, formula, variables, ...)
# Add object to reducedDim
x <- .add_object_to_reduceddim(x, rda, name = name, ...)
return(x)
}
)
#' @export
#' @rdname runCCA
#' @aliases addRDA
runRDA <- function(x,...){
addRDA(x,...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.