require(vegan)
require(scales)
require(ape)
require(phyloseq)
#' Compute incidence matrix of a tree
#'
#' @title incidenceMatrix
#' @param phy Required. A \code{phylo} class object
#' @return An incidence matrix M of size nedges(tree) x ntaxa(tree) where
#' M[i,j] is set to 1 if taxa derives from edge i and 0 otherwise.
#' @note incidenceMatrix assumes that the tree is rooted. If not, it roots it
#' it at an arbitrary taxa (taxa 1).
incidenceMatrix <- function(phy) {
if (!is.rooted(phy)) {
warning("Tree is not rooted, incidence matrix may be meaningless")
}
## Construct incidence matrix of the tree (taxa x edge matrix)
## All taxa descending from an edge are set to 1, all others to -1
ntaxa <- length(phy$tip.label)
nedges <- nrow(phy$edge)
incidence <- matrix (0,
nrow = ntaxa,
ncol = nedges,
dimnames = list(phy$tip.label, phy$edge[, 2]))
## Incidence of internal edges
phy.part <- prop.part(phy) ## clade composition indexed by (shifted) node number
for (i in 2:length(phy.part)) { ## first clade corresponds to root node
edge <- which(phy$edge[, 2] == i + ntaxa) ## node numbers are shifted by ntaxa
incidence[phy.part[[i]] , edge] <- 1
}
## Incidence of pendant edges
## pendant.edges[i] is the edge leading to tip i.
pendant.edges <- match(seq_len(ntaxa), phy$edge[ , 2])
for (i in seq_len(ntaxa)) {
incidence[i, pendant.edges[i]] <- 1
}
attr(incidence, "pendant.edges") <- pendant.edges
return(incidence)
}
#' Compute edge PCA components of a metagenomic sample
#' using the edgePCA idea developed in Matsen and Evans (2012)
#' with regularization of the edges for the PCA part as proposed in
#' Shen, H. and Huang, J. Z. (2008).
#'
#' @title edgePCA
#' @param physeq Required. A \code{\link{phyloseq-class}} class object
#' @param number.components Optional. Numeric. Number of components used in the PCA, defaults to 2.
#' @param method. Optional. PCA method. Any unambiguous abbreviation of 'normal',
#' 'sparse' and 'regularized'. Defaults to 'sparse'.
#' @param number.edges Optional. Integer. Number of non-zero loadings for each
#' component in the sparse PCA setting.
#' @param ... Optional. Additional parameters passed on to sparcePca,
#' regularizedPca and pca, such as number of iterations and
#' convergence criterion.
#' @return An \code{edgepca} class object with components:
#' - \item{loadings}: The loadings of the principal component axis
#' - \item{scores}: The score of the samples
#' - \item{values}: A date frame with components eigenvalues, relative eigenvalues and
#' cumulative eigenvalues.
#' @note scores and extract_eigenvalues methods are associated with 'edgepca'.
#' @references Shen, H. and Huang, J. Z. (2008). Sparse principal component
#' analysis via regularized low rank matrix approximation. _Journal
#' of Multivariate Analysis_ *99*, 1015-1034.
#' @references Matsen, F. A. and Evans, S. (2012). Edge Principal Components
#' and Squash Clustering: Using the Special Structure of Phylogenetic
#' Placement Data for Sample Comparison. _Plos One_ *8*(3):e56859.
#' @seealso \code{\link{pca}}, \code{\link{sparsePca}}, \code{\link{regularizedPca}},
edgePCA <- function(physeq,
number.components = 2,
method = c("normal", "sparse", "regularized"),
number.edges = 50,
...) {
## Extract otu_table matrix
x <- as(otu_table(physeq), "matrix")
if (taxa_are_rows(physeq)) { x <- t(x) }
phy <- phy_tree(physeq)
## Scale counts to frequencies
x <- x/rowSums(x)
## Construct incidence matrix of the tree and transform it to contrasts
incidence <- incidenceMatrix(phy)
incidence <- 2 * (incidence - 0.5)
## Order community table according to edge order and create
## mass difference matrix
x <- x[ , rownames(incidence), drop = FALSE]
mdm <- x %*% incidence
## Correct number.edges if needed
if (length(number.edges) == 1) {
number.edges <- rep(number.edges, number.components)
}
if (length(number.edges) != number.components) {
warning("number.edges is not long enough, set to repeats of number.edges[1]")
number.edges <- rep(number.edges[1], number.components)
}
## Compute number.component first axis from the mass difference matrix
method <- match.arg(method)
results <- switch(method,
sparse = sparsePca(mdm, number.components, number.edges, ...),
normal = pca(mdm, number.components, ...),
regularized = regularizedPca(mdm, number.components, ...))
class(results) <- c("edgepca", class(results))
return(results)
}
#' Perform a principal component analysis on a given data matrix using
#' Singular Value Decomposition.
#'
#' @title pca
#' @param x Required. Numeric matrix.
#' @param number.components Optional. Integer. Number of components returned by the PCA.
#' Defaults to NULL which returns all components
#' @param center Optional. Logical. Should x be shifted to be (column-wise) 0-centered?
#' Defaults to TRUE. Alternatively, a vector of length equal to the
#' number of columns in x. The value is passed on to scale.
#' @param scale. Optional. Logical. Should the variable be normalized to have unit
#' variance. Defaults to TRUE. Alternatively, a vector of length
#' equal to the number of columns in x. The value is passed on to scale.
#' @return A list with components
#' - \item{number.components}: The number of components used
#' - \item{loadings}: The matrix of variable loadings
#' - \item{scores}: The matrix of sample scores
#' - \item{values}: A list with components
#' - \item{Eigenvalues}: the eigenvalues of the covariance/correlation
#' - \item{Relative_eig}: the normalized eigenvalues (sum up to 1)
#' - \item{Cumul_eig}: the cumulative eigenvalues (useful for scree plot)
#' - \item{center, scale}: centering and scaling vectors, used, if any.
#' Otherwise, FALSE.
#' @seealso \code{\link{edgepca}}, \code{\link{sparsePca}}, \code{\link{regularizedPca}},
pca <- function(x, number.components = NULL, center = TRUE, scale. = FALSE) {
x <- as.matrix(x)
## Construct variable and sample names
variable.names <- colnames(x)
if (is.null(variable.names)) {
variable.names <- paste("var", 1:ncol(x), sep = ".")
}
sample.names <- rownames(x)
if (is.null(sample.names)) {
sample.names <- 1:nrow(x)
}
## Scale matrix
x <- scale(x, center = center, scale = scale.)
cen <- attr(x, "scaled:center")
sc <- attr(x, "scaled:scale")
if (any(sc == 0))
stop("cannot rescale a constant/zero column to unit variance")
## Check for missing values
if (any(is.na(x))) {
stop("cannot handle missing values")
}
## Optionally, correct number of components
number.components <- min(number.components, ncol(x), nrow(x))
## apply svd
x.svd <- svd(x, nu = 0, nv = number.components)
## Extract eigenvectors (loadings), eigenvalues and scores
loadings <- x.svd$v
scores <- x %*% loadings
eigenvalues <- (x.svd$d)^2 / max(1, nrow(x) - 1)
## Add sensible names to loadings and scores
dimnames(loadings) <- list(variable.names,
paste("Axis", 1:ncol(loadings), sep = "."))
dimnames(scores) <- list(sample.names,
paste("Axis", 1:ncol(loadings), sep = "."))
## Compute relative and cumulative eigenvalues
values <- list(Eigenvalues = eigenvalues,
Relative_eig = eigenvalues / sum(eigenvalues),
Cumul_eig = cumsum(eigenvalues) / sum(eigenvalues))
## Construct results and return it
result <- list(number.components = number.components,
scores = scores,
loadings = loadings,
values = values,
center = ifelse(center, cen, FALSE),
scale = ifelse(scale., sc, FALSE))
class(result) <- "pca"
return(invisible(result))
}
#' Perform a regularized principal component analysis on a given data matrix using
#' Singular Value Decomposition and regularization.
#'
#' @title regularizedPca
#' @param x Required. Numeric matrix.
#' @param number.components Optional. Integer. Number of components used for denoising.
#' Defaults to 10 and is automatically reduced to max(dim(x)) - 1
#' if 10 is too high.
#' @param center Optional. Logical. Should x be shifted to be (column-wise) 0-centered?
#' Defaults to TRUE. Alternatively, a vector of length equal to the
#' number of columns in x. The value is passed on to scale.
#' @param scale. Optional. Logical. Should the variable be normalized to have unit
#' variance. Defaults to TRUE. Alternatively, a vector of length
#' equal to the number of columns in x. The value is passed on to scale.
#' @return A list with components
#' - \item{number.components}: The number of components used
#' - \item{loadings}: The matrix of variable loadings
#' - \item{scores}: The matrix of sample scores
#' - \item{values}: A list with components
#' - \item{Eigenvalues}: the eigenvalues of the covariance/correlation
#' - \item{Relative_eig}: the normalized eigenvalues (sum up to 1)
#' - \item{Cumul_eig}: the cumulative eigenvalues (useful for scree plot)
#' - \item{center, scale}: centering and scaling vectors, used, if any.
#' Otherwise, FALSE.
#' @seealso \code{\link{pca}}, \code{\link{edgePCA}}, \code{\link{regularizedPca}}
regularizedPca <- function(x, number.components = 10, center = TRUE, scale. = FALSE) {
x <- as.matrix(x)
n <- nrow(x)
p <- ncol(x)
## Construct variable and sample names
variable.names <- colnames(x)
if (is.null(variable.names)) {
variable.names <- paste("var", 1:p, sep = ".")
}
sample.names <- rownames(x)
if (is.null(sample.names)) {
sample.names <- 1:n
}
## Scale matrix
x <- scale(x, center = center, scale = scale.)
cen <- attr(x, "scaled:center")
sc <- attr(x, "scaled:scale")
if (any(sc == 0))
stop("cannot rescale a constant/zero column to unit variance")
## Check for missing values
if (any(is.na(x))) {
stop("cannot handle missing values")
}
## Choose number of components
if (is.null(number.components)) {
stop("Please specify the number of components used for denoising")
} else {
number.components <- min(number.components, ncol(x) - 1, nrow(x) - 1)
}
## apply svd
x.svd <- svd(x, nu = number.components, nv = number.components)
## Shrink eigenvalues
S <- number.components
sigma.square <- sum(x.svd$d[-c(1:S)]^2)/(n*p - p - n*S - p*S + S^2)
shrinked.eigenvalues = (x.svd$d[1:S]^2 - ifelse(p < n, n, p * (1 - 1/n)) * sigma.square) / x.svd$d[1:S]
## Extract eigenvectors (loadings), eigenvalues and scores
loadings <- x.svd$v
scores <- x.svd$u %*% diag(shrinked.eigenvalues)
eigenvalues <- shrinked.eigenvalues
## Add sensible names to loadings and scores
dimnames(loadings) <- list(variable.names,
paste("Axis", 1:ncol(loadings), sep = "."))
dimnames(scores) <- list(sample.names,
paste("Axis", 1:ncol(loadings), sep = "."))
## Compute relative and cumulative eigenvalues
values <- list(Eigenvalues = eigenvalues,
Relative_eig = eigenvalues / sum(eigenvalues),
Cumul_eig = cumsum(eigenvalues) / sum(eigenvalues))
## Construct results and return it
result <- list(number.components = number.components,
scores = scores,
loadings = loadings,
values = values,
center = ifelse(center, cen, FALSE),
scale = ifelse(scale., sc, FALSE))
class(result) <- c("rpca", "pca")
return(invisible(result))
}
#' Perform a sparse principal component analysis on a given data matrix using
#' NIPALS algorithm.
#'
#' @title sparsePca
#' @param x Required. Numeric matrix.
#' @param number.components Optional. Integer. Number of components used for denoising.
#' Defaults to 10 and is automatically reduced to max(dim(x)) - 1
#' if 10 is too high.
#' @param center Optional. Logical. Should x be shifted to be (column-wise) 0-centered?
#' Defaults to TRUE. Alternatively, a vector of length equal to the
#' number of columns in x. The value is passed on to scale.
#' @param scale. Optional. Logical. Should the variable be normalized to have unit
#' variance. Defaults to TRUE. Alternatively, a vector of length
#' equal to the number of columns in x. The value is passed on to scale.
#' @param number.variables Optional. Integer vector of length number.components specifying
#' the number of non-zero loadings for each component. By default, all
#' loadings can be different from 0 (no sparsity).
#' @param iter.max Optional. Integer. The maximum number of iterations used in NIPALS for each
#' each component. Defaults to 500.
#' @param tol Optional. A (small) positive numeric used to check convergence of loadings
#' for each component. Defaults to 1e-09.
#' @return A list with components
#' - \item{number.components}: The number of components used
#' - \item{loadings}: The matrix of variable loadings
#' - \item{scores}: The matrix of sample scores
#' - \item{values}: A list with components
#' - \item{Eigenvalues}: the eigenvalues of the covariance/correlation
#' - \item{Relative_eig}: the normalized eigenvalues (sum up to 1)
#' - \item{Cumul_eig}: the cumulative eigenvalues (useful for scree plot)
#' - \item{center, scale}: centering and scaling vectors, used, if any.
#' Otherwise, FALSE.
#' @seealso \code{\link{pca}}, \code{\link{edgePCA}}, \code{\link{regularizedPca}},
sparsePca <- function(x, number.components = 10,
number.variables = rep(ncol(x), number.components),
center = TRUE, scale. = FALSE,
iter.max = 500, tol = 1e-09) {
x <- as.matrix(x)
n <- nrow(x)
p <- ncol(x)
## Construct variable and sample names
variable.names <- colnames(x)
if (is.null(variable.names)) {
variable.names <- paste("var", 1:p, sep = ".")
}
sample.names <- rownames(x)
if (is.null(sample.names)) {
sample.names <- 1:n
}
## Scale matrix
x <- scale(x, center = center, scale = scale.)
cen <- attr(x, "scaled:center")
sc <- attr(x, "scaled:scale")
if (any(sc == 0))
stop("cannot rescale a constant/zero column to unit variance")
## Check for missing values
if (any(is.na(x))) {
stop("cannot handle missing values")
}
## Check consistency of number.variables
if (length(number.variables) != number.components) {
stop("number.variables must be a vector of length ", number.components, ".")
}
if (any(number.variables > p)) {
wrong.components <- which(number.variables > p)
warning("You selected more than p variables for components",
paste(wrong.components, collapse = ", "),
". Automatically changed to p.")
number.variables[wrong.components] <- p
}
## Check number of components
number.components <- min(number.components, n, p)
## Set up u (scores), v (loadings) and d (eigenvalues) matrices for
## the NIPALS algorithm
vect.d <- rep(NA, number.components)
iterations <- rep(0, number.components)
convergence <- rep(FALSE, number.components)
mat.u <- matrix(NA, nrow = n, ncol = number.components)
mat.v <- matrix(NA, nrow = p, ncol = number.components)
x.new <- x
for (h in 1:number.components) {
## Initialization
x.svd <- svd(x.new, nu = 1, nv = 1)
u.new <- x.svd$u[ , 1]
v.new <- x.svd$d[1] * x.svd$v[ , 1]
v.conv <- u.conv <- FALSE
if (h >= 2) {
x.h = x %*% mat.v[, 1:(h - 1)]
}
variables.h <- p - number.variables[h] ## number of variables to discard (at least)
iter <- 0
## while loop
while ( (!v.conv || !u.conv) & iter < iter.max) {
iter <- iter + 1
v.temp <- t(x.new) %*% u.new
u.old <- u.new
v.old <- v.new
## Update and sparsify v by keeping only p - nx variables
if (variables.h != 0) {
threshold <- sort(abs(v.temp))[variables.h]
v.new <- ifelse(abs(v.temp) > threshold,
v.temp - sign(v.temp) * threshold,
0)
}
## Update and normalize u
if (h == 1) {
u.new <- as.vector(x.new %*% v.new)
u.new <- u.new /sqrt(sum(u.new^2))
} else {
u.new <- lsfit(y = x %*% v.old,
x = x.h,
intercept = FALSE,
tolerance = tol)$res
u.new = u.new/sqrt(sum(u.new^2))
}
## Check convergence
if (sum( (u.new - u.old)^2 ) < tol) {
u.conv <- TRUE
}
if (sum( (v.new - v.old)^2 ) < tol) {
v.conv <- TRUE
}
}
## Keep log of convergence
iterations[h] <- iter
convergence[h] <- (v.conv && u.conv)
## Norm v.new, update mat.u, mat.v
mat.v[ , h] <- v.new / sqrt(sum(v.new^2))
mat.u[ , h] <- u.new
## Deflate x.new with first axis
x.new <- x.new - x.svd$d[1] * tcrossprod(x.svd$u, x.svd$v)
## Compute percentage of variance explained
## for x reconstructed with first h components
x.reconstructed.h <- x %*% mat.v[ , 1:h, drop = F] %*%
solve(t(mat.v[ , 1:h, drop = F]) %*% mat.v[ , 1:h, drop = F]) %*% t(mat.v[ , 1:h, drop = F])
vect.d[h] <- sum(x.reconstructed.h^2)
}
## Extract eigenvectors (loadings), eigenvalues and scores
loadings <- mat.v
scores <- mat.u
eigenvalues <- -diff(c(0, vect.d))
## Add sensible names to loadings and scores
dimnames(loadings) <- list(variable.names,
paste("Axis", 1:ncol(loadings), sep = "."))
dimnames(scores) <- list(sample.names,
paste("Axis", 1:ncol(loadings), sep = "."))
## Compute relative and cumulative eigenvalues
values <- list(Eigenvalues = eigenvalues,
Relative_eig = eigenvalues / sum(eigenvalues),
Cumul_eig = cumsum(eigenvalues) / sum(eigenvalues))
## Construct results and return it
result <- list(number.components = number.components,
scores = scores,
loadings = loadings,
values = values,
center = ifelse(center, cen, FALSE),
scale = ifelse(scale., sc, FALSE))
class(result) <- c("sparsePca", "pca")
return(invisible(result))
}
#' Perform a sparse principal component analysis on a given data matrix using
#' iterative algorithm from Shen and Huang.
#'
#' @title sparsePca
#' @param x Required. Numeric matrix.
#' @param number.components Optional. Integer. Number of components used for denoising.
#' Defaults to 10 and is automatically reduced to max(dim(x)) - 1
#' if 10 is too high.
#' @param center Optional. Logical. Should x be shifted to be (column-wise) 0-centered?
#' Defaults to TRUE. Alternatively, a vector of length equal to the
#' number of columns in x. The value is passed on to scale.
#' @param scale. Optional. Logical. Should the variable be normalized to have unit
#' variance. Defaults to TRUE. Alternatively, a vector of length
#' equal to the number of columns in x. The value is passed on to scale.
#' @param lambda Required. Regularization parameter.
#' @param iter.max Optional. Integer. The maximum number of iterations used in iterative step
#' for each
#' each component. Defaults to 500.
#' @param tol Optional. A (small) positive numeric used to check convergence of loadings
#' for each component. Defaults to 1e-09.
#' @references Shen, H. and Huang, J. Z. (2008). Sparse principal component
#' analysis via regularized low rank matrix approximation. _Journal
#' of Multivariate Analysis_ *99*, 1015-1034.
#' @return A list with components
#' - \item{number.components}: The number of components used
#' - \item{loadings}: The matrix of variable loadings
#' - \item{scores}: The matrix of sample scores
#' - \item{values}: A list with components
#' - \item{Eigenvalues}: the eigenvalues of the covariance/correlation
#' - \item{Relative_eig}: the normalized eigenvalues (sum up to 1)
#' - \item{Cumul_eig}: the cumulative eigenvalues (useful for scree plot)
#' - \item{center, scale}: centering and scaling vectors, used, if any.
#' Otherwise, FALSE.
#' @seealso \code{\link{pca}}, \code{\link{edgePCA}}, \code{\link{regularizedPca}},
sparsePca.lambda <- function(x, number.components = 10,
lambda,
center = TRUE, scale. = FALSE,
iter.max = 500, tol = 1e-09) {
x <- as.matrix(x)
n <- nrow(x)
p <- ncol(x)
## Construct variable and sample names
variable.names <- colnames(x)
if (is.null(variable.names)) {
variable.names <- paste("var", 1:p, sep = ".")
}
sample.names <- rownames(x)
if (is.null(sample.names)) {
sample.names <- 1:n
}
## Scale matrix
x <- scale(x, center = center, scale = scale.)
cen <- attr(x, "scaled:center")
sc <- attr(x, "scaled:scale")
if (any(sc == 0))
stop("cannot rescale a constant/zero column to unit variance")
## Check for missing values
if (any(is.na(x))) {
stop("cannot handle missing values")
}
## Check consistency of number.variables
if (length(number.variables) != number.components) {
stop("number.variables must be a vector of length ", number.components, ".")
}
if (any(number.variables > p)) {
wrong.components <- which(number.variables > p)
warning("You selected more than p variables for components",
paste(wrong.components, collapse = ", "),
". Automatically changed to p.")
number.variables[wrong.components] <- p
}
## Check number of components
number.components <- min(number.components, n, p)
## Set up u (scores), v (loadings) and d (eigenvalues) matrices for
## the iterative algorithm
vect.d <- rep(NA, number.components)
iterations <- rep(0, number.components)
convergence <- rep(FALSE, number.components)
mat.u <- matrix(NA, nrow = n, ncol = number.components)
mat.v <- matrix(NA, nrow = p, ncol = number.components)
x.new <- x
for (h in 1:number.components) {
## Initialization
x.svd <- svd(x.new, nu = 1, nv = 1)
## u and v solutions of unpenalized approximation problem
## u %*% t(v) is the best rank one approximation of x
u.new <- x.svd$u[ , 1]
v.new <- x.svd$d[1] * x.svd$v[ , 1]
v.conv <- u.conv <- FALSE
iter <- 0
## while loop
while ( (!v.conv || !u.conv) & iter < iter.max) {
iter <- iter + 1
u.old <- u.new
v.old <- v.new
## update v using soft thresholding
z <- t(x.new) %*% u.new
v.new <- ifelse(abs(z) > lambda, sign(z) * (abs(z) - lambda), 0)
## Update and normalize u
u.new <- as.vector(x.new %*% v.new)
u.new <- u.new / sqrt(sum(u.new^2))
## Check convergence
if (sum( (u.new - u.old)^2 ) < tol) {
u.conv <- TRUE
}
if (sum( (v.new - v.old)^2 ) < tol) {
v.conv <- TRUE
}
}
## Keep log of convergence
iterations[h] <- iter
convergence[h] <- (v.conv && u.conv)
## Norm v (?) and update mat.u, mat.v
## v.new <- v.new / sqrt(sum(v.new^2))
mat.v[ , h] <- v.new
mat.u[ , h] <- u.new
## Deflate x.new with rank one approximation
x.new <- x.new - tcrossprod(u.new, v.new)
## Compute percentage of variance explained
## for x reconstructed with first h components
x.reconstructed.h <- mat.u[ , 1:h, drop = F] %*% mat.v[ , 1:h, drop = F]
vect.d[h] <- sum(x.reconstructed.h^2) / sum(x^2)
}
## Extract eigenvectors (loadings), eigenvalues and scores
loadings <- mat.v
scores <- mat.u
eigenvalues <- diff(c(0, vect.d, 1))
## Add sensible names to loadings and scores
dimnames(loadings) <- list(variable.names,
paste("Axis", 1:ncol(loadings), sep = "."))
dimnames(scores) <- list(sample.names,
paste("Axis", 1:ncol(loadings), sep = "."))
## Compute relative and cumulative eigenvalues
values <- list(Eigenvalues = eigenvalues,
Relative_eig = eigenvalues / sum(eigenvalues),
Cumul_eig = cumsum(eigenvalues) / sum(eigenvalues))
## Construct results and return it
result <- list(number.components = number.components,
lambda = lambda,
scores = scores,
loadings = loadings,
values = values,
center = ifelse(center, cen, FALSE),
scale = ifelse(scale., sc, FALSE))
class(result) <- c("sparsePca", "pca")
return(invisible(result))
}
#' S3 Method for extracting scores from pca object
#'
#' @title scores.pca
#' @param pca Required. A \code{\link{pca}} class object
#' @param choices Required. Ordination axes. If missing, returns all axes.
#' @param display Required. Unambiguous abbreviation of 'variables' or 'samples'.
#' For compatibility with vegan and phyloseq, 'variables' can be replaced
#' by 'species' or 'taxa' and 'samples' can be replaced by 'sites'.
#' @param ... Optional. Additional argument passed on from scores.default. Unused
#' @return A matrix of scores for axes in 'choices'
#' @seealso \code{\link{pca}}, \code{\link{edgePCA}}, \code{\link{regularizedPca}}, \code{\link{sparsePca}},
scores.pca <- function(pca, choices,
display = c("sites", "species", "samples", "variables", "taxa"),
...) {
display <- match.arg(display)
if (display %in% c("sites", "samples")) {
x <- pca$scores
}
if (display %in% c("variables", "taxa", "species")) {
x <- pca$loadings
}
if (!missing(choices)) {
choices <- choices[choices <= ncol(x)]
x <- x[ , choices, drop = FALSE]
}
return(x)
}
#' Extract supporting taxa from an edge pca
#'
#' @title support_taxa
#' @param physeq Required. A \code{\link{phyloseq-class}} class instance.
#' @param epca Required. An \code{\link{edgepca}} class object
#' @param choices Optional. Ordination axis for which support taxa are extracted.
#' Defaults to all axis.
#' @param threshold Optional. Numeric. Threshold that a taxa loading
#' must pass to be extracted. Defaults to 0.
#' @param number Optional. Integer. Number of taxa to extract from each axis.
#' Defaults to all taxa.
#' @note If number and threshold are specified, extracts at most \code{number} taxa whose loadings
#' exceed threshold for each axis.
#' @return A character vector ot taxa names.
support_taxa <- function(physeq, epca, choices = seq(epca$number.components),
threshold = 0, number = ntaxa(physeq)) {
## Extract loading vectors
x <- abs(epca$loadings)
choices <- choices[choices <= ncol(x)]
x <- x[ , choices, drop = FALSE]
## Extract tip loadings
tree <- phy_tree(physeq)
pendant.edges <- match(1:length(tree$tip.label) , tree$edge[ , 2])
x <- x[pendant.edges, , drop = FALSE]
## Threshold loadings values
y <- ifelse(x > threshold, 1, 0)
x <- x*y
## Initialize taxa set
taxa.set <- rep(FALSE, ntaxa(physeq))
names(taxa.set) <- taxa_names(physeq)
## Extract taxa from each component
for (comp in 1:ncol(x)) {
z <- x[ , comp]
names(z) <- tree$tip.label
cur.taxa.set <- sort(z, decreasing = TRUE)[1:number]
cur.taxa.set <- names(cur.taxa.set)[cur.taxa.set > 0]
taxa.set[cur.taxa.set] <- TRUE
}
return(names(taxa.set)[taxa.set])
}
#' Function for plotting a tree with edge fattened and colored according to
#' the loadings of an edgePCA results.
#'
#' @title plotLoadings
#' @param physeq Required. A \code{\link{phyloseq-class}} class instance.
#' @param epca Required. An \code{\link{edgepca}} class object
#' @param choices Required. Ordination axes.
#' @param width.lim Optional. Numeric. Minimum and maximal edge after fattening.
#' Defaults to c(0.1, 4). Set to c(0, x) for true linear scaling.
#' @param fatten.edges.by Required. Aesthetics that will be used for fattening.
#' Subset of 'size' and 'alpha'.
#' @param fatten.tips Optional. Logical. Should tips be fattened like pendant edges.
#' Defaults to FALSE.
#' @param color Optional. Color vector of length 2 used to distinguish positive
#' loading edges (color[1]) and negative loading edges (color[2]).
#' Defaults to c("#EF8A62", "#67A9CF")
#' @param color.tip.by Optional. Either a single character string matching a variable
#' name in the corresponding tax_table of `physeq`, or a factor with
#' the same length as the number of taxa in `physeq`. If NULL, nothing happens.
#' @param missing.color Optional. Color used for tips and edges with 0 loading.
#' Defaults to NULL. If NULL, nothing happens. Use "white", "transparent",
#' or par("bg") to remove them from plot.
#' @param ... Additional arguments passed on to plot.phylo
#'
#' @return Nothing, this function is used for its side effect of plotting a tree
#' @seealso \code{\link{edgePCA}}
plotLoadings <- function(physeq, epca, choices, fatten.by = c("size"), fatten.tips = FALSE,
width.lim = c(0.1, 4), color = c("#EF8A62", "#67A9CF"),
color.tip.by = NULL, missing.color = NULL,
...) {
## Check fatten.by
if (any( !fatten.by %in% c("size", "alpha"))) {
stop("fatten.by must be a subset of c(\"size\", \"alpha\")")
}
## Extract loading vectors
x <- epca$loadings
choices <- choices[choices <= ncol(x)]
x <- x[ , choices, drop = FALSE]
## Extract tree
tree <- phy_tree(physeq)
pendant.edges <- match(1:length(tree$tip.label) , tree$edge[ , 2])
tip.loadings <- x[pendant.edges, , drop = FALSE]
## Get edge.color
edge.color <- ifelse(x > 0, color[1], color[2])
## Get tip.color
tip.color <- edge.color[pendant.edges, , drop = FALSE]
## Get fattening factor
fattening.factor <- rescale(abs(x), to = width.lim)
## Fatten edges by size
if ("size" %in% fatten.by) {
edge.width <- fattening.factor
} else {
edge.width <- rep(1, length(x))
}
dim(edge.width) <- dim(x)
## If alpha, fatten edges by alpha
if ("alpha" %in% fatten.by) {
scaled.alpha <- fattening.factor / width.lim[2]
edge.color <- alpha(edge.color, scaled.alpha)
dim(edge.color) <- dim(x)
}
## Color tips
if (!is.null(color.tip.by)) {
tip.color <- color_edges(physeq, group = color.tip.by,
tip.only = TRUE, method = "majority")
legend.palette <- tip.color$palette
tip.color <- tip.color$tip
tip.color <- matrix(rep(tip.color, ncol(x)), ncol = ncol(x))
}
## Fatten tips
if (fatten.tips) {
tip.width <- edge.width[pendant.edges, , drop = FALSE]
if ("alpha" %in% fatten.by) {
dim(scaled.alpha) <- dim(x)
scaled.alpha <- scaled.alpha[pendant.edges, , drop = FALSE]
tip.color <- alpha(tip.color, scaled.alpha)
dim(tip.color) <- dim(scaled.alpha)
}
}
## Set missing edges and possibly tips to missing.color
if (!is.null(missing.color)) {
edge.color[x == 0] <- missing.color
tip.color[tip.loadings == 0] <- missing.color
}
## Plot tree
## Prepare arguments for plot phylo
args <- list(x = tree, edge.width = 1, edge.color = "black")
args <- c(args, list(...))
## Prepare layout if there are multiple trees
oldmar <- par("mar")
n.axis <- ncol(x)
if (n.axis > 1) {
layout.nrow <- floor(sqrt(n.axis))
layout.ncol <- ceiling(n.axis / layout.nrow)
par(mfcol = c(layout.nrow, layout.ncol), mar = c(0, 0, 1, 0))
for (i in 1:n.axis) {
args[["edge.width"]] <- edge.width[ , i]
args[["edge.color"]] <- edge.color[ , i]
args[["tip.color"]] <- tip.color[ , i]
if (fatten.tips) {
args[["cex"]] <- tip.width[ , i]
}
do.call("plot.phylo", args)
title(paste("Axis", choices[i]))
}
} else {
par(mar = c(0, 0, 1, 0))
args[["edge.width"]] <- edge.width[ , 1]
args[["edge.color"]] <- edge.color[ , 1]
args[["tip.color"]] <- tip.color[ , 1]
if (fatten.tips) {
args[["cex"]] <- tip.width[ , 1]
}
do.call("plot.phylo", args)
title(paste("Axis", choices))
}
par(mar = oldmar)
}
################################################################################
# Define S3 method extract_eigenvalue function for pca class
# Function is used by `plot_scree` to get the eigenvalue vector from different
# types of ordination objects.
#' @keywords internal
# for pca (pca) objects
extract_eigenvalue.pca = function(ordination) ordination$values$Eigenvalues
################################################################################
#' Method for plotting an edgepca object with enhanced axes. Expands
#' \code{\link{plotLoadings}} and \code{\link{plot_ordination}}
#'
#' @title plot_edgepca
#' @param physeq Required. A \code{\link{phyloseq-class}} object
#' @param epca Required. A \code{\link{edgepca}} class object
#' @param axes Optional. Ordination axes. Defaults to c(1, 2).
#' @param type Required. Unambiguous abbreviation of 'variables' or 'samples'.
#' For compatibility with vegan and phyloseq, 'variables' can be replaced
#' by 'species' or 'taxa' and 'samples' can be replaced by 'sites'.
#' @param args.loadings Optional. Additional passed on to plotLoadings. Defaults to NULL
#' @param widths Optional. A vector of 2 values for the widths of columns. Defaults to c(1, 1)
#' @param heights Optional. A vector of 2 values for the heights of rows. Defaults to c(1, 1)
#' @param ... Optional. Additional argument passed on to plot_ordination
#' @return Nothing. The function is called for its side effect.
#' @seealso \code{\link{pca}}, \code{\link{edgePCA}}, \code{\link{regularizedPca}}, \code{\link{sparsePca}},
plot_edgepca <- function(physeq, epca, axes = c(1, 2),
widths = c(1, 1), heights = c(1, 1),
args.loadings = NULL, ...) {
## Setup graphic devices
layout(matrix(c(2, 0, 3, 1), 2, 2), widths = widths, heights = heights)
## Plot first loading
args1 <- list(physeq = physeq, epca = epca, choices = axes[1])
args.loadings <- c(args1, args.loadings)
do.call("plotLoadings", args = args.loadings)
## Plot second loading
args.loadings[["choices"]] <- axes[2]
do.call("plotLoadings", args = args.loadings)
## Plot ordination using plot_ordination and ggplot2
plot.new()
vps <- baseViewports()
pushViewport(vps$figure)
p <- plot_ordination(physeq = physeq, ordination = epca, axes = axes, ...)
g <- ggplot_gtable(ggplot_build(p))
grid.draw(g)
upViewport()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.