#' Draw a between class analysis (BCA) plot.
#'
#' @param physeq Phyloseq object.
#' @param factor Name of the sample metadata column to perform BCA.
#' @param color Name of the sample metadata column to color the samples with.
#' @param label Taxonomic annotation of the taxon to be shown in the biplot.
#' @param ntaxa Maximum number of driver taxa to display. Actual number shown might be lower than the specified number.
#'
#' @return None.
#' @export
#'
#' @import ggplot2
#' @import ggrepel
#'
#' @examples
#' \dontrun{
#' biplot_bca(physeq, factor = "Enterotype", color = "Enterotype", label = "Genus")
#' biplot_bca(physeq, factor = "Enterotype", color = "Diet", label = "Genus", ntaxa = 5)
#' }
biplot_bca <- function(physeq, factor = "Enterotype", color = "Enterotype", label = "Genus", ntaxa = 10) {
data <- as.data.frame(otu_table(physeq))
sample_data <- as.data.frame(sample_data(physeq))
taxnames <- as.data.frame(tax_table(physeq))
factors <- as.factor(sample_data[[factor]])
classes = length(levels(factors))
obs.pca <- dudi.pca(data, scannf=F, nf=10)
obs.bet <- bca(obs.pca, fac=as.factor(factors), scannf=F, nf=classes-1)
drivers <- obs.bet$co
components <- colnames(drivers)
# Sum-squared components
drivers$Total <- apply(drivers**2, 1, sum)
# Names
names <- taxnames[label]
colnames(names) <- NULL
drivers<- cbind(Name = names, drivers)
# Flag top 5 in each component
drivers$Selected <- FALSE
for (x in c(components, "Total")) {
drivers <- dplyr::arrange(drivers, desc(abs(get(x))))
drivers[1:ntaxa,]$Selected <- TRUE
}
# Need names
drivers$Selected <- drivers$Selected & !is.na(drivers$Name)
drivers <- dplyr::filter(drivers, Selected == TRUE)
drivers <- dplyr::arrange(drivers, desc(Total))
drivers <- head(drivers, ntaxa)
row.names(drivers) <- drivers$Name
drivers$Total <- NULL
drivers$Name <- NULL
drivers$Selected <- NULL
color_factors <- as.factor(sample_data[[color]])
colors <- brewer.pal(length(levels(color_factors)), my_palette[[color]])
s.class(obs.bet$ls, fac=color_factors, grid=F, cell=0, cstar=0, col=colors)
s.arrow(drivers*7, clabel=0.75, boxes = TRUE, add.plot=TRUE)
s.class(obs.bet$ls, fac=color_factors, cpoint=0, grid=F, cell=0, cstar=0, col=colors, add.plot=TRUE)
rm(drivers)
rm(obs.pca)
rm(obs.bet)
rm(colors)
rm(factors)
rm(classes)
rm(names)
rm(data)
}
#' Draw a principal coordinate biplot using Bray-Curtis dissimilarity measure.
#'
#' @param physeq Phyloseq object.
#' @param color Name of the sample metadata column to color the samples with.
#' @param shape Name of the sample metadata column to assign shapes for the samples.
#' @param axis1 Index of the principal component to draw in X-axis.
#' @param axis2 Index of the principal component to draw in Y-axis.
#' @param show.taxa logical. Should the taxa be shown in the biplot? Without taxa, this becomes a standard PCoA plot.
#' @param label Taxonomic annotation of the taxon to be shown in the biplot.
#' @param repel logical. Should the taxa names in the biplot use ggrepel to display all taxa in a non-overlapping manner? If FALSE, then only non-overlapping taxa are shown to improve readability.
#' @param show.legend logical. Should the legend be drawn?
#' @param custom_palette Custom palette list to use (see example).
#'
#' @return ggplot object with the biplot
#' @export
#'
#' @examples
#' \dontrun{
#'
#' # Show samples and taxa in a biplot
#' biplot_pcoa(physeq, color="Diet")
#'
#' # Don't show taxa - equivalient to plot_pcoa()
#' biplot_pcoa(physeq, color="Diet", show.taxa = FALSE)
#'
#' # Specify color and shape
#' biplot_pcoa(physeq, color="Diet", shape="Diet", point_size = 2)
#' biplot_pcoa(physeq, color="Diet", shape="Diet", axis1 = 1, axis2 = 3, show.legend = FALSE, point_size = 2)
#'
#' # custom palette can be just names of ColorBrewer palettes
#'
#' pal = list(Diet = "Set2", Enterotype = "Pastel1", Significance = "PuRd")
#' biplot_pcoa(physeq, color="Diet", shape="Diet", point_size = 2, custom_palette = pal) + ggtitle("Fecal samples") + theme(plot.title = element_text(size = 7, face = "bold", hjust = 0.5), legend.position = "right")
#'
#' # Or it can also be named lists for all possible values.
#'
#' diet_colors = c(Control="black", HSD="#4A57A2")
#' ET_colors = c("red", "black", "blue")
#' names(ET_colors) = c("ET1", "ET2", "ET3")
#' pal = list(Diet = diet_colors, Enterotype = ET_colors)
#'
#' biplot_pcoa(physeq, color="Diet", shape="Diet", custom_palette = pal)
#' biplot_pcoa(physeq, color="Enterotype", shape="Diet", custom_palette = pal)
#' }
biplot_pcoa <- function(physeq, color = "Group", shape = NULL, axis1 = 1, axis2 = 2, show.taxa = TRUE, label = "Genus", repel = FALSE, show.legend = TRUE, custom_palette = NULL) {
dist <- phyloseq::distance(physeq =physeq, method="bray")
ordination <- phyloseq::ordinate(physeq, method = "PCoA", distance = dist)
axes = c(axis1, axis2)
DF <- phyloseq::plot_ordination(physeq, ordination, axes = axes, type="biplot", justDF = TRUE)
x = colnames(DF)[1]
y = colnames(DF)[2]
ord_map = aes_string(x = x, y = y, color = color, shape = shape, na.rm = TRUE)
label_map <- aes_string(x = x, y = y, label = label, na.rm = TRUE)
# Get strength of the axes
if (length(extract_eigenvalue(ordination)[axes]) > 0) {
eigvec = extract_eigenvalue(ordination)
fracvar = eigvec[axes]/sum(eigvec)
percvar = round(100 * fracvar, 1)
}
# Get samples and taxa separately
DF_Taxa <- DF[DF$id.type == "Taxa",]
DF_Samples <- DF[DF$id.type == "Samples",]
# Pick top 50 taxa based on their loadings weighted by variance
score <- fracvar[1]*(DF_Taxa[x]^2) + fracvar[2]*(DF_Taxa[y]^2)
colnames(score) <- NULL
DF_Taxa <- cbind(DF_Taxa, Score = score)
rm(score)
DF_Taxa <- dplyr::arrange(DF_Taxa, desc(Score))
DF_Taxa <- head(DF_Taxa, 50)
# Plot
Tr <-
ggplot(DF_Samples, ord_map) +
geom_point(na.rm = TRUE, size = 2, show.legend = show.legend) +
theme(aspect.ratio = 1)
#scale_size_manual("type", values = c(Samples = 2, Taxa = 4)) +
#scale_shape_manual(values=GP1.shape) +
#geom_segment(data = DF_Taxa, x = DF_Taxa$Axis.1, y = DF_Taxa$Axis.2, xend = 0, yend = 0)
# Add color
color_values <- get_my_palette_colors(custom_palette, color, levels(DF_Samples[[color]]), offset=0)
Tr <- Tr + scale_color_manual(values = color_values)
if (show.taxa) {
if (repel) {
Tr <- Tr +
geom_text_repel(label_map, data = rm.na.phyloseq(DF_Taxa, label),
size = 3,
color = "black",
vjust = "center",
hjust = "middle",
show.legend = FALSE,
na.rm = TRUE)
} else {
Tr <- Tr +
geom_text(label_map, data = rm.na.phyloseq(DF_Taxa, label),
check_overlap = TRUE,
size = 3,
color = "black",
vjust = "center",
hjust = "middle",
show.legend = FALSE,
na.rm = TRUE)
}
}
# Add variance in axes
strivar = paste0("PC", axes, " [", percvar, "%]")
Tr = Tr + xlab(strivar[1]) + ylab(strivar[2])
return(Tr)
}
#' Draw a principal coordinate plot using Bray-Curtis dissimilarity measure.
#'
#' @param physeq Phyloseq object.
#' @param color Name of the sample metadata column to color the samples with. If NULL, first column will be used.
#' @param shape Name of the sample metadata column to assign shapes for the samples.
#' @param axis1 Index of the principal component to draw in X-axis.
#' @param axis2 Index of the principal component to draw in Y-axis.
#' @param show.legend logical. Should the legend be drawn?
#' @param point_size Size of the data points in the PCoA.
#' @param custom_palette Custom palette list to use (see example).
#'
#' @return ggplot object with the biplot
#'
#' @export
#'
#' @examples
#' \dontrun{
#' plot_pcoa(physeq, color="Diet")
#' plot_pcoa(physeq, color="Diet", shape="Diet", point_size = 2)
#' plot_pcoa(physeq, color="Diet", shape="Diet", axis1 = 1, axis2 = 3, show.legend = FALSE, point_size = 2)
#'
#' # custom palette can be just names of ColorBrewer palettes
#'
#' pal = list(Diet = "Set2", Enterotype = "Pastel1", Significance = "PuRd")
#' plot_pcoa(physeq, color="Diet", shape="Diet", point_size = 2, custom_palette = pal) + ggtitle("Fecal samples") + theme(plot.title = element_text(size = 7, face = "bold", hjust = 0.5), legend.position = "right")
#'
#' # Or it can also be named lists for all possible values.
#'
#' diet_colors = c(Control="black", HSD="#4A57A2")
#' ET_colors = c("red", "black", "blue")
#' names(ET_colors) = c("ET1", "ET2", "ET3")
#' pal = list(Diet = diet_colors, Enterotype = ET_colors)
#'
#' plot_pcoa(physeq, color="Diet", shape="Diet", custom_palette = pal)
#' plot_pcoa(physeq, color="Enterotype", shape="Diet", custom_palette = pal)
#' }
plot_pcoa <- function(physeq, color = NULL, shape = NULL, axis1 = 1, axis2 = 2, show.legend = TRUE, point_size = 2, custom_palette = NULL) {
dist <- phyloseq::distance(physeq =physeq, method="bray")
ordination <- phyloseq::ordinate(physeq, method = "PCoA", distance = dist)
axes = c(axis1, axis2)
DF <- phyloseq::plot_ordination(physeq, ordination, axes = axes, type="samples", justDF = TRUE)
x = colnames(DF)[1]
y = colnames(DF)[2]
if (is.null(color)) {
color = sample_data(physeq)[1]
}
ord_map = aes_string(x = x, y = y, color = color, shape = shape, na.rm = TRUE)
# Get strength of the axes
if (length(extract_eigenvalue(ordination)[axes]) > 0) {
eigvec = extract_eigenvalue(ordination)
fracvar = eigvec[axes]/sum(eigvec)
percvar = round(100 * fracvar, 1)
}
# Plot
Tr <-
ggplot(DF, ord_map) +
geom_point(na.rm = TRUE, size = point_size, show.legend = show.legend) +
theme(aspect.ratio = 1)
# Add color
color_values <- get_my_palette_colors(custom_palette, color, levels(DF[[color]]), offset=0)
Tr <- Tr + scale_color_manual(values = color_values)
# Add variance in axes
strivar = paste0("PC", axes, " [", percvar, "%]")
Tr = Tr + xlab(strivar[1]) + ylab(strivar[2])
return(Tr)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.