# Plot prioritisation by ? diversity
# Author: Walter Xie
# Accessed on 3 Mar 2016
#' @name PlotPrioritisation
#' @title Plot prioritisation
#'
#' @description Ranking of sample plots by their contributions
#' to the total biodiversity.
#'
#' @details \code{getPlotPrior.JostDiver} computes plot prioritisation
#' by Jost diversity calculated from \pkg{vegetarian} \code{\link{d}}.
#' It uses a greedy algorithm to remove plots sequentially
#' so as to minimize the loss of diversity among the remaining plots,
#' which always chooses the 1st plot if there are multi-results
#' in each prioritisation loop.
#' Rank 1 is the most important plot and removed at the last,
#' \emph{n} is the least important and removed in the beginning.
#'
#' @param t.community.matrix A transposed matrix from community matrix,
#' where rows are plots (Use plots instead of subplots.), columns are OTUs.
#' @param lev Level of diversity to be calculated.
#' Will accept: 'alpha', 'beta', or 'gamma'.
#' @param q Order of the diversity measure.
#' Defaults to the Shannon case where q = 1.
#' @param order.by How the result is ordered.
#' Choose from 'sample', 'rank', or 'diversity'.
#' @return
#' A data frame with 2 columns: \emph{rank, diversity}.
#' Rank 1 is the most important plot,
#' \emph{n} is the least important, and row.names are plot names.
#' For example,
#' \tabular{rrr}{
#' \tab rank \tab diversity\cr
#' CM30c39 \tab 28 \tab 1845.785714\cr
#' CM30c44 \tab 27 \tab 1875.888889\cr
#' CM31a5 \tab 26 \tab 1899.653846
#' }
#' @export
#' @keywords plot prioritisation
#' @examples
#' plot.prior.g1 <- getPlotPrior.JostDiver(t.community.matrix, lev="gamma", q=1)
#'
#' @rdname PlotPrioritisation
getPlotPrior.JostDiver <- function(t.community.matrix, lev=c("alpha","beta","gamma"),
q=1, order.by=c("sample","rank","diversity")){
tmpCM <- t.community.matrix
removedSites <- c()
lev <- match.arg(lev)
require(vegetarian)
jost.diver <- d(t.community.matrix, lev=lev, q=q)
cat("Original community", paste0(lev, "(", q, ")"), "=", jost.diver,
"for", nrow(t.community.matrix), "samples.\n")
# add orignal diversity
maxRemainedDiversities <- c(jost.diver)
for (ra in nrow(t.community.matrix):2) {
if(ra != nrow(tmpCM)) stop("incorrect nrow(tmpCM) ", nrow(tmpCM), " != ra ", ra, " !")
maxRemainedDiv = 0
removedSiteId = 0
for (siteId in 1:nrow(tmpCM)) {
tmpDiv <- d(tmpCM[-siteId,], lev=lev, q=q)
if (tmpDiv > maxRemainedDiv) {
maxRemainedDiv = tmpDiv
removedSiteId = siteId
}
# print(paste("siteId = ", siteId, ", remove siteId = ", removedSiteId, ", maxRemainedDiv = ", maxRemainedDiv, sep=""))
}
if(maxRemainedDiv < 1 | removedSiteId < 1)
stop("incorrect max remained ", lev, "(", q, ") = ", maxRemainedDiv,
", when sites = ", nrow(tmpCM), " !")
cat("remove sample", rownames(tmpCM)[removedSiteId], ": max remained",
paste0(lev, "(", q, ")"), "=", maxRemainedDiv, "\n")
removedSites <- c(removedSites, rownames(tmpCM)[removedSiteId])
maxRemainedDiversities <- c(maxRemainedDiversities, maxRemainedDiv)
# remove sample from cm
tmpCM <- tmpCM[-removedSiteId,]
}
removedSites <- c(removedSites, rownames(tmpCM)[1]) # the last
cat("Last sample remained :", rownames(tmpCM)[1], "\n")
if(length(removedSites) != length(maxRemainedDiversities))
stop("length(maxRemainedDiversities) ", length(maxRemainedDiversities),
" - 1 != sites ", length(div.df$rank), " !")
# 1st removed (biggest rank) div.df$rank is the least important
div.df <- data.frame(row.names=removedSites, rank=nrow(t.community.matrix):1,
diversity=maxRemainedDiversities)
div.df <- orderDiversityDF(div.df, order.by)
return(div.df)
}
#' @details \code{getPlotPrior.PhyloAlpha} calculates plot prioritisation
#' by phylogenetic alpha diversity from \code{\link{phylo.alpha}}.
#' It also can return the ranks based on species richness (SR),
#' but they may be different to ranks calculated from
#' \code{getPlotPrior.JostDiver} using gamma0 (also species richness).
#'
#' @param phylo.tree,... The parameters passed to \code{\link{phylo.alpha}}.
#' @param taxa.match Logical, if taxa in phylogenies do not match OTUs in the community.
#' If TRUE, as default, to use t.community.matrix and phylo.tree directly,
#' otherwise to call \code{\link{match.phylo.comm}}
#' before calculate phylogenetic alpha diversity.
#' @export
#' @keywords plot prioritisation
#' @examples
#' phylo.alpha <- getPlotPrior.PhyloAlpha(t.community.matrix, phylo.tree)
#'
#' @rdname PlotPrioritisation
getPlotPrior.PhyloAlpha <- function(t.community.matrix, phylo.tree, taxa.match=TRUE,
order.by=c("sample","rank","diversity"), ...) {
if (!taxa.match) {
require(picante)
combined <- match.phylo.comm(phylo.tree, t.community.matrix)
pd.alpha <- ComMA::phylo.alpha(combined$comm, combined$phy, ...)
} else {
pd.alpha <- ComMA::phylo.alpha(t.community.matrix, phylo.tree, ...)
}
# add rank
pd.alpha <- pd.alpha[order(pd.alpha$PD, decreasing = T),]
pd.df <- data.frame(row.names=rownames(pd.alpha), rank=1:nrow(pd.alpha), diversity=pd.alpha$PD)
pd.df <- orderDiversityDF(pd.df, order.by)
pd.alpha <- pd.alpha[order(pd.alpha$SR, decreasing = T),]
sr.df <- data.frame(row.names=rownames(pd.alpha), rank=1:nrow(pd.alpha), diversity=pd.alpha$SR)
sr.df <- orderDiversityDF(sr.df, order.by)
list(PD=pd.df, SR=sr.df)
}
#' @details \code{getPlotPrior} is a generic function including both
#' \code{getPlotPrior.JostDiver} and \code{getPlotPrior.PhyloAlpha},
#' and it also handles multiple communities.
#'
#' @param cm.list The list of community matrices.
#' @param is.transposed If TRUE, then the community matrix is already
#' transposed to be the valid input of \code{\link{vegdist}}.
#' Default to FASLE to transpose.
#' @param tre.list A list of phylo tree objects for 'pd.alpha' and 'sp.rich',
#' corresponding to \code{cm.list}. Default to an empty list.
#' @param diversities The vector of diversities used to compute plot prioritisation.
#' The values are 'gamma0','gamma1','beta0','beta1','pd.alpha','sp.rich'.
#' The first two are calculated by \code{\link{d}},
#' the last two by \code{\link{pd}}.
#' @export
#' @keywords plot prioritisation
#' @examples
#' plot.prior.list <- getPlotPrior(cm.list, is.transposed=FALSE, diversities=c("gamma1","beta1"))
#' plot.prior.list <- getPlotPrior(cm.list, is.transposed=FALSE, tre.list=tre.list, diversities=c("gamma1","beta1","pd.alpha","sp.rich"))
#' @rdname PlotPrioritisation
getPlotPrior <- function(cm.list, is.transposed=FALSE, tre.list=list(), taxa.match=TRUE,
diversities=c("gamma0","gamma1","beta0","beta1","pd.alpha","sp.rich")) {
cat("Plot prioritisation at", length(cm.list), "data sets",
ifelse(length(tre.list)>0, paste(", and", length(tre.list), "trees"), ""), ".\n")
# add names
if (is.null(names(cm.list)))
names(cm.list) <- 1:length(cm.list)
plot.prior.list <- list()
for (i in 1:length(cm.list)) {
cm.name <- names(cm.list)[i]
# if transposed
if (!is.transposed)
t.cm <- ComMA::transposeDF(cm.list[[i]])
else
t.cm <- cm.list[[i]]
for (d in 1:length(diversities)) {
div <- diversities[d]
if (div=="gamma0") {
plot.prior <- ComMA::getPlotPrior.JostDiver(t.cm, lev="gamma", q=0)
} else if (div=="gamma1") {
plot.prior <- ComMA::getPlotPrior.JostDiver(t.cm, lev="gamma", q=1)
} else if (div=="beta0") {
plot.prior <- ComMA::getPlotPrior.JostDiver(t.cm, lev="beta", q=0)
} else if (div=="beta1") {
plot.prior <- ComMA::getPlotPrior.JostDiver(t.cm, lev="beta", q=1)
} else if (div=="pd.alpha" || div=="sp.rich") {
if(length(tre.list) < 1 || length(tre.list) != length(cm.list))
stop("Invalid 'tre.list': a phylo tree object is required ",
"for each cm to caculate 'pd.alpha' or 'sp.rich' !")
plot.prior.2 <- ComMA::getPlotPrior.PhyloAlpha(t.cm, tre.list[[i]], taxa.match=taxa.match)
if (div=="pd.alpha")
plot.prior <- plot.prior.2$PD
else
plot.prior <- plot.prior.2$SR
} else {
plot.prior <- NULL
warning("Invalid diversity : ", div, " !")
}
plot.prior.list[[div]][[cm.name]] <- plot.prior
}
}
plot.prior.list[["diversities"]] <- diversities
plot.prior.list[["cm.names"]] <- names(cm.list)
return(plot.prior.list)
}
#' @details \code{mergePlotPriorListOfDF} merges a list of data frames
#' produced by \code{getPlotPrior} into one. Their columns must contain
#' 'rank' and 'diversity'.
#'
#' @param plot.prior.list The output from \code{getPlotPrior}.
#' @param suffixes The vector of suffixes added to distinguish colmun names
#' merged by different data frames. Its length must equal to \code{plot.prior.list}.
#' @export
#' @keywords plot prioritisation
#' @examples
#' pp.df.list <- mergePlotPriorListOfDF(plot.prior.list)
#'
#' @rdname PlotPrioritisation
mergePlotPriorListOfDF <- function(plot.prior.list, suffixes=c(), rm.prefix=TRUE) {
diversities <- plot.prior.list[["diversities"]]
if (length(suffixes) == 0)
suffixes <- plot.prior.list[["cm.names"]]
pp.df.list <- list()
for (d in 1:length(diversities)) {
div <- diversities[d]
if (length(plot.prior.list[[div]]) != length(suffixes))
stop("Invalid list : length(plot.prior.list[[div]]) != length(suffixes) !")
pp.df <- ComMA::mergeListOfDF(plot.prior.list[[div]], suffixes=suffixes)
rank.df <- pp.df[, grepl("^rank", colnames(pp.df))]
div.df <- pp.df[, grepl("^diversity", colnames(pp.df))]
if (rm.prefix) {
colnames(rank.df) <- gsub("rank.", "", colnames(rank.df), ignore.case = T)
colnames(div.df) <- gsub("diversity.", "", colnames(div.df), ignore.case = T)
}
pp.df.list[["rank"]][[div]] <- rank.df
pp.df.list[["diversity"]][[div]] <- div.df
}
return(pp.df.list)
}
#' @details \code{plotPrioritisation} produces the list of \code{\link{ggHeatmap}}
#' given a list of output from \code{mergePlotPriorListOfDF}.
#'
#' @param pp.df.list The output from \code{mergePlotPriorListOfDF}.
#' @param at The values of heatmap, choose from "rank" or "diversity".
#' @param row.id What row names are representing.
#' @export
#' @keywords plot prioritisation
#' @examples
#' heatmap.list <- plotPrioritisation(pp.df.list, add.label=T)
#'
#' @rdname PlotPrioritisation
plotPrioritisation <- function(pp.df.list, at=c("rank","diversity"), x.levels=c(), y.levels=c(),
add.label=FALSE, label.digits=1, guide="colourbar", ...) {
rank.df.list <-pp.df.list[[match.arg(at)]]
if (is.null(names(rank.df.list)))
names(rank.df.list) <- 1:length(rank.df.list)
require(gg1L)
heatmap.list <- list()
for (i in 1:length(rank.df.list)) {
pp.df <- rank.df.list[[i]]
midpoint <- round(mean(c(max(pp.df), min(pp.df))))
pp.df[,"samples"] <- rownames(pp.df)
p.hm <- gg1L::ggHeatmap(pp.df, melt.id="samples", title="", x.levels=x.levels, y.levels=y.levels,
add.label=add.label, label.digits=label.digits, guide=guide,
midpoint=midpoint, low="#f46d43", mid="#ffffbf", high="#3288bd" )
heatmap.list[[names(rank.df.list)[i]]] <- p.hm
}
return(heatmap.list)
}
#' @details \code{plotPrioritisation.Phyloseq} produces
#' one clusterd heatmap using \code{\link{phyloseq}} \code{\link{plot_heatmap}}
#' and attaches an additional plot of a selected attribute
#' from environmental meta-data, such as Elevation.
#'
#' @param pp.df One element of the 'rank' list from \code{mergePlotPriorListOfDF}.
#' @param attr.df Environmental meta-data. Rows are samples,
#' and must include all rownames from \code{pp.df}
#' @param y2.id,y2.lab The column of \code{attr.df} used
#' to plot the 2nd figure on the right side, default to "Elevation".
#' @param grid.widths A unit vector giving the width of each two columns,
#' used by \code{\link{plot_grid}} in \pkg{cowplot} and default to \code{c(8, 2)}.
#'
#' @note Use \code{theme_set(theme_bw(base_size=8))} to remove the grey backgroud.
#' @export
#' @keywords plot prioritisation
#' @examples
#' hm.elv <- plotPrioritisation.Phyloseq(pp.df.list[["rank"]][[1]], env.plot, grid.widths = c(10,2))
#' plot(hm.elv$heatmap)
#'
#' @rdname PlotPrioritisation
plotPrioritisation.Phyloseq <- function(pp.df, attr.df, y2.id="Elevation", y2.lab="Elevation (m)",
x.lab="Amplicon dataset", y.lab="Sample plot",
grid.widths = c(8, 2), ...) {
if (! y2.id %in% colnames(attr.df) )
stop("Invalid y2.id,", y2.id, "not exsit in meta data column names !\n")
midpoint <- mean(c(max(pp.df), min(pp.df)))
require(phyloseq)
phy <- phyloseq(otu_table(pp.df, taxa_are_rows = FALSE))
ph <- plot_heatmap(phy, method = "RDA", distance = "euclidean")
require(ggplot2)
ph <- ph + scale_fill_gradient2(low="#f46d43", mid="#ffffbf", high="#3288bd", midpoint = midpoint) +
xlab(x.lab) + ylab(y.lab) + coord_flip() +
theme(legend.position="none", axis.text.x=element_text(size = 8, angle = -45, vjust = 1),
axis.text.y = element_text(size = 8),
panel.margin = unit(c(0,-0.5,0,0), "cm"))
### Extract data for secondary plot ###
gb = ggplot_build(ph)
p.ord <- as.data.frame(gb$data[[1]])
# Sample col is defined by phyloseq
p.ord$Sample <- gb$plot$data$Sample
p.ord[,y2.id] <- attr.df[match(p.ord$Sample, rownames(attr.df)), y2.id]
p2 <- ggplot(data = p.ord, aes_string("Sample", y2.id, group = "1")) + geom_line() +
coord_flip() + ylab(y2.lab) + xlab("") +
theme(axis.text.x = element_text(size = 8),
axis.text.y = element_blank(), axis.ticks.y = element_blank(),
panel.margin = unit(c(0,0,0,-0.5), "cm"))
require(cowplot)
list(heatmap=plot_grid(ph, p2, align = 'h', rel_widths = grid.widths),
gg1=ph, gg2=p2)
}
#' @details \code{plotPrioritisation.NeatMap} produces
#' one clusterd heatmap using \code{\link{NeatMap}}
#' and attaches an additional plot of a selected attribute
#' from environmental meta-data, such as Elevation.
#'
#' @export
#' @keywords plot prioritisation
#' @examples
#' hm.elv <- plotPrioritisation.NeatMap(pp.df.list[["rank"]][[1]], env.plot, grid.widths = c(10,2))
#' plot(hm.elv$heatmap)
#'
#' @rdname PlotPrioritisation
plotPrioritisation.NeatMap <- function(pp.df, attr.df, y2.id="Elevation", y2.lab="Elevation (m)",
x.lab="Amplicon dataset", y.lab="Sample plot",
grid.widths = c(8, 3), ...) {
if (! y2.id %in% colnames(attr.df) )
stop("Invalid y2.id,", y2.id, "not exsit in meta data column names !\n")
midpoint <- mean(c(max(pp.df), min(pp.df)))
require(ggplot2)
require("NeatMap")
p <- make.heatmap1(as.matrix(pp.df),
column.method = "complete", column.metric="euclidean", # Column ordering method
row.method ="complete", row.metric = "euclidean",
#column.cluster.method = "none", # Column clustering method
#row.cluster.method = "none",
column.cluster.method = "complete", # Column clustering method
row.cluster.method = "complete",
column.labels = colnames(pp.df), row.labels = rownames(pp.df),
column.label.size = 2.5, row.label.size = 2.5) +
scale_fill_gradient2(low="#f46d43", mid="#ffffbf", high="#3288bd", midpoint = midpoint) +
xlab(x.lab) + ylab(y.lab) + #coord_flip() +
theme(panel.grid = element_blank(), panel.border = element_blank(),
axis.text = element_blank(), axis.ticks = element_blank(), legend.position="none",
plot.margin = unit(c(0,2,2,0),"cm"), strip.text.x = element_text(colour = "red"))
### Get data for secondary plot ###
gb = ggplot_build(p)
#grid.draw(gb)
#build$data[[4]]$y
#build$data[[4]]$label
#p.ord <- as.data.frame(gb$data[[2]]) # Without clusters
p.ord <- as.data.frame(gb$data[[4]]) # With clusters
#colnames(p.ord)[[1]] <- "Plot"
p.ord <- p.ord[order(p.ord$y), ]
p.ord$label <- factor(p.ord$label, levels = p.ord$label, ordered = TRUE)
p.ord[,y2.id] <- attr.df[match(p.ord$label, rownames(attr.df)),y2.id]
p2 <- ggplot(data = p.ord, aes_string("label", y2.id, group = "1")) + geom_line() +
coord_flip() + ylab(y2.lab) + xlab("") +
theme(axis.text.x = element_text(size = 8),
axis.text.y = element_blank(), axis.ticks.y = element_blank(),
plot.margin = unit(c(0,2,2,0),"cm"))
require(cowplot)
list(heatmap=plot_grid(p, p2, align = 'h', rel_widths = grid.widths),
gg1=p, gg2=p2)
}
############ internal #############
orderDiversityDF <- function(div.df, order.by=c("sample","rank","diversity")) {
order.by <- match.arg(order.by)
if (order.by=="rank") {
div.df <- div.df[order(div.df$rank),]
} else if (order.by=="diversity") {
div.df <- div.df[order(div.df$rank, decreasing = T),]
} else {
div.df <- div.df[order(rownames(div.df)),]
}
return(div.df)
}
############ old code #############
# maximum and minimum Jost diversity of all possible combinations of m_comb plots
# TODO old code, need to re-write
getCombPlotPrior.JostDiver <- function(t.community.matrix, m_comb) {
plotsComb <- combn(rownames(t.community.matrix), m_comb)
d.comb <- matrix(0,nrow=9,ncol=ncol(plotsComb))
rownames(d.comb) <- paste("$^",qs,"D_\\",levels,"$", sep="") #$^0D_\\gamma$
for (i in 1:ncol(plotsComb)) {
plots <- plotsComb[,i]
matrixName <- paste(plots, collapse=" ")
plotId <- which(rownames(t.community.matrix) %in% plots)
if (length(plotId) != m_comb)
stop(paste("Cannot find plot index from community matrix", plots))
diversity.table <- ComMA::diversityTable(t.community.matrix[plotId,])
for (j in 1:9) {
d.comb[j,i] <- unlist(diversity.table)[j]
}
}
maxD <- matrix(0,nrow=9,ncol=(nrow(t.community.matrix)+1))
rownames(maxD) <- paste("$^",qs,"D_\\",levels,"$", sep="") #$^0D_\\gamma$
colnames(maxD) <- c(rownames(t.community.matrix), "Maximum")
minD <- matrix(0,nrow=9,ncol=(nrow(t.community.matrix)+1))
rownames(minD) <- paste("$^",qs,"D_\\",levels,"$", sep="") #$^0D_\\gamma$
colnames(minD) <- c(rownames(t.community.matrix), "Minimum")
for (j in 1:9) {
maxD[j,ncol(maxD)] <- max(d.comb[j,])
maxId <- which(d.comb[j,] == max(d.comb[j,]))
plots <- plotsComb[,maxId]
for (p in plots) {
plotId <- which(colnames(maxD) %in% p)
maxD[j,plotId] <- maxD[j,plotId] + 1
}
# convert frequency to ratio, last col is max value
maxD[j,-ncol(maxD)] <- maxD[j,-ncol(maxD)]/max(maxD[j,-ncol(maxD)])
minD[j,ncol(minD)] <- min(d.comb[j,])
minId <- which(d.comb[j,] == min(d.comb[j,]))
plots <- plotsComb[,minId]
for (p in plots) {
plotId <- which(colnames(minD) %in% p)
minD[j,plotId] <- minD[j,plotId] + 1
}
# convert frequency to ratio, last col is min value
minD[j,-ncol(minD)] <- minD[j,-ncol(minD)]/max(minD[j,-ncol(minD)])
print(paste("max", rownames(d.comb)[j], "index =", maxId, "; min index =", minId))
}
# maxD[,ncol(maxD)] <- formatC(signif(maxD[,ncol(maxD)],digits=5), digits=5,format="fg", flag="#")
# minD[,ncol(minD)] <- formatC(signif(minD[,ncol(minD)],digits=5), digits=5,format="fg", flag="#")
# maxD[maxD==0] <- ""
# minD[minD==0] <- ""
print(maxD)
print(minD)
list("maxD" = maxD, "minD" = minD)
}
# give same rank to the same remaining
# TODO in development
getPlotPrior.JostDiver2 <- function(t.community.matrix, lev, q){
tmpCM <- t.community.matrix[order(rownames(t.community.matrix)),]
ranks<-c()
removedSites <- c()
maxRemainedDiversities<-c()
while(nrow(tmpCM) > 1) {
divs <- c()
for (siteId in 1:nrow(tmpCM)) {
tmpDiv <- d(tmpCM[-siteId,], lev, q)
divs <- c(divs, tmpDiv)
print(paste("siteId = ", siteId, ", tmpDiv = ", tmpDiv, sep=""))
}
maxRemainedDiv = max(divs)
removedSiteId = which(divs==maxRemainedDiv)
if(maxRemainedDiv < 0 | length(removedSiteId) < 1)
stop(paste("incorrect max remained ", lev, "(", q, ") = ", maxRemainedDiv, ", when sites = ", nrow(tmpCM), sep=""))
print(paste("remove site ", rownames(tmpCM)[removedSiteId], ", max remained ", lev, "(", q, ") = ", maxRemainedDiv, sep=""))
ranks<-c(ranks, rep(nrow(tmpCM), length(removedSiteId)))
removedSites <- c(removedSites, rownames(tmpCM)[removedSiteId])
maxRemainedDiversities <- c(maxRemainedDiversities, rep(maxRemainedDiv, length(removedSiteId)))
tmpCM <- tmpCM[-removedSiteId,]
}
if (nrow(tmpCM) == 1) {
ranks<-c(ranks, 1)
removedSites <- c(removedSites, rownames(tmpCM)[1]) # the last
maxRemainedDiversities <- c(maxRemainedDiversities, 0)
}
if(length(ranks) != length(maxRemainedDiversities))
stop(paste("length of maxRemainedDiversities ", length(maxRemainedDiversities), " - 1 != sites ", length(ranks), sep=""))
# 1st removed (biggest rank) div.df$rank is the least important
div.df <- data.frame(row.names=removedSites, rank=ranks, diversity=maxRemainedDiversities)
div.df <- div.df[order(rownames(div.df)),]
return(div.df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.