#' Creates bar graph of relative taxonomic abundance from SummarizedExperiment object.
#' @export
plot_bar_graph <- function(ExpObj = NULL, glomby = NULL, absolute = FALSE, samplesToKeep = NULL, featuresToKeep = NULL, groupby = NULL, label_samples = FALSE, colourby = NULL, subsetby = NULL, applyfilters = NULL, featcutoff = NULL, GenomeCompletenessCutoff = NULL, PPM_normalize_to_bases_sequenced = FALSE, total_units = 100, threshold = 2, cat_pos = -2, cat_text_size = 3, legend_text_size = 4, border_color = "white", cdict = NULL, feature_cdict = NULL, addtit = NULL, grid = TRUE, ignoreunclassified = FALSE, class_to_ignore = "N_A", ...) {
require(reshape2)
require(Polychrome)
valid_vars <- c(groupby, subsetby)[which(!is.na(c(groupby, subsetby)))]
#Hardwire PctFromCtgscutoff, as this should never be used without filtering because of huge amounts of false positives when evaluating taxonomic information from unassembled reads. The use of classifying unassembled reads is deprecated in JAMS and the default is to NOT classify unassembled reads, so this is usually not an issue.
PctFromCtgscutoff <- c(70, 50)
transp <- TRUE
#Vet experiment object
obj <- ExpObjVetting(ExpObj = ExpObj, samplesToKeep = samplesToKeep, featuresToKeep = featuresToKeep, glomby = glomby, variables_to_fix = valid_vars, class_to_ignore = class_to_ignore)
if (!is.null(groupby)) {
if (!is.null(obj[[groupby]])) {
obj <- obj[,order(obj[[groupby]])]
} else {
flog.error(str_c(groupby, " is not a valid column of your metadata"))
groupby <- NULL
}
}
analysis <- metadata(obj)$analysis
if (!is.null(glomby)){
analysisname <- glomby
} else {
analysisname <- analysis
}
presetlist <- declare_filtering_presets(analysis = analysis, applyfilters = applyfilters, featcutoff = featcutoff, GenomeCompletenessCutoff = GenomeCompletenessCutoff, PctFromCtgscutoff = PctFromCtgscutoff)
if (!(is.null(subsetby))){
subset_points <- sort(unique(colData(obj)[, which(colnames(colData(obj)) == subsetby)]))
} else {
subset_points <- "none"
}
#Create list vector to hold plots
gvec <- list()
plotnum <- 1
if (absolute) {
bartitbase <- paste("Absolute abundances of", analysisname)
} else {
bartitbase <- paste("Relative abundances of", analysisname)
}
for (sp in 1:length(subset_points)){
if (!(is.null(subsetby))){
sp_samplesToKeep <- rownames(colData(obj))[which(colData(obj)[ , subsetby] == subset_points[sp])]
flog.info(paste("Plotting within", subset_points[sp]))
subsetname <- subset_points[sp]
bartit <- paste(bartitbase, "within", subset_points[sp])
} else {
sp_samplesToKeep <- rownames(colData(obj))
subsetname <- "no_sub"
bartit <- bartitbase
}
bartit <- paste(c(bartit, presetlist$filtermsg), collapse = "\n")
if (absolute) {
currobj <- obj
} else {
currobj <- filter_experiment(ExpObj = obj, featcutoff = presetlist$featcutoff, samplesToKeep = sp_samplesToKeep, featuresToKeep = featuresToKeep, asPPM = TRUE, PPM_normalize_to_bases_sequenced = PPM_normalize_to_bases_sequenced, GenomeCompletenessCutoff = presetlist$GenomeCompletenessCutoff, PctFromCtgscutoff = presetlist$PctFromCtgscutoff)
}
currpt <- as.data.frame(colData(currobj))
if (PPM_normalize_to_bases_sequenced == TRUE){
bartit <- paste(c(bartit, "Normalized to total number of bases sequenced in sample"), collapse = "\n")
} else if (!absolute) {
bartit <- paste(c(bartit, "Normalized to number of bases for analysis in sample"), collapse = "\n")
}
#Get counts matrix
countmat <- as.matrix(assays(currobj)$BaseCounts)
#Protect against rows with empty data
rowsToKeep <- which(rowSums(countmat) > 0 & rownames(countmat) != "")
countmat <- countmat[rowsToKeep, ]
if (ignoreunclassified == TRUE){
dunno <- c(paste(analysis, "none", sep = "_"), "LKT__d__Unclassified", "LKT__Unclassified")
rowsToKeep <- which(!(rownames(countmat) %in% dunno))
countmat <- countmat[rowsToKeep, ]
}
#Re-normalize to percentage
if (!absolute) {
for (colm in 1:ncol(countmat)){
countmat[ , colm] <- (countmat[ , colm] / sum(countmat[ , colm])) * total_units
}
}
if (threshold > 0) {
new_counts <- countmat[rowMeans(countmat) > threshold,, drop=FALSE ]
if (absolute) {
misc_counts <- colSums(countmat) - colSums(new_counts)
} else {
misc_counts <- total_units - colSums(new_counts)
}
new_counts <- rbind(new_counts, Misc_Low_Abundance = misc_counts)
} else {
new_counts <- countmat
}
mdf <- reshape2::melt(new_counts)
colnames(mdf) <- c(analysisname, "Sample", "Abundance")
fill_sum <- aggregate(as.formula(paste("Abundance ~", analysisname)), mdf, sum)
mdf[ , analysisname] <- factor(mdf[ , analysisname], levels = fill_sum[order(-fill_sum$Abundance), analysisname])
mdf$Sample <- factor(mdf$Sample, levels = colnames(new_counts))
p <- ggplot(mdf, aes_string(x = "Sample", y = "Abundance", fill = analysisname))
p <- p + geom_bar(stat = "identity", position = "stack", color = border_color, size = 0)
if(!is.null(addtit)) {
bartit <- str_c(bartit, " ", addtit)
}
p <- p + ggtitle(bartit)
p <- p + theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
p <- p + theme(plot.margin = unit(c(0, 1, 1, 0.5), "lines"))
p <- p + theme(strip.background=element_rect(fill = "white", colour = NA))
p <- p + theme(panel.background = element_rect(fill = "white", colour = "white"))
p <- p + theme(plot.margin = margin(c(1, 1, 3, 1), unit = "lines"))
p <- p + theme(axis.line = element_blank())
p <- p + theme(legend.position = "bottom")
p <- p + theme(legend.text = element_text(colour = "black", size = legend_text_size, face="italic"))
p <- p + theme(legend.title = element_text(colour = "black", size = legend_text_size))
p <- p + theme(title = element_text(size = 8))
if (label_samples) {
p <- p + theme(axis.text.x = element_text(angle=90))
}
if(!(is.null(cdict))) {
ct <- cdict[[colourby]]
groupcols <- setNames(as.character(ct$Colour), as.character(ct$Name))
p <- p + scale_color_manual(values = groupcols)
}
if (is.null(feature_cdict)){
feature_cdict <- createPalette(nrow(new_counts), c("#ff0000", "#00ff00", "#0000ff"))
names(feature_cdict) <- rownames(new_counts)
}
feature_cdict <- feature_cdict[rownames(new_counts)] # remove entries not on graph
p <- p + scale_fill_manual(values = feature_cdict)
if (!is.null(groupby)) {
cat_values <- as.factor(currpt[ , groupby])
for (level in levels(cat_values)) {
startx <- min(which(cat_values==level))
endx <- max(which(cat_values==level))
p <- p + annotate("segment", x=startx, xend=endx, y = cat_pos, yend = cat_pos)
p <- p + annotate("text", label = level, x = startx + (endx - startx) / 2, y = 2 * cat_pos, cex = cat_text_size)
}
}
return(p)
gvec[[plotnum]] <- p
plotnum <- plotnum + 1
}
return(gvec)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.