#' Plot stacked abundace barplots
#'
#' @param dataset MicroVis dataset. Defaults to the active dataset
#' @param relative Whether to plot relative abundance. Defaults to TRUE
#' @param factor Factor to group samples by
#' @param stratify Whether to stratify the groups. Defaults to FALSE
#' @param bySample Whether to show a bar for each sample instead of aggregating
#' samples by groups. Defaults to FALSE
#' @param allSamples Whether to plot a single graph for all the samples in the
#' dataset. Defaults to FALSE
#' @param unfiltered Whether to use data before or after filtering out features.
#' Defaults to TRUE (uses data before filtering)
#' @param label_unknown Whether to group unknown taxa separate. Defaults to TRUE
#' @param rank Rank at which to use features
#' @param top Number of features to show based on greatest relative abundance.
#' Defaults to top 15 features by relative abundance
#' @param top_by Select the top features either by the max, min, or mean relative
#' abundance in the groups, or the summed relative abundance across all groups.
#' Defaults to mean (in other words, for each feature its mean
#' relative abundance values across the samples will be used to determine
#' whether it is a top feature).
#' @param ftlist (Optional) List of features to consider. Defaults to all features
#' at the rank
#' @param plotSigs Plot only the significant features? Defaults to FALSE
#' @param alpha Significance threshold for selecting significant features. Defaults
#' to 0.05
#' @param separateLegend Whether to separate the legend from the plot. Defaults
#' to FALSE
#' @param paired_ids Plot samples paired by "factor", with each pair having
#' a unique combination of IDs determined by this variable (as a vector)
#'
#' @return Stacked bar plot of feature abundances of selected features with the
#' other features lumped into an "Other" category
#' @export
#'
plotStackedBars <- function(dataset=NULL, relative=T,
factor=NULL, stratify=F,
bySample=F, allSamples=F, paired_ids = c(),
unfiltered=T,label_unknown=T,rank=NA,
top=15, top_by='mean',
ftlist=c(),plotSigs=F,alpha=0.05,
separateLegend=F) {
if(is.null(dataset)) dataset <- get('active_dataset',envir = mvEnv)
if(is.null(dataset$name)) dataset_name <- 'active_dataset'
else dataset_name <- dataset$name
taxaranks <- get('taxaRanks',envir = mvEnv)
if(unfiltered) tempds <- clearProcessing(dataset, temp = T, silent = T)
else tempds <- clearNormalization(dataset, temp = T, silent = T)
if(!(rank %in% intersect(taxaranks,names(tempds$data$proc)))) {
rank <- tempds$data$proc$active_rank
} else tempds$data$proc$active_rank <- rank
if(label_unknown) tempds <- filterNAs(tempds,
ranks = c(rank),
temp=T, silent = T)
data <- mvmelt(tempds)
factor <- dataset$active_factor
mdcolnum <- ncol(tempds$metadata)
fts <- colnames(data[(mdcolnum+1):ncol(data)])
fts <- fts[!(fts %in% c('Other','Unknown'))]
paired_ids <- paired_ids[paired_ids %in% colnames(tempds$metadata)]
if(length(ftlist)) fig_name <- paste0('selected_',rank)
else fig_name <- rank
if(plotSigs) {
sigfts <- listsigs(dataset=dataset,
factor=factor,
ranks=rank,
alpha=alpha,
silent=T,
dataset_name=dataset_name)
if(!length(sigfts)) message('\nNo significant features were found\n')
else fig_name <- paste0('sig-alpha_',alpha,fig_name)
ftlist <- c(ftlist,sigfts)
}
# If pairing samples, this step should occur after getting a list of
# significant features, but before manipulating the data for representation
# (ie aggregating less common features into the "Other" group and
# determining relative abundance).
if(length(paired_ids)) {
pairedSamples <- T
paired_samples <- getSamples(dataset, id_cols = paired_ids, complete = factor)$sample
data.paired <- data[data$sample %in% paired_samples,]
cat('\n',nrow(data.paired),'out of',nrow(data),'samples form complete pairs\n')
data <- data.paired
id <- paste0(paired_ids,collapse = '_')
data[[id]] <- apply(data[paired_ids], 1, paste, collapse = '_')
} else pairedSamples <- F
ftlist <- ftlist[ftlist %in% fts]
if(length(ftlist)) {
if(length(ftlist)>1) {
fts <- ftlist
} else {
data$Other <- rowSums(data[fts[!(fts %in% ftlist)]])
data[fts[!(fts %in% ftlist)]] <- NULL
fts <- c(ftlist,'Other','Unknown')
}
} else if(label_unknown) fts <- c(fts,'Unknown')
if(relative) {
data[fts] <- data.frame(t(apply(data[fts], 1, function(x) x/sum(x))))
abundance_type <- 'Relative Abundance'
} else {
abundance_type <- 'Absolute Abundance'
}
if(bySample) {
data$sample <- as.character(data$sample)
if(!pairedSamples) id <- 'sample'
fig_name <- paste0(fig_name,'_bySample')
} else if(allSamples) {
data <- data.frame(t(colMeans(data[fts])))
colnames(data) <- fts
abundance_type <- paste0('Mean ',abundance_type)
} else {
if(stratify) {
facet <- select.list(names(dataset$factors)[!(names(dataset$factors) %in% factor)],
graphics = T)
if(is.null(facet)) stratify <- F
else {
# compareby <- paste(factor,facet,sep = '+')
data <- data[c(factor,facet,fts)]
fig_name <- paste0(fig_name,'_stratified')
}
} else {
data <- data[c(factor,fts)]
}
aggformula <- formula(paste('. ~',factor))
# data <- aggregate(aggformula, data, function(x) sum(x))
agglist <- list()
for(ft in fts) agglist[[ft]] <- aggregate(aggformula,data[c(factor,ft)],
function(x) mean(x,na.rm=T))
data <- purrr::reduce(agglist,merge)
abundance_type <- paste0('Mean ',abundance_type)
}
if(top<length(fts)) {
if(allSamples | !(top_by %in% c('max','min','mean','sum'))) top_by <- 'mean'
ftstats <- data.frame(max=apply(data[fts],2,function(x) max(x)),
min=apply(data[fts],2,function(x) min(x)),
mean=apply(data[fts],2,function(x) mean(x)),
sum=apply(data[fts],2,function(x) sum(x)))
ftstats <- ftstats[!(rownames(ftstats) %in% c('Other','Unknown')),]
# If unknowns are to be included separately, make sure they don't count
# towards the number of named features shown
fts <- fts[!(fts %in% 'Unknown')]
if(top_by=='max') low_abun <- rownames(dplyr::slice_min(ftstats, order_by=max, n=(length(fts)-top)))
if(top_by=='min') low_abun <- rownames(dplyr::slice_min(ftstats, order_by=min, n=(length(fts)-top)))
if(top_by=='mean') low_abun <- rownames(dplyr::slice_min(ftstats, order_by=mean, n=(length(fts)-top)))
if(top_by=='sum') low_abun <- rownames(dplyr::slice_min(ftstats, order_by=sum, n=(length(fts)-top)))
# Add the unknown feature back to the feature list (fts) if it is to be labeled
# separately
if(label_unknown) fts <- c(fts,'Unknown')
if(is.null(data$Other)) data$Other <- rep(0,nrow(data))
data$Other <- data$Other + rowSums(data[low_abun])
data[low_abun] <- NULL
fts <- c(fts[!(fts %in% low_abun)],'Other','Unknown')
fig_name <- paste0('top',top,fig_name)
}
data_pivoted <- data %>% tidyr::pivot_longer(all_of(fts),
names_to=rank,
values_to=abundance_type)
namedfts <- unique(data_pivoted[[rank]][!(data_pivoted[[rank]] %in% c('Other','Unknown'))])
data_pivoted[[rank]] <- factor(data_pivoted[[rank]],
levels=c('Unknown','Other',namedfts))
if(bySample) {
p <- ggplot(data_pivoted, aes(x=as.numeric(factor(.data[[id]])), y=.data[[abundance_type]]))+
geom_area(aes(fill=.data[[rank]]))+
scale_x_continuous(id, labels = unique(data_pivoted[[id]]),
breaks = unique(as.numeric(factor(data_pivoted[[id]]))))+
ggpubr::theme_pubr()+
labs(fill=capitalize(rank))+
facet_wrap(facets = factor, scales = 'free_x',ncol = 1)+
theme(axis.title = element_text(size=25),
axis.text = element_text(size=22),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = 'right',
legend.title = element_text(size = 22),
legend.text = element_text(size=15),
legend.key.size = unit(1,'cm'),
strip.text = element_text(size = 18))
if(pairedSamples) {
p <- p+theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 60, vjust = 0.5, size=15),
axis.ticks.x = element_line())
separateLegend <- T
}
} else if(allSamples) {
p <- ggpubr::ggbarplot(data_pivoted,y=abundance_type,
fill=tempds$data$proc$active_rank,color='white')+
labs(fill=capitalize(rank),
x=dataset$factors[[factor]]$name_text)+
theme(axis.title = element_text(size=25),
axis.text = element_text(size=22),
legend.position = 'right',
legend.title = element_text(size = 22),
legend.text = element_text(size=15),
legend.key.size = unit(1,'cm'))
} else {
p <- ggpubr::ggbarplot(data_pivoted,x=factor,y=abundance_type,
fill=tempds$data$proc$active_rank,color='white')+
labs(fill=capitalize(rank),
x=dataset$factors[[factor]]$name_text)+
theme(axis.title = element_text(size=25),
axis.text = element_text(size=22),
legend.position = 'right',
legend.title = element_text(size = 22),
legend.text = element_text(size=15),
legend.key.size = unit(1,'cm'))
}
if(stratify) {
p <- facet(p,facet.by=facet)+
theme(strip.text = element_text(size = 18))
}
if(separateLegend) {
if(!exists('p_legend',inherits = F)) p_legend <- as_ggplot(get_legend(p))
p <- p+theme(legend.position = 'none')
fig_name <- paste0(fig_name,'_nolegend')
legend_output_location <- paste0(dataset$results_path,'/Results_',Sys.Date(),'/Stacked Bar Graphs/')
show(p_legend)
}
show(p)
save_directory <- saveResults(dataset,foldername = 'Stacked Bar Graphs',
filename = fig_name,
factors = dataset$factors,
active_factor = dataset$active_factor,
width = 12, height = 8,
stat_results = list(stats=data_pivoted),
suffix = paste0('_',abundance_type),
verbose = F)
if(!is.null(save_directory)) {
if(exists('p_legend')) {
ggsave(save_directory,
filename=paste0('Legend_top',top,'.png'),
plot=p_legend,
device = 'png',
width = 20,
height = 8,
dpi=600)
}
cat('\nFigure(s) saved to:\n ',save_directory,'\n')
}
activate(dataset)
if(allSamples) {
total_props <- data_pivoted[data_pivoted[[rank]] %in% namedfts,]
colnames(total_props) <- c('Group.1','x')
} else if(bySample) {
total_props <- aggregate(data_pivoted[[abundance_type]][data_pivoted[[rank]] %in% namedfts],
by=list(data_pivoted[[rank]][data_pivoted[[rank]] %in% namedfts]),
mean)
} else {
total_props <- aggregate(data_pivoted[[abundance_type]][data_pivoted[[rank]] %in% namedfts],
by=list(data_pivoted[[factor]][data_pivoted[[rank]] %in% namedfts]),
sum)
}
cat('\nTop',top,'features make up:\n')
if(allSamples | bySample) cat('',paste0(signif(100*sum(total_props$x),3),'%'),
'across all samples\n')
if(!bySample & !allSamples) for(grp in total_props$Group.1) {
cat('',paste0(signif(100*mean(total_props$x[total_props$Group.1==grp]),
3),'%'),'in',grp,'\n')
}
if('Unknown' %in% data_pivoted[[rank]]) {
cat('\n',
paste0(signif((100*mean(data_pivoted[[abundance_type]][data_pivoted[[rank]]=='Unknown'])),
3),'%'),'of',rank,'are unknown')
}
cat('\n\n')
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.