#' Plot stackbars from PAC
#'
#' \code{PAC_stackbar} Generates a graph that stack classes up to 100% or on total reads.
#'
#' Given a PAC object the function will attempt to extract group information
#' from Pheno, class information from Anno, and summarize this over the data
#' in Counts or norm to generate a stacked (percent or total counts) bar.
#'
#' @family PAC analysis
#'
#' @seealso \url{https://github.com/Danis102} for updates on the current
#' package.
#'
#' @param PAC PAC-list object.
#'
#' @param anno_target List with:
#' 1st object being character vector of target
#' column(s) in Anno, 2nd object being a character
#' vector of the target biotype(s) in the target column
#' (1st object). Important, the 2nd object is order
#' sensitive, meaning that categories will appear in
#' the same order in the stacked bargraph.
#' (default=NULL)
#'
#'
#' @param pheno_target List with:
#' 1st object being character vector of target
#' column(s) in Pheno, 2nd object being a character
#' vector of the target group(s) in the target column
#' (1st object). Important, the 2nd object is order
#' sensitive, meaning that categories will appear in
#' the same order in the stacked bargraph.
#' (default=NULL)
#'
#' @param color Character vector with rgb colour hex codes in the same length
#' as the number of biotypes. For example see:
#' https://www.coolgenerator.com/rgb-color-generator. color=NULL will
#' generate the default color scheme.
#'
#' @param width Integer adjusting the width of the bars (default=1). Works best
#' with few or singular bars.
#'
#' @param no_anno Logical. If TRUE sequences with no annotations will be
#' plotted, while FALSE will skip sequences with 'no_anno' in the column
#' defined by anno_target (default=TRUE). Note that you can always use
#' \code{PAC_filter} to remove anno_targets from PAC prior to plotting.
#'
#' @param total Logical, whether the total counts should be added at the bottom
#' of each graph (default=TRUE).
#'
#' @param summary Character vector defining whether to stack individual samples
#' in each stack, or using a group mean of samples sharing the same names in
#' the specified pheno_target. If summary="samples" individual samples will be
#' plotted, if summary="pheno" means of pheno_target will be plotted, while if
#' summary= all" a mean of all samples will be plotted. (Default="samples").
#'
#' @param norm Character vector defining what data to base analysis on, e.g
#' "counts" for raw counts (default), "cpm" for normalized data or any other
#' column in norm section of PAC object.
#'
#' @param plot Character vector defining how data is to be presented in stack
#' bar, where default is "percent", showing the percentage of the anno_target
#' of all reads. Other option is "total", where the total amount of
#' counts/normalized reads are stacked in one stack per anno_target.
#'
#' @return A stacked bar plot generated by ggplot2
#'
#' @examples
#'
#' load(system.file("extdata", "drosophila_sRNA_pac_filt_anno.Rdata",
#' package = "seqpac", mustWork = TRUE))
#'
#' ##########################################
#' ### Stacked bars in seqpac
#' ##----------------------------------------
#'
#' # Choose an anno_target and plot samples (summary="samples")
#' PAC_stackbar(pac, anno_target=list("Biotypes_mis0"))
#'
#' # 'no_anno' and 'other' will always end on top not matter the order
#' ord_bio <- as.character(sort(unique(anno(pac)$Biotypes_mis3)))
#' p1 <- PAC_stackbar(pac, anno_target=list("Biotypes_mis0", ord_bio))
#' p2 <- PAC_stackbar(pac, anno_target=list("Biotypes_mis0", rev(ord_bio)))
#' cowplot::plot_grid(plotlist=list(p1, p2))
#' # (Hint: if you don't want them to appear on top, rename them)
#'
#' # Reorder samples by pheno_targets
#' PAC_stackbar(pac, pheno_target=list("batch"), summary="samples",
#' anno_target=list("Biotypes_mis0"))
#'
#' # Summarized over pheno_target
#' # (as default PAC_stackbar orders by pheno_target but plots all samples,
#' # unless summary="pheno")
#' PAC_stackbar(pac, anno_target=list("Biotypes_mis0"),
#' summary="pheno", pheno_target=list("stage"))
#'
#' # Summarized over a grand mean of all samples
#' PAC_stackbar(pac, anno_target=list("Biotypes_mis0"), summary="all")
#'
#' @export
PAC_stackbar <- function(PAC, anno_target=NULL, pheno_target=NULL, color=NULL,
width=1.0, no_anno=TRUE, total=TRUE,
summary="samples", norm="counts", plot="percent"){
## Check S4
if(isS4(PAC)){
tp <- "S4"
PAC <- as(PAC, "list")
}else{
tp <- "S3"
}
stopifnot(PAC_check(PAC))
Sample <- Value <- Category <- tot_counts <- NULL
## Prepare targets
if(!is.null(pheno_target)){
if(length(pheno_target)==1){
if(is(PAC$Pheno[,pheno_target[[1]]], "factor")){
pheno_target[[2]] <- levels(PAC$Pheno[,pheno_target[[1]]])
}else{
pheno_target[[2]] <- as.character(unique(PAC$Pheno[,pheno_target[[1]]]))
}
}
}else{
PAC$Pheno$eXtra_Col <- rownames(PAC$Pheno)
pheno_target <- list(NA)
pheno_target[[1]] <- "eXtra_Col"
pheno_target[[2]] <- PAC$Pheno$eXtra_Col
}
if(!is.null(anno_target)){
if(length(anno_target)==1){
if(is(PAC$Anno[,anno_target[[1]]], "factor")){
anno_target[[2]] <- levels(PAC$Anno[,anno_target[[1]]])
}else{
anno_target[[2]] <- as.character(unique(PAC$Anno[,anno_target[[1]]]))
}
}
}
## Subset
PAC_sub <- PAC_filter(PAC, subset_only=TRUE,
pheno_target=pheno_target, anno_target=anno_target)
anno <- PAC_sub$Anno
pheno <- PAC_sub$Pheno
data <- PAC_sub$Counts
if(norm == "counts"){
data <- PAC_sub$Counts
}
else{
data <- PAC_sub$norm[norm][[1]]
}
### Removing no_Anno
if(no_anno==FALSE){
data <- data[!as.character(anno[, anno_target[[1]]]) == "no_anno",]
anno <- anno[!as.character(anno[, anno_target[[1]]]) == "no_anno",]
}
### Totaling all counts per sample and sums each per biotype
if(summary=="all"){
tot_cnts <- mean(colSums(data))
names(tot_cnts) <- "all"
data_shrt <- stats::aggregate(data, list(anno[, anno_target[[1]]]), "sum")
data_shrt <- data.frame(Group.1=data_shrt[,1],
all= rowMeans(data_shrt[,-1]))
}else{
data_shrt <- stats::aggregate(data, list(anno[, anno_target[[1]]]), "sum")
if(summary %in% c("sample","samples")){
tot_cnts <- colSums(data)
}
if(summary=="pheno"){
x <- split(pheno, pheno[, pheno_target[[1]]])
data_pheno_shrt <- as.data.frame(data_shrt$Group.1)
for(i in seq.int(length(x))){
names <- as.data.frame(x[i])
names <- rownames(names)
data_pheno <- data_shrt[, colnames(data_shrt) %in% names,drop=FALSE]
data_pheno <- rowMeans(data_pheno)
data_pheno_shrt <- as.data.frame(cbind(data_pheno_shrt, data_pheno))}
colnames(data_pheno_shrt) <- c("Group.1", names(x))
data_shrt <- data_pheno_shrt
tot_cnts <- colSums(data_shrt[,-1,drop=FALSE])
}
}
data_shrt_total <- data_shrt
data_shrt_perc <- data_shrt
data_shrt_perc[,-1] <- "NA"
for (i in seq.int(length(tot_cnts))){
data_shrt_perc[,1+i] <- data_shrt[,1+i]/tot_cnts[i]
}
data_long_perc <- reshape2::melt(data_shrt_perc, id.vars = "Group.1")
data_long_tot <- reshape2::melt(data_shrt_total, id.vars = "Group.1")
colnames(data_long_perc) <- c("Category", "Sample", "Value")
colnames(data_long_tot) <- c("Category", "Sample", "Value")
data_long_perc$Value <- data_long_perc$Value * 100
if(plot=="total"){
data_long_perc<-data_long_tot
}
## Fix order
# Anno
bio <- anno_target[[2]]
extra <- which(bio %in% c("no_anno", "other"))
if(length(extra)>0){
bio <- c(sort(bio[extra]), bio[-extra])
}
data_long_perc$Category <- factor(as.character(data_long_perc$Category),
levels=bio)
# Pheno
if(is.null(pheno_target)){
data_long_perc$Sample <- factor(as.character(data_long_perc$Sample),
levels=as.character(
unique(data_long_perc$Sample)))
}else{
if(summary %in% c("sample","samples")){
stopifnot(any(!rownames(PAC$Pheno) %in% as.character(
data_long_perc$Sample))==FALSE)
sampl_ord <- do.call("c", split(rownames(PAC$Pheno),
factor(PAC$Pheno[,pheno_target[[1]]],
levels=pheno_target[[2]])))
data_long_perc$Sample <- factor(as.character(data_long_perc$Sample),
levels=as.character(sampl_ord))
data_long_perc <- data_long_perc[!is.na(data_long_perc$Sample),,drop=FALSE]
}
}
## Add total counts
tot_cnts <- tot_cnts[match(names(tot_cnts), unique(data_long_perc$Sample))]
data_long_perc$tot_counts <- ""
if(total==TRUE){
trg_1st <- levels(data_long_perc$Category)[
length(levels(data_long_perc$Category))]
data_long_perc$tot_counts[data_long_perc$Category == trg_1st] <- tot_cnts
}
### Plot data
if(is.null(color)){
n_extra <- length(extra)
colfunc <- grDevices::colorRampPalette(c("#094A6B", "#EBEBA6", "#9D0014"))
if(n_extra==1){color <- c(colfunc(length(bio)-1), "#6E6E6E")}
if(n_extra==2){color <- c(colfunc(length(bio)-2), "#6E6E6E", "#BCBCBD")}
if(n_extra==0){color <- colfunc(length(bio))}
}else{
color <- rev(color)
}
p1 <- ggplot2::ggplot(data_long_perc,
ggplot2::aes(x=Sample, y=Value, fill=Category)) +
ggplot2::geom_bar(stat="identity", col="black", width=width, linewidth=0.3) +
ggplot2::geom_text(ggplot2::aes(label=tot_counts), nudge_y=-3, nudge_x=0,
angle = 0, color="black", size=4)+
ggplot2::geom_hline(yintercept=0, col="black")+
{ if(plot=="percent")ggplot2::coord_cartesian(ylim = c(-2, 100)) }+
{ if(plot=="percent") ggplot2::ylab("Percent of total reads")} +
{if(plot=="total") ggplot2::ylab("Total reads")} +
ggplot2::geom_hline(yintercept=100, col="black")+
ggplot2::scale_fill_manual(values=rev(color))+
ggplot2::theme_classic()+
ggplot2::theme(
axis.ticks.length.y=ggplot2::unit(.25, "cm"),
plot.caption = ggplot2::element_text(size=12, face= "bold"),
axis.title.y = ggplot2::element_text(size=16, face= "bold"),
axis.line=ggplot2::element_blank(),
axis.title.x = ggplot2::element_blank(),
axis.text = ggplot2::element_text(size=12),
axis.text.x = ggplot2::element_text(angle=45, hjust=1),
panel.background = ggplot2::element_blank())
return(p1)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.