Nothing
#' @title
#' Create \code{trans_abund} object for plotting taxonomic abundance.
#'
#' @description
#' This class is a wrapper for the taxonomic abundance transformations and visualization.
#' The converted data style is the long-format for \code{ggplot2} plot.
#' The plotting methods include bar plot, boxplot, heatmap, pie chart and line chart.
#'
#' @export
trans_abund <- R6Class(classname = "trans_abund",
public = list(
#' @param dataset default NULL; the object of \code{\link{microtable}} class.
#' @param taxrank default "Phylum"; taxonomic rank.
#' @param show default 0; the relative abundance threshold for filtering the taxa with low abundance.
#' @param ntaxa default 10; how many taxa are selected to show. Taxa are ordered by abundance from high to low.
#' This parameter does not conflict with the parameter \code{show}. Both can be used. \code{ntaxa = NULL} means it is unavailable.
#' @param groupmean default NULL; calculate mean abundance for each group. Select a column name in \code{microtable$sample_table}.
#' @param group_morestats default FALSE; only available when \code{groupmean} parameter is provided;
#' Whether output more statistics for each group, including min, max, median and quantile;
#' Thereinto, quantile25 and quantile75 denote 25\% and 75\% quantiles, respectively.
#' @param delete_taxonomy_lineage default TRUE; whether delete the taxonomy lineage in front of the target level.
#' @param delete_taxonomy_prefix default TRUE; whether delete the prefix of taxonomy, such as "g__".
#' @param prefix default NULL; character string; available when \code{delete_taxonomy_prefix = T};
#' default NULL reprensents using the "letter+__", e.g. "k__" for Phylum level;
#' Please provide the customized prefix when it is not standard, otherwise the program can not correctly recognize it.
#' @param use_percentage default TRUE; show the abundance percentage.
#' @param input_taxaname default NULL; character vector; input taxa names for selecting some taxa.
#' @param high_level default NULL; a taxonomic rank, such as "Phylum", used to add the taxonomic information of higher level.
#' It is necessary for the legend with nested taxonomic levels in the bar plot.
#' @param high_level_fix_nsub default NULL; an integer, used to fix the number of selected abundant taxa in each taxon from higher taxonomic level.
#' If the total number under one taxon of higher level is less than the high_level_fix_nsub, the total number will be used.
#' When \code{high_level_fix_nsub} is provided, the taxa number of higher level is calculated as: \code{ceiling(ntaxa/high_level_fix_nsub)}.
#' Note that \code{ntaxa} means either the parameter \code{ntaxa} or the taxonomic number obtained by filtering according to the \code{show} parameter.
#' @return \code{data_abund} stored in the object. The column 'all_mean_abund' reprensents mean relative abundance across all the samples.
#' So the values in one taxon are all same across all the samples.
#' If the sum of column 'Abundance' in one sample is larger than 1, the 'Abundance', 'SD' and 'SE' has been multiplied by 100.
#' @examples
#' \donttest{
#' data(dataset)
#' t1 <- trans_abund$new(dataset = dataset, taxrank = "Phylum", ntaxa = 10)
#' }
initialize = function(
dataset = NULL,
taxrank = "Phylum",
show = 0,
ntaxa = 10,
groupmean = NULL,
group_morestats = FALSE,
delete_taxonomy_lineage = TRUE,
delete_taxonomy_prefix = TRUE,
prefix = NULL,
use_percentage = TRUE,
input_taxaname = NULL,
high_level = NULL,
high_level_fix_nsub = NULL
){
check_microtable(dataset)
check_taxa_abund(dataset)
sample_table <- dataset$sample_table
if("Sample" %in% colnames(sample_table)){
colnames(sample_table)[colnames(sample_table) == "Sample"] <- "Sample_replace"
}
if(! taxrank %in% names(dataset$taxa_abund)){
stop("The input taxrank: ", taxrank, " is not found! Please check it!")
}
abund_data <- dataset$taxa_abund[[taxrank]] %>%
rownames_to_column(var = "Taxonomy") %>%
reshape2::melt(id.vars = "Taxonomy") %>%
`colnames<-`(c("Taxonomy", "Sample", "Abundance"))
check_nd <- grepl("__$", abund_data$Taxonomy)
if(any(check_nd)){
abund_data$Taxonomy[check_nd] %<>% paste0(., "unidentified")
}
if(delete_taxonomy_lineage | delete_taxonomy_prefix){
if(delete_taxonomy_prefix){
if(is.null(prefix)){
prefix <- ".__"
}
abund_data$Taxonomy %<>% gsub(prefix, "", .)
}
if(delete_taxonomy_lineage){
abund_data$Taxonomy %<>% gsub(".*\\|", "", .)
}
}
abund_data %<>% dplyr::group_by(!!! syms(c("Taxonomy", "Sample"))) %>%
dplyr::summarise(Abundance = sum(Abundance)) %>%
as.data.frame(stringsAsFactors = FALSE)
abund_data$Taxonomy %<>% as.character
mean_abund <- tapply(abund_data$Abundance, abund_data$Taxonomy, FUN = mean)
# add mean abundance for all samples
all_mean_abund <- data.frame(Taxonomy = names(mean_abund), all_mean_abund = mean_abund)
rownames(all_mean_abund) <- NULL
abund_data %<>% {suppressWarnings(dplyr::left_join(., rownames_to_column(sample_table), by = c("Sample" = "rowname")))}
if(!is.null(groupmean)){
message(groupmean, " column is used to calculate mean abundance ...")
abund_data <- microeco:::summarySE_inter(abund_data, measurevar = "Abundance", groupvars = c("Taxonomy", groupmean), more = group_morestats)
colnames(abund_data)[colnames(abund_data) == "Mean"] <- "Abundance"
colnames(abund_data)[colnames(abund_data) == groupmean] <- "Sample"
if(is.factor(sample_table[, groupmean])){
abund_data$Sample %<>% factor(., levels = levels(sample_table[, groupmean]))
}
}
abund_data <- dplyr::left_join(abund_data, all_mean_abund, by = c("Taxonomy" = "Taxonomy"))
if(!is.null(high_level)){
if(length(high_level) > 1){
warning("Input high_level has multiple elements! Only select the first one!")
high_level <- high_level[1]
}
message("Add higher taxonomic level into the table ...")
if(! high_level %in% colnames(dataset$tax_table)){
stop("Provided high_level must be a colname of input dataset$tax_table!")
}else{
extract_tax_table <- dataset$tax_table[, c(high_level, taxrank)] %>% unique
if(!delete_taxonomy_lineage){
stop("The delete_taxonomy_lineage should be FALSE when high_level is provided!")
}
if(delete_taxonomy_prefix){
extract_tax_table[, taxrank] %<>% gsub(prefix, "", .)
}
abund_data <- dplyr::left_join(abund_data, extract_tax_table, by = c("Taxonomy" = taxrank))
}
}
use_taxanames <- as.character(rev(names(sort(mean_abund))))
if(!is.null(ntaxa)){
ntaxa_theshold <- ntaxa_use <- ntaxa
}else{
ntaxa_theshold <- ntaxa_use <- sum(mean_abund > show)
}
if(ntaxa_use > sum(mean_abund > show)){
ntaxa_use <- sum(mean_abund > show)
}
use_taxanames %<>% .[!grepl("unidentified|unculture|Incertae.sedis", .)]
if(is.null(input_taxaname)){
if(is.null(high_level_fix_nsub)){
if(length(use_taxanames) > ntaxa_use){
use_taxanames %<>% .[1:ntaxa_use]
}
}else{
high_level_n <- ceiling(ntaxa_use/high_level_fix_nsub)
high_level_ordered_taxa <- abund_data[match(names(mean_abund), abund_data$Taxonomy), high_level] %>% unique %>% .[1:high_level_n]
use_taxanames <- lapply(high_level_ordered_taxa, function(x){
tmp <- abund_data[abund_data[, high_level] == x, ]
tmp <- names(mean_abund) %>% .[. %in% tmp$Taxonomy]
tmp[1:ifelse(length(tmp) < high_level_fix_nsub, length(tmp), high_level_fix_nsub)]
}) %>% unlist
}
}else{
if(!any(input_taxaname %in% use_taxanames)){
stop("The input_taxaname does not match to taxa names! Please check the input!")
}else{
use_taxanames <- input_taxaname[input_taxaname %in% use_taxanames]
}
}
if(!is.null(high_level)){
# sort the taxa in high levels according to abundance sum
tmp <- abund_data[abund_data$Taxonomy %in% use_taxanames, c(high_level, "Abundance")]
tmp <- tapply(tmp$Abundance, tmp[, high_level], FUN = sum)
data_taxanames_highlevel <- as.character(names(sort(tmp, decreasing = TRUE)))
self$data_taxanames_highlevel <- data_taxanames_highlevel
}
if(ntaxa_theshold < sum(mean_abund > show) | show == 0){
if(use_percentage == T){
abund_data$Abundance %<>% {. * 100}
if("SE" %in% colnames(abund_data)) abund_data$SE %<>% {. * 100}
if("SD" %in% colnames(abund_data)) abund_data$SD %<>% {. * 100}
ylabname <- "Relative abundance (%)"
}else{
ylabname <- "Relative abundance"
}
}else{
ylabname <- paste0("Relative abundance (", taxrank, " > ", show*100, "%)")
}
self$use_percentage <- use_percentage
self$ylabname <- ylabname
self$taxrank <- taxrank
self$data_abund <- abund_data
self$data_taxanames <- use_taxanames
self$high_level <- high_level
message('The transformed abundance data is stored in object$data_abund ...')
},
#' @description
#' Bar plot.
#'
#' @param color_values default \code{RColorBrewer::brewer.pal}(8, "Dark2"); colors palette for the bars.
#' @param bar_type default "full"; "full" or "notfull"; if \code{"full"}, total abundance are summed to 1 or 100 percentage.
#' @param others_color default "grey90"; the color for "others" taxa.
#' @param facet default NULL; a character vector for the facet; group column name of \code{sample_table}, such as, \code{"Group"};
#' If multiple facets are needed, please provide ordered names, such as \code{c("Group", "Type")}.
#' The latter should have a finer scale than the former one;
#' Please adjust the facet orders in the plot by assigning factors in \code{sample_table} before creating \code{trans_abund} object or
#' assigning factors in the \code{data_abund} table of \code{trans_abund} object.
#' When multiple facets are used, please first install package \code{ggh4x} using the command \code{install.packages("ggh4x")}.
#' @param order_x default NULL; vector; used to order the sample names in x axis; must be the samples vector, such as \code{c("S1", "S3", "S2")}.
#' @param x_axis_name NULL; a character string; a column name of sample_table in dataset; used to show the sample names in x axis.
#' @param barwidth default NULL; bar width, see \code{width} in \code{\link{geom_bar}}.
#' @param use_alluvium default FALSE; whether add alluvium plot. If \code{TRUE}, please first install \code{ggalluvial} package.
#' @param clustering default FALSE; whether order samples by the clustering.
#' @param clustering_plot default FALSE; whether add clustering plot.
#' If \code{clustering_plot = TRUE}, \code{clustering} will be also TRUE in any case for the clustering.
#' @param cluster_plot_width default 0.2, the dendrogram plot width; available when \code{clustering_plot = TRUE}.
#' @param facet_color default "grey95"; facet background color.
#' @param strip_text default 11; facet text size.
#' @param legend_text_italic default FALSE; whether use italic in legend.
#' @param xtext_angle default 0; number ranging from 0 to 90; used to adjust x axis text angle to reduce text overlap;
#' @param xtext_size default 10; x axis text size.
#' @param xtext_keep default TRUE; whether retain x text.
#' @param xtitle_keep default TRUE; whether retain x title.
#' @param ytitle_size default 17; y axis title size.
#' @param coord_flip default FALSE; whether flip cartesian coordinates so that horizontal becomes vertical, and vertical becomes horizontal.
#' @param ggnested default FALSE; whether use nested legend. Need \code{ggnested} package to be installed (https://github.com/gmteunisse/ggnested).
#' To make it available, please assign \code{high_level} parameter when creating the object.
#' @param high_level_add_other default FALSE; whether add 'Others' (all the unknown taxa) in each taxon of higher taxonomic level.
#' Only available when \code{ggnested = TRUE}.
#' @return ggplot2 object.
#' @examples
#' \donttest{
#' t1$plot_bar(facet = "Group", xtext_keep = FALSE)
#' }
plot_bar = function(
color_values = RColorBrewer::brewer.pal(8, "Dark2"),
bar_type = "full",
others_color = "grey90",
facet = NULL,
order_x = NULL,
x_axis_name = NULL,
barwidth = NULL,
use_alluvium = FALSE,
clustering = FALSE,
clustering_plot = FALSE,
cluster_plot_width = 0.2,
facet_color = "grey95",
strip_text = 11,
legend_text_italic = FALSE,
xtext_angle = 0,
xtext_size = 10,
xtext_keep = TRUE,
xtitle_keep = TRUE,
ytitle_size = 17,
coord_flip = FALSE,
ggnested = FALSE,
high_level_add_other = FALSE
){
plot_data <- self$data_abund
# try to filter useless columns
plot_data %<>% .[, ! colnames(.) %in% c("N", "SD", "SE", "Median", "Min", "Max", "quantile25", "quantile75", "all_mean_abund")]
use_taxanames <- self$data_taxanames
if(ggnested){
if(is.null(self$high_level)){
stop("The high_level is necessary when ggnested = TRUE! Please assign high_level parameter when creating the object!")
}
if(high_level_add_other){
plot_data$Taxonomy[!plot_data$Taxonomy %in% use_taxanames] <- "Others"
use_taxanames %<>% c(., "Others")
new_data <- plot_data %>% dplyr::group_by(!!! syms(c(self$high_level, "Taxonomy", "Sample"))) %>%
dplyr::summarise(Abundance = sum(Abundance)) %>%
as.data.frame(stringsAsFactors = FALSE)
plot_data_merge <- plot_data[, ! colnames(plot_data) %in% c(self$high_level, "Taxonomy", "Abundance"), drop = FALSE] %>% unique
plot_data <- dplyr::left_join(new_data, plot_data_merge, by = c("Sample" = "Sample"))
}
bar_type <- "notfull"
}
if(bar_type == "full"){
# make sure that taxonomy info are all in selected use_taxanames in case of special data
if(!all(plot_data$Taxonomy %in% use_taxanames)){
plot_data$Taxonomy[!plot_data$Taxonomy %in% use_taxanames] <- "Others"
new_data <- plot_data %>% dplyr::group_by(!!! syms(c("Taxonomy", "Sample"))) %>%
dplyr::summarise(Abundance = sum(Abundance)) %>%
as.data.frame(stringsAsFactors = FALSE)
plot_data_merge <- plot_data[, ! colnames(plot_data) %in% c("Taxonomy", "Abundance"), drop = FALSE] %>% unique
plot_data <- dplyr::left_join(new_data, plot_data_merge, by = c("Sample" = "Sample"))
plot_data$Taxonomy %<>% factor(., levels = rev(c(use_taxanames, "Others")))
}else{
plot_data$Taxonomy %<>% factor(., levels = rev(use_taxanames))
}
}else{
if(ggnested){
plot_data %<>% .[.[, self$high_level] %in% self$data_taxanames_highlevel, ]
plot_data[, self$high_level] %<>% factor(., levels = self$data_taxanames_highlevel)
}
plot_data %<>% {.[.$Taxonomy %in% use_taxanames, ]}
# two legend ordering types depending on ggnested
if(ggnested){
plot_data$Taxonomy %<>% factor(., levels = use_taxanames)
}else{
plot_data$Taxonomy %<>% factor(., levels = rev(use_taxanames))
}
}
# order x axis samples
plot_data <- private$adjust_axis_facet(
plot_data = plot_data,
x_axis_name = x_axis_name,
order_x = order_x
)
# arrange plot_data--Abundance according to the Taxonomy-group column factor-levels
plot_data <- plot_data[unlist(lapply(levels(plot_data$Taxonomy), function(x) which(plot_data$Taxonomy == x))),]
if(!ggnested){
if(any(grepl("Others", as.character(plot_data$Taxonomy)))){
bar_colors_use <- expand_colors(color_values, length(unique(plot_data$Taxonomy)) - 1)
bar_colors_use <- c(bar_colors_use, others_color)
}else{
bar_colors_use <- expand_colors(color_values, length(unique(plot_data$Taxonomy)))
}
}else{
# high_level determine the colors
bar_colors_use <- expand_colors(color_values, length(unique(plot_data[, self$high_level])))
}
if(clustering | clustering_plot){
data_clustering <- reshape2::dcast(plot_data, Sample ~ Taxonomy, value.var = "Abundance", fun.aggregate = sum) %>%
`row.names<-`(.[,1]) %>% .[, -1]
tmp_hclust <- hclust(dist(data_clustering))
order_x_clustering <- tmp_hclust %>% {.$labels[.$order]} %>% as.character
plot_data$Sample %<>% factor(., levels = order_x_clustering)
}
if(use_alluvium){
p <- ggplot(plot_data, aes(
x = Sample, y = Abundance,
fill = Taxonomy, color = Taxonomy,
weight = Abundance,
alluvium = Taxonomy, stratum = Taxonomy
)) +
ggalluvial::geom_flow(alpha = .4, width = 3/15) +
ggalluvial::geom_stratum(width = .2) +
scale_color_manual(values = rev(bar_colors_use))
}else{
if(ggnested){
p <- ggnested::ggnested(plot_data, aes_meco(x = "Sample", y = "Abundance", main_group = self$high_level, sub_group = "Taxonomy"), main_palette = bar_colors_use)
}else{
p <- ggplot(plot_data, aes_meco(x = "Sample", y = "Abundance", fill = "Taxonomy"))
}
if(bar_type == "full"){
if(self$use_percentage == T){
p <- p + geom_bar(stat = "identity", position = "stack", show.legend = T, width = barwidth)
}else{
p <- p + geom_bar(stat = "identity", position = "fill", show.legend = T, width = barwidth)
}
}else{
p <- p + geom_bar(stat = "identity", position = "stack", show.legend = T, width = barwidth)
}
}
if(!ggnested){
p <- p + scale_fill_manual(values = rev(bar_colors_use))
}
p <- p + xlab("") + ylab(self$ylabname)
if(!is.null(facet)){
if(coord_flip){
facet_formula <- reformulate(".", paste0(facet, collapse = " + "))
}else{
facet_formula <- reformulate(facet, ".")
}
if(length(facet) == 1){
p <- p + facet_grid(facet_formula, scales = "free", space = "free")
}else{
p <- p + ggh4x::facet_nested(facet_formula, nest_line = element_line(linetype = 2), scales = "free", space = "free")
}
p <- p + theme(strip.background = element_rect(fill = facet_color, color = facet_color), strip.text = element_text(size=strip_text))
p <- p + scale_y_continuous(expand = c(0, 0.01))
}else{
if(bar_type == "full" & self$use_percentage == FALSE){
p <- p + scale_y_continuous(limits = c(0, 1), expand = c(0, 0))
}else{
p <- p + scale_y_continuous(expand = c(0, 0))
}
}
p <- p + theme(panel.grid = element_blank(), panel.border = element_blank()) +
theme(axis.line.y = element_line(color = "grey60", linetype = "solid", lineend = "square"))
if(legend_text_italic == T) {
p <- p + theme(legend.text = element_text(face = 'italic'))
}
if(clustering_plot){
if(! coord_flip){
message("Rotate the axis automatically to add the clustering plot ...")
coord_flip <- TRUE
}
}
p <- p + private$ggplot_xtext_type(xtext_angle = xtext_angle, xtext_size = xtext_size, xtext_keep = xtext_keep, coord_flip = coord_flip)
p <- p + theme(axis.title.y = element_text(size = ytitle_size))
if(xtitle_keep == F){
p <- p + theme(axis.title.x = element_blank())
}
p <- p + guides(fill = guide_legend(title = self$taxrank))
if(use_alluvium | ggnested){
p <- p + guides(color = guide_legend(title = self$taxrank))
}
if(coord_flip){
p <- p + coord_flip()
}
if(clustering_plot){
left_plot <- ggtree::ggtree(tmp_hclust, hang = 0)
p %<>% aplot::insert_left(left_plot, width = cluster_plot_width)
}
p
},
#' @description
#' Plot the heatmap.
#'
#' @param color_values default rev(RColorBrewer::brewer.pal(n = 11, name = "RdYlBu"));
#' colors palette for the plotting.
#' @param facet default NULL; a character vector for the facet; a group column name of \code{sample_table}, such as, \code{"Group"};
#' If multiple facets are needed, please provide ordered names, such as \code{c("Group", "Type")}.
#' The latter should have a finer scale than the former one;
#' Please adjust the facet orders in the plot by assigning factors in \code{sample_table} before creating \code{trans_abund} object or
#' assigning factors in the \code{data_abund} table of \code{trans_abund} object.
#' When multiple facets are used, please first install package \code{ggh4x} using the command \code{install.packages("ggh4x")}.
#' @param x_axis_name NULL; a character string; a column name of sample_table used to show the sample names in x axis.
#' @param order_x default NULL; vector; used to order the sample names in x axis; must be the samples vector, such as, c("S1", "S3", "S2").
#' @param withmargin default TRUE; whether retain the tile margin.
#' @param plot_numbers default FALSE; whether plot the number in heatmap.
#' @param plot_text_size default 4; If plot_numbers TRUE, text size in plot.
#' @param plot_breaks default NULL; The legend breaks.
#' @param margincolor default "white"; If withmargin TRUE, use this as the margin color.
#' @param plot_colorscale default "log10"; color scale.
#' @param min_abundance default .01; the minimum abundance percentage in plot.
#' @param max_abundance default NULL; the maximum abundance percentage in plot, NULL reprensent the max percentage.
#' @param strip_text default 11; facet text size.
#' @param xtext_size default 10; x axis text size.
#' @param ytext_size default 11; y axis text size.
#' @param xtext_keep default TRUE; whether retain x text.
#' @param xtitle_keep default TRUE; whether retain x title.
#' @param grid_clean default TRUE; whether remove grid lines.
#' @param xtext_angle default 0; number ranging from 0 to 90; used to adjust x axis text angle to reduce text overlap;
#' @param legend_title default "\% Relative\\nAbundance"; legend title text.
#' @param pheatmap default FALSE; whether use pheatmap package to plot the heatmap.
#' @param ... paremeters pass to pheatmap when pheatmap = TRUE.
#' @return ggplot2 object or grid object based on pheatmap.
#' @examples
#' \donttest{
#' t1 <- trans_abund$new(dataset = dataset, taxrank = "Genus", ntaxa = 40)
#' t1$plot_heatmap(facet = "Group", xtext_keep = FALSE, withmargin = FALSE)
#' }
plot_heatmap = function(
color_values = rev(RColorBrewer::brewer.pal(n = 11, name = "RdYlBu")),
facet = NULL,
x_axis_name = NULL,
order_x = NULL,
withmargin = TRUE,
plot_numbers = FALSE,
plot_text_size = 4,
plot_breaks = NULL,
margincolor = "white",
plot_colorscale = "log10",
min_abundance = 0.01,
max_abundance = NULL,
strip_text = 11,
xtext_size = 10,
ytext_size = 11,
xtext_keep = TRUE,
xtitle_keep = TRUE,
grid_clean = TRUE,
xtext_angle = 0,
legend_title = "% Relative\nAbundance",
pheatmap = FALSE,
...
){
plot_data <- self$data_abund
use_taxanames <- self$data_taxanames
plot_data %<>% {.[.$Taxonomy %in% use_taxanames, ]}
if(pheatmap == FALSE){
# order x axis samples
plot_data <- private$adjust_axis_facet(plot_data = plot_data, x_axis_name = x_axis_name, order_x = order_x)
if (is.null(min_abundance)){
min_abundance <- ifelse(min(plot_data$Abundance) > 0.001, min(plot_data$Abundance), 0.001)
}
if (is.null(max_abundance)){
max_abundance <- max(plot_data$Abundance)
}
plot_data$Taxonomy %<>% factor(., levels = rev(use_taxanames))
p <- ggplot(plot_data, aes(x = .data[["Sample"]], y = .data[["Taxonomy"]], label = .data[[formatC("Abundance", format = "f", digits = 1)]]))
if(withmargin == T){
p <- p + geom_tile(aes(fill = Abundance), colour = margincolor, size = 0.5)
}else{
p <- p + geom_tile(aes(fill = Abundance))
}
p <- p + theme(axis.text.y = element_text(size = 12)) + theme(plot.margin = unit(c(0.3, 0.3, 0.3, 0.3), "cm"))
if (plot_numbers == T){
abund <- plot_data
abund$Abundance <- round(abund$Abundance, 1)
p <- p + geom_text(data = abund, size = plot_text_size, colour = "grey10")
}
if (is.null(plot_breaks)){
p <- p + scale_fill_gradientn(colours = color_values, trans = plot_colorscale, na.value = "#00008B", limits = c(min_abundance, max_abundance))
}else{
p <- p + scale_fill_gradientn(colours = color_values, trans = plot_colorscale, breaks=plot_breaks, na.value = "#00008B",
limits = c(min_abundance, max_abundance))
}
if(!is.null(facet)){
if(length(facet) == 1){
p <- p + facet_grid(reformulate(facet, "."), scales = "free", space = "free")
}else{
p <- p + ggh4x::facet_nested(reformulate(facet), nest_line = element_line(linetype = 2), scales = "free", space = "free")
}
p <- p + theme(strip.background = element_rect(color = "white", fill = "grey92"), strip.text = element_text(size=strip_text))
}
p <- p + labs(x = "", y = "", fill = legend_title)
if (!is.null(ytext_size)){
p <- p + theme(axis.text.y = element_text(size = ytext_size))
}
p <- p + private$ggplot_xtext_type(xtext_angle = xtext_angle, xtext_size = xtext_size, xtext_keep = xtext_keep)
if(grid_clean){
p <- p + theme(panel.border = element_blank(), panel.grid = element_blank())
}
p
} else {
# first to wide format
wide_table <- reshape2::dcast(plot_data, Taxonomy ~ Sample, value.var = "Abundance") %>%
`row.names<-`(.[,1]) %>%
.[, -1, drop = FALSE]
# check sd for each feature, if 0, delete
if(any(apply(wide_table, MARGIN = 1, FUN = function(x) sd(x) == 0))){
select_rows <- apply(wide_table, MARGIN = 1, FUN = function(x) sd(x) != 0)
wide_table %<>% {.[select_rows, ]}
}
p <- pheatmap::pheatmap(
wide_table,
...
)
p$gtable
}
},
#' @description
#' Box plot.
#'
#' @param color_values default \code{RColorBrewer::brewer.pal}(8, "Dark2"); colors palette for the box.
#' @param group default NULL; a column name of sample table to show abundance across groups.
#' @param show_point default FALSE; whether show points in plot.
#' @param point_color default "black"; If show_point TRUE; use the color
#' @param point_size default 3; If show_point TRUE; use the size
#' @param point_alpha default .3; If show_point TRUE; use the transparency.
#' @param plot_flip default FALSE; Whether rotate plot.
#' @param boxfill default TRUE; Whether fill the box with colors.
#' @param middlecolor default "grey95"; The middle line color.
#' @param middlesize default 1; The middle line size.
#' @param xtext_angle default 0; number ranging from 0 to 90; used to adjust x axis text angle to reduce text overlap;
#' @param xtext_size default 10; x axis text size.
#' @param ytitle_size default 17; y axis title size.
#' @param ... parameters pass to \code{\link{geom_boxplot}} function.
#' @return ggplot2 object.
#' @examples
#' \donttest{
#' t1$plot_box(group = "Group")
#' }
plot_box = function(
color_values = RColorBrewer::brewer.pal(8, "Dark2"),
group = NULL,
show_point = FALSE,
point_color = "black",
point_size = 3,
point_alpha = .3,
plot_flip = FALSE,
boxfill = TRUE,
middlecolor = "grey95",
middlesize = 1,
xtext_angle = 0,
xtext_size = 10,
ytitle_size = 17,
...
){
plot_data <- self$data_abund
use_taxanames <- self$data_taxanames
plot_data %<>% {.[.$Taxonomy %in% use_taxanames, ]}
plot_data$Taxonomy %<>% factor(., levels = use_taxanames)
p <- ggplot(plot_data, aes(x = .data[["Taxonomy"]], y = .data[["Abundance"]]))
p <- p + ylab(self$ylabname) + guides(col = guide_legend(reverse = TRUE)) + xlab("")
if (plot_flip == T){
p <- p + coord_flip()
}
if(is.null(group)) {
p <- p + geom_boxplot(color = color_values[1], ...)
} else {
color_values <- expand_colors(color_values, length(unique(plot_data[, group])))
if(boxfill == T){
p <- p + geom_boxplot(aes(color = .data[[group]], fill = .data[[group]]), ...)
p <- p + scale_fill_manual(values = color_values)
p <- p + scale_color_manual(values = color_values) + guides(color = "none")
## Change the default middle line
dat <- ggplot_build(p)$data[[1]]
p <- p + geom_segment(data=dat, aes(x=xmin, xend=xmax, y=middle, yend=middle), colour = middlecolor, size=middlesize)
} else {
p <- p + geom_boxplot(aes(color = .data[[group]]), ...) + scale_color_manual(values = color_values)
}
}
if(show_point == T){
p <- p + geom_point(size = point_size, color = point_color, alpha = point_alpha, position = "jitter")
}
p <- p + private$ggplot_xtext_type(xtext_angle = xtext_angle, xtext_size = xtext_size)
p <- p + theme(axis.title.y = element_text(size = ytitle_size)) + scale_y_continuous(expand = c(0, 0.01))
if(!is.null(group)) {
p <- p + guides(fill=guide_legend(title=group))
}
p
},
#' @description
#' Plot the line chart.
#'
#' @param color_values default \code{RColorBrewer::brewer.pal}(8, "Dark2"); colors palette for the points and lines.
#' @param plot_SE default TRUE; TRUE: the errorbar is \eqn{mean±se}; FALSE: the errorbar is \eqn{mean±sd}.
#' @param position default position_dodge(0.1); Position adjustment, either as a string (such as "identity"), or the result of a call to a position adjustment function.
#' @param errorbar_size default 1; errorbar line size.
#' @param errorbar_width default 0.1; errorbar width.
#' @param point_size default 3; point size for taxa.
#' @param point_alpha default 0.8; point transparency.
#' @param line_size default 0.8; line size.
#' @param line_alpha default 0.8; line transparency.
#' @param line_type default 1; an integer; line type.
#' @param xtext_angle default 0; number ranging from 0 to 90; used to adjust x axis text angle to reduce text overlap;
#' @param xtext_size default 10; x axis text size.
#' @param ytitle_size default 17; y axis title size.
#' @return ggplot2 object.
#' @examples
#' \donttest{
#' t1 <- trans_abund$new(dataset = dataset, taxrank = "Genus", ntaxa = 5)
#' t1$plot_line(point_size = 3)
#' t1 <- trans_abund$new(dataset = dataset, taxrank = "Genus", ntaxa = 5, groupmean = "Group")
#' t1$plot_line(point_size = 5, errorbar_size = 1, xtext_angle = 30)
#' }
plot_line = function(
color_values = RColorBrewer::brewer.pal(8, "Dark2"),
plot_SE = TRUE,
position = position_dodge(0.1),
errorbar_size = 1,
errorbar_width = 0.1,
point_size = 3,
point_alpha = 0.8,
line_size = 0.8,
line_alpha = 0.8,
line_type = 1,
xtext_angle = 0,
xtext_size = 10,
ytitle_size = 17
){
plot_data <- self$data_abund
use_taxanames <- self$data_taxanames
plot_data %<>% {.[.$Taxonomy %in% use_taxanames, ]}
plot_data$Taxonomy %<>% factor(., levels = use_taxanames)
color_values <- expand_colors(color_values, length(use_taxanames))
p <- ggplot(plot_data, aes(x = .data[["Sample"]], y = .data[["Abundance"]], color = .data[["Taxonomy"]], group = .data[["Taxonomy"]]))
if(("SE" %in% colnames(plot_data)) & plot_SE){
p <- p + geom_errorbar(aes(ymin = Abundance - SE, ymax = Abundance + SE), width = errorbar_width, position = position, linewidth = errorbar_size)
}else{
if(("SD" %in% colnames(plot_data)) & plot_SE){
p <- p + geom_errorbar(aes(ymin = Abundance - SD, ymax = Abundance + SD), width = errorbar_width, position = position, linewidth = errorbar_size)
}
}
p <- p + geom_point(size = point_size, alpha = point_alpha, position = position)
p <- p + geom_line(size = line_size, alpha = line_alpha, linetype = line_type, position = position)
p <- p + ylab(self$ylabname) + guides(col = guide_legend(title=self$taxrank, reverse = TRUE)) + xlab("")
p <- p + private$ggplot_xtext_type(xtext_angle = xtext_angle, xtext_size = xtext_size)
p <- p + theme(axis.title.y = element_text(size = ytitle_size)) + scale_y_continuous(expand = c(0, 0.01))
p <- p + scale_color_manual(values = color_values)
p
},
#' @description
#' Pie chart.
#'
#' @param color_values default \code{RColorBrewer::brewer.pal}(8, "Dark2"); colors palette for each section.
#' @param facet_nrow default 1; how many rows in the plot.
#' @param strip_text default 11; sample title size.
#' @param add_label default FALSE; Whether add the percentage label in each section of pie chart.
#' @param legend_text_italic default FALSE; whether use italic in legend.
#' @return ggplot2 object.
#' @examples
#' \donttest{
#' t1 <- trans_abund$new(dataset = dataset, taxrank = "Phylum", ntaxa = 6, groupmean = "Group")
#' t1$plot_pie(facet_nrow = 1)
#' }
plot_pie = function(
color_values = RColorBrewer::brewer.pal(8, "Dark2"),
facet_nrow = 1,
strip_text = 11,
add_label = FALSE,
legend_text_italic = FALSE
){
plot_data <- self$data_abund
use_taxanames <- self$data_taxanames
# sum others to one
if(any(!plot_data$Taxonomy %in% use_taxanames)){
plot_data$Taxonomy[!plot_data$Taxonomy %in% use_taxanames] <- "Others"
plot_data %<>% dplyr::group_by(!!! syms(c("Taxonomy", "Sample"))) %>%
dplyr::summarise(Abundance = sum(Abundance)) %>%
as.data.frame(stringsAsFactors = FALSE)
plot_data$Taxonomy %<>% factor(., levels = c(use_taxanames, "Others"))
color_values <- expand_colors(color_values, length(use_taxanames) + 1)
}else{
color_values <- expand_colors(color_values, length(use_taxanames))
}
plot_data$label <- paste0(round(plot_data$Abundance, 1), "%")
p <- ggplot(plot_data, aes(x = '', y = Abundance, fill = Taxonomy, label = label)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start = 0)
if(add_label){
p <- p + ggrepel::geom_label_repel(position = position_stack(vjust = 0.5), show.legend = FALSE)
}
p <- p + private$blank_theme +
scale_fill_manual(values = color_values) +
theme(axis.text.x = element_blank()) +
facet_wrap(~Sample, nrow = facet_nrow) +
theme(strip.text = element_text(size = strip_text)) +
guides(fill = guide_legend(title = self$taxrank))
if(legend_text_italic == T) {
p <- p + theme(legend.text = element_text(face = 'italic'))
}
p
},
#' @description
#' Donut chart based on the \code{ggpubr::ggdonutchart} function.
#'
#' @param color_values default \code{RColorBrewer::brewer.pal}(8, "Dark2"); colors palette for the donut.
#' @param label default TRUE; whether show the percentage label.
#' @param facet_nrow default 1; how many rows in the plot.
#' @param legend_text_italic default FALSE; whether use italic in legend.
#' @param ... parameters passed to \code{ggpubr::ggdonutchart}.
#' @return combined ggplot2 objects list, generated by \code{ggpubr::ggarrange} function.
#' @examples
#' \dontrun{
#' t1 <- trans_abund$new(dataset = dataset, taxrank = "Phylum", ntaxa = 6, groupmean = "Group")
#' t1$plot_donut(label = TRUE)
#' }
plot_donut = function(
color_values = RColorBrewer::brewer.pal(8, "Dark2"),
label = TRUE,
facet_nrow = 1,
legend_text_italic = FALSE,
...
){
plot_data <- self$data_abund
use_taxanames <- self$data_taxanames
# sum others to one
if(any(!plot_data$Taxonomy %in% use_taxanames)){
plot_data$Taxonomy[!plot_data$Taxonomy %in% use_taxanames] <- "Others"
plot_data %<>% dplyr::group_by(!!! syms(c("Taxonomy", "Sample"))) %>%
dplyr::summarise(Abundance = sum(Abundance)) %>%
as.data.frame(stringsAsFactors = FALSE)
plot_data$Taxonomy %<>% factor(., levels = c(use_taxanames, "Others"))
color_values <- expand_colors(color_values, length(use_taxanames) + 1)
}else{
color_values <- expand_colors(color_values, length(use_taxanames))
}
plot_data$label <- paste0(round(plot_data$Abundance, 1), "%")
# use ggarrange, because facet can not seperate the labels of all the samples
plot_list <- list()
for(i in unique(plot_data$Sample)){
tmp <- plot_data[plot_data$Sample == i, ]
p <- ggpubr::ggdonutchart(tmp, "Abundance", fill = "Taxonomy", label = "label", color = "white", palette = color_values, ...) +
guides(fill = guide_legend(title=self$taxrank))
theme(axis.text.y = element_blank())
if(label == F){
p <- p + theme(axis.text.x = element_blank())
}
if(legend_text_italic == T) {
p <- p + theme(legend.text = element_text(face = 'italic'))
}
plot_list[[i]] <- p
}
facet_ncol <- ceiling(length(plot_list)/facet_nrow)
ggpubr::ggarrange(plotlist = plot_list, nrow = facet_nrow, ncol = facet_ncol, labels = names(plot_list), common.legend = TRUE, legend = "bottom")
},
#' @description
#' Radar chart based on the \code{ggradar} package (https://github.com/ricardo-bion/ggradar).
#'
#' @param color_values default \code{RColorBrewer::brewer.pal}(8, "Dark2"); colors palette for samples.
#' @param ... parameters passed to \code{ggradar::ggradar} function except group.colours parameter.
#' @return ggplot2 object.
#' @examples
#' \dontrun{
#' t1 <- trans_abund$new(dataset = dataset, taxrank = "Phylum", ntaxa = 6, groupmean = "Group")
#' t1$plot_radar()
#' }
plot_radar = function(
color_values = RColorBrewer::brewer.pal(8, "Dark2"),
...
){
plot_data <- self$data_abund
use_taxanames <- self$data_taxanames
color_values <- expand_colors(color_values, length(use_taxanames))
plot_data <- plot_data[plot_data$Taxonomy %in% use_taxanames, ]
if(self$use_percentage){
plot_data$Abundance %<>% {./100}
}
tmp_data <- reshape2::dcast(plot_data, Taxonomy ~ Sample, value.var = "Abundance")
colnames(tmp_data)[1] <- "group"
tmp_data$group %<>% factor(., levels = use_taxanames)
# https://github.com/ricardo-bion/ggradar
ggradar::ggradar(tmp_data, group.colours = color_values, ...)
},
#' @description
#' Ternary diagrams based on the \code{ggtern} package.
#'
#' @param color_values default \code{RColorBrewer::brewer.pal}(8, "Dark2"); colors palette for the samples.
#' @param color_legend_guide_size default 4; The size of legend guide for color.
#' @return ggplot2 object.
#' @examples
#' \dontrun{
#' t1 <- trans_abund$new(dataset = dataset, taxrank = "Phylum", ntaxa = 6, groupmean = "Group")
#' t1$plot_tern()
#' }
plot_tern = function(
color_values = RColorBrewer::brewer.pal(8, "Dark2"),
color_legend_guide_size = 4
){
data_abund <- self$data_abund
use_taxanames <- self$data_taxanames
if(length(unique(data_abund$Sample)) != 3){
stop("Ternary diagrams can only be used for three samples!")
}
data_abund %<>% {.[.$Taxonomy %in% use_taxanames, ]}
data_abund$Taxonomy %<>% factor(., levels = use_taxanames)
plot_data <- reshape2::dcast(data_abund, Taxonomy + all_mean_abund ~ Sample, value.var = "Abundance")
colnames(plot_data)[2] <- "Abundance"
if(is.factor(data_abund$Sample)){
sample_names <- levels(data_abund$Sample)
}else{
sample_names <- unique(data_abund$Sample)
}
color_values <- expand_colors(color_values, length(use_taxanames))
p <- ggtern::ggtern(data = plot_data, aes_meco(x = sample_names[1], y = sample_names[2], z = sample_names[3])) +
theme_bw() +
geom_point(aes(color = Taxonomy, size = Abundance)) +
scale_color_manual(values = color_values) +
scale_fill_manual(values = color_values) +
guides(color = guide_legend(override.aes = list(size = color_legend_guide_size)))
p
},
#' @description
#' Print the trans_abund object.
print = function(){
cat("trans_abund object:\n")
cat(paste("data_abund have", ncol(self$data_abund), "columns: ", paste0(colnames(self$data_abund), collapse = ", "), "\n"))
cat(paste("data_abund have total", length(unique(as.character(self$data_abund$Taxonomy))), "taxa\n"))
cat(paste("taxrank: ", self$taxrank, "\n"))
if(!is.null(self$data_taxanames)){
if(length(self$data_taxanames) > 50){
cat(paste("data_taxanames: ", length(self$data_taxanames), "taxa\n"))
}else{
cat(paste("data_taxanames: ", paste0(self$data_taxanames, collapse = ", "), "\n"))
}
}
invisible(self)
}
),
private = list(
adjust_axis_facet = function(plot_data, x_axis_name, order_x){
# order x axis samples and facet
if(!is.null(x_axis_name)){
colnames(plot_data)[colnames(plot_data) == "Sample"] <- "Sample_rownames_before"
if(! x_axis_name %in% colnames(plot_data)){
stop(paste("No", x_axis_name, "found in the column names of sample_table!"))
}else{
colnames(plot_data)[colnames(plot_data) == x_axis_name] <- "Sample"
}
}
if(!is.null(order_x)){
if(length(order_x) == 1){
stop("This may be wrong. Only one sample used to order the samples!")
}else{
plot_data$Sample %<>% factor(., levels = order_x)
}
}
plot_data
},
ggplot_xtext_type = function(xtext_angle, xtext_size, xtext_keep = TRUE, coord_flip = FALSE){
if(coord_flip){
if(xtext_keep){
theme(axis.text.y = element_text(colour = "black", size = xtext_size))
}else{
theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())
}
}else{
if(xtext_keep){
ggplot_xtext_anglesize(xtext_angle, xtext_size)
}else{
theme(axis.ticks.x = element_blank(), axis.text.x = element_blank())
}
}
},
blank_theme =
theme_minimal() +
theme(
axis.title = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
legend.position="right",
plot.title=element_text(size=14, face="bold")
)
),
lock_objects = FALSE,
lock_class = FALSE
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.