draw_significant_taxa_heatmap <-
function(df,
physeq,
group,
group1 = NULL,
group2 = NULL,
block = NULL,
max_abundance_for_color = NULL,
filename = NA) {
tax_labels <- paste0(df$Annotation, " [", substr(df$Taxon, 4, nchar(df$Taxon)), "]")
taxa_data <- data.frame(Significance = as.factor(df$sig))
rownames(taxa_data) <- df$Taxon
pruned_ps <- prune_taxa(df$Taxon, physeq)
if (is.null(group1) & is.null(group2)) {
my_group_fac <- factor(sample_data(physeq)[[group]])
my_fac_levels <- levels(my_group_fac)
if (length(my_fac_levels) > 2) {
stop(
paste(
group,
"has",
length(my_fac_levels),
"levels, but you did not specify which levels to compare"
)
)
}
group1 = my_fac_levels[1]
group2 = my_fac_levels[2]
}
my_heat_map <-
make_heat_map_physeq_levels(
pruned_ps,
group_var = group,
group1 = group1,
group2 = group2,
block_var = block,
max_abundance_for_color = max_abundance_for_color,
tax_labels = tax_labels,
taxa_data = taxa_data,
tax_order = df$Taxon,
gradient_steps = c(0.05, 0.15, 0.3, 1)
)
return(my_heat_map)
}
#' Draw a heatmap of the OTU abundances in a phyloseq object.
#'
#' @param physeq Phyloseq object
#' @param taxa_data Taxa annotation to show.
#' @param group Name of column in sample metadata to group samples on (1st layer).
#' @param compare Vector of values in \code{group} column to compare (e.g., HFD vs Control).
#' @param block Name of column in sample metadata to subgroup samples on (2nd layer).
#' @param max_abundance_for_color Maximum abundance to use in the color scale.
#' @param border_color Border color for the heatmap cells.
#' @param filename Name of file to save the heatmap figure.
#' @param custom_palette Custom palette list to use (see example).
#' @param log_transform boolean specifying if data should be log-transformed.
#' @param show_rownames boolean specifying if column names are be shown.
#' @param gradient_steps
#' @param cellwidth \code{cellwidth} argument to \code{pheatmap()}.
#' @param cellheight \code{cellheight} argument to \code{pheatmap()}.
#' @param gaps_col \code{gaps_col} argument to \code{pheatmap()}.
#'
#' @return ggplot object.
#' @export
#'
#' @import phyloseq
#'
#' @examples
#' \dontrun{
#' # Assuming that the 'physeq' variable has the genera for taxa, this will draw a heatmap of
#' # only the 4 genera listed in 'taxa_data' with samples first grouped by Diet and
#' # then by Enterotype. It will annotate the taxa based on their Phylum. Heatmap cells will have
#' # "grey60" borders.
#' pal = list(Diet = "Set2", Enterotype = "Pastel1", Significance = "PuRd")
#' taxa_data = list(Bacteroides = "Bacteroidetes", Prevotella = "Bacteroidetes", Roseburia = "Firmicutes", Blautia = "Firmicutes")
#' names(taxa_data) = c("Phylum")
#' draw_taxa_heatmap(physeq, taxa_data, group = "Diet", block = "Enterotype", custom_palette = pal, filename = "heatmap.pdf")
#'
#' # Heatmap without borders
#' draw_taxa_heatmap(physeq, taxa_data, ..., border_color = NULL)
#'
#' # Heatmap with black borders
#' draw_taxa_heatmap(physeq, taxa_data, ..., border_color = "black")
#' }
draw_taxa_heatmap <-
function(physeq,
taxa_data,
group,
compare = NULL,
block = NULL,
max_abundance_for_color = NULL,
log_transform = FALSE,
border_color = "grey60",
filename = NA,
show_rownames = TRUE,
gradient_steps = c(0.01, 0.1, 0.5, 1),
cellwidth = NA,
cellheight = NA,
gaps_col = NULL,
custom_palette = NULL) {
# Which taxa to show?
if (!is.null(taxa_data)) {
shown_taxa <- rownames(taxa_data)
pruned_ps <- prune_taxa(shown_taxa, physeq)
} else {
shown_taxa <- taxa_names(physeq)
pruned_ps <- physeq
}
# Make taxa names for display
# If not unique, add rownames from physeq
tax_table <- tax_table(pruned_ps)
tax_labels <- apply(tax_table, 1, get_pretty_taxon_name)
if (length(tax_labels) != length(unique(tax_labels))) {
rownames_bak <- names(tax_labels)
tax_labels <- paste0(tax_labels, " [", rownames(tax_table), "]")
names(tax_labels) <- rownames_bak
rm(rownames_bak)
}
# Thorsten uses gradient_steps = c(0.05, 0.15, 0.3, 1)
my_heat_map <-
make_heat_map_from_physeq(
pruned_ps,
group = group,
compare = compare,
block = block,
max_abundance_for_color = max_abundance_for_color,
log_transform = log_transform,
border_color = border_color,
tax_labels = tax_labels,
taxa_data = taxa_data,
tax_order = shown_taxa,
show_rownames = show_rownames,
cellwidth = cellwidth,
cellheight = cellheight,
gaps_col = gaps_col,
gradient_steps = gradient_steps,
filename = filename,
custom_palette = custom_palette
)
return(my_heat_map)
}
#' Internal function to draw heatmap from a physeq object.
#'
#' @param physeq Phyloseq object
#' @param group Name of column in sample metadata to group samples on (1st layer).
#' @param compare Vector of values in \code{group} column to compare (e.g., HFD vs Control).
#' @param max_abundance_for_color Maximum abundance to use in the color scale of 50 levels. If null, the 99.5th percentile in the data is used. If maximum value in the data is higher than this, this will be level 49 and max will be level 50.
#' @param taxa_data Taxa annotation to show.
#' @param tax_labels Character vector of names to be shown for each taxon. If NULL, names in the physeq object will be used.
#' @param tax_order Character vector of the original taxon names in the order you want them shown. If NULL, order in tax_labels will be used.
#' @param gradient_steps The steps for the 5-phase 50-level gradient is drawn.
#' @param block Name of column in sample metadata to subgroup samples on (2nd layer).
#' @param border_color Border color for the heatmap cells.
#' @param filename Name of file to save the heatmap figure.
#'
#' @importFrom grDevices colorRampPalette
#' @import pheatmap
#'
#' @return pheatmap object.
#'
make_heat_map_from_physeq <-
function(physeq,
taxa_data = NULL,
group,
block = NULL,
compare = NULL,
max_abundance_for_color = NULL,
log_transform = FALSE,
border_color = "grey60",
filename = NA,
tax_labels = NULL,
tax_order = NULL,
show_rownames = TRUE,
cellwidth = NA,
cellheight = NA,
gaps_col = NULL,
gradient_steps,
custom_palette = NULL
) {
if (taxa_are_rows(physeq)) {
physeq <- t(physeq)
}
DF_CT <- as(otu_table(physeq), "matrix") # taxa are columns
DF_CT <- as.data.frame(t(DF_CT)) # taxa are rows now
# Order taxa based on the order in tax_order
if (!is.null(tax_order)) {
DF_CT <- DF_CT[tax_order,]
}
# Order samples based on group and block
sam_info <- sample_data(physeq)
rownames(sam_info) <- NULL
sam_info[[group]] <- as.factor(sam_info[[group]])
if (!is.null(block)) {
sam_info[[block]] <- as.factor(sam_info[[block]])
sam_info <- dplyr::select(sam_info, one_of("ID", group, block))
sam_info <- dplyr::arrange(sam_info, get(group), get(block))
} else {
sam_info <- dplyr::select(sam_info, one_of("ID", group))
sam_info <- dplyr::arrange(sam_info, get(group))
}
# Select samples
if (!is.null(compare)) {
sam_info <- dplyr::filter(sam_info, get(group) == compare[1] | get(group) == compare[2])
if (dim(sam_info)[1]<1) {
stop(paste0("Selecting samples from ", group, "={", paste(compare, collapse=","), "} results in 0 samples!"))
}
}
rownames(sam_info) <- sam_info$ID
sam_info$ID <- NULL
DF_CT <- DF_CT[,rownames(sam_info)]
# Set up colors
group_colors = get_my_palette_colors(custom_palette, group, levels(sam_info[[group]]))
block_colors = NULL
if (!is.null(block)) {
block_colors = get_my_palette_colors(custom_palette, block, levels(sam_info[[block]]))
}
tax_colors = lapply(colnames(taxa_data), function(x){get_my_palette_colors(custom_palette, x, levels(taxa_data[[x]]))})
names(tax_colors) <- colnames(taxa_data)
ann_colors <- list(group_colors, block_colors)
names(ann_colors) <- c(group, block)
ann_colors <- c(ann_colors, tax_colors)
if (is.null(tax_labels)) {
tax_labels <- rownames(DF_CT)
}
if (length(tax_labels) != nrow(DF_CT)) {
stop("the given tax_names must be a character vector of length = ntaxa(physeq)")
}
tax_labels[is.na(tax_labels)] <- "NA"
#rownames(DF_CT) <- make.unique(tax_labels)
tax_labels <- tax_labels[tax_order]
# Set up heatmap range breaks
if (is.null(max_abundance_for_color)) {
max_abundance_for_color <- quantile(unlist(DF_CT), .995)[[1]]
}
l_DF <- unlist(DF_CT)
max_in_data <- max(l_DF)
# Set up colors first
myPaletteLength = 50
myBaseLength = 5
myBlocks = myPaletteLength / myBaseLength
#myColors = colorRampPalette(c("red", viridis(myBaseLength)))(myPaletteLength+1)
myColors = colorRampPalette(c("white", brewer.pal(myBaseLength, "Reds")))(myPaletteLength+1)
# Set up abundance breaks for the colors above
if (log_transform) {
# Log transformation of DF
min_in_data <- min(l_DF[l_DF>0])
min_in_data <- floor(log10(min_in_data)-0.5)
max_abundance_for_color <- ceiling(log10(max_abundance_for_color))
max_in_data <- ceiling(log10(max_in_data))
DF_CT <- log10(DF_CT)
DF_CT[DF_CT == -Inf] <- min_in_data
# Gradient steps
myBreaks <- seq(from=min_in_data, to=max_abundance_for_color, length=myPaletteLength+1)
} else {
gradient_steps <- c(0, 1e-14, gradient_steps)
myBreaks = gradient_steps*max_abundance_for_color
myBreaks = c(unlist(lapply(1:myBaseLength, function(i) {seq(from=myBreaks[i], to=myBreaks[i+1], length.out=myBlocks+1)[1:myBlocks]})),myBreaks[myBaseLength+1])
}
if (max_in_data > max_abundance_for_color) {
myBreaks[myPaletteLength+1] = max_in_data
}
hm.parameters <- list(DF_CT,
color = myColors,
breaks = myBreaks,
border_color = border_color,
cellwidth = cellwidth,
cellheight = cellheight,
show_rownames = (show_rownames & (dim(DF_CT)[1] < 75)), # Show tax label only when < 60 taxa
show_colnames = FALSE,
cluster_rows = FALSE, cluster_cols = FALSE,
scale = "none",
legend = TRUE,
annotation_names_col = TRUE,
annotation_col = sam_info,
annotation_names_row = FALSE,
annotation_row = NULL,
labels_row = tax_labels,
annotation_legend = TRUE,
annotation_colors = ann_colors,
drop_levels = TRUE,
font_size = 10,
fontsize_row = 8,
fontsize_col = 8,
fontsize_num = 6,
gaps_col = gaps_col,
#gaps_col = c(7,14,21,28),
filename = filename
)
do.call("pheatmap", hm.parameters)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.