R/heatmap.R

draw_significant_taxa_heatmap <-
  function(df,
           physeq,
           group,
           group1 = NULL,
           group2 = NULL,
           block = NULL,
           max_abundance_for_color = NULL,
           filename = NA) {
    tax_labels <- paste0(df$Annotation, " [", substr(df$Taxon, 4, nchar(df$Taxon)), "]")
    taxa_data <- data.frame(Significance = as.factor(df$sig))
    rownames(taxa_data) <- df$Taxon

    pruned_ps <- prune_taxa(df$Taxon, physeq)
    if (is.null(group1) & is.null(group2)) {
      my_group_fac <- factor(sample_data(physeq)[[group]])
      my_fac_levels <- levels(my_group_fac)
      if (length(my_fac_levels) > 2) {
        stop(
          paste(
            group,
            "has",
            length(my_fac_levels),
            "levels, but you did not specify which levels to compare"
          )
        )
      }
      group1 = my_fac_levels[1]
      group2 = my_fac_levels[2]
    }
    my_heat_map <-
      make_heat_map_physeq_levels(
        pruned_ps,
        group_var = group,
        group1 = group1,
        group2 = group2,
        block_var = block,
        max_abundance_for_color = max_abundance_for_color,
        tax_labels = tax_labels,
        taxa_data = taxa_data,
        tax_order = df$Taxon,
        gradient_steps = c(0.05, 0.15, 0.3, 1)
      )
    return(my_heat_map)
  }

#' Draw a heatmap of the OTU abundances in a phyloseq object.
#'
#' @param physeq      Phyloseq object
#' @param taxa_data   Taxa annotation to show.
#' @param group       Name of column in sample metadata to group samples on (1st layer).
#' @param compare     Vector of values in \code{group} column to compare (e.g., HFD vs Control).
#' @param block       Name of column in sample metadata to subgroup samples on (2nd layer).
#' @param max_abundance_for_color Maximum abundance to use in the color scale.
#' @param border_color      Border color for the heatmap cells.
#' @param filename          Name of file to save the heatmap figure.
#' @param custom_palette    Custom palette list to use (see example).
#' @param log_transform     boolean specifying if data should be log-transformed.
#' @param show_rownames     boolean specifying if column names are be shown.
#' @param gradient_steps
#' @param cellwidth         \code{cellwidth} argument to \code{pheatmap()}.
#' @param cellheight        \code{cellheight} argument to \code{pheatmap()}.
#' @param gaps_col          \code{gaps_col} argument to \code{pheatmap()}.
#'
#' @return ggplot object.
#' @export
#'
#' @import phyloseq
#'
#' @examples
#' \dontrun{
#' # Assuming that the 'physeq' variable has the genera for taxa, this will draw a heatmap of
#' # only the 4 genera listed in 'taxa_data' with samples first grouped by Diet and
#' # then by Enterotype. It will annotate the taxa based on their Phylum. Heatmap cells will have
#' # "grey60" borders.
#' pal = list(Diet = "Set2", Enterotype = "Pastel1", Significance = "PuRd")
#' taxa_data = list(Bacteroides = "Bacteroidetes", Prevotella = "Bacteroidetes", Roseburia = "Firmicutes", Blautia = "Firmicutes")
#' names(taxa_data) = c("Phylum")
#' draw_taxa_heatmap(physeq, taxa_data, group = "Diet", block = "Enterotype", custom_palette = pal, filename = "heatmap.pdf")
#'
#' # Heatmap without borders
#' draw_taxa_heatmap(physeq, taxa_data, ..., border_color = NULL)
#'
#' # Heatmap with black borders
#' draw_taxa_heatmap(physeq, taxa_data, ..., border_color = "black")
#' }
draw_taxa_heatmap <-
  function(physeq,
           taxa_data,
           group,
           compare = NULL,
           block = NULL,
           max_abundance_for_color = NULL,
           log_transform = FALSE,
           border_color = "grey60",
           filename = NA,
           show_rownames = TRUE,
           gradient_steps = c(0.01, 0.1, 0.5, 1),
           cellwidth = NA,
           cellheight = NA,
           gaps_col = NULL,
           custom_palette = NULL) {

    # Which taxa to show?
    if (!is.null(taxa_data)) {
      shown_taxa <- rownames(taxa_data)
      pruned_ps <- prune_taxa(shown_taxa, physeq)
    } else {
      shown_taxa <- taxa_names(physeq)
      pruned_ps <- physeq
    }

    # Make taxa names for display
    # If not unique, add rownames from physeq

    tax_table <- tax_table(pruned_ps)
    tax_labels <- apply(tax_table, 1, get_pretty_taxon_name)
    if (length(tax_labels) != length(unique(tax_labels))) {
      rownames_bak <- names(tax_labels)
      tax_labels <- paste0(tax_labels, " [", rownames(tax_table), "]")
      names(tax_labels) <- rownames_bak
      rm(rownames_bak)
    }

    # Thorsten uses gradient_steps = c(0.05, 0.15, 0.3, 1)

    my_heat_map <-
      make_heat_map_from_physeq(
        pruned_ps,
        group = group,
        compare = compare,
        block = block,
        max_abundance_for_color = max_abundance_for_color,
        log_transform = log_transform,
        border_color = border_color,
        tax_labels = tax_labels,
        taxa_data = taxa_data,
        tax_order = shown_taxa,
        show_rownames = show_rownames,
        cellwidth = cellwidth,
        cellheight = cellheight,
        gaps_col = gaps_col,
        gradient_steps = gradient_steps,
        filename = filename,
        custom_palette = custom_palette
      )
    return(my_heat_map)
  }


#' Internal function to draw heatmap from a physeq object.
#'
#' @param physeq Phyloseq object
#' @param group Name of column in sample metadata to group samples on (1st layer).
#' @param compare Vector of values in \code{group} column to compare (e.g., HFD vs Control).
#' @param max_abundance_for_color Maximum abundance to use in the color scale of 50 levels. If null, the 99.5th percentile in the data is used. If maximum value in the data is higher than this, this will be level 49 and max will be level 50.
#' @param taxa_data Taxa annotation to show.
#' @param tax_labels Character vector of names to be shown for each taxon. If NULL, names in the physeq object will be used.
#' @param tax_order Character vector of the original taxon names in the order you want them shown. If NULL, order in tax_labels will be used.
#' @param gradient_steps The steps for the 5-phase 50-level gradient is drawn.
#' @param block Name of column in sample metadata to subgroup samples on (2nd layer).
#' @param border_color Border color for the heatmap cells.
#' @param filename Name of file to save the heatmap figure.
#'
#' @importFrom grDevices colorRampPalette
#' @import pheatmap
#'
#' @return pheatmap object.
#'
make_heat_map_from_physeq <-
  function(physeq,
           taxa_data = NULL,
           group,
           block = NULL,
           compare = NULL,
           max_abundance_for_color = NULL,
           log_transform = FALSE,
           border_color = "grey60",
           filename = NA,
           tax_labels = NULL,
           tax_order = NULL,
           show_rownames = TRUE,
           cellwidth = NA,
           cellheight = NA,
           gaps_col = NULL,
           gradient_steps,
           custom_palette = NULL
           ) {

    if (taxa_are_rows(physeq)) {
      physeq <- t(physeq)
    }

    DF_CT <- as(otu_table(physeq), "matrix") # taxa are columns
    DF_CT <- as.data.frame(t(DF_CT)) # taxa are rows now

    # Order taxa based on the order in tax_order
    if (!is.null(tax_order)) {
      DF_CT <- DF_CT[tax_order,]
    }

    # Order samples based on group and block

    sam_info <- sample_data(physeq)
    rownames(sam_info) <- NULL
    sam_info[[group]] <- as.factor(sam_info[[group]])
    if (!is.null(block)) {
      sam_info[[block]] <- as.factor(sam_info[[block]])
      sam_info <- dplyr::select(sam_info, one_of("ID", group, block))
      sam_info <- dplyr::arrange(sam_info, get(group), get(block))
    } else {
      sam_info <- dplyr::select(sam_info, one_of("ID", group))
      sam_info <- dplyr::arrange(sam_info, get(group))
    }

    # Select samples

    if (!is.null(compare)) {
      sam_info <- dplyr::filter(sam_info, get(group) == compare[1] | get(group) == compare[2])
      if (dim(sam_info)[1]<1) {
        stop(paste0("Selecting samples from ", group, "={", paste(compare, collapse=","), "} results in 0 samples!"))
      }
    }

    rownames(sam_info) <- sam_info$ID
    sam_info$ID <- NULL
    DF_CT <- DF_CT[,rownames(sam_info)]

    # Set up colors

    group_colors = get_my_palette_colors(custom_palette, group, levels(sam_info[[group]]))
    block_colors = NULL
    if (!is.null(block)) {
      block_colors = get_my_palette_colors(custom_palette, block, levels(sam_info[[block]]))
    }

    tax_colors = lapply(colnames(taxa_data), function(x){get_my_palette_colors(custom_palette, x, levels(taxa_data[[x]]))})
    names(tax_colors) <- colnames(taxa_data)
    ann_colors <- list(group_colors, block_colors)
    names(ann_colors) <- c(group, block)
    ann_colors <- c(ann_colors, tax_colors)

    if (is.null(tax_labels)) {
      tax_labels <- rownames(DF_CT)
    }

    if (length(tax_labels) != nrow(DF_CT)) {
      stop("the given tax_names must be a character vector of length = ntaxa(physeq)")
    }

    tax_labels[is.na(tax_labels)] <- "NA"
    #rownames(DF_CT) <- make.unique(tax_labels)
    tax_labels <- tax_labels[tax_order]

    # Set up heatmap range breaks

    if (is.null(max_abundance_for_color)) {
      max_abundance_for_color <- quantile(unlist(DF_CT), .995)[[1]]
    }
    l_DF <- unlist(DF_CT)
    max_in_data <- max(l_DF)

    # Set up colors first

    myPaletteLength = 50
    myBaseLength = 5
    myBlocks = myPaletteLength / myBaseLength
    #myColors = colorRampPalette(c("red", viridis(myBaseLength)))(myPaletteLength+1)
    myColors = colorRampPalette(c("white", brewer.pal(myBaseLength, "Reds")))(myPaletteLength+1)

    # Set up abundance breaks for the colors above

    if (log_transform) {
      # Log transformation of DF

      min_in_data <- min(l_DF[l_DF>0])
      min_in_data <- floor(log10(min_in_data)-0.5)

      max_abundance_for_color <- ceiling(log10(max_abundance_for_color))
      max_in_data <- ceiling(log10(max_in_data))

      DF_CT <- log10(DF_CT)
      DF_CT[DF_CT == -Inf] <- min_in_data

      # Gradient steps
      myBreaks <- seq(from=min_in_data, to=max_abundance_for_color, length=myPaletteLength+1)
    } else {
      gradient_steps <- c(0, 1e-14, gradient_steps)
      myBreaks = gradient_steps*max_abundance_for_color
      myBreaks = c(unlist(lapply(1:myBaseLength, function(i) {seq(from=myBreaks[i], to=myBreaks[i+1], length.out=myBlocks+1)[1:myBlocks]})),myBreaks[myBaseLength+1])
    }

    if (max_in_data > max_abundance_for_color) {
      myBreaks[myPaletteLength+1] = max_in_data
    }

    hm.parameters <- list(DF_CT,
                          color = myColors,
                          breaks = myBreaks,
                          border_color = border_color,
                          cellwidth = cellwidth,
                          cellheight = cellheight,
                          show_rownames = (show_rownames & (dim(DF_CT)[1] < 75)), # Show tax label only when < 60 taxa
                          show_colnames = FALSE,
                          cluster_rows = FALSE, cluster_cols = FALSE,
                          scale = "none",
                          legend = TRUE,
                          annotation_names_col = TRUE,
                          annotation_col = sam_info,
                          annotation_names_row = FALSE,
                          annotation_row = NULL,
                          labels_row = tax_labels,
                          annotation_legend = TRUE,
                          annotation_colors = ann_colors,
                          drop_levels = TRUE,
                          font_size = 10,
                          fontsize_row = 8,
                          fontsize_col = 8,
                          fontsize_num = 6,
                          gaps_col = gaps_col,
                          #gaps_col = c(7,14,21,28),
                          filename = filename
    )
    do.call("pheatmap", hm.parameters)
  }
TBrach/MicrobiomeX documentation built on May 14, 2019, 2:28 p.m.