R/visualization.R
In umiAnalyzer: Tools for Analyzing Sequencing Data with Unique Molecular Identifiers

Documented in AmpliconHeatmap AmpliconPlot BarcodeFamilyHistogram QCplot timeSeriesGrid UmiCountsPlot

#' Generate QC plots
#'
#' Visualize the UMI count for each selected assay and sample for a given
#' consensus depth. This is useful to detect differences in coverage,
#' especially for multiplexed assays.
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param group.by String. Which variable should be used as a factor on the x-axis. Default is sample
#' @param plotDepth Which consensus depth to plot
#' @param assays (Optional) user-supplied list of assays to plot. Default is all.
#' @param samples (Optional) user-supplied list of samples to plot. Default is all.
#' @param theme ggplot theme to use.
#' @param option Color palette to use, either ggplot default or viridis colors.
#' @param direction If viridis colors are used, choose orientation of color scale.
#' @param toggle_mean Show mean or median
#' @param center Choose mean or median
#' @param line_col Choose color for mean/median line
#' @param angle Angle of labels on x-axis.
#' @param plotly Should plotly be used for rendering?
#'
#' @export
#'
#' @import ggplot2
#' @import dplyr
#' @importFrom magrittr "%>%" "%<>%"
#' @importFrom stats median
#' @importFrom viridis scale_fill_viridis
#' @importFrom plotly ggplotly
#' 
#' @return A ggplot object 
#' 
#' @examples
#' library(umiAnalyzer)
#' 
#' main = system.file('extdata', package = 'umiAnalyzer')
#' samples <- list.dirs(path = main, full.names = FALSE, recursive = FALSE)
#' simsen <- createUmiExperiment(experimentName = 'example',mainDir = main,sampleNames = samples)
#' 
#' depth_plot <- QCplot(simsen)
#'
QCplot <- function(
  object,
  group.by = 'sample',
  plotDepth = 3,
  assays = NULL,
  samples = NULL,
  theme = 'classic',
  option = 'viridis',
  direction = 'default',
  toggle_mean = TRUE,
  center = 'mean',
  line_col = 'blue',
  angle = 0,
  plotly = FALSE
  ) {

  if (missing(x = object)) {
    stop('Must provide a umiExperiment object and filter names')
  } else if(!class(object) == 'UMIexperiment'){
    stop('Object is not of class UMIexperiment.')
  }

  cons.table <- object@cons.data
  summary.table <- object@summary.data

  # Consensus depth plot per assay
  #cdepths <- summary.table %>% dplyr::filter(
  #  .data$assay != '',
  #  .data$depth == plotDepth
  #)

  cdepths <- cons.table %>%
    dplyr::filter(
      .data$Name != '',
      .data$`Consensus group size` == plotDepth
    )


  if (!is.null(assays)) {
    cdepths <- cdepths %>%
      dplyr::filter(.data$Name %in% assays)
  }

  if (!is.null(samples)) {
    cdepths <- cdepths %>%
      dplyr::filter(.data$`Sample Name` %in% samples)
  }

  cdepths <- dplyr::rename(cdepths, UMIcount = .data$Coverage, assay = .data$Name)

  cdepths <- cdepths %>%
    dplyr::group_by(.data$`Sample Name`, .data$assay) %>%
    dplyr::summarise(UMIcount = mean(.data$UMIcount))

  cdepths


  # Set assay and sample to factor for better plotting
  cdepths$assay %<>% as.factor
  cdepths$`Sample Name` %<>% as.factor

  #print(as.data.frame(cdepths))

  # From the ggplot2 vignette:
  # https://github.com/tidyverse/ggplot2/releases
  # aes_() replaces aes_q(). It also supports formulas, so the most concise
  # SE version of aes(carat, price) is now aes_(~carat, ~price). You may
  # want to use this form in packages, as it will avoid spurious R CMD check
  # warnings about undefined global variables.

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  if (group.by == 'assay') {
    depth_plot <- ggplot(cdepths, aes_(x = ~assay, y = ~UMIcount, fill=~(`Sample Name`))) +
      geom_bar(position = 'dodge', stat = 'identity') +
      use_theme +
      theme(
        axis.title.x = element_blank(),
        axis.text.x = element_text(size = 14, angle = angle, hjust = 1),
        axis.text.y = element_text(size = 14)
      ) +
      labs(
        title = paste('Consensus ', plotDepth, ' depths by assay', sep = ""),
        subtitle = paste(
          'Mean depth: ', round(mean(cdepths$UMIcount)),
          'Median depth: ', round(median(cdepths$UMIcount))
        ),
        caption = ''
      )
  } else if (group.by == 'sample') {
    depth_plot <- ggplot(cdepths, aes_(x = ~(`Sample Name`), y = ~UMIcount, fill=~assay)) +
      geom_bar(position = 'dodge', stat = 'identity') +
      use_theme +
      theme(
        axis.title.x = element_blank(),
        axis.text.x = element_text(size = 14, angle = angle),
        axis.text.y = element_text(size = 14)
      ) +
      labs(
        title = paste('Consensus ', plotDepth, ' depths by sample', sep = ''),
        subtitle = paste(
          'Mean depth: ', round(mean(cdepths$UMIcount)),
          'Median depth: ', round(median(cdepths$UMIcount))
        ),
        caption = ''
      )
  }

  if(toggle_mean){

    if(center == 'mean'){
      depth_plot <- depth_plot + geom_hline(
        yintercept = mean(cdepths$UMIcount),
        linetype = 'dashed',
        color = line_col)
    } else {
      depth_plot <- depth_plot + geom_hline(
        yintercept = median(cdepths$UMIcount),
        linetype = 'dashed',
        color = line_col)
    }


  }

  if(option != 'default'){

    if(direction == 'default'){
      orientation = 1
    } else {
      orientation = -1
    }

    depth_plot <- depth_plot + viridis::scale_fill_viridis(
      discrete = TRUE,
      option = option,
      direction = orientation
    )
  }

  if(plotly) {
    depth_plot <- plotly::ggplotly(depth_plot)
  }

  # Plot consensus depth distribution
  return(depth_plot)
}

#' Plot UMI counts
#'
#' Visualize the number detected UMI for each consensus depth cut-off. This may
#' may helpful in choosing the right consensus depth for your analysis, by
#' checking the number of reads still available for each assay and sample
#' for your chosen cut-off.
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param amplicons (Optional) user-supplied list of assays to plot. Default is all.
#' @param samples (Optional) user-supplied list of samples to plot. Default is all.
#' @param theme Plotting theme, default is classic
#' @param option Color palette. Default uses ggplot standard, otherwise viridis options.
#' @param direction If using viridis colors should the scale be inverted or default?
#'
#' @export
#'
#' @import ggplot2
#' @import dplyr
#' @importFrom magrittr "%>%" "%<>%"
#' @importFrom stats median
#' @importFrom viridis scale_fill_viridis
#' 
#' @return A UMIexperiment object
#' 
#' @examples
#' library(umiAnalyzer)
#' 
#' main = system.file('extdata', package = 'umiAnalyzer')
#' samples <- list.dirs(path = main, full.names = FALSE, recursive = FALSE)
#' simsen <- createUmiExperiment(experimentName = 'example',mainDir = main,sampleNames = samples)
#' simsen <- filterUmiObject(simsen)
#' 
#' count_plot <- UmiCountsPlot(simsen)
#'
UmiCountsPlot <- function(
  object,
  amplicons = NULL,
  samples = NULL,
  theme = 'classic',
  option = 'viridis',
  direction = 1
  ) {

  # Read summary data from object
  data <- object@summary.data %>%
    dplyr::filter(
      !is.na(.data$assay),
      .data$depth > 0
    )

  # Select amplicons
  if (!is.null(amplicons)) {
    data <- data %>%
      dplyr::filter(.data$assay %in% amplicons)
  }

  # Select samples
  if (!is.null(samples)) {
    data <- data %>%
      dplyr::filter(.data$sample %in% samples)
  }

  # Generate ggplot object
  data$depth %<>% as.factor

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  # If color option is not default use the viridis package for color palettes.
  # https://cran.r-project.org/web/packages/viridis/vignettes/intro-to-viridis.html
  if(option != 'default') {
    plot <- ggplot(
      data = data, aes(
        x=.data$depth,
        y=.data$UMIcount, 
        fill=sample)
      ) +
      use_theme +
      viridis::scale_fill_viridis(
        discrete = TRUE,
        option = option,
        direction = direction
        ) +
        geom_col(alpha=0.6) +
        facet_grid(assay ~ sample) +
        ylab('UMI count') +
        xlab('Consensus depth cut-off')
  } else {
    plot <- ggplot(data, aes(x=.data$depth, y=.data$UMIcount, fill=sample)) +
      use_theme +
      geom_col(alpha=0.6) +
      facet_grid(assay ~ sample) +
      ylab('UMI count') +
      xlab('Consensus depth cut-off')
  }

  # Return object and plot
  return(plot)
}

#' Generate Amplicon plots
#'
#' Plots variant allele frequencies or alternate allele counts for chosen
#' samples and assays.
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param filter.name Name of the filter to be plotted.
#' @param cut.off How many variant reads are necessary to consider a variant above background? Default is 5 reads.
#' @param min.count Minimum variants counts to plot, default is 0.
#' @param min.vaf Minimum variants allele frequency to plot, default is 0.
#' @param amplicons (Optional) character vector of amplicons to be plotted.
#' @param samples (Optional) character vector of samples to be plotted.
#' @param abs.count Should absolute counts be plotted instead of frequencies? Default is FALSE.
#' @param theme Plotting theme to use, default is classic.
#' @param option Color palette to use.
#' @param y_min Minimum y-axis value, default is 0
#' @param y_max Maximum y-axis value, default is NULL (autoscale)
#' @param direction Orientation of the color palette.
#' @param plot.text Should non-references bases be indicated above the bar?
#' @param plot.ref If true show reference base instead of position on x-axis.
#' @param stack.plot Show all variant alleles in a stacked bar plot.
#' @param classic.plot Show classical debarcer amplicon plot with raw error.
#' @param fdr False-discovery-rate cut-off for variants.
#' @param use.caller Should data from variant caller be used? Default is FALSE
#' @param use.plotly Should plotly be used instead of the regular ggplot device? Default is TRUE
#' @param font.size Font size
#' @param angle Font angle
#'
#' @export
#'
#' @import ggplot2
#' @importFrom plotly ggplotly
#' @importFrom scales rescale_none
#' @importFrom magrittr "%>%" "%<>%"
#' @importFrom dplyr filter
#' @importFrom viridis scale_fill_viridis
#' 
#' @return A UMIexperiment object containing a ggplot object with the
#' amplicon plot.
#'
#' @examples
#' library(umiAnalyzer)
#' 
#' main = system.file('extdata', package = 'umiAnalyzer')
#' samples <- list.dirs(path = main, full.names = FALSE, recursive = FALSE)
#' simsen <- createUmiExperiment(experimentName = 'example',mainDir = main,sampleNames = samples)
#' simsen <- filterUmiObject(simsen)
#'
#' amplicon_plot <- AmpliconPlot(simsen)
#' 
#' 
AmpliconPlot <- function(
  object,
  filter.name = 'default',
  cut.off = 5,
  min.count = 0,
  min.vaf = 0,
  amplicons = NULL,
  samples = NULL,
  abs.count = FALSE,
  y_min = 0,
  y_max = NULL,
  theme = 'classic',
  option = 'default',
  direction = 'default',
  plot.text = FALSE,
  plot.ref = TRUE,
  stack.plot = FALSE,
  classic.plot = FALSE,
  fdr = 0.05,
  font.size = 6,
  angle = 45,
  use.caller = FALSE,
  use.plotly = TRUE
  ) {

  if (missing(x = object)) {
    stop("Must provide a umiExperiment object and filter names")
  } else if(!class(object) == "UMIexperiment"){
    stop("Object is not of class UMIexperiment.")
  } else if(!is.logical(abs.count)){
    warning("abs.count needs to be of type boolean. Using defaults.")
    abs.count = FALSE
  } else if (!is.numeric(cut.off) || cut.off < 0) {
    warning("cut.off needs to be a positive integer. Using defaults.")
    cut.off = 5
  } else if(is.null(object@filters[filter.name][[1]])) {
    if(!is.null(object@filters$default)){
      warning("Requested filter not found, using default.")
    } else {
      stop("Filter not found. Have you run filterUmiObject?")
    }
  }

  # Use depth cut-off to call variants
  cons.table.default <- getFilteredData(
    object = object,
    name = filter.name
  )
  
  cons.table.default$Variants <- ifelse(
    test = cons.table.default$`Max Non-ref Allele Count` >= cut.off, 
    yes = "Variant",
    no = "Background"
  )

  if(use.caller){
    # Check if variant caller has been run on object
    if(!identical(dim(object@variants), dim(tibble()))) {
      cons.table <- object@variants
      cons.table$Variants <- ifelse(
        test = cons.table$p.adjust <= fdr, 
        yes = "Variant", 
        no = "Background"
      )
    } else {
      warning("Variant caller has not been run, using default cut-off instead!")
      cons.table <- cons.table.default
    }
  } else {
    cons.table <- cons.table.default
  }

  # Make variables factors to ensure equidistance on the x-axis
  cons.table$`Sample Name` %<>% as.factor
  cons.table$Position %<>% as.factor
  cons.table$Name %<>% as.factor
  cons.table$sample %<>% as.factor
  cons.table$`Consensus group size` %<>% as.factor

  # Filter amplicons and samples based on user selection
  cons.table <- filterConsensusTable(
    cons.table,
    amplicons = amplicons,
    samples = samples
  )

  # Filter position based on minimum VAF and counts, default is to use all positions.
  cons.table <- cons.table %>%
    dplyr::filter(
      .data$`Max Non-ref Allele Frequency` >= min.vaf/100,
      .data$`Max Non-ref Allele Count` >= min.count
    )

  # Get raw error data (cons0)
  raw_error <- object@raw.error

  # Make variables factors to ensure equidistance on the x-axis
  raw_error$`Sample Name` %<>% as.factor
  raw_error$Position %<>% as.factor
  raw_error$Name %<>% as.factor
  raw_error$sample %<>% as.factor
  raw_error$`Consensus group size` %<>% as.factor

  # Filter selected amplicons and samples
  raw_error <- filterConsensusTable(
    raw_error,
    amplicons = amplicons,
    samples = samples
  )

  classic_data <- dplyr::bind_rows(cons.table,raw_error)

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  # Set maximum y-axis limit to largest variant allele frequency in data
  # rounded up to the nearest integer.
  if(is.null(y_max)){
    y_max <- ceiling(100*max(cons.table$`Max Non-ref Allele Frequency`))
  }

  # If classic plot is chosen make a raw vs consN plot
  classic_plot <- ggplot(classic_data, aes_(
      x = ~Position,
      y = ~(100 * .data$`Max Non-ref Allele Frequency`),
      fill = ~(`Consensus group size`))
    ) +
    use_theme +
    geom_bar(stat = 'identity', width=.5, position = 'dodge') +
    theme(
      axis.text.x = element_text(
        size = font.size,
        angle = angle,
        hjust = 1
      ),
      axis.title.x = element_blank()
      ) +
    ylab('Variant Allele Frequency (%)') +
    xlab('Assay') +
    scale_y_continuous(
      limits = c(y_min,y_max),
      oob = scales::rescale_none
    ) +
    facet_grid(`Sample Name` ~ Name, scales = 'free_x', space = 'free_x')

  # If the plot is too big, limit number of positions plotted;
  # also output tabular output as an html table
  n_samples <- length(unique(cons.table$`Sample Name`))
  n_positions <- length(unique(cons.table$Position))


  if ( (n_samples > 6) | (n_positions > 500) ) {
    amplicon_plot <- ggplot(
      cons.table, aes_(
        x = ~ Name,
        y = ~ (100 * .data$`Max Non-ref Allele Frequency`))
      ) +
      geom_point(
        mapping = aes(
          col = .data$Variants,
          size = .data$`Max Non-ref Allele Count`)
      ) +
      use_theme +
      theme(
        axis.text.x = element_text(
          size = font.size,
          angle = angle,
          hjust = 1
        ),
        axis.title.x = element_blank()
        ) +
      ylab("Variant Allele Frequency (%)") +
      xlab("Assay") +
      labs(
        title = "Maximum variant allele frequency by assay",
        caption = "Each dot represenst a position. All samples are included.
           Blue dots represent positions with at least 5 variant alleles."
      )
  } else {
    if(abs.count) {
      amplicon_plot <- ggplot(cons.table, aes_(
        x = ~Position,
        y = ~`Max Non-ref Allele Count`,
        fill = ~Variants)
      ) +
        use_theme +
        geom_bar(stat = "identity") +
        theme(
          axis.text.x = element_text(size = font.size, angle = angle, hjust = 1),
          axis.title.x = element_blank()
          ) +
        ylab("Variant UMI count") +
        xlab("Assay") +
        facet_grid(`Sample Name` ~ Name, scales = "free_x", space = "free_x") +
        geom_hline(
          yintercept = 0,
          color = 'gray50',
          size = 0.3
        )
    } else {

      cons.table <- cons.table %>%
        dplyr::mutate(
          `Max Non-ref Allele Frequency` = 100*.data$`Max Non-ref Allele Frequency`
          )

      amplicon_plot <- ggplot(cons.table, aes_(
        x = ~Position,
        y = ~`Max Non-ref Allele Frequency`,
        fill = ~Variants)
        ) +
        use_theme +
        geom_bar(stat = "identity") +
        theme(
          axis.text.x = element_text(
            size = font.size,
            angle = angle,
            hjust = 1
          ),
          axis.title.x = element_blank()
          ) +
        ylab("Variant Allele Frequency (%)") +
        xlab("Assay") +
        scale_y_continuous(
          limits=c(y_min,y_max),
          oob = scales::rescale_none
        ) +
        facet_grid(`Sample Name` ~ Name, scales = "free_x", space = "free_x") +
        geom_hline(
          yintercept = 0,
          color = 'gray50',
          size = 0.3
        )
    }

    if(classic.plot){
      amplicon_plot <- classic_plot
    }

    if(plot.text){
      amplicon_plot <- amplicon_plot +
        geom_text(
          data = cons.table,
          mapping = aes_(label = ~(`Max Non-ref Allele`)),
          position = position_dodge(width = 1),
          size = 4
        )
    }

    if(plot.ref){
      amplicon_plot <- amplicon_plot +
        scale_x_discrete(
          breaks = cons.table$Position,
          labels = cons.table$Reference
        ) +
        theme(
          axis.text.x = element_text(
            size = font.size,
            angle = 0
            ),
          axis.title.x = element_blank()
        )
    }
  }


  if(direction == 'default'){
    orientation = 1
  } else {
    orientation = -1
  }

  if(option != 'default'){

    # If not using default color scheme use either
    # (1) Colors from viridis package
    if( option %in% c('viridis','magma','plasma','inferno','cividis') ){
      amplicon_plot <- amplicon_plot + viridis::scale_fill_viridis(
        discrete = TRUE,
        option = option,
        direction = orientation
      )
    } else {
    # (2) Colors from ggplot: Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3
      amplicon_plot <- amplicon_plot +
        ggplot2::scale_fill_brewer(
          palette = option
        )
    }
  }

  if(stack.plot){
    amplicon_plot <- stacked_amplicon_plot(
      cons.data = cons.table,
      theme = theme,
      plot.ref = plot.ref,
      abs.count = abs.count,
      option = option,
      direction = orientation
    )
  }

  # Show plot and add ggplot object to the UMIexperiment object
  if(use.plotly){
    amplicon_plot <- plotly::ggplotly(amplicon_plot)
  }

  return(amplicon_plot)
}


#' Amplicon heatmap
#'
#' Generates a heatmap of mutations with sample clustering using pheatmap.
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param filter.name Name of the filter to be plotted.
#' @param cut.off How many variant reads are necessary to consider a variant above background? Default is 5 reads.
#' @param amplicons (Optional) character vector of amplicons to be plotted.
#' @param samples (Optional) character vector of samples to be plotted.
#' @param left.side Show assays or sample on the left side of the heatmap. Default is assays
#' @param abs.count Logical. Should absolute counts be used instead of frequencies?
#' @param font.size Font size to use for sample labels
#'
#' @export
#'
#' @import magrittr
#' @importFrom pheatmap pheatmap
#' @importFrom tidyr spread
#' @importFrom dplyr select
#' @importFrom grDevices dev.off
#'
#' @return A graphics object
#' 
#' @examples
#' \dontrun{
#' library(umiAnalyzer)
#' 
#' main = system.file('extdata', package = 'umiAnalyzer')
#' samples <- list.dirs(path = main, full.names = FALSE, recursive = FALSE)
#' simsen <- createUmiExperiment(experimentName = 'example',mainDir = main,sampleNames = samples)
#' simsen <- filterUmiObject(simsen)
#' 
#' hmap <- AmpliconHeatmap(simsen)
#' }
#'
AmpliconHeatmap <- function(
  object,
  filter.name = 'default',
  cut.off = 5,
  left.side = 'columns',
  amplicons = NULL,
  samples = NULL,
  abs.count = FALSE,
  font.size = 10
  #n_col = 5,
  #colours = 'Blues'
){

  # Check if variant caller has been run on object
  if (identical(dim(object@variants), dim(tibble()))) {
    cons.table <- getFilteredData(
      object = object,
      name = filter.name
    )

    cons.table$Variants <- ifelse(cons.table$`Max Non-ref Allele Count` >= cut.off, 'Variant', 'Background')
  } else {
    cons.table <- object@variants
    cons.table$Variants <- ifelse(cons.table$p.adjust <= 0.05, 'Variant', 'Background')
  }

  # Make variables factors to ensure equidistance on the x-axis
  cons.table$`Sample Name` %<>% as.factor
  cons.table$Position %<>% as.factor
  cons.table$Name %<>% as.factor
  cons.table$sample %<>% as.factor
  cons.table$`Consensus group size` %<>% as.factor

  # Select samples and amplicons chosen by user
  cons.table <- filterConsensusTable(
    cons.table,
    amplicons = amplicons,
    samples = samples
  )

  # Do not cluster if only a single sample has been selected
  if(length(samples) > 1){
    do.cluster = TRUE
  } else {
    do.cluster = FALSE
  }

  # Should absolute counts or frequencies be plotted?
  if(abs.count){
    cons_wide <- cons.table %>%
      dplyr::select(.data$`Sample Name`, .data$Position, .data$Name, .data$`Max Non-ref Allele Count`)  %>%
      tidyr::spread(.data$`Sample Name`, .data$`Max Non-ref Allele Count`)
  } else {
    cons_wide <- cons.table %>%
      dplyr::select(.data$`Sample Name`, .data$Position, .data$Name, .data$`Max Non-ref Allele Frequency`)  %>%
      tidyr::spread(.data$`Sample Name`, .data$`Max Non-ref Allele Frequency`)
  }

  # Create matrix for heatmap and plot heatmap
  cons_mat <- cons_wide %>% dplyr::select(-c(.data$Position, .data$Name))
  cons_mat <- as.matrix(cons_mat)
  rownames(cons_mat) <- cons_wide$Position

  hcluster_clean <- data.frame(Amplicon = as.factor(cons_wide$Name))
  rownames(hcluster_clean) <- cons_wide$Position


  if(left.side == 'columns'){
    heatmap_DNA_clean <- pheatmap::pheatmap(
      mat = 100*cons_mat,
      angle_col = 45,
      scale = 'none',
      #color = RColorBrewer::brewer.pal(n = n_col, name = colours),
      color = c("#EFF3FF", "#BDD7E7", "#6BAED6", "#3182BD", "#08519C"),
      cluster_rows = FALSE,
      cluster_cols = do.cluster,
      clustering_method = 'ward.D2',
      drop_levels = TRUE,
      fontsize_col = font.size,
      annotation_row = hcluster_clean,
      show_rownames = FALSE,
      na_col = 'gray80'
    )
  } else{
    heatmap_DNA_clean <- pheatmap::pheatmap(
      mat = t(100*cons_mat),
      angle_col = 45,
      scale = 'none',
      #color = RColorBrewer::brewer.pal(n = n_col, name = colours),
      color = c("#EFF3FF", "#BDD7E7", "#6BAED6", "#3182BD", "#08519C"),
      cluster_rows = do.cluster,
      cluster_cols = FALSE,
      clustering_method = 'ward.D2',
      drop_levels = TRUE,
      fontsize_row = font.size,
      annotation_col = hcluster_clean,
      show_rownames = TRUE,
      show_colnames = FALSE,
      na_col = 'gray80'
    )

  }

  grDevices::dev.off()
  return(heatmap_DNA_clean)
}

#' Consensus depth histograms
#'
#' Generate histograms for the frequency of barcode family depths.
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param xMin Minimum consensus family size to plot, default is 0.
#' @param xMax Maximum consensus family size to plot. Default is 100.
#' @param samples List of samples to be shown.
#' @param option Color scheme to use
#' @param direction If using viridis colors sets the orientation of color scale.
#' @param theme ggplot theme to use. Defaults to classic.
#'
#' @export
#'
#' @import ggplot2
#' @importFrom tibble is_tibble
#' @importFrom viridis scale_fill_viridis
#' 
#' @return A ggplot object
#'
#' @examples
#' library(umiAnalyzer)
#' main = system.file('extdata', package = 'umiAnalyzer')
#' simsen <- createUmiExperiment(main, importBam = TRUE)
#' barcode_dist <- BarcodeFamilyHistogram(simsen)
#'
BarcodeFamilyHistogram <- function(
  object,
  xMin = 0,
  xMax = 100,
  samples = NULL,
  option = 'viridis',
  direction = 1,
  theme = 'classic'
  ) {

  if (missing(x = object)) {
    stop("Must provide a umiExperiment object.")
  } else if(!is.numeric(xMin) && !is.numeric(xMax)){
    warning("xMin and xMax needs to be numerical. Using default values instead.")
    xMin = 0
    xMax = 10
  } else if(xMin < 0 || xMax < 0){
    warning("xMin and xMax need to be positive numbers. Using default values")
    xMin = 0
    xMax = 10
  } else if(xMax < xMin){
    warning("xMax smaller than xMin, using default values instead.")
    xMin = 0
    xMax = 100
  } else if(!is.character(samples) && !is.null(samples)){
    warning("Samples need to be a character or NULL. Using default.")
    samples = NULL
  }

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  # check if object is a UMIexperiment
  if (class(object)[1]== "UMIexperiment") {
    reads <- object@reads

    if (!is.null(samples)) {
      reads <- reads %>% dplyr::filter(.data$sample %in% samples)
    }

    cons_depth_plot <- ggplot(reads, aes(x = count, fill = sample)) +
      geom_histogram(binwidth = 1, alpha = 0.5) +
      use_theme +
      viridis::scale_fill_viridis(discrete = TRUE,option = option,direction = direction) +
      xlim(xMin, xMax) +
      theme(axis.text = element_text(size = 12),
            axis.title = element_text(size = 14, face = "bold")
      ) +
      facet_wrap(~sample) +
      ylab("Number of families") +
      xlab("Barcode familiy size")

    # Assign plot to UMIexperiment object
    object@plots$family_histogram <- cons_depth_plot

  } else {
    # a tibble containing read info is passed directly
    if (!is.null(samples)) {
      object <- object %>% dplyr::filter(.data$sample %in% samples)
    }

    cons_depth_plot <- ggplot(object, aes(x = count, fill = sample)) +
      geom_histogram(binwidth = 1, alpha = 0.5) +
      use_theme +
      viridis::scale_fill_viridis(discrete = TRUE,option = option,direction = direction) +
      xlim(xMin, xMax) +
      theme(axis.text = element_text(size = 12),
            axis.title = element_text(size = 14, face = "bold")
      ) +
      facet_wrap(~sample) +
      ylab("Number of families") +
      xlab("Barcode familiy size")
  }

  return(cons_depth_plot)
}

#' Plot all variant allele bases
#'
#' @import ggplot2
#' @importFrom forcats fct_relevel
#' @importFrom magrittr "%>%" "%<>%"
#'
#' @param cons.data Consensus data table
#' @param theme Plotting theme
#' @param plot.ref If true, shows reference base on x-axis
#' @param abs.count Plot absolute counts instead of frequencies.
#' @param option Color scheme
#' @param direction Direction of the color scale
#'
#' @noRd
#'
#' @return A ggplot object.
stacked_amplicon_plot <- function(
  cons.data,
  theme = 'classic',
  plot.ref = FALSE,
  abs.count = FALSE,
  option = 'Pastel1',
  direction = 1
  ){

  out.file <- tibble()
  for(j in 1:nrow(cons.data)) {
    row <- cons.data[j,]
    if( row$Reference == "A" ) {
      row$A <- 0
    }
    else if( row$Reference == "C" ) {
      row$C <- 0
    }
    else if( row$Reference == "G" ) {
      row$G <- 0
    }
    else if( row$Reference == "T" ) {
      row$T <- 0
    }
    out.file <- dplyr::bind_rows(out.file, row)
  }

  # Rename variables
  out.file <- out.file %>% dplyr::select(
    .data$`Sample Name`,
    .data$Name, .data$Position,
    .data$Coverage,.data$Reference,
    .data$A,.data$T,.data$C,.data$G,
    .data$N,.data$I,.data$D) %>%
    tidyr::gather(
      "variant",
      "count",
      -c(.data$Name,
         .data$`Sample Name`,
         .data$Position,
         .data$Reference,
         .data$Coverage
        )
      )

  out.file$Name %<>% as.factor
  out.file$Position %<>% as.factor
  out.file$variant %<>% as.factor

  out.file$variant  <- forcats::fct_relevel(
    .f = out.file$variant,
    "A","C","G","T","D","I","N"
  )

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  if(abs.count){
    # Stacked count plot.
    stacked <- ggplot(out.file, aes_(
      fill=~variant,
      y=~count,
      x=~Position)) +
      geom_bar(position="stack", stat="identity") +
      use_theme +
      ylab("Variant Allele Frequency (%)") +
      xlab("Assay") +
      facet_grid(`Sample Name` ~ Name, scales = "free_x", space = "free_x") +
      theme(axis.text.x = element_text(angle = 90))
  } else {
    # Stacked count plot.
    stacked <- ggplot(out.file, aes_(
      fill=~variant,
      y=~(100*count/Coverage),
      x=~Position)) +
      geom_bar(position="stack", stat="identity") +
      use_theme +
      ylab("Variant Allele Frequency (%)") +
      xlab("Assay") +
      facet_grid(`Sample Name` ~ Name, scales = "free_x", space = "free_x") +
      theme(axis.text.x = element_text(angle = 90))
  }

  if(plot.ref){
    stacked <- stacked +
      scale_x_discrete(
        breaks = out.file$Position,
        labels = out.file$Reference
      ) +
      theme(
        axis.text.x = element_text(size = 9, angle = 0)
      )
  }

  if(option == 'default'){
    option = 'Pastel1'
  }

  if(option != 'default'){
    # If not using default color scheme use either
    # (1) Colors from viridis package
    if( option %in% c('viridis','magma','plasma','inferno','cividis') ){
      stacked <- stacked + viridis::scale_fill_viridis(
        discrete = TRUE,
        option = option,
        direction = direction
      )
    } else {
      # (2) Colors from ggplot: Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3
      stacked <- stacked +
        ggplot2::scale_fill_brewer(
          palette = option
        )
    }
  }

  return(stacked)

}

#' Plot counts by nucleotide change
#' @param cons.data A consensus data table
#' @param do.plot Logical. Should plot be shown?
#' @param option Color palette to use
#' @param direction If using viridis colors, choose orientation of palette
#' @param theme ggplot theme to use, default is classic.
#' @param plot.ref = TRUE
#'
#' @import tibble
#' @import dplyr
#' @import ggplot2
#' @importFrom magrittr "%>%" "%<>%"
#' @importFrom tidyr gather
#' @importFrom viridis scale_fill_viridis
#' 
#' @noRd
#'
#' @return A ggplot object.
#'
vizStackedCounts <- function(
  cons.data,
  do.plot = TRUE,
  option = 'viridis',
  direction = 1,
  theme = 'bw',
  plot.ref = TRUE
  ){

  # For each row in consensus data, set the reference count to 0.
  out.file <- tibble()
  for(j in 1:nrow(cons.data)) {
    row <- cons.data[j,]
    if( row$Reference == "A" ) {
      row$avg.A <- 0
    }
    else if( row$Reference == "C" ) {
      row$avg.C <- 0
    }
    else if( row$Reference == "G" ) {
      row$avg.G <- 0
    }
    else if( row$Reference == "T" ) {
      row$avg.T <- 0
    }
    out.file <- dplyr::bind_rows(out.file, row)
  }

  # Rename variables
  out.file <- out.file %>% dplyr::select(
    .data$Name, .data$Position, .data$group.by,
    .data$avg.Depth,.data$Reference,
    .data$avg.A,.data$avg.T,.data$avg.C,.data$avg.G,
    .data$avg.N,.data$avg.I,.data$avg.D) %>%
    dplyr::rename(">A"= .data$avg.A,
                  ">T"= .data$avg.T,
                  ">C"= .data$avg.C,
                  ">G"= .data$avg.G,
                  ">N"= .data$avg.N,
                  ">I"= .data$avg.I,
                  ">D"= .data$avg.D) %>%
    tidyr::gather("variant","count", -c(.data$Name,
                                        .data$Position,
                                        .data$group.by,
                                        .data$Reference,
                                        .data$avg.Depth))

  out.file$variant  <- forcats::fct_relevel(
    .f = out.file$variant,
    ">A",">C",">G",">T",">D",">I",">N"
  )

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  # Stacked count plot.
  stacked <- ggplot(
    data = out.file, 
    mapping = aes_(
      fill=~variant,
      y=~count, 
      x=~Position
      )
    ) +
    geom_bar( stat="identity") +
    use_theme +
    facet_grid(. ~ Name, scales = "free_x", space = "free_x") +
    theme(axis.text.x = element_text(angle = 90))

  allowed_colours <- c('viridis','magma','plasma','inferno','cividis',
                       'Accent', 'Dark2', 'Paired', 'Pastel1', 'Pastel2',
                       'Set1', 'Set2', 'Set3')

  if(! option %in% allowed_colours){
    option = 'Set1'
  }

  if( option %in% c('viridis','magma','plasma','inferno','cividis') ){
    stacked <- stacked + viridis::scale_fill_viridis(
      discrete = TRUE,
      option = option
    )
  } else {
    # (2) Colours from ggplot: Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3
    stacked <- stacked +
      ggplot2::scale_fill_brewer(
        palette = option
      )
  }

  if(plot.ref){
    stacked <- stacked +
      scale_x_discrete(
        breaks = out.file$Position,
        labels = out.file$Reference
      ) +
      theme(
        axis.text.x = element_text(
          size = 9,
          angle = 0
        )
      )
  }

  if(do.plot){
    return(stacked)
  } else {
    return(NULL)
  }
}


#' Plot time series data
#' 
#' Function for plotting time series or other meta data. Uses facet wrap to
#' display user-provided categorical variables.
#' 
#' @param object A consensus data table
#' @param filter.name "default"
#' @param cut.off 5
#' @param min.count 0
#' @param min.vaf 0
#' @param amplicons NULL
#' @param samples NULL
#' @param x_variable NULL
#' @param y_variable "Max Non-ref Allele Frequency"
#' @param columns "Sample Name"
#' @param rows "Name"
#' @param color_by "Name"
#' @param fdr 0.05
#' @param use.caller TRUE
#' @param bed_positions NULL
#'
#' @import tibble
#' @import dplyr
#' @import ggplot2
#' @importFrom magrittr "%>%" "%<>%"
#' @importFrom tidyr gather
#' @importFrom viridis scale_fill_viridis
#'
#' @return A ggplot object.
#'
#' @export
#' 
#' @examples
#' library(umiAnalyzer)
#'
#' main <- system.file("extdata", package = "umiAnalyzer")
#' simsen <- createUmiExperiment(main)
#' simsen <- filterUmiObject(simsen)
#' 
#' metaData <- system.file("extdata", "metadata.txt", package = "umiAnalyzer")
#' simsen <- importDesign(object = simsen,file = metaData)
#' 
#' bed_dir <- system.file("extdata", "simple.bed", package = "umiAnalyzer")
#' bed <- importBedFile(path = bed_dir)
#' 
#' time_plot <- timeSeriesGrid(simsen, x_variable = "time", bed_positions = bed)
#'
timeSeriesGrid <- function(
  object,
  filter.name = 'default',
  cut.off = 5,
  min.count = 0,
  min.vaf = 0,
  amplicons = NULL,
  samples = NULL,
  x_variable = NULL,
  y_variable = "Max Non-ref Allele Frequency",
  columns = "Sample Name",
  rows = "Name",
  color_by = "Name",
  fdr = 0.05,
  use.caller = TRUE,
  bed_positions = NULL
){

  # Use depth cut-off to call variants
  cons.table.default <- getFilteredData(object = object,name = filter.name)
  cons.table.default$Variants <- ifelse(cons.table.default$`Max Non-ref Allele Count` >= cut.off, "Variant", "Background")

  if(use.caller){
    # Check if variant caller has been run on object
    if(!identical(dim(object@variants), dim(tibble()))) {
      cons.table <- object@variants
      cons.table$Variants <- ifelse(cons.table$p.adjust <= fdr, "Variant", "Background")
    } else {
      warning("Variant caller has not been run, using default cut-off instead!")
      cons.table <- cons.table.default
    }
  } else {
    cons.table <- cons.table.default
  }

  design <- object@meta.data
  design <- as_tibble(design)
  colnames(design)[1] <- 'Sample Name'

  cons.table <- full_join(design,cons.table, by = 'Sample Name')

  # Filter amplicons and samples based on user selection
  cons.table <- filterConsensusTable(
    cons.table,
    amplicons = amplicons,
    samples = samples
  )

  # Filter position based on minimum VAF and counts, default is to use all positions.
  cons.table <- cons.table %>%
    dplyr::filter(
      .data$`Max Non-ref Allele Frequency` >= min.vaf/100,
      .data$`Max Non-ref Allele Count` >= min.count
    )

  if(!is.null(bed_positions)){
    # Select positions from bed file
    cons.table <- cons.table %>%
      dplyr::filter(.data$Position %in% bed_positions)
  }

  cons.table <- cons.table %>%
    dplyr::mutate("variant" = paste(.data$Name,' ', .data$Contig, ':', .data$Position))

  # Time course plots
  plot <- ggplot2::ggplot(
    data = cons.table,
    mapping = ggplot2::aes(
      x= !!as.symbol(x_variable),
      y= !!as.symbol(y_variable),
      col = .data$variant
    )) +
    ggplot2::geom_point(
      data = cons.table,
      size=3
    ) +
    ggplot2::geom_line() +
    ggplot2::scale_y_continuous(limits=c(0,NA)) +
    ggplot2::facet_wrap(
      vars(!!as.symbol(columns)),
      scales = 'free_y'
    ) +
    ggplot2::scale_color_brewer(palette = "Set1") +
    ggplot2::theme_bw()

  return(plot)
}


#' Plot coverage before and after normalization
#' 
#' @importFrom gridExtra grid.arrange
#' @importFrom magrittr "%>%" "%<>%"
#' 
#' @param cons.data Consensus data table
#' 
#' @noRd
#' 
#' @return A list of ggplot objects.
#' 
vizNormalization <- function(cons.data){
  
  cons.data$Name %<>% as.factor
  cons.data$replicate <- cons.data$group.by
  
  # plot coverage before nomralization per assay and group by replicate
  p1 <- ggplot(cons.data, aes_(x=~Name, y=~Coverage)) +
    geom_boxplot(outlier.colour="red", outlier.shape=8,
                 outlier.size=4) +
    theme(axis.text.x = element_text(angle = 90)) +
    ggtitle("Before normalization") +
    facet_grid(. ~ replicate, scales = "free_x", space = "free_x")
  
  # plot coverage after normalization per assay and group by replicate
  p2 <- ggplot(cons.data, aes_(x=~Name, y=~normCoverage)) +
    geom_boxplot(outlier.colour="red", outlier.shape=8,
                 outlier.size=4) +
    theme(axis.text.x = element_text(angle = 90)) +
    ggtitle("After normalization") +
    facet_grid(. ~ replicate, scales = "free_x", space = "free_x")
  
  # group replicates
  merged <- list(p1,p2)
  
  return(merged)
}

#' View count normalization
#'
#' @param object A UMIexperiment object containing norm plots
#' @param do.plot should plot be shown? If false returns a grid.arrange object
#'
#' @noRd
#' 
#' @importFrom gridExtra grid.arrange
#' 
#' @return A ggplot object
#'
viewNormPlot <- function(
  object,
  do.plot = TRUE
){
  
  plot <- grid.arrange(
    object@plots$norm_plot[[1]],
    object@plots$norm_plot[[2]],
    nrow = 1
  )
  
  if(do.plot){
    plot
  } else{
    return(plot)
  }
}


#' Generate Merged data plots
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param do.plot Logical. Should plots be shown.
#' @param cut.off How many variant reads are necessary to consider a variant above background? Default is 5 reads.
#' @param amplicons (Optional) character vector of amplicons to plot.
#' @param theme ggplot theme to use. Default is classic.
#' @param plot.ref = TRUE
#'
#' @noRd
#'
#' @import ggplot2
#' @importFrom dplyr filter group_by
#' @importFrom magrittr "%>%" "%<>%"
#' 
#' @return A UMIexperiment object
#'
vizMergedData <- function(
  object,
  cut.off = 5,
  amplicons = NULL,
  do.plot = TRUE,
  theme = 'classic',
  plot.ref = TRUE
){
  
  # Plotting maximum alternate alle count on merged data
  data <- object@merged.data %>%
    dplyr::rename(replicate = .data$group.by)
  data$Position %<>% as.factor
  
  if (!is.null(amplicons)) {
    data <- data %>% dplyr::filter(.data$Name %in% amplicons)
  }
  
  data$Variants <- ifelse(data$avg.MaxAC >= cut.off, 'Variant', 'Background')
  
  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)
  
  plot <- ggplot(
    data = data,
    mapping = aes_(
      x=~Position,
      y=~avg.Max.AF,
      fill=~Variants
    )
  ) +
    geom_bar(stat = 'identity') +
    use_theme +
    geom_errorbar(
      mapping = aes_(
        ymin=~.data$avg.Max.AF,
        ymax=~(.data$avg.Max.AF + .data$std.MaxAF)
      ),
      width=.1
    ) +
    theme(axis.text.x = element_text(size = 2, angle = 90)) +
    facet_grid(replicate ~ Name, scales = 'free_x', space = 'free_x') +
    ylab("Variant Allele Frequency (%)") +
    xlab("Amplicon position") +
    ggplot2::scale_fill_brewer(palette = 'Set1')
  
  
  if(plot.ref){
    plot <- plot +
      scale_x_discrete(
        breaks = data$Position,
        labels = data$Reference
      ) +
      theme(
        axis.text.x = element_text(
          size = 9,
          angle = 0
        )
      )
  }
  
  if(do.plot) {
    print(plot)
    object@plots$merged_amplicons <- plot
  } else {
    object@plots$merged_amplicons <- plot
  }
}
Any scripts or data that you put into this service are public.
umiAnalyzer documentation built on Nov. 25, 2021, 9:07 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
umiAnalyzer
Tools for Analyzing Sequencing Data with Unique Molecular Identifiers

R/visualization.R
In umiAnalyzer: Tools for Analyzing Sequencing Data with Unique Molecular Identifiers

Defines functions vizMergedData viewNormPlot vizNormalization timeSeriesGrid vizStackedCounts stacked_amplicon_plot BarcodeFamilyHistogram AmpliconHeatmap AmpliconPlot UmiCountsPlot QCplot

Documented in AmpliconHeatmap AmpliconPlot BarcodeFamilyHistogram QCplot timeSeriesGrid UmiCountsPlot

Try the umiAnalyzer package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

umiAnalyzer Tools for Analyzing Sequencing Data with Unique Molecular Identifiers

R/visualization.R In umiAnalyzer: Tools for Analyzing Sequencing Data with Unique Molecular Identifiers

Defines functions vizMergedData viewNormPlot vizNormalization timeSeriesGrid vizStackedCounts stacked_amplicon_plot BarcodeFamilyHistogram AmpliconHeatmap AmpliconPlot UmiCountsPlot QCplot

Documented in AmpliconHeatmap AmpliconPlot BarcodeFamilyHistogram QCplot timeSeriesGrid UmiCountsPlot

Try the umiAnalyzer package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

umiAnalyzer
Tools for Analyzing Sequencing Data with Unique Molecular Identifiers

R/visualization.R
In umiAnalyzer: Tools for Analyzing Sequencing Data with Unique Molecular Identifiers