R/visualization.R

Defines functions vizMergedData viewNormPlot vizNormalization timeSeriesGrid vizStackedCounts stacked_amplicon_plot BarcodeFamilyHistogram AmpliconHeatmap AmpliconPlot UmiCountsPlot QCplot

Documented in AmpliconHeatmap AmpliconPlot BarcodeFamilyHistogram QCplot timeSeriesGrid UmiCountsPlot

#' Generate QC plots
#'
#' Visualize the UMI count for each selected assay and sample for a given
#' consensus depth. This is useful to detect differences in coverage,
#' especially for multiplexed assays.
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param group.by String. Which variable should be used as a factor on the x-axis. Default is sample
#' @param plotDepth Which consensus depth to plot
#' @param assays (Optional) user-supplied list of assays to plot. Default is all.
#' @param samples (Optional) user-supplied list of samples to plot. Default is all.
#' @param theme ggplot theme to use.
#' @param option Color palette to use, either ggplot default or viridis colors.
#' @param direction If viridis colors are used, choose orientation of color scale.
#' @param toggle_mean Show mean or median
#' @param center Choose mean or median
#' @param line_col Choose color for mean/median line
#' @param angle Angle of labels on x-axis.
#' @param plotly Should plotly be used for rendering?
#'
#' @export
#'
#' @import ggplot2
#' @import dplyr
#' @importFrom magrittr "%>%" "%<>%"
#' @importFrom stats median
#' @importFrom viridis scale_fill_viridis
#' @importFrom plotly ggplotly
#' 
#' @return A ggplot object 
#' 
#' @examples
#' library(umiAnalyzer)
#' 
#' main = system.file('extdata', package = 'umiAnalyzer')
#' samples <- list.dirs(path = main, full.names = FALSE, recursive = FALSE)
#' simsen <- createUmiExperiment(experimentName = 'example',mainDir = main,sampleNames = samples)
#' 
#' depth_plot <- QCplot(simsen)
#'
QCplot <- function(
  object,
  group.by = 'sample',
  plotDepth = 3,
  assays = NULL,
  samples = NULL,
  theme = 'classic',
  option = 'viridis',
  direction = 'default',
  toggle_mean = TRUE,
  center = 'mean',
  line_col = 'blue',
  angle = 0,
  plotly = FALSE
  ) {

  if (missing(x = object)) {
    stop('Must provide a umiExperiment object and filter names')
  } else if(!class(object) == 'UMIexperiment'){
    stop('Object is not of class UMIexperiment.')
  }

  cons.table <- object@cons.data
  summary.table <- object@summary.data

  # Consensus depth plot per assay
  #cdepths <- summary.table %>% dplyr::filter(
  #  .data$assay != '',
  #  .data$depth == plotDepth
  #)

  cdepths <- cons.table %>%
    dplyr::filter(
      .data$Name != '',
      .data$`Consensus group size` == plotDepth
    )


  if (!is.null(assays)) {
    cdepths <- cdepths %>%
      dplyr::filter(.data$Name %in% assays)
  }

  if (!is.null(samples)) {
    cdepths <- cdepths %>%
      dplyr::filter(.data$`Sample Name` %in% samples)
  }

  cdepths <- dplyr::rename(cdepths, UMIcount = .data$Coverage, assay = .data$Name)

  cdepths <- cdepths %>%
    dplyr::group_by(.data$`Sample Name`, .data$assay) %>%
    dplyr::summarise(UMIcount = mean(.data$UMIcount))

  cdepths


  # Set assay and sample to factor for better plotting
  cdepths$assay %<>% as.factor
  cdepths$`Sample Name` %<>% as.factor

  #print(as.data.frame(cdepths))

  # From the ggplot2 vignette:
  # https://github.com/tidyverse/ggplot2/releases
  # aes_() replaces aes_q(). It also supports formulas, so the most concise
  # SE version of aes(carat, price) is now aes_(~carat, ~price). You may
  # want to use this form in packages, as it will avoid spurious R CMD check
  # warnings about undefined global variables.

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  if (group.by == 'assay') {
    depth_plot <- ggplot(cdepths, aes_(x = ~assay, y = ~UMIcount, fill=~(`Sample Name`))) +
      geom_bar(position = 'dodge', stat = 'identity') +
      use_theme +
      theme(
        axis.title.x = element_blank(),
        axis.text.x = element_text(size = 14, angle = angle, hjust = 1),
        axis.text.y = element_text(size = 14)
      ) +
      labs(
        title = paste('Consensus ', plotDepth, ' depths by assay', sep = ""),
        subtitle = paste(
          'Mean depth: ', round(mean(cdepths$UMIcount)),
          'Median depth: ', round(median(cdepths$UMIcount))
        ),
        caption = ''
      )
  } else if (group.by == 'sample') {
    depth_plot <- ggplot(cdepths, aes_(x = ~(`Sample Name`), y = ~UMIcount, fill=~assay)) +
      geom_bar(position = 'dodge', stat = 'identity') +
      use_theme +
      theme(
        axis.title.x = element_blank(),
        axis.text.x = element_text(size = 14, angle = angle),
        axis.text.y = element_text(size = 14)
      ) +
      labs(
        title = paste('Consensus ', plotDepth, ' depths by sample', sep = ''),
        subtitle = paste(
          'Mean depth: ', round(mean(cdepths$UMIcount)),
          'Median depth: ', round(median(cdepths$UMIcount))
        ),
        caption = ''
      )
  }

  if(toggle_mean){

    if(center == 'mean'){
      depth_plot <- depth_plot + geom_hline(
        yintercept = mean(cdepths$UMIcount),
        linetype = 'dashed',
        color = line_col)
    } else {
      depth_plot <- depth_plot + geom_hline(
        yintercept = median(cdepths$UMIcount),
        linetype = 'dashed',
        color = line_col)
    }


  }

  if(option != 'default'){

    if(direction == 'default'){
      orientation = 1
    } else {
      orientation = -1
    }

    depth_plot <- depth_plot + viridis::scale_fill_viridis(
      discrete = TRUE,
      option = option,
      direction = orientation
    )
  }

  if(plotly) {
    depth_plot <- plotly::ggplotly(depth_plot)
  }

  # Plot consensus depth distribution
  return(depth_plot)
}

#' Plot UMI counts
#'
#' Visualize the number detected UMI for each consensus depth cut-off. This may
#' may helpful in choosing the right consensus depth for your analysis, by
#' checking the number of reads still available for each assay and sample
#' for your chosen cut-off.
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param amplicons (Optional) user-supplied list of assays to plot. Default is all.
#' @param samples (Optional) user-supplied list of samples to plot. Default is all.
#' @param theme Plotting theme, default is classic
#' @param option Color palette. Default uses ggplot standard, otherwise viridis options.
#' @param direction If using viridis colors should the scale be inverted or default?
#'
#' @export
#'
#' @import ggplot2
#' @import dplyr
#' @importFrom magrittr "%>%" "%<>%"
#' @importFrom stats median
#' @importFrom viridis scale_fill_viridis
#' 
#' @return A UMIexperiment object
#' 
#' @examples
#' library(umiAnalyzer)
#' 
#' main = system.file('extdata', package = 'umiAnalyzer')
#' samples <- list.dirs(path = main, full.names = FALSE, recursive = FALSE)
#' simsen <- createUmiExperiment(experimentName = 'example',mainDir = main,sampleNames = samples)
#' simsen <- filterUmiObject(simsen)
#' 
#' count_plot <- UmiCountsPlot(simsen)
#'
UmiCountsPlot <- function(
  object,
  amplicons = NULL,
  samples = NULL,
  theme = 'classic',
  option = 'viridis',
  direction = 1
  ) {

  # Read summary data from object
  data <- object@summary.data %>%
    dplyr::filter(
      !is.na(.data$assay),
      .data$depth > 0
    )

  # Select amplicons
  if (!is.null(amplicons)) {
    data <- data %>%
      dplyr::filter(.data$assay %in% amplicons)
  }

  # Select samples
  if (!is.null(samples)) {
    data <- data %>%
      dplyr::filter(.data$sample %in% samples)
  }

  # Generate ggplot object
  data$depth %<>% as.factor

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  # If color option is not default use the viridis package for color palettes.
  # https://cran.r-project.org/web/packages/viridis/vignettes/intro-to-viridis.html
  if(option != 'default') {
    plot <- ggplot(
      data = data, aes(
        x=.data$depth,
        y=.data$UMIcount, 
        fill=sample)
      ) +
      use_theme +
      viridis::scale_fill_viridis(
        discrete = TRUE,
        option = option,
        direction = direction
        ) +
        geom_col(alpha=0.6) +
        facet_grid(assay ~ sample) +
        ylab('UMI count') +
        xlab('Consensus depth cut-off')
  } else {
    plot <- ggplot(data, aes(x=.data$depth, y=.data$UMIcount, fill=sample)) +
      use_theme +
      geom_col(alpha=0.6) +
      facet_grid(assay ~ sample) +
      ylab('UMI count') +
      xlab('Consensus depth cut-off')
  }

  # Return object and plot
  return(plot)
}

#' Generate Amplicon plots
#'
#' Plots variant allele frequencies or alternate allele counts for chosen
#' samples and assays.
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param filter.name Name of the filter to be plotted.
#' @param cut.off How many variant reads are necessary to consider a variant above background? Default is 5 reads.
#' @param min.count Minimum variants counts to plot, default is 0.
#' @param min.vaf Minimum variants allele frequency to plot, default is 0.
#' @param amplicons (Optional) character vector of amplicons to be plotted.
#' @param samples (Optional) character vector of samples to be plotted.
#' @param abs.count Should absolute counts be plotted instead of frequencies? Default is FALSE.
#' @param theme Plotting theme to use, default is classic.
#' @param option Color palette to use.
#' @param y_min Minimum y-axis value, default is 0
#' @param y_max Maximum y-axis value, default is NULL (autoscale)
#' @param direction Orientation of the color palette.
#' @param plot.text Should non-references bases be indicated above the bar?
#' @param plot.ref If true show reference base instead of position on x-axis.
#' @param stack.plot Show all variant alleles in a stacked bar plot.
#' @param classic.plot Show classical debarcer amplicon plot with raw error.
#' @param fdr False-discovery-rate cut-off for variants.
#' @param use.caller Should data from variant caller be used? Default is FALSE
#' @param use.plotly Should plotly be used instead of the regular ggplot device? Default is TRUE
#' @param font.size Font size
#' @param angle Font angle
#'
#' @export
#'
#' @import ggplot2
#' @importFrom plotly ggplotly
#' @importFrom scales rescale_none
#' @importFrom magrittr "%>%" "%<>%"
#' @importFrom dplyr filter
#' @importFrom viridis scale_fill_viridis
#' 
#' @return A UMIexperiment object containing a ggplot object with the
#' amplicon plot.
#'
#' @examples
#' library(umiAnalyzer)
#' 
#' main = system.file('extdata', package = 'umiAnalyzer')
#' samples <- list.dirs(path = main, full.names = FALSE, recursive = FALSE)
#' simsen <- createUmiExperiment(experimentName = 'example',mainDir = main,sampleNames = samples)
#' simsen <- filterUmiObject(simsen)
#'
#' amplicon_plot <- AmpliconPlot(simsen)
#' 
#' 
AmpliconPlot <- function(
  object,
  filter.name = 'default',
  cut.off = 5,
  min.count = 0,
  min.vaf = 0,
  amplicons = NULL,
  samples = NULL,
  abs.count = FALSE,
  y_min = 0,
  y_max = NULL,
  theme = 'classic',
  option = 'default',
  direction = 'default',
  plot.text = FALSE,
  plot.ref = TRUE,
  stack.plot = FALSE,
  classic.plot = FALSE,
  fdr = 0.05,
  font.size = 6,
  angle = 45,
  use.caller = FALSE,
  use.plotly = TRUE
  ) {

  if (missing(x = object)) {
    stop("Must provide a umiExperiment object and filter names")
  } else if(!class(object) == "UMIexperiment"){
    stop("Object is not of class UMIexperiment.")
  } else if(!is.logical(abs.count)){
    warning("abs.count needs to be of type boolean. Using defaults.")
    abs.count = FALSE
  } else if (!is.numeric(cut.off) || cut.off < 0) {
    warning("cut.off needs to be a positive integer. Using defaults.")
    cut.off = 5
  } else if(is.null(object@filters[filter.name][[1]])) {
    if(!is.null(object@filters$default)){
      warning("Requested filter not found, using default.")
    } else {
      stop("Filter not found. Have you run filterUmiObject?")
    }
  }

  # Use depth cut-off to call variants
  cons.table.default <- getFilteredData(
    object = object,
    name = filter.name
  )
  
  cons.table.default$Variants <- ifelse(
    test = cons.table.default$`Max Non-ref Allele Count` >= cut.off, 
    yes = "Variant",
    no = "Background"
  )

  if(use.caller){
    # Check if variant caller has been run on object
    if(!identical(dim(object@variants), dim(tibble()))) {
      cons.table <- object@variants
      cons.table$Variants <- ifelse(
        test = cons.table$p.adjust <= fdr, 
        yes = "Variant", 
        no = "Background"
      )
    } else {
      warning("Variant caller has not been run, using default cut-off instead!")
      cons.table <- cons.table.default
    }
  } else {
    cons.table <- cons.table.default
  }

  # Make variables factors to ensure equidistance on the x-axis
  cons.table$`Sample Name` %<>% as.factor
  cons.table$Position %<>% as.factor
  cons.table$Name %<>% as.factor
  cons.table$sample %<>% as.factor
  cons.table$`Consensus group size` %<>% as.factor

  # Filter amplicons and samples based on user selection
  cons.table <- filterConsensusTable(
    cons.table,
    amplicons = amplicons,
    samples = samples
  )

  # Filter position based on minimum VAF and counts, default is to use all positions.
  cons.table <- cons.table %>%
    dplyr::filter(
      .data$`Max Non-ref Allele Frequency` >= min.vaf/100,
      .data$`Max Non-ref Allele Count` >= min.count
    )

  # Get raw error data (cons0)
  raw_error <- object@raw.error

  # Make variables factors to ensure equidistance on the x-axis
  raw_error$`Sample Name` %<>% as.factor
  raw_error$Position %<>% as.factor
  raw_error$Name %<>% as.factor
  raw_error$sample %<>% as.factor
  raw_error$`Consensus group size` %<>% as.factor

  # Filter selected amplicons and samples
  raw_error <- filterConsensusTable(
    raw_error,
    amplicons = amplicons,
    samples = samples
  )

  classic_data <- dplyr::bind_rows(cons.table,raw_error)

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  # Set maximum y-axis limit to largest variant allele frequency in data
  # rounded up to the nearest integer.
  if(is.null(y_max)){
    y_max <- ceiling(100*max(cons.table$`Max Non-ref Allele Frequency`))
  }

  # If classic plot is chosen make a raw vs consN plot
  classic_plot <- ggplot(classic_data, aes_(
      x = ~Position,
      y = ~(100 * .data$`Max Non-ref Allele Frequency`),
      fill = ~(`Consensus group size`))
    ) +
    use_theme +
    geom_bar(stat = 'identity', width=.5, position = 'dodge') +
    theme(
      axis.text.x = element_text(
        size = font.size,
        angle = angle,
        hjust = 1
      ),
      axis.title.x = element_blank()
      ) +
    ylab('Variant Allele Frequency (%)') +
    xlab('Assay') +
    scale_y_continuous(
      limits = c(y_min,y_max),
      oob = scales::rescale_none
    ) +
    facet_grid(`Sample Name` ~ Name, scales = 'free_x', space = 'free_x')

  # If the plot is too big, limit number of positions plotted;
  # also output tabular output as an html table
  n_samples <- length(unique(cons.table$`Sample Name`))
  n_positions <- length(unique(cons.table$Position))


  if ( (n_samples > 6) | (n_positions > 500) ) {
    amplicon_plot <- ggplot(
      cons.table, aes_(
        x = ~ Name,
        y = ~ (100 * .data$`Max Non-ref Allele Frequency`))
      ) +
      geom_point(
        mapping = aes(
          col = .data$Variants,
          size = .data$`Max Non-ref Allele Count`)
      ) +
      use_theme +
      theme(
        axis.text.x = element_text(
          size = font.size,
          angle = angle,
          hjust = 1
        ),
        axis.title.x = element_blank()
        ) +
      ylab("Variant Allele Frequency (%)") +
      xlab("Assay") +
      labs(
        title = "Maximum variant allele frequency by assay",
        caption = "Each dot represenst a position. All samples are included.
           Blue dots represent positions with at least 5 variant alleles."
      )
  } else {
    if(abs.count) {
      amplicon_plot <- ggplot(cons.table, aes_(
        x = ~Position,
        y = ~`Max Non-ref Allele Count`,
        fill = ~Variants)
      ) +
        use_theme +
        geom_bar(stat = "identity") +
        theme(
          axis.text.x = element_text(size = font.size, angle = angle, hjust = 1),
          axis.title.x = element_blank()
          ) +
        ylab("Variant UMI count") +
        xlab("Assay") +
        facet_grid(`Sample Name` ~ Name, scales = "free_x", space = "free_x") +
        geom_hline(
          yintercept = 0,
          color = 'gray50',
          size = 0.3
        )
    } else {

      cons.table <- cons.table %>%
        dplyr::mutate(
          `Max Non-ref Allele Frequency` = 100*.data$`Max Non-ref Allele Frequency`
          )

      amplicon_plot <- ggplot(cons.table, aes_(
        x = ~Position,
        y = ~`Max Non-ref Allele Frequency`,
        fill = ~Variants)
        ) +
        use_theme +
        geom_bar(stat = "identity") +
        theme(
          axis.text.x = element_text(
            size = font.size,
            angle = angle,
            hjust = 1
          ),
          axis.title.x = element_blank()
          ) +
        ylab("Variant Allele Frequency (%)") +
        xlab("Assay") +
        scale_y_continuous(
          limits=c(y_min,y_max),
          oob = scales::rescale_none
        ) +
        facet_grid(`Sample Name` ~ Name, scales = "free_x", space = "free_x") +
        geom_hline(
          yintercept = 0,
          color = 'gray50',
          size = 0.3
        )
    }

    if(classic.plot){
      amplicon_plot <- classic_plot
    }

    if(plot.text){
      amplicon_plot <- amplicon_plot +
        geom_text(
          data = cons.table,
          mapping = aes_(label = ~(`Max Non-ref Allele`)),
          position = position_dodge(width = 1),
          size = 4
        )
    }

    if(plot.ref){
      amplicon_plot <- amplicon_plot +
        scale_x_discrete(
          breaks = cons.table$Position,
          labels = cons.table$Reference
        ) +
        theme(
          axis.text.x = element_text(
            size = font.size,
            angle = 0
            ),
          axis.title.x = element_blank()
        )
    }
  }


  if(direction == 'default'){
    orientation = 1
  } else {
    orientation = -1
  }

  if(option != 'default'){

    # If not using default color scheme use either
    # (1) Colors from viridis package
    if( option %in% c('viridis','magma','plasma','inferno','cividis') ){
      amplicon_plot <- amplicon_plot + viridis::scale_fill_viridis(
        discrete = TRUE,
        option = option,
        direction = orientation
      )
    } else {
    # (2) Colors from ggplot: Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3
      amplicon_plot <- amplicon_plot +
        ggplot2::scale_fill_brewer(
          palette = option
        )
    }
  }

  if(stack.plot){
    amplicon_plot <- stacked_amplicon_plot(
      cons.data = cons.table,
      theme = theme,
      plot.ref = plot.ref,
      abs.count = abs.count,
      option = option,
      direction = orientation
    )
  }

  # Show plot and add ggplot object to the UMIexperiment object
  if(use.plotly){
    amplicon_plot <- plotly::ggplotly(amplicon_plot)
  }

  return(amplicon_plot)
}


#' Amplicon heatmap
#'
#' Generates a heatmap of mutations with sample clustering using pheatmap.
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param filter.name Name of the filter to be plotted.
#' @param cut.off How many variant reads are necessary to consider a variant above background? Default is 5 reads.
#' @param amplicons (Optional) character vector of amplicons to be plotted.
#' @param samples (Optional) character vector of samples to be plotted.
#' @param left.side Show assays or sample on the left side of the heatmap. Default is assays
#' @param abs.count Logical. Should absolute counts be used instead of frequencies?
#' @param font.size Font size to use for sample labels
#'
#' @export
#'
#' @import magrittr
#' @importFrom pheatmap pheatmap
#' @importFrom tidyr spread
#' @importFrom dplyr select
#' @importFrom grDevices dev.off
#'
#' @return A graphics object
#' 
#' @examples
#' \dontrun{
#' library(umiAnalyzer)
#' 
#' main = system.file('extdata', package = 'umiAnalyzer')
#' samples <- list.dirs(path = main, full.names = FALSE, recursive = FALSE)
#' simsen <- createUmiExperiment(experimentName = 'example',mainDir = main,sampleNames = samples)
#' simsen <- filterUmiObject(simsen)
#' 
#' hmap <- AmpliconHeatmap(simsen)
#' }
#'
AmpliconHeatmap <- function(
  object,
  filter.name = 'default',
  cut.off = 5,
  left.side = 'columns',
  amplicons = NULL,
  samples = NULL,
  abs.count = FALSE,
  font.size = 10
  #n_col = 5,
  #colours = 'Blues'
){

  # Check if variant caller has been run on object
  if (identical(dim(object@variants), dim(tibble()))) {
    cons.table <- getFilteredData(
      object = object,
      name = filter.name
    )

    cons.table$Variants <- ifelse(cons.table$`Max Non-ref Allele Count` >= cut.off, 'Variant', 'Background')
  } else {
    cons.table <- object@variants
    cons.table$Variants <- ifelse(cons.table$p.adjust <= 0.05, 'Variant', 'Background')
  }

  # Make variables factors to ensure equidistance on the x-axis
  cons.table$`Sample Name` %<>% as.factor
  cons.table$Position %<>% as.factor
  cons.table$Name %<>% as.factor
  cons.table$sample %<>% as.factor
  cons.table$`Consensus group size` %<>% as.factor

  # Select samples and amplicons chosen by user
  cons.table <- filterConsensusTable(
    cons.table,
    amplicons = amplicons,
    samples = samples
  )

  # Do not cluster if only a single sample has been selected
  if(length(samples) > 1){
    do.cluster = TRUE
  } else {
    do.cluster = FALSE
  }

  # Should absolute counts or frequencies be plotted?
  if(abs.count){
    cons_wide <- cons.table %>%
      dplyr::select(.data$`Sample Name`, .data$Position, .data$Name, .data$`Max Non-ref Allele Count`)  %>%
      tidyr::spread(.data$`Sample Name`, .data$`Max Non-ref Allele Count`)
  } else {
    cons_wide <- cons.table %>%
      dplyr::select(.data$`Sample Name`, .data$Position, .data$Name, .data$`Max Non-ref Allele Frequency`)  %>%
      tidyr::spread(.data$`Sample Name`, .data$`Max Non-ref Allele Frequency`)
  }

  # Create matrix for heatmap and plot heatmap
  cons_mat <- cons_wide %>% dplyr::select(-c(.data$Position, .data$Name))
  cons_mat <- as.matrix(cons_mat)
  rownames(cons_mat) <- cons_wide$Position

  hcluster_clean <- data.frame(Amplicon = as.factor(cons_wide$Name))
  rownames(hcluster_clean) <- cons_wide$Position


  if(left.side == 'columns'){
    heatmap_DNA_clean <- pheatmap::pheatmap(
      mat = 100*cons_mat,
      angle_col = 45,
      scale = 'none',
      #color = RColorBrewer::brewer.pal(n = n_col, name = colours),
      color = c("#EFF3FF", "#BDD7E7", "#6BAED6", "#3182BD", "#08519C"),
      cluster_rows = FALSE,
      cluster_cols = do.cluster,
      clustering_method = 'ward.D2',
      drop_levels = TRUE,
      fontsize_col = font.size,
      annotation_row = hcluster_clean,
      show_rownames = FALSE,
      na_col = 'gray80'
    )
  } else{
    heatmap_DNA_clean <- pheatmap::pheatmap(
      mat = t(100*cons_mat),
      angle_col = 45,
      scale = 'none',
      #color = RColorBrewer::brewer.pal(n = n_col, name = colours),
      color = c("#EFF3FF", "#BDD7E7", "#6BAED6", "#3182BD", "#08519C"),
      cluster_rows = do.cluster,
      cluster_cols = FALSE,
      clustering_method = 'ward.D2',
      drop_levels = TRUE,
      fontsize_row = font.size,
      annotation_col = hcluster_clean,
      show_rownames = TRUE,
      show_colnames = FALSE,
      na_col = 'gray80'
    )

  }

  grDevices::dev.off()
  return(heatmap_DNA_clean)
}

#' Consensus depth histograms
#'
#' Generate histograms for the frequency of barcode family depths.
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param xMin Minimum consensus family size to plot, default is 0.
#' @param xMax Maximum consensus family size to plot. Default is 100.
#' @param samples List of samples to be shown.
#' @param option Color scheme to use
#' @param direction If using viridis colors sets the orientation of color scale.
#' @param theme ggplot theme to use. Defaults to classic.
#'
#' @export
#'
#' @import ggplot2
#' @importFrom tibble is_tibble
#' @importFrom viridis scale_fill_viridis
#' 
#' @return A ggplot object
#'
#' @examples
#' library(umiAnalyzer)
#' main = system.file('extdata', package = 'umiAnalyzer')
#' simsen <- createUmiExperiment(main, importBam = TRUE)
#' barcode_dist <- BarcodeFamilyHistogram(simsen)
#'
BarcodeFamilyHistogram <- function(
  object,
  xMin = 0,
  xMax = 100,
  samples = NULL,
  option = 'viridis',
  direction = 1,
  theme = 'classic'
  ) {

  if (missing(x = object)) {
    stop("Must provide a umiExperiment object.")
  } else if(!is.numeric(xMin) && !is.numeric(xMax)){
    warning("xMin and xMax needs to be numerical. Using default values instead.")
    xMin = 0
    xMax = 10
  } else if(xMin < 0 || xMax < 0){
    warning("xMin and xMax need to be positive numbers. Using default values")
    xMin = 0
    xMax = 10
  } else if(xMax < xMin){
    warning("xMax smaller than xMin, using default values instead.")
    xMin = 0
    xMax = 100
  } else if(!is.character(samples) && !is.null(samples)){
    warning("Samples need to be a character or NULL. Using default.")
    samples = NULL
  }

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  # check if object is a UMIexperiment
  if (class(object)[1]== "UMIexperiment") {
    reads <- object@reads

    if (!is.null(samples)) {
      reads <- reads %>% dplyr::filter(.data$sample %in% samples)
    }

    cons_depth_plot <- ggplot(reads, aes(x = count, fill = sample)) +
      geom_histogram(binwidth = 1, alpha = 0.5) +
      use_theme +
      viridis::scale_fill_viridis(discrete = TRUE,option = option,direction = direction) +
      xlim(xMin, xMax) +
      theme(axis.text = element_text(size = 12),
            axis.title = element_text(size = 14, face = "bold")
      ) +
      facet_wrap(~sample) +
      ylab("Number of families") +
      xlab("Barcode familiy size")

    # Assign plot to UMIexperiment object
    object@plots$family_histogram <- cons_depth_plot

  } else {
    # a tibble containing read info is passed directly
    if (!is.null(samples)) {
      object <- object %>% dplyr::filter(.data$sample %in% samples)
    }

    cons_depth_plot <- ggplot(object, aes(x = count, fill = sample)) +
      geom_histogram(binwidth = 1, alpha = 0.5) +
      use_theme +
      viridis::scale_fill_viridis(discrete = TRUE,option = option,direction = direction) +
      xlim(xMin, xMax) +
      theme(axis.text = element_text(size = 12),
            axis.title = element_text(size = 14, face = "bold")
      ) +
      facet_wrap(~sample) +
      ylab("Number of families") +
      xlab("Barcode familiy size")
  }

  return(cons_depth_plot)
}

#' Plot all variant allele bases
#'
#' @import ggplot2
#' @importFrom forcats fct_relevel
#' @importFrom magrittr "%>%" "%<>%"
#'
#' @param cons.data Consensus data table
#' @param theme Plotting theme
#' @param plot.ref If true, shows reference base on x-axis
#' @param abs.count Plot absolute counts instead of frequencies.
#' @param option Color scheme
#' @param direction Direction of the color scale
#'
#' @noRd
#'
#' @return A ggplot object.
stacked_amplicon_plot <- function(
  cons.data,
  theme = 'classic',
  plot.ref = FALSE,
  abs.count = FALSE,
  option = 'Pastel1',
  direction = 1
  ){

  out.file <- tibble()
  for(j in 1:nrow(cons.data)) {
    row <- cons.data[j,]
    if( row$Reference == "A" ) {
      row$A <- 0
    }
    else if( row$Reference == "C" ) {
      row$C <- 0
    }
    else if( row$Reference == "G" ) {
      row$G <- 0
    }
    else if( row$Reference == "T" ) {
      row$T <- 0
    }
    out.file <- dplyr::bind_rows(out.file, row)
  }

  # Rename variables
  out.file <- out.file %>% dplyr::select(
    .data$`Sample Name`,
    .data$Name, .data$Position,
    .data$Coverage,.data$Reference,
    .data$A,.data$T,.data$C,.data$G,
    .data$N,.data$I,.data$D) %>%
    tidyr::gather(
      "variant",
      "count",
      -c(.data$Name,
         .data$`Sample Name`,
         .data$Position,
         .data$Reference,
         .data$Coverage
        )
      )

  out.file$Name %<>% as.factor
  out.file$Position %<>% as.factor
  out.file$variant %<>% as.factor

  out.file$variant  <- forcats::fct_relevel(
    .f = out.file$variant,
    "A","C","G","T","D","I","N"
  )

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  if(abs.count){
    # Stacked count plot.
    stacked <- ggplot(out.file, aes_(
      fill=~variant,
      y=~count,
      x=~Position)) +
      geom_bar(position="stack", stat="identity") +
      use_theme +
      ylab("Variant Allele Frequency (%)") +
      xlab("Assay") +
      facet_grid(`Sample Name` ~ Name, scales = "free_x", space = "free_x") +
      theme(axis.text.x = element_text(angle = 90))
  } else {
    # Stacked count plot.
    stacked <- ggplot(out.file, aes_(
      fill=~variant,
      y=~(100*count/Coverage),
      x=~Position)) +
      geom_bar(position="stack", stat="identity") +
      use_theme +
      ylab("Variant Allele Frequency (%)") +
      xlab("Assay") +
      facet_grid(`Sample Name` ~ Name, scales = "free_x", space = "free_x") +
      theme(axis.text.x = element_text(angle = 90))
  }

  if(plot.ref){
    stacked <- stacked +
      scale_x_discrete(
        breaks = out.file$Position,
        labels = out.file$Reference
      ) +
      theme(
        axis.text.x = element_text(size = 9, angle = 0)
      )
  }

  if(option == 'default'){
    option = 'Pastel1'
  }

  if(option != 'default'){
    # If not using default color scheme use either
    # (1) Colors from viridis package
    if( option %in% c('viridis','magma','plasma','inferno','cividis') ){
      stacked <- stacked + viridis::scale_fill_viridis(
        discrete = TRUE,
        option = option,
        direction = direction
      )
    } else {
      # (2) Colors from ggplot: Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3
      stacked <- stacked +
        ggplot2::scale_fill_brewer(
          palette = option
        )
    }
  }

  return(stacked)

}

#' Plot counts by nucleotide change
#' @param cons.data A consensus data table
#' @param do.plot Logical. Should plot be shown?
#' @param option Color palette to use
#' @param direction If using viridis colors, choose orientation of palette
#' @param theme ggplot theme to use, default is classic.
#' @param plot.ref = TRUE
#'
#' @import tibble
#' @import dplyr
#' @import ggplot2
#' @importFrom magrittr "%>%" "%<>%"
#' @importFrom tidyr gather
#' @importFrom viridis scale_fill_viridis
#' 
#' @noRd
#'
#' @return A ggplot object.
#'
vizStackedCounts <- function(
  cons.data,
  do.plot = TRUE,
  option = 'viridis',
  direction = 1,
  theme = 'bw',
  plot.ref = TRUE
  ){

  # For each row in consensus data, set the reference count to 0.
  out.file <- tibble()
  for(j in 1:nrow(cons.data)) {
    row <- cons.data[j,]
    if( row$Reference == "A" ) {
      row$avg.A <- 0
    }
    else if( row$Reference == "C" ) {
      row$avg.C <- 0
    }
    else if( row$Reference == "G" ) {
      row$avg.G <- 0
    }
    else if( row$Reference == "T" ) {
      row$avg.T <- 0
    }
    out.file <- dplyr::bind_rows(out.file, row)
  }

  # Rename variables
  out.file <- out.file %>% dplyr::select(
    .data$Name, .data$Position, .data$group.by,
    .data$avg.Depth,.data$Reference,
    .data$avg.A,.data$avg.T,.data$avg.C,.data$avg.G,
    .data$avg.N,.data$avg.I,.data$avg.D) %>%
    dplyr::rename(">A"= .data$avg.A,
                  ">T"= .data$avg.T,
                  ">C"= .data$avg.C,
                  ">G"= .data$avg.G,
                  ">N"= .data$avg.N,
                  ">I"= .data$avg.I,
                  ">D"= .data$avg.D) %>%
    tidyr::gather("variant","count", -c(.data$Name,
                                        .data$Position,
                                        .data$group.by,
                                        .data$Reference,
                                        .data$avg.Depth))

  out.file$variant  <- forcats::fct_relevel(
    .f = out.file$variant,
    ">A",">C",">G",">T",">D",">I",">N"
  )

  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)

  # Stacked count plot.
  stacked <- ggplot(
    data = out.file, 
    mapping = aes_(
      fill=~variant,
      y=~count, 
      x=~Position
      )
    ) +
    geom_bar( stat="identity") +
    use_theme +
    facet_grid(. ~ Name, scales = "free_x", space = "free_x") +
    theme(axis.text.x = element_text(angle = 90))

  allowed_colours <- c('viridis','magma','plasma','inferno','cividis',
                       'Accent', 'Dark2', 'Paired', 'Pastel1', 'Pastel2',
                       'Set1', 'Set2', 'Set3')

  if(! option %in% allowed_colours){
    option = 'Set1'
  }

  if( option %in% c('viridis','magma','plasma','inferno','cividis') ){
    stacked <- stacked + viridis::scale_fill_viridis(
      discrete = TRUE,
      option = option
    )
  } else {
    # (2) Colours from ggplot: Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3
    stacked <- stacked +
      ggplot2::scale_fill_brewer(
        palette = option
      )
  }

  if(plot.ref){
    stacked <- stacked +
      scale_x_discrete(
        breaks = out.file$Position,
        labels = out.file$Reference
      ) +
      theme(
        axis.text.x = element_text(
          size = 9,
          angle = 0
        )
      )
  }

  if(do.plot){
    return(stacked)
  } else {
    return(NULL)
  }
}


#' Plot time series data
#' 
#' Function for plotting time series or other meta data. Uses facet wrap to
#' display user-provided categorical variables.
#' 
#' @param object A consensus data table
#' @param filter.name "default"
#' @param cut.off 5
#' @param min.count 0
#' @param min.vaf 0
#' @param amplicons NULL
#' @param samples NULL
#' @param x_variable NULL
#' @param y_variable "Max Non-ref Allele Frequency"
#' @param columns "Sample Name"
#' @param rows "Name"
#' @param color_by "Name"
#' @param fdr 0.05
#' @param use.caller TRUE
#' @param bed_positions NULL
#'
#' @import tibble
#' @import dplyr
#' @import ggplot2
#' @importFrom magrittr "%>%" "%<>%"
#' @importFrom tidyr gather
#' @importFrom viridis scale_fill_viridis
#'
#' @return A ggplot object.
#'
#' @export
#' 
#' @examples
#' library(umiAnalyzer)
#'
#' main <- system.file("extdata", package = "umiAnalyzer")
#' simsen <- createUmiExperiment(main)
#' simsen <- filterUmiObject(simsen)
#' 
#' metaData <- system.file("extdata", "metadata.txt", package = "umiAnalyzer")
#' simsen <- importDesign(object = simsen,file = metaData)
#' 
#' bed_dir <- system.file("extdata", "simple.bed", package = "umiAnalyzer")
#' bed <- importBedFile(path = bed_dir)
#' 
#' time_plot <- timeSeriesGrid(simsen, x_variable = "time", bed_positions = bed)
#'
timeSeriesGrid <- function(
  object,
  filter.name = 'default',
  cut.off = 5,
  min.count = 0,
  min.vaf = 0,
  amplicons = NULL,
  samples = NULL,
  x_variable = NULL,
  y_variable = "Max Non-ref Allele Frequency",
  columns = "Sample Name",
  rows = "Name",
  color_by = "Name",
  fdr = 0.05,
  use.caller = TRUE,
  bed_positions = NULL
){

  # Use depth cut-off to call variants
  cons.table.default <- getFilteredData(object = object,name = filter.name)
  cons.table.default$Variants <- ifelse(cons.table.default$`Max Non-ref Allele Count` >= cut.off, "Variant", "Background")

  if(use.caller){
    # Check if variant caller has been run on object
    if(!identical(dim(object@variants), dim(tibble()))) {
      cons.table <- object@variants
      cons.table$Variants <- ifelse(cons.table$p.adjust <= fdr, "Variant", "Background")
    } else {
      warning("Variant caller has not been run, using default cut-off instead!")
      cons.table <- cons.table.default
    }
  } else {
    cons.table <- cons.table.default
  }

  design <- object@meta.data
  design <- as_tibble(design)
  colnames(design)[1] <- 'Sample Name'

  cons.table <- full_join(design,cons.table, by = 'Sample Name')

  # Filter amplicons and samples based on user selection
  cons.table <- filterConsensusTable(
    cons.table,
    amplicons = amplicons,
    samples = samples
  )

  # Filter position based on minimum VAF and counts, default is to use all positions.
  cons.table <- cons.table %>%
    dplyr::filter(
      .data$`Max Non-ref Allele Frequency` >= min.vaf/100,
      .data$`Max Non-ref Allele Count` >= min.count
    )

  if(!is.null(bed_positions)){
    # Select positions from bed file
    cons.table <- cons.table %>%
      dplyr::filter(.data$Position %in% bed_positions)
  }

  cons.table <- cons.table %>%
    dplyr::mutate("variant" = paste(.data$Name,' ', .data$Contig, ':', .data$Position))

  # Time course plots
  plot <- ggplot2::ggplot(
    data = cons.table,
    mapping = ggplot2::aes(
      x= !!as.symbol(x_variable),
      y= !!as.symbol(y_variable),
      col = .data$variant
    )) +
    ggplot2::geom_point(
      data = cons.table,
      size=3
    ) +
    ggplot2::geom_line() +
    ggplot2::scale_y_continuous(limits=c(0,NA)) +
    ggplot2::facet_wrap(
      vars(!!as.symbol(columns)),
      scales = 'free_y'
    ) +
    ggplot2::scale_color_brewer(palette = "Set1") +
    ggplot2::theme_bw()

  return(plot)
}


#' Plot coverage before and after normalization
#' 
#' @importFrom gridExtra grid.arrange
#' @importFrom magrittr "%>%" "%<>%"
#' 
#' @param cons.data Consensus data table
#' 
#' @noRd
#' 
#' @return A list of ggplot objects.
#' 
vizNormalization <- function(cons.data){
  
  cons.data$Name %<>% as.factor
  cons.data$replicate <- cons.data$group.by
  
  # plot coverage before nomralization per assay and group by replicate
  p1 <- ggplot(cons.data, aes_(x=~Name, y=~Coverage)) +
    geom_boxplot(outlier.colour="red", outlier.shape=8,
                 outlier.size=4) +
    theme(axis.text.x = element_text(angle = 90)) +
    ggtitle("Before normalization") +
    facet_grid(. ~ replicate, scales = "free_x", space = "free_x")
  
  # plot coverage after normalization per assay and group by replicate
  p2 <- ggplot(cons.data, aes_(x=~Name, y=~normCoverage)) +
    geom_boxplot(outlier.colour="red", outlier.shape=8,
                 outlier.size=4) +
    theme(axis.text.x = element_text(angle = 90)) +
    ggtitle("After normalization") +
    facet_grid(. ~ replicate, scales = "free_x", space = "free_x")
  
  # group replicates
  merged <- list(p1,p2)
  
  return(merged)
}

#' View count normalization
#'
#' @param object A UMIexperiment object containing norm plots
#' @param do.plot should plot be shown? If false returns a grid.arrange object
#'
#' @noRd
#' 
#' @importFrom gridExtra grid.arrange
#' 
#' @return A ggplot object
#'
viewNormPlot <- function(
  object,
  do.plot = TRUE
){
  
  plot <- grid.arrange(
    object@plots$norm_plot[[1]],
    object@plots$norm_plot[[2]],
    nrow = 1
  )
  
  if(do.plot){
    plot
  } else{
    return(plot)
  }
}


#' Generate Merged data plots
#'
#' @param object Requires a UMI sample or UMI experiment object
#' @param do.plot Logical. Should plots be shown.
#' @param cut.off How many variant reads are necessary to consider a variant above background? Default is 5 reads.
#' @param amplicons (Optional) character vector of amplicons to plot.
#' @param theme ggplot theme to use. Default is classic.
#' @param plot.ref = TRUE
#'
#' @noRd
#'
#' @import ggplot2
#' @importFrom dplyr filter group_by
#' @importFrom magrittr "%>%" "%<>%"
#' 
#' @return A UMIexperiment object
#'
vizMergedData <- function(
  object,
  cut.off = 5,
  amplicons = NULL,
  do.plot = TRUE,
  theme = 'classic',
  plot.ref = TRUE
){
  
  # Plotting maximum alternate alle count on merged data
  data <- object@merged.data %>%
    dplyr::rename(replicate = .data$group.by)
  data$Position %<>% as.factor
  
  if (!is.null(amplicons)) {
    data <- data %>% dplyr::filter(.data$Name %in% amplicons)
  }
  
  data$Variants <- ifelse(data$avg.MaxAC >= cut.off, 'Variant', 'Background')
  
  # Use selected plotting theme
  use_theme <- select_theme(theme = theme)
  
  plot <- ggplot(
    data = data,
    mapping = aes_(
      x=~Position,
      y=~avg.Max.AF,
      fill=~Variants
    )
  ) +
    geom_bar(stat = 'identity') +
    use_theme +
    geom_errorbar(
      mapping = aes_(
        ymin=~.data$avg.Max.AF,
        ymax=~(.data$avg.Max.AF + .data$std.MaxAF)
      ),
      width=.1
    ) +
    theme(axis.text.x = element_text(size = 2, angle = 90)) +
    facet_grid(replicate ~ Name, scales = 'free_x', space = 'free_x') +
    ylab("Variant Allele Frequency (%)") +
    xlab("Amplicon position") +
    ggplot2::scale_fill_brewer(palette = 'Set1')
  
  
  if(plot.ref){
    plot <- plot +
      scale_x_discrete(
        breaks = data$Position,
        labels = data$Reference
      ) +
      theme(
        axis.text.x = element_text(
          size = 9,
          angle = 0
        )
      )
  }
  
  if(do.plot) {
    print(plot)
    object@plots$merged_amplicons <- plot
  } else {
    object@plots$merged_amplicons <- plot
  }
}

Try the umiAnalyzer package in your browser

Any scripts or data that you put into this service are public.

umiAnalyzer documentation built on Nov. 25, 2021, 9:07 a.m.