R/plot_read_counts.R

Defines functions plot_read_counts

Documented in plot_read_counts

#' Plot read counts by library
#' 
#' Output plots of read counts by library, at coarse and fine scales. Libraries areordered by the total
#' number of reads. These plots can optionally be output to pdfs by specifing \code{file_prefix}. The first
#' plot shows all libraries, with a line at \code{threshold_line} million reads. The second plot zooms in
#' on the \code{n_lowcount} libraries with the lowest counts.
#' @param metrics matrix or data frame containing values of metrics. Should have metrics in columns and libraries in rows.
#' @param file_prefix a character string. If provided, the function outputs pdfs of the plots, named "{file_prefix}_{plot_name}.pdf". If not provided, the function prints to a plotting window.
#' @param plotdims a numeric vector, the size (in inches) of the plotting object. Either the size of the pdfs, or the size of the plotting windows.
#' @param threshold_line numeric, the values (in millions of reads) at which to plot a horizontal line.
#' @param n_lowcount numeric, the number of libraries to include in the plot of low-count libraries
#' @param color_by_var (optional) character string or integer identifying the column in \code{metrics} to color bars by. If not provided, bars are plotted in grey.
#' @param color_by_var_levels (optional) character vector defining the order of elements in the variable used for coloring bars; this order is used for the plot legend and to match the order of colors (if provided). If not provided, levels are taken from the factor levels (if color_by_var is a factor), or else are ordered by order of appearance in \code{metrics}.
#' @param color_var_lab (optional) string to be used as the title for the color legend.
#' @param my_cols (optional) vector of colors to use for plotting. If color_by_var is not numeric, should be a vector with one color for each level of \code{color_by_var}; if the number of values supplied is less than the numer of levels in color_by_var, additional values are interpolated using colorRampPalette. By default, uses a range from blue to red. Future updates will handle numeric vectors for \code{color_by_var}, as follows. \code{color_by_var} is numeric, should have two elements, providing the start and end points for a continuous color scale (generated by \code{scale_color_gradient}).
#' @param na_col color to use for NA values of \code{color_by_var}.
#' @param id_col numeric or character, the number or name of the column containing the read counts to plot. Used Defaults to "fastq_total_reads".
#' @param id_col numeric or character, the number or name of the column containing the library identifiers. Used to plot identifiers of low-count libraries. Defaults to "lib.id".
#' @export
plot_read_counts <-
  function(metrics,
           file_prefix=NULL, plotdims=c(9,6),
           threshold_line=5, n_lowcount=20,
           color_by_var=NULL, color_by_var_levels=NULL, color_var_lab=NULL,
           my_cols=c("red","blue"), na_col="grey50",
           id_col="lib.id", total_reads_col="fastq_total_reads"
           ) {
    checkmate::assert(
      checkmate::check_data_frame(metrics)
    )
    
    metrics <- arrange(metrics, fastq_total_reads)
    file_suffix <- "pdf"
    
    if (!is.null(color_by_var)) {
      if (!is.numeric(metrics[, color_by_var, drop=TRUE])) {
        if (is.null(color_by_var_levels)) {
          if (is.factor(metrics[, color_by_var, drop=TRUE])) {
            color_by_var_levels <- levels(metrics[, color_by_var, drop=TRUE])
          } else {
            color_by_var_levels <- as.character(unique(na.omit(metrics[, color_by_var, drop=TRUE])))
          }
        }
        metrics[, color_by_var] <-
          factor(metrics[, color_by_var, drop=TRUE], levels=color_by_var_levels)
        
        if (length(my_cols) < length(color_by_var_levels))
          my_cols <- colorRampPalette(colors=my_cols)(length(color_by_var_levels))
        names(my_cols) <- color_by_var_levels
      # } else {
      #   color_scale <- scale_color_gradient(low=my_cols[1], high=my_cols[2], na.value=na_col)
      }
      col <- my_cols[metrics[, color_by_var, drop=TRUE]]
      file_suffix <- paste0("color_by_", color_by_var, ".", file_suffix)
    } else col <- "grey"
    
    

    if (!is.null(file_prefix)) {
      pdf(file=paste0(file_prefix, ".read_count_all_libs.", file_suffix), w=plotdims[1], h=plotdims[2])
      on.exit(while ("pdf" %in% names(dev.list())) dev.off()) # close plotting device on exit (mostly important for errors that could leave pdf output open)
    } else quartz(w=plotdims[1], h=plotdims[2])
    barplot(metrics[, total_reads_col, drop=TRUE]/10^6, main="Read count for all libraries",
            xlab="libraries", ylab = "total reads (in millions)", col=col)
    abline(h=threshold_line)
    
    if (!is.null(file_prefix)) {
      pdf(file=paste0(file_prefix, ".read_count_lowcount_libs.", file_suffix), w=plotdims[1], h=plotdims[2])
      on.exit(while ("pdf" %in% names(dev.list())) dev.off()) # close plotting device on exit (mostly important for errors that could leave pdf output open)
    } else quartz(w=plotdims[1], h=plotdims[2])
    barplot(metrics[1:n_lowcount, total_reads_col, drop=TRUE]/10^6, main="Read count for low-count libraries",
            names.arg = metrics[1:n_lowcount, id_col, drop=TRUE],
            ylab = "total reads (in millions)", las=2, col=col)
    abline(h=threshold_line)
  }
mjdufort/RNAseQC documentation built on April 19, 2024, 3:13 p.m.