#' Cumulative fraction of reads per cell barcode
#'
#' Calculate cumulative fraction of reads per detected cell barcode. The
#' cumulative fraction can be plotted to estimate the number of sequenced cells
#' by identifying the "knee" of the distribution. For further details on
#' selection of cells, see the Drop-seq alignment cookbook provided by the
#' \href{http://mccarrolllab.com/dropseq/}{McCarroll lab}. This function is
#' implemented in \link[dropseqr]{plot_cumfrac_rpc} and is only required when
#' an alternative plot or analysis of the cumulative fraction is needed.
#'
#' @param read_counts A data.frame containing the number of reads per cell
#' barcode. Should consist of two columns, whereas the number of reads must in
#' the first and the barcode identity in the second column. This is typically
#' produced by running BAMTagHistogram from Drop-seq tools.
#'
#' @return A data.frame containing the original data in read_counts with an
#' added column containing the cumulative fraction.
#'
#' @examples
#' library(dropseqr)
#'
#' # load example dataset
#' data(reads_per_cell_barcode)
#'
#' # calculate cumulative fraction
#' cf <- cumfrac_rpc(reads_per_cell_barcode)
#'
#' @export
cumfrac_rpc <- function(read_counts){
# abort if read_counts is not a data.frame
if (class(read_counts) != "data.frame"){
stop("read_counts not of class data.frame!")
}
# set colnames for read_counts
colnames(read_counts)[1:2] <- c("reads", "cell_barcode")
# make sure that read_counts is ordered correctly
read_counts <- read_counts[order(read_counts[, 1], decreasing = TRUE), ]
# calculate cumulative sum
reads_cumsum <- cumsum(read_counts[, 1])
# calculate cumulative fraction
cumfrac <- reads_cumsum / max(reads_cumsum)
# add to read_counts
cbind(read_counts, cumfrac)
}
#' Plot cumulative fraction of reads per cell barcode
#'
#' Calculate and plot cumulative fraction of reads per detected cell barcode.
#' The cumulative fraction can be plotted to estimate the number of sequenced
#' cells by identifying the "knee" of the distribution. For further details on
#' selection of cells, see the Drop-seq alignment cookbook provided by the
#' \href{http://mccarrolllab.com/dropseq/}{McCarroll lab}.
#'
#' @param read_counts A data.frame containing the number of reads per cell
#' barcode. Should consist of two columns, whereas the number of reads must in
#' the first and the barcode identity in the second column. This is typically
#' produced by running BAMTagHistogram from Drop-seq tools.
#'
#' @param nbcs Number of cell barcodes to be plotted on the x-axis.
#'
#' @param title String containing plot title.
#'
#' @param size Size used for plotting the cumulative fraction and optional
#' ncell line.
#'
#' @param cumfrac_col Color for plotting the cumulative fraction line
#'
#' @param ncells (optional) Estimate of how many cells are inculed in sample.
#' A vertical line will be drawn at x = ncells.
#'
#' @param ncells_col Color for plotting the vertical line if ncells is
#' specified.
#'
#' @examples
#' library(dropseqr)
#'
#' # load example dataset
#' data(reads_per_cell_barcode)
#'
#' # cumulative fraction with default settings
#' plot_cumfrac_rpc(reads_per_cell_barcode, title = "Experiment 42")
#'
#' # zoom in and draw line at estimated number of cells
#' plot_cumfrac_rpc(reads_per_cell_barcode, title = "Experiment 42",
#' nbcs = 30000, ncells = 3500)
#'
#' @export
plot_cumfrac_rpc <- function(read_counts, nbcs = 50000, title = NULL, size = 1,
cumfrac_col = "steelblue", ncells = NULL,
ncells_col = "red"){
# calculate cumulative fraction
cumfrac <- cumfrac_rpc(read_counts)
# only retain nbcs number of barcodes
cumfrac <- cumfrac[1:nbcs, ]
# add index used for plotting
cumfrac$index <- 1:nrow(cumfrac)
# plot cumulative fraction vs cell barcodes
if (is.null(ncells)){
ggplot2::ggplot(cumfrac, ggplot2::aes_string(x = "index", y = "cumfrac")) +
ggplot2::geom_line(color = cumfrac_col, size = size) +
ggplot2::scale_x_continuous(name = "Cell barcodes sorted by number of reads [descending]") +
ggplot2::scale_y_continuous(name = "Cumulative fraction of reads",
limits = c(0, 1),
breaks = c(0, 0.25, 0.5, 0.75, 1)) +
ggplot2::ggtitle(title)
}else{
ggplot2::ggplot(cumfrac, ggplot2::aes_string(x = "index", y = "cumfrac")) +
ggplot2::geom_line(color = cumfrac_col, size = size) +
ggplot2::scale_x_continuous(name = "Cell barcodes sorted by number of reads [descending]") +
ggplot2::scale_y_continuous(name = "Cumulative fraction of reads",
limits = c(0, 1),
breaks = c(0, 0.25, 0.5, 0.75, 1)) +
ggplot2::ggtitle(title) +
ggplot2::geom_vline(xintercept = ncells, linetype = "dashed",
size = size, color = ncells_col)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.