data-raw/reads_per_cell_barcode.R

# Create example data for a reads per cell barcode histogram file as generated
# by BAMTagHistogram from Drop-seq tools. Read counts are simulated based on
# the negative binomial distribution.

# set seed to make simulation reproducible
set.seed(42)

# simulate reads per true cells based on negative binomial distribution
cells <- 5000
avg_reads_cells <- 1500
reads_cells <- rnbinom(n = cells, mu = avg_reads_cells, size = 1)

# simulate reads from 1.5M empty droplets
empty <- 1.5e6
avg_reads_empty <- 6
reads_empty <- rnbinom(n = empty, mu = avg_reads_empty, size = 0.075)

# combine and sort into one vector of read counts
reads <- sort(c(reads_cells, reads_empty), decreasing = TRUE)

# generate random cell barcodes
cell_barcode <- unlist(lapply(1:(cells + empty + 1e5), FUN = function(x){

  paste(sample(c("A", "T", "C", "G"), size = 12, replace = TRUE),
        collapse = "")

}))

# only retain the required amount of unique barcodes
cell_barcode <- unique(cell_barcode)[1:(cells + empty)]

# combine reads and cell barcodes into data.frame
reads_per_cell_barcode <- data.frame(reads, cell_barcode,
                                     stringsAsFactors = FALSE)

# save data in RData object for use in package
devtools::use_data(reads_per_cell_barcode)
argschwind/dropseqr documentation built on May 23, 2019, 4:24 p.m.