R/generate.clonal.data.R
In lymphclon: Accurate Estimation of Clonal Coincidences and Abundances from Biological Replicates

#!/usr/bin/Rscript

generate.clonal.data <- function(
n = 2e7, # typical number of B cell clones in an human -- typically 20 million
num.cells.taken.vector = c(2e3, 5e3, 1e4, 2e4, 5e4, 5e4),
read.count.per.replicate.vector = rep(2e4, length(num.cells.taken.vector)),
clonal.distribution.power = -sqrt(2),
pcr.noise.type = 'pareto',
pcr.pareto.location = 1,
pcr.pareto.shape = 1,
pcr.lognormal.meanlog = 0,
pcr.lognormal.sdlog = 1)
{
pcr.rpareto.func <- function(n, location, shape) {
  # taken from VGAM 0.92
  ans <- location / runif(n)^(1/shape)
  ans[location <= 0] <- NaN
  ans[shape    <= 0] <- NaN
  ans
}
unnorm.clone.prob <- (1:n) ^ clonal.distribution.power
true.clone.prob <- unnorm.clone.prob / sum(unnorm.clone.prob)
true.clonality <- sum(true.clone.prob * true.clone.prob)
num.replicates <- length(num.cells.taken.vector)
replicates <- matrix(0, n, num.replicates)
replicate.errs <- matrix(0, n, num.replicates)
replicate.squared.errs <- rep(NA, num.replicates)

if (pcr.noise.type == 'pareto') {
  get.readcount.given.cellcount <- function(x) {
    ifelse(x > 0, sum(abs(pcr.rpareto.func(n = x, location = pcr.pareto.location, shape = pcr.pareto.shape))), 0)
  }
} else if (pcr.noise.type == 'lognormal') { # lognormal
  get.readcount.given.cellcount <- function(x) {
    ifelse(x > 0, sum(abs(rlnorm(n = x, meanlog = pcr.lognormal.meanlog, sdlog = pcr.lognormal.sdlog))), 0)
  }
} else {
  print(sprintf('Unknown pcr noise type: %s', pcr.noise.type))
}

for (i in 1:num.replicates)
{

  read.count.per.replicate <- read.count.per.replicate.vector[i]
  num.cells.taken <- num.cells.taken.vector[i]

  num.obs.clones.frac <- num.cells.taken / n
  sample.of.cells <- rmultinom(n = 1, size = num.cells.taken, prob = true.clone.prob)
  raw.sample.of.reads <- sapply(X = sample.of.cells, FUN = get.readcount.given.cellcount)
  sample.of.reads <- rpois(
    n = length(sample.of.cells),
    lambda = raw.sample.of.reads * (read.count.per.replicate / sum(raw.sample.of.reads)))
  # table(sample.of.reads)
  # return the counts, rather than the abundance proportions
  replicates[, i] <- sample.of.reads # / sum(sample.of.reads)
  replicate.errs[, i] <- sample.of.reads / sum(sample.of.reads) - true.clone.prob
  replicate.squared.errs[i] <- sum(replicate.errs[, i] ^ 2)
  grahm.errs <- t(replicate.errs) %*% replicate.errs
}

simulated.data <- 
  list(read.count.matrix = replicates, 
       true.clone.prob = true.clone.prob, 
       true.clonality = true.clonality,
       replicate.squared.errs = replicate.squared.errs,
       grahm.errs = grahm.errs)

return(simulated.data)
}

Any scripts or data that you put into this service are public.

lymphclon documentation built on May 2, 2019, 7:23 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

lymphclon
Accurate Estimation of Clonal Coincidences and Abundances from Biological Replicates

R/generate.clonal.data.R
In lymphclon: Accurate Estimation of Clonal Coincidences and Abundances from Biological Replicates

Try the lymphclon package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

lymphclon Accurate Estimation of Clonal Coincidences and Abundances from Biological Replicates

R/generate.clonal.data.R In lymphclon: Accurate Estimation of Clonal Coincidences and Abundances from Biological Replicates

Try the lymphclon package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

lymphclon
Accurate Estimation of Clonal Coincidences and Abundances from Biological Replicates

R/generate.clonal.data.R
In lymphclon: Accurate Estimation of Clonal Coincidences and Abundances from Biological Replicates