Nothing
#' Add noise to count data
#'
#' Gaussian noise can be added to the simulated count matrix in multiple ways which can
#' be combined.
#'
#' - `add_noise` adds simple Gaussian noise to counts. This affects low
#' expressed genes and hardly affects highly expressed genes.
#' - With `log_noise`,
#' counts are converted using log2+1 and Gaussian noise added, followed by
#' conversion back to count scale. This affects all genes irrespective of
#' expression level.
#' - With `graded_log_noise`,
#' counts are converted to log2+1. A scaling factor is calculated for gene
#' expression level ranging from 0 to 1, which maps to 0 to the maximum number
#' of counts. This scaling factor is inverted from 1 to 0 (i.e. noise affects
#' low counts more than high counts) and then passed through the function
#' specified by `transform` (this controls how much the middle counts are
#' affected). Then the Gaussian noise is multiplied by the scaling factor and
#' added to the counts.
#' - With `sqrt_noise`, counts
#' are square root transformed before Gaussian noise is added, and then
#' transformed back. This still has a stronger effect on low expressed genes,
#' but the effect is more graduated with a more gradual fall off in effect on
#' genes with increasing expression.
#' - With `shift_noise`, whole gene rows are selected at random then each row is
#' multiplied by a random amount varying according to 2^rnorm. This simulates
#' shifted expression up/down due to differences in chemistry through which some
#' genes are more or less detectable.
#'
#' @param counts An integer count matrix with genes in rows and cell
#' subclasses typically generated by [simulate_bulk()].
#' @param sd Standard deviation of noise to be added.
#' @param transform Function for controlling amount of noise by expression level
#' in [graded_log_noise()].
#' @param p Proportion of genes affected by noise.
#' @returns A positive integer count matrix with genes in rows and cell
#' subclasses in columns.
#' @export
add_noise <- function(counts, sd = 100) {
scale_factor <- mean(colSums(counts)) / 1e9
sd <- sd * scale_factor
# simple Gaussian noise
rn <- rnorm(prod(dim(counts)), sd = sd)
rmat <- matrix(round(rn), nrow = nrow(counts))
counts <- counts + rmat
finish(counts)
}
#' @rdname add_noise
#' @export
log_noise <- function(counts, sd = 0.1) {
# Gaussian noise on log scale
rn <- rnorm(prod(dim(counts)), sd = sd)
rmat <- matrix(rn, nrow = nrow(counts))
log_sim <- log2(counts +1)
log_sim <- log_sim + rmat
counts <- 2^log_sim -1
finish(counts)
}
#' @rdname add_noise
#' @export
graded_log_noise <- function(counts, sd = 0.1,
transform = function(x) x^3) {
# Gaussian noise on log scale
rn <- rnorm(prod(dim(counts)), sd = sd)
rmat <- matrix(rn, nrow = nrow(counts))
log_sim <- log2(counts +1)
fac <- 1 - (log_sim / max(log_sim))
fac <- transform(fac)
log_sim <- log_sim + rmat * fac
counts <- 2^log_sim -1
finish(counts)
}
#' @rdname add_noise
#' @export
sqrt_noise <- function(counts, sd = 100) {
# sqrt transform
scale_factor <- mean(colSums(counts)) / 1e9
sd <- sqrt(sd * scale_factor) /2 # loose equivalence to count_sd
rn <- rnorm(prod(dim(counts)), sd = sd)
rmat <- matrix(rn, nrow = nrow(counts))
sx <- sqrt(counts)
sx <- sx + rmat
sx[sx < 0] <- 0
counts <- sx^2
finish(counts)
}
#' @rdname add_noise
#' @importFrom stats rbinom
#' @export
shift_noise <- function(counts, sd = 0.5, p = 0.5) {
# shift transform
rn <- rnorm(nrow(counts), sd = sd)
w <- rbinom(nrow(counts), 1, p)
rn[w == 0] <- 0
counts <- counts * 2^rn
out <- finish(counts)
attr(out, "shift") <- rn
out
}
finish <- function(counts) {
counts[counts < 0] <- 0
if (max(counts) <= .Machine$integer.max) {
mode(counts) <- "integer"
} else counts <- round(counts)
counts
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.