# OxoG artifact: G->T on read1 and C->A on read2
# FFPE artifact: C->T on read1 and T->A on read2
#' Simulate orientation bias read statistics.
#'
#' @param n number of reads
#' @param theta variant frequency
#' @param phi damage frequency
#' @param e.mean mean probability of read error
#' @param e.sd standard deviation of the probability of read error
#' @export
simulate_orient_bias_read_stats <- function(n, theta, phi, e.mean, e.sd) {
# 0: reference
# 1: alternative
# 2: third nucleotide
# 3: fourth nucleotide
nucleotides <- 0:3;
ref <- 0;
alt <- 1;
es <- anti_phred(rnorm(n, mean=e.mean, sd=e.sd));
probs <- c((1 - theta), theta);
# original bases
bases0 <- sample(nucleotides[1:2], n, replace=TRUE, prob=probs);
# observed bases
bases <- bases0;
# whether read-pair orientation is consistent with artifact mechanism
oriented <- sample(c(FALSE, TRUE), n, replace=TRUE);
damages <- rep(FALSE, n);
idx <- (bases == ref) & oriented;
m <- sum(idx);
damages[idx] <- sample(c(FALSE, TRUE), m, replace=TRUE, prob=c(1 - phi, phi));
bases[damages] <- alt;
# whether read-pair orientation and mutation is consistent with
# artifact mechanism
#orients <- (sample(c(FALSE, TRUE), n, replace=TRUE) & bases == alt) | damages;
d <- data.frame(
base = bases,
e = es,
oriented = oriented,
base0 = bases0,
damage = damages
);
d
}
#' Simulate orientation bias read statistics based on a hierarchical model.
#'
#' Simulates data for multiple loci as determined by the size of `ns`,
#' and each locus is indexed by `j`.
#'
#' @param ns number of reads at each locus
#' @param alpha_theta hyperparmaeter for theta_j (variant frequency)
#' @param beta_theta hyperparameter for theta_j (variant frequency)
#' @param alpha_phi hyperparameter for phi_j (damage frequency)
#' @param beta_phi hyperparameter for phi_j (damage frequency)
#' @param e.mean mean probability of read error
#' @param e.sd standard deviation of the probability of read error
#' @export
simulate_orient_bias_read_stats_hier <- function(ns, alpha_theta, beta_theta, alpha_phi, beta_phi, e.mean, e.sd) {
m <- length(ns);
thetas <- rbeta(m, alpha_theta, beta_theta);
phis <- rbeta(m, alpha_phi, beta_phi);
ys <- mapply(
function(j, n, theta, phi) {
s <- simulate_orient_bias_read_stats(n, theta, phi, e.mean, e.sd);
d <- data.frame(
locus = j,
s,
theta = theta,
phi = phi
);
d
},
1:m, ns, thetas, phis,
SIMPLIFY = FALSE
);
do.call(rbind, ys)
}
#' Simulate read counts for quantification model for orientation bias.
#'
#' @param ns number of reads at each locus
#' @param theta variant frequency
#' @param phi damage frequency
#' @export
simulate_orient_bias_read_counts <- function(ns, theta, phi) {
# number of loci
m <- length(ns);
ni <- floor(ns / 2);
nc <- ns - ni;
xi <- rbinom(m, ni, theta);
xr <- rbinom(m, nc, theta);
xd <- rbinom(m, nc - xr, phi);
xc <- xr + xd;
data.frame(xc, xi, nc, ni, xr, xd)
}
#' Simulate read counts for quantification model based on a hierarchical
#' model for orientation bias.
#'
#' @param ns number of reads each locus
#' @param alpha_theta hyperparameter for theta (variant frequency)
#' @param beta_theta hyperparameter for theta (variant frequency)
#' @param alpha_phi hyperparameter for phi (damage frequency)
#' @param beta_phi hyperparameter for phi (damage frequency)
#' @export
simulate_orient_bias_read_counts_hier <- function(ns, alpha_theta, beta_theta, alpha_phi, beta_phi) {
# number of loci
m <- length(ns);
thetas <- rbeta(m, alpha_theta, beta_theta);
phis <- rbeta(m, alpha_phi, beta_phi);
d <- simulate_orient_bias_read_counts(ns, thetas, phis);
d$theta <- thetas;
d$phi <- phis;
d
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.