p_ref_base_given_params <- function(theta, phi, e) {
theta * (e / 3) * (1 - phi) +
(1 - theta) * (1 - e) * (1 - phi) +
theta * (1 - e) * phi
}
p_alt_base_given_params <- function(theta, phi, e) {
theta * (1 - e) * (1 - phi) +
(1 - theta) * (e / 3) * (1 - phi) +
(1 - theta) * (1 - e) * phi
}
p_other_base_given_params <- function(theta, phi, e) {
# alt or ref mutated to other allele by sequencing error
(e / 3) * (1 - phi) +
# alt or ref mutated to other allele by damage
(1 - e) * phi
}
lp_ref_base_given_params <- function(theta, phi, e) {
log_sum_exp(c(
log( theta) + log(e)-log(3) + log(1 - phi),
log(1 - theta) + log(1 - e) + log(1 - phi),
log( theta) + log(1 - e) + log( phi)
))
}
lp_alt_base_given_params <- function(theta, phi, e) {
log_sum_exp(c(
log( theta) + log(1 - e) + log(1 - phi),
log(1 - theta) + log(e)-log(3) + log(1 - phi),
log(1 - theta) + log(1 - e) + log( phi)
))
}
lp_other_base_given_params <- function(theta, phi, e) {
log_sum_exp(c(
# alt or ref mutated to other allele by sequencing error
log(e)-log(3) + log(1 - phi),
# alt or ref mutated to other allele by damage
log(1 - e) + log( phi)
))
}
lp_bases_given_params <- function(d, theta, phi) {
sum(mapply(
function(base, e, orient) {
# read-pair orientation is consistent with artifact
if (orient) {
phi2 <- phi;
} else {
phi2 <- 0;
}
if (base == 0) {
lp_ref_base_given_params(theta, phi2, e)
} else if (base == 1) {
lp_alt_base_given_params(theta, phi2, e)
} else {
lp_other_base_given_params(theta, phi2, e)
}
},
d$base, d$e, d$oriented
))
}
# compare deviance of model1: theta = theta.mle and model0: theta = 0
deviance_theta <- function(d, theta, phi) {
2 * (
# alternative model: theta = theta.mle
lp_bases_given_params(d, theta, phi) -
# null model: theta = 0
lp_bases_given_params(d, 0, phi)
)
}
estimate_theta <- function(d, theta.0, phi, abstol=1e-6) {
theta.opt <- optim(
logit(theta.0),
function(theta.real) {
-lp_bases_given_params(d, logistic(theta.real), phi)
},
method = "Brent",
lower = -15,
upper = 15,
control = list(abstol=abstol)
);
theta.hat <- logistic(theta.opt$par);
# theta.opt <- optim(
# theta.0,
# function(theta.real) {
# -lp_bases_given_params(d, logistic(theta.real), phi)
# },
# #method = "BFGS"
# #method = "Nelder-Mead"
# method = "L-BFGS-B"
# );
# theta.hat <- logistic(theta.opt$par);
#
# theta.opt <- optimize(
# function(theta.real) {
# lp_bases_given_params(d, logistic(theta.real), phi)
# },
# interval = c(-100, 100),
# maximum = TRUE
# );
# theta.hat <- logistic(theta.opt$maximum);
#
# theta.opt <- optimize(
# function(theta) {
# lp_bases_given_params(d, theta, phi)
# },
# interval = c(0, 1),
# maximum = TRUE
# );
# theta.hat <- theta.opt$maximum
theta.hat
}
estimate_phi <- function(d, theta, phi.0, abstol=1e-6) {
phi.opt <- optim(
logit(phi.0),
function(phi.real) {
-lp_bases_given_params(d, theta, logistic(phi.real))
},
method = "Brent",
lower = -15,
upper = 15,
control = list(abstol=abstol)
);
logistic(phi.opt$par)
}
estimate_theta_phi <- function(d, theta, phi, eps=1e-6, max.iter=20) {
theta.old <- theta;
phi.old <- phi;
delta <- 1;
converged <- FALSE;
for (i in 1:max.iter) {
theta <- estimate_theta(d, theta, phi);
phi <- estimate_phi(d, theta, phi);
delta <- (abs(theta.old - theta) + abs(phi.old - phi)) / 2;
if (delta < eps) {
converged <- TRUE;
break;
}
theta.old <- theta;
phi.old <- phi;
}
list(theta = theta, phi = phi)
}
estimate_initial_theta <- function(d) {
counts <- table(d$base);
if (length(counts) >= 2) {
x <- counts[2];
} else {
x <- 0;
}
(x + 1) / (sum(counts) + 2)
}
#' Variant test with adjustment for orientation bias.
#'
#' Test whether variant allele frequency > 0,
#' with consideration for orientation bias, as specified
#' by the estimated orientation bias error rate.
#'
#' @param d \code{data.frame} with fields:
#' base, e (esimated base error), and oriented
#' (whether read-pair orientation is consistent with artifact mechanism)
#' @param phi.0 initial estimate of damage rate (causing orientation bias)
#' @return
orient_bias_adjusted_variant_test <- function(data, phi.0, ...) {
theta.0 <- estimate_initial_theta(data);
mle <- estimate_theta_phi(data, theta.0, phi.0, ...);
theta.hat <- mle$theta;
phi.hat <- mle$phi;
dev <- deviance_theta(data, mle$theta, mle$phi);
params <- c(df = 1);
p <- pchisq(dev, df = params[1], lower.tail=FALSE);
structure(
list(
method = "Orientation bias adjusted variant test with unknown extent of damage",
statistic = c(D=dev), parameter = params, p.value = p, estimate = unlist(mle)
),
class = "htest"
)
}
#' Variant test with adjustment for orientation bias.
#'
#' Test whether variant allele frequency > 0,
#' with consideration for orientation bias, as specified
#' by the estimated orientation bias error rate.
#'
#' @param d \code{data.frame} with fields:
#' base, e (esimated base error), and oriented
#' (whether read-pair orientation is consistent with artifact mechanism)
#' @param phi estimated damage rate (causing orientation bias)
#' @return
orient_bias_adjusted_variant_test_with_known_phi <- function(data, phi) {
theta.0 <- estimate_initial_theta(data);
theta.hat <- estimate_theta(data, theta.0, phi);
dev <- deviance_theta(data, theta.hat, phi);
params <- c(df = 1, phi = phi);
p <- pchisq(dev, df = params[1], lower.tail=FALSE);
structure(
list(
method = "Orientation bias adjusted variant test with known extent of damage",
statistic = c(D=dev), parameter = params, p.value = p, estimate = theta.hat
),
class = "htest"
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.