standard_integration <- function(f, ...) {
ff <- function(xs) {
unlist(lapply(xs, function(x) f(x)))
}
integrate(ff, ...)
}
midpoint_integration <- function(x, f, ...) {
n <- length(x);
d <- diff(x);
# compute midpoints
p <- (x[1:(n-1)] + x[2:n]) / 2;
# evaluate function at the midpoints
ps <- unlist(lapply(p, function(pp) f(pp, ...)));
sum(ps * d)
}
midpoint_integration_logit_x <- function(xr, f, ...) {
midpoint_integration(logistic(xr), f, ...)
}
log_midpoint_integration <- function(x, lf, ...) {
n <- length(x);
d <- diff(x);
# compute midpoints
p <- (x[1:(n-1)] + x[2:n]) / 2;
# evaluate function at the midpoints
lps <- unlist(lapply(p, function(pp) lf(pp, ...)));
log_sum_exp(lps + log(d))
}
trapezoid_integration <- function(x, f, ...) {
n <- length(x);
ps <- unlist(lapply(x, function(xi) f(xi, ...)));
d <- diff(x);
lefts <- ps[1:(n-1)] * d / 2;
rights <- ps[2:n] * d / 2;
sum(c(lefts, rights))
}
log_trapezoid_integration <- function(x, lf, ...) {
n <- length(x);
lps <- unlist(lapply(x, function(xi) lf(xi, ...)));
d <- diff(x);
lefts <- lps[1:(n-1)] - log(2) + log(d);
rights <- lps[2:n] - log(2) + log(d);
log_sum_exp(c(lefts, rights))
}
# Integrate over unit interval [0, 1]
#
# Univariate function does not need to support taking a vector of possible as
# input.
#
# @param f univariate function to integrate
integrate_over_unit <- function(f, ...) {
integrate(
function(xs) {
unlist(lapply(xs, f, ...))
},
# values that are too large to small round to 0 or 1, which may not
# be in the domain of the function
lower = 0, upper = 1,
...
)
}
#' Bayesian model for orientation bias adjusted test
#'
#' Compute the posterior probability of the alternative model with positive
#' variant allelic frequency (theta > 0) against the null model (theta = 0).
#'
#' @param d \code{data.frame} with fields:
#' base, e (esimated base error), and oriented
#' (whether read-pair orientation is consistent with artifact mechanism)
#' @param fast whether to use the fast integration method;
#' less precise but it will not choke on problematic joint
#' probability distributions [default: true]
#' @param alpha_phi hyperparameter for the prior beta distribution of phi
#' @param beta_phi hyperparameter for the prior beta distribution of phi
#' @return a Bayesian test object
#' @export
#'
orient_bias_adjusted_variant_test_with_variable_phi <- function(d, prior = c(0.5, 0.5), alpha_phi=1, beta_phi=1, fast=TRUE, ...) {
lprior <- log(prior);
if (fast) {
levs <- orient_bias_identify_bayes_model_midpoint_cgrid(d, alpha_phi, beta_phi, ...);
} else {
levs <- orient_bias_identify_bayes_model_standard(d, alpha_phi, beta_phi, ...);
}
structure(
list(
method = "Orientation bias adjusted variant test with variable extent of damage",
levidence =
list(
null = levs[1],
alternative = levs[2]
),
lposterior = (levs[2] + lprior[2]) - log_sum_exp(levs + lprior)
),
class = "btest"
)
}
orient_bias_identify_bayes_model_standard <- function(data, alpha_phi, beta_phi) {
null.model.evidence <- integrate_over_unit(
function(phi) {
exp(
lp_bases_given_params(data, 0, phi) +
dbeta(phi, alpha_phi, beta_phi, log=TRUE)
# dbeta(theta, 1, 1, log=TRUE)
)
}
)$value;
model.evidence <- integrate_over_unit(
function(theta) {
integrate_over_unit(
function(phi) {
exp(
lp_bases_given_params(data, theta, phi) +
dbeta(phi, alpha_phi, beta_phi, log=TRUE)
# dbeta(theta, 1, 1, log=TRUE)
)
}
)$value
}
)$value;
c(log(null.model.evidence), log(model.evidence))
}
# DO NOT USE
# Problematic: Ignores prior on parameters; therefore, model is not penalized
# automatically for having more parameters
# If the target model has more free parameters than the null
# model, then its evidence cannot be less than the null model!
orient_bias_identify_bayes_model_large_approx <- function(data, alpha_phi, beta_phi) {
# approximate p(x, phi) by replacing p(phi) with delta function centered at MLE of phi
phi.hat <- estimate_phi(data, 0, 0.5);
null.model.levidence <- lp_bases_given_params(data, 0, phi.hat);
print(phi.hat)
# p(x | theta) is probably sharply peaked at some value of theta
# approximate p(x, theta, phi) by replacing p(phi) and p(theta) with MLE
# of phi and theta
phat <- estimate_theta_phi(data, 0.5, 0.5);
model.levidence <- lp_bases_given_params(data, phat$theta, phat$phi);
c(null.model.levidence, model.levidence)
}
orient_bias_identify_bayes_model_log_midpoint <- function(data, alpha_phi, beta_phi) {
eps <- 1e-6;
phis <- seq(eps, 1-eps, 0.01);
#phis <- exp(seq(-10-eps, -eps, 0.5));
thetas <- seq(eps, 1-eps, 0.01);
#thetas <- exp(seq(-10-eps, -eps, 0.5))
lp_f <- function(phi, theta) {
lp_bases_given_params(data, theta, phi) +
dbeta(phi, alpha_phi, beta_phi, log=TRUE) +
dbeta(theta, 1, 1, log=TRUE)
}
model.levidence <- log_midpoint_integration(thetas, function(th) {
log_midpoint_integration(phis, lp_f, theta=th)
});
null.model.levidence <- log_midpoint_integration(phis, lp_f, theta=0);
c(null.model.levidence, model.levidence)
}
orient_bias_identify_bayes_model_midpoint <- function(data, alpha_phi, beta_phi) {
eps <- 1e-6;
phis <- seq(eps, 1-eps, 0.01);
#phis <- exp(seq(-10-eps, -eps, 0.5));
thetas <- seq(eps, 1-eps, 0.01);
#thetas <- exp(seq(-10-eps, -eps, 0.5))
p_f <- function(phi, theta) {
exp(
lp_bases_given_params(data, theta, phi) +
dbeta(phi, alpha_phi, beta_phi, log=TRUE)
# log p(theta) is omitted because it is 0:
# dbeta(theta, 1, 1, log=TRUE) == 0
)
}
model.evidence <- midpoint_integration(thetas, function(th) {
midpoint_integration(phis, p_f, theta=th)
});
null.model.evidence <- midpoint_integration(phis, p_f, theta=0);
c(log(null.model.evidence), log(model.evidence))
}
orient_bias_identify_bayes_model_midpoint_logit_x <- function(data, alpha_phi, beta_phi, lower=-upper, upper=10) {
lphis <- seq(lower, upper, 0.2);
lthetas <- seq(lower, upper, 0.2);
p_f <- function(phi, theta) {
exp(
lp_bases_given_params(data, theta, phi) +
dbeta(phi, alpha_phi, beta_phi, log=TRUE)
# log p(theta) is omitted because it is 0:
# dbeta(theta, 1, 1, log=TRUE) == 0
)
}
model.evidence <- midpoint_integration_logit_x(lthetas, function(th) {
midpoint_integration_logit_x(lphis, p_f, theta=th)
});
null.model.evidence <- midpoint_integration_logit_x(lphis, p_f, theta=0);
c(log(null.model.evidence), log(model.evidence))
}
# create grid over (0, 1) centered at x
# with exponentially increasing distance on either side
centered_grid <- function(x, eps=1e-6, step=0.005, base=1.4) {
# 100 is much higher than we need;
# in a loop, we should stop when d > 1
d <- step * base^seq(0, 100);
g <- c(eps, x - rev(d), x, x + d, 1 - eps);
g[g > 0 & g < 1]
}
orient_bias_identify_bayes_model_midpoint_cgrid <- function(data, alpha_phi, beta_phi) {
p_f <- function(phi, theta) {
exp(
lp_bases_given_params(data, theta, phi) +
dbeta(phi, alpha_phi, beta_phi, log=TRUE)
# log p(theta) is omitted because it is 0:
# dbeta(theta, 1, 1, log=TRUE) == 0
)
}
# create grid centered around approx maximum
phi.hat <- estimate_phi(data, 0, 0.5, abstol=1e-2);
phis <- centered_grid(phi.hat);
null.model.evidence <- midpoint_integration(phis, p_f, theta=0);
# create grid centered around approx maximum for phi = phi.hat
theta.hat <- estimate_theta(data, 0.5, phi.hat, abstol=1e-2);
thetas <- centered_grid(theta.hat);
model.evidence <- midpoint_integration(thetas, function(th) {
midpoint_integration(phis, p_f, theta=th)
});
c(log(null.model.evidence), log(model.evidence))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.