R/identification-model_bayes.R

Defines functions standard_integration midpoint_integration midpoint_integration_logit_x log_midpoint_integration trapezoid_integration log_trapezoid_integration integrate_over_unit orient_bias_adjusted_variant_test_with_variable_phi orient_bias_identify_bayes_model_standard orient_bias_identify_bayes_model_large_approx orient_bias_identify_bayes_model_log_midpoint orient_bias_identify_bayes_model_midpoint orient_bias_identify_bayes_model_midpoint_logit_x centered_grid orient_bias_identify_bayes_model_midpoint_cgrid

standard_integration <- function(f, ...) {
	ff <- function(xs) {
		unlist(lapply(xs, function(x) f(x)))
	}

	integrate(ff, ...)
}

midpoint_integration <- function(x, f, ...) {
	n <- length(x);
	d <- diff(x);

	# compute midpoints
	p <- (x[1:(n-1)] + x[2:n]) / 2;

	# evaluate function at the midpoints
	ps <- unlist(lapply(p, function(pp) f(pp, ...)));

	sum(ps * d)
}

midpoint_integration_logit_x <- function(xr, f, ...) {
	midpoint_integration(logistic(xr), f, ...)
}

log_midpoint_integration <- function(x, lf, ...) {
	n <- length(x);
	d <- diff(x);

	# compute midpoints
	p <- (x[1:(n-1)] + x[2:n]) / 2;

	# evaluate function at the midpoints
	lps <- unlist(lapply(p, function(pp) lf(pp, ...)));

	log_sum_exp(lps + log(d))
}

trapezoid_integration <- function(x, f, ...) {
	n <- length(x);
	ps <- unlist(lapply(x, function(xi) f(xi, ...)));
	d <- diff(x);

	lefts <- ps[1:(n-1)] * d / 2;
	rights <- ps[2:n] * d / 2;

	sum(c(lefts, rights))
}

log_trapezoid_integration <- function(x, lf, ...) {
	n <- length(x);
	lps <- unlist(lapply(x, function(xi) lf(xi, ...)));
	d <- diff(x);

	lefts <- lps[1:(n-1)] - log(2) + log(d);
	rights <- lps[2:n] - log(2) + log(d);

	log_sum_exp(c(lefts, rights))
}


# Integrate over unit interval [0, 1]
#
# Univariate function does not need to support taking a vector of possible as
# input.
#
# @param f  univariate function to integrate
integrate_over_unit <- function(f, ...) {
	integrate(
		function(xs) {
			unlist(lapply(xs, f, ...))
		},
		# values that are too large to small round to 0 or 1, which may not
		# be in the domain of the function
		lower = 0, upper = 1,
		...
	)
}

#' Bayesian model for orientation bias adjusted test
#'
#' Compute the posterior probability of the alternative model with positive
#' variant allelic frequency (theta > 0) against the null model (theta = 0).
#'
#' @param d  \code{data.frame} with fields:
#'           base, e (esimated base error), and oriented
#'           (whether read-pair orientation is consistent with artifact mechanism)
#' @param fast  whether to use the fast integration method;
#'              less precise but it will not choke on problematic joint 
#'              probability distributions [default: true]
#' @param alpha_phi  hyperparameter for the prior beta distribution of phi
#' @param beta_phi   hyperparameter for the prior beta distribution of phi
#' @return a Bayesian test object
#' @export
#' 
orient_bias_adjusted_variant_test_with_variable_phi <- function(d, prior = c(0.5, 0.5), alpha_phi=1, beta_phi=1, fast=TRUE, ...) {

	lprior <- log(prior);

	if (fast) {
		levs <- orient_bias_identify_bayes_model_midpoint_cgrid(d, alpha_phi, beta_phi, ...);
	} else {
		levs <- orient_bias_identify_bayes_model_standard(d, alpha_phi, beta_phi, ...);
	}
	
	structure(
		list(
			method = "Orientation bias adjusted variant test with variable extent of damage",
			levidence =
				list(
					null = levs[1],
					alternative = levs[2]
				),
			lposterior = (levs[2] + lprior[2]) - log_sum_exp(levs + lprior)
		),
		class = "btest"
	)
}

orient_bias_identify_bayes_model_standard <- function(data, alpha_phi, beta_phi) {
	null.model.evidence <- integrate_over_unit(
		function(phi) {
			exp(
				lp_bases_given_params(data, 0, phi) +
					dbeta(phi, alpha_phi, beta_phi, log=TRUE)
					# dbeta(theta, 1, 1, log=TRUE)
			)
		}
	)$value;

	model.evidence <- integrate_over_unit(
		function(theta) {
			integrate_over_unit(
				function(phi) {
					exp(
						lp_bases_given_params(data, theta, phi) +
							dbeta(phi, alpha_phi, beta_phi, log=TRUE)
							# dbeta(theta, 1, 1, log=TRUE)
					)
				}
			)$value
		}
	)$value;

	c(log(null.model.evidence), log(model.evidence))
}

# DO NOT USE
# Problematic: Ignores prior on parameters; therefore, model is not penalized
#              automatically for having more parameters
#              If the target model has more free parameters than the null
#              model, then its evidence cannot be less than the null model!
orient_bias_identify_bayes_model_large_approx <- function(data, alpha_phi, beta_phi) {
	# approximate p(x, phi) by replacing p(phi) with delta function centered at MLE of phi
	phi.hat <- estimate_phi(data, 0, 0.5);
	null.model.levidence <- lp_bases_given_params(data, 0, phi.hat);
	print(phi.hat)

	# p(x | theta) is probably sharply peaked at some value of theta
	# approximate p(x, theta, phi) by replacing p(phi) and p(theta) with MLE
	# of phi and theta
	phat <- estimate_theta_phi(data, 0.5, 0.5);
	model.levidence <- lp_bases_given_params(data, phat$theta, phat$phi);

	c(null.model.levidence, model.levidence)
}

orient_bias_identify_bayes_model_log_midpoint <- function(data, alpha_phi, beta_phi) {
	eps <- 1e-6;
	phis <- seq(eps, 1-eps, 0.01);
	#phis <- exp(seq(-10-eps, -eps, 0.5));
	thetas <- seq(eps, 1-eps, 0.01);
	#thetas <- exp(seq(-10-eps, -eps, 0.5))

	lp_f <- function(phi, theta) {
		lp_bases_given_params(data, theta, phi) +
			dbeta(phi, alpha_phi, beta_phi, log=TRUE) +
			dbeta(theta, 1, 1, log=TRUE)
	}

	model.levidence <- log_midpoint_integration(thetas, function(th) {
		log_midpoint_integration(phis, lp_f, theta=th)
	});

	null.model.levidence <- log_midpoint_integration(phis, lp_f, theta=0);

	c(null.model.levidence, model.levidence)
}

orient_bias_identify_bayes_model_midpoint <- function(data, alpha_phi, beta_phi) {
	eps <- 1e-6;
	phis <- seq(eps, 1-eps, 0.01);
	#phis <- exp(seq(-10-eps, -eps, 0.5));
	thetas <- seq(eps, 1-eps, 0.01);
	#thetas <- exp(seq(-10-eps, -eps, 0.5))

	p_f <- function(phi, theta) {
		exp(
			lp_bases_given_params(data, theta, phi) +
				dbeta(phi, alpha_phi, beta_phi, log=TRUE)
				# log p(theta) is omitted because it is 0:
				# dbeta(theta, 1, 1, log=TRUE) == 0
		)
	}

	model.evidence <- midpoint_integration(thetas, function(th) {
		midpoint_integration(phis, p_f, theta=th)
	});

	null.model.evidence <- midpoint_integration(phis, p_f, theta=0);

	c(log(null.model.evidence), log(model.evidence))
}

orient_bias_identify_bayes_model_midpoint_logit_x <- function(data, alpha_phi, beta_phi, lower=-upper, upper=10) {
	lphis <- seq(lower, upper, 0.2);
	lthetas <- seq(lower, upper, 0.2);

	p_f <- function(phi, theta) {
		exp(
			lp_bases_given_params(data, theta, phi) +
				dbeta(phi, alpha_phi, beta_phi, log=TRUE)
				# log p(theta) is omitted because it is 0:
				# dbeta(theta, 1, 1, log=TRUE) == 0
		)
	}

	model.evidence <- midpoint_integration_logit_x(lthetas, function(th) {
		midpoint_integration_logit_x(lphis, p_f, theta=th)
	});

	null.model.evidence <- midpoint_integration_logit_x(lphis, p_f, theta=0);

	c(log(null.model.evidence), log(model.evidence))
}

# create grid over (0, 1) centered at x
# with exponentially increasing distance on either side
centered_grid <- function(x, eps=1e-6, step=0.005, base=1.4) {
	# 100 is much higher than we need;
	# in a loop, we should stop when d > 1
	d <- step * base^seq(0, 100);
	g <- c(eps, x - rev(d), x, x + d, 1 - eps);
	g[g > 0 & g < 1]
}

orient_bias_identify_bayes_model_midpoint_cgrid <- function(data, alpha_phi, beta_phi) {
	p_f <- function(phi, theta) {
		exp(
			lp_bases_given_params(data, theta, phi) +
				dbeta(phi, alpha_phi, beta_phi, log=TRUE)
				# log p(theta) is omitted because it is 0:
				# dbeta(theta, 1, 1, log=TRUE) == 0
		)
	}

	# create grid centered around approx maximum
	phi.hat <- estimate_phi(data, 0, 0.5, abstol=1e-2);
	phis <- centered_grid(phi.hat);

	null.model.evidence <- midpoint_integration(phis, p_f, theta=0);

	# create grid centered around approx maximum for phi = phi.hat
	theta.hat <- estimate_theta(data, 0.5, phi.hat, abstol=1e-2);
	thetas <- centered_grid(theta.hat);

	model.evidence <- midpoint_integration(thetas, function(th) {
		midpoint_integration(phis, p_f, theta=th)
	});

	c(log(null.model.evidence), log(model.evidence))
}
djhshih/orient-bias-filter documentation built on Nov. 4, 2019, 10:54 a.m.