R/quantification-model_bayes.R

Defines functions quantify_variable_damage lp_xc_given_hparams lp_xcj_given_hparams dlp_xi_given_hparams_dalpha dlp_xij_given_hparams_dalpha dlp_xi_given_hparams_dbeta dlp_xij_given_hparams_dbeta lp_xi_given_hparams lp_xij_given_hparams quant_estimate_theta_hparams_optim quant_estimate_theta_hparams_ravine quant_estimate_phi_hparams quant_estimate_phi_hparams_ravine evaluate_grid idx_to_sub

#' Estimate hyperparameters of the hierarchical Bayesian model for damage quantification.
#'
#' Hyperparameters \alpha_{\theta}, \beta_{\theta}, \alpha_{\phi}, and \beta_{\phi} are
#' estimated by maximizing the marginal likelihood.
#'
#' Model allows for variable damage rate (\phi_j) across each locus j, and
#' it assumes negligible sequencing error.
#'
#' Observe x^c_j = x^r_j + x^d_j and x^i_j
#' j \in \{1 ... J\}
#'
#' \theta_j ~ Beta(\alpha_{\theta}, \beta_{\theta})
#' \phi_j   ~ Beta(\alpha_{\phi}, \beta_{\phi})
#' x^i_j ~ Binomial(n^i_j, \theta_j)
#' x^r_j ~ Binomial(n^c_j, \theta_j)
#' x^d_j ~ Binomial(n^c_j - x^r_j, \phi_j)
#'
#' @param xc   vector of alt counts of reads that are consistent with damage
#'             (e.g. for oxoG, G>T on read 1; C>A on read 2)
#' @param xi   vector of alt counts of reads that are inconsistent with damage
#'             (e.g. for oxoG, C>A on read 2; G>T on read 2)
#' @param nc   vector of counts of reads that are consistent
#'             (e.g. for oxoG, G>G and G>T on read 1; C>C and C>A on read 2)
#' @param ni   vector of counts of reads that are inconsistent
#'             (e.g. for oxoG, C>C and C>A on read 2; G>G and G>T on read 2)
#' @return estimated values of hyperparameters \code{alpha_phi} and
#'         \code{beta_phi} that parameterize the beta prior distribution of
#'         \code{phi_j}, as well as estimated values of hyperparameters for
#'         \code{theta_j} (nuisance variables).
#' @export
quantify_variable_damage <- function(xc, xi, nc, ni, alpha0_theta=1, beta0_theta=1, alpha0_phi=1, beta0_phi=1) {
	theta <- quant_estimate_theta_hparams(xi, ni, alpha0_theta, beta0_theta);

	phi <- quant_estimate_phi_hparams(xc, nc, theta$alpha_theta, theta$beta_theta, alpha0_phi, beta0_phi);

	list(alpha_theta=theta$alpha_theta, beta_theta=theta$beta_theta,
		alpha_phi=phi$alpha_phi, beta_phi=phi$beta_phi
	)
}

# xc and nc are vectors
lp_xc_given_hparams <- function(xc, nc, alpha_theta, beta_theta, alpha_phi, beta_phi) {
	J <- length(xc);	

	- sum(log(nc + 1)) -
	J * lbeta(alpha_theta, beta_theta) -
	J * lbeta(alpha_phi, beta_phi) +
	sum(mapply(
		function(xcj, ncj) {
			lp_xcj_given_hparams(xcj, ncj, alpha_theta, beta_theta, alpha_phi, beta_phi)
		},
		xc, nc
	))
}

lp_xcj_given_hparams <- function(xcj, ncj, alpha_theta, beta_theta, alpha_phi, beta_phi) {
	k <- 0:xcj;
	# omitted terms because they are included in the calling function:
	# - log(ncj + 1) 
	# - lbeta(alpha_phi, beta_phi)
	# - lbeta(alpha_theta, beta_theta)
	# +
	log_sum_exp(
		-log(ncj - k + 1) +
		lbeta(xcj - k + alpha_phi, ncj - xcj + beta_phi) -
		lbeta(xcj - k + 1,         ncj - xcj + 1) +
		lbeta(k + alpha_theta, ncj - k + beta_theta) -
		lbeta(k + 1,           ncj - k + 1)
	)
}

# xi and ni are vectors
dlp_xi_given_hparams_dalpha <- function(xi, ni, alpha_theta, beta_theta) {
	J <- length(xi);
	J * (digamma(alpha_theta + beta_theta) - digamma(alpha_theta)) +
	sum(
		dlp_xij_given_hparams_dalpha(xi, ni, alpha_theta, beta_theta)
	)
}

dlp_xij_given_hparams_dalpha <- function(xij, nij, alpha_theta, beta_theta) {
	# omitted terms because they are included in the calling function:
	# digamma(alpha_theta + beta_theta) - digamma(alpha_theta)
	# +
	digamma(xij + alpha_theta) - digamma(nij + alpha_theta + beta_theta)
}

# xi and ni are vectors
dlp_xi_given_hparams_dbeta <- function(xi, ni, alpha_theta, beta_theta) {
	J <- length(xi);
	J * (digamma(alpha_theta + beta_theta) - digamma(beta_theta)) +
	sum(
		dlp_xij_given_hparams_dbeta(xi, ni, alpha_theta, beta_theta)
	)
}

dlp_xij_given_hparams_dbeta <- function(xij, nij, alpha_theta, beta_theta) {
	# omitted terms because they are included in the calling function:
	# digamma(alpha_theta + beta_theta) - digamma(beta_theta)
	# +
	digamma(nij - xij + beta_theta) - digamma(nij + alpha_theta + beta_theta)
}

# xi and ni are vectors
lp_xi_given_hparams <- function(xi, ni, alpha_theta, beta_theta) {
	J <- length(xi);
	-J * lbeta(alpha_theta, beta_theta) +
	sum(lp_xij_given_hparams(xi, ni, alpha_theta, beta_theta))
}

lp_xij_given_hparams <- function(xij, nij, alpha_theta, beta_theta) {
	# omitted terms because they are included in the calling function:
	# - lbeta(alpha_theta, beta_theta)
	# +
	lbeta(xij + alpha_theta, nij - xij + beta_theta) +
	lchoose(nij, xij)
	# NB if xi and ni are fixed, the last term should ideally be memoized/pre-computed
}

# xi and ni are vectors
quant_estimate_theta_hparams_optim <- function(xi, ni, alpha0_theta=1, beta0_theta=1) {
	res <- optim(
		c(alpha0_theta, beta0_theta),
		function(x) {
			-lp_xi_given_hparams(xi, ni, x[1], x[2])
		},
		lower = 0.001,
		upper = 1000,
		method = "L-BFGS-B"
	);

	list(optim=res,
		alpha_theta=res$par[1], beta_theta=res$par[2]
	)
}

quant_estimate_theta_hparams_ravine <- function(xi, ni, alpha_theta=NA, beta_theta=NA) {
	if (is.na(alpha_theta)) {
		alpha_theta <- 1;
	}
	if (is.na(beta_theta)) {
		# use MLE estimator for E[theta]
		e_theta <- quant_estimate_global_theta(sum(xi), sum(ni));

		# E[theta] = alpha_theta / (alpha_theta + beta_theta)
		# which implies that beta_theta = alpha_theta (1 - E[theta]) / E[theta]
		beta_theta <- alpha_theta * (1/e_theta - 1);
	} else {
		e_theta <- alpha_theta / (alpha_theta + beta_theta);
	}
		
	res <- optim(
		alpha_theta,
		function(a) {
			b <- a * (1/e_theta - 1);
			-lp_xi_given_hparams(xi, ni, a, b)
		},
		lower = 0.001,
		upper = 1000,
		method = "Brent"
	);

	alpha <- res$par;
	beta <- alpha * (1/e_theta - 1);

	list(optim=res, alpha_theta=alpha, beta_theta=beta)
}

quant_estimate_theta_hparams <- quant_estimate_theta_hparams_optim

quant_estimate_phi_hparams <- function(xc, nc, alpha_theta, beta_theta, alpha0_phi=1, beta0_phi=1, improve=FALSE) {
	# We use a narrower range for the hyperparameters of phi, because
	# the estimates tend to bias towards more exteme values, leading to
	# underestimation of the variance in phi
	res <- optim(
		c(alpha0_phi, beta0_phi),
		function(x) {
			-lp_xc_given_hparams(xc, nc, alpha_theta, beta_theta, x[1], x[2])
		},
		lower = 0.01,
		upper = 100,
		method = "L-BFGS-B"
	);

	# the first search likely landed in a ravine
	# attempt to improve the estimate with a second search along this ravine
	if (improve) {
		quant_estimate_phi_hparams_ravine(xc, nc, alpha_theta, beta_theta, alpha_phi=res$par[1], beta_phi=res$par[2])
	} else {
		list(optim=res, alpha_phi=res$par[1], beta_phi=res$par[2])
	}
}

# Search the ravine by adding an additional constraint:
# E[phi] = alpha_phi / (alpha_phi + beta_phi)
# which implies that beta_phi = alpha_phi (1 - E[phi]) / E[phi]
# alpha_phi and beta_phi should be previous estimates at a local optimal
# However, this secondary search does not seem to find the ground truth
# values, suggesting that the understimation of the variance is likely
# caused by the sampled data, insufficient sample size, and the loss of
# information obtained from the data as learning moves to higher levels of the model
quant_estimate_phi_hparams_ravine <- function(xc, nc, alpha_theta, beta_theta, alpha_phi=NA, beta_phi=NA) {
	if (is.na(alpha_phi)) {
		alpha_phi <- 1;
	}
	if (is.na(beta_phi)) {
		e_theta <- alpha_theta / (alpha_theta + beta_theta);
		# use MLE estimator for E[theta]
		e_phi <- quant_estimate_global_phi(sum(xc), sum(nc), e_theta);

		# E[phi] = alpha_phi / (alpha_phi + beta_phi)
		# which implies that beta_phi = alpha_phi (1 - E[phi]) / E[phi]
		beta_phi <- alpha_phi * (1/e_phi - 1);
	} else {
		e_phi <- alpha_phi / (alpha_phi + beta_phi);
	}

	# define e_phi = E[phi]
	e_phi <- alpha_phi / (alpha_phi + beta_phi);

	res <- optim(
		alpha_phi,
		function(a) {
			b <- a * (1/e_phi - 1);
			-lp_xc_given_hparams(xc, nc, alpha_theta, beta_theta, a, b)
		},
		lower = 0.001,
		upper = 1000,
		method = "Brent"
	);

	alpha <- res$par;
	beta <- alpha * (1/e_phi - 1);

	list(optim=res, alpha_phi=alpha, beta_phi=beta)
}

evaluate_grid <- function(xs, ys, f, ...) {
	zs <- unlist(lapply(xs,
		function(x) {
			unlist(lapply(ys,
				function(y) {
					f(x, y, ...)
				}
			))
		}
	));
	matrix(zs, nrow=length(xs), ncol=length(ys), byrow=TRUE)
}

# Convert from linear index to row and column subscripts
# for a m by n matrix.
idx_to_sub <- function(k, m) {
	j <- ceiling(k / m);
	i <-k - (j - 1) * m;
	c(i, j)
}
djhshih/orient-bias-filter documentation built on Nov. 4, 2019, 10:54 a.m.