demo/try_quantification.R

library(io);
library(devtools);
library(microbenchmark);

load_all("../R");

rmse <- function(x, y) {
	sqrt(mean( (x - y)^2 ))
}

set.seed(1);

m <- 100;
# accurate hyperparameter estimation requires m > 10000 

alpha_phi <- 1;
beta_phi <- 10;

#alpha_phi <- 1;
#beta_phi <- 50;

#alpha_phi <- 10;
#beta_phi <- 100;

#alpha_theta <- 1;
#beta_theta <- 1;

#alpha_theta <- 1;
#beta_theta <- 2;

alpha_theta <- 1;
beta_theta <- 10;

truth_phi_moments <- list(
	mean = alpha_phi / (alpha_phi + beta_phi),
	var = alpha_phi * beta_phi / (alpha_phi + beta_phi)^2 / (alpha_phi + beta_phi + 1)
);

params <- list(
	ns = rpois(m, 50),
	theta = rbeta(m, alpha_theta, beta_theta),
	phi = rbeta(m, alpha_phi, beta_phi)
);

s <- with(params, simulate_orient_bias_read_counts(ns, theta, phi));

phi_g_hat <- with(s, quantify_global_damage(sum(xc), sum(xi), sum(nc), sum(ni)));
theta_g_hat <- with(s, estimate_global_theta(sum(xi), sum(ni)));

alpha_phi / (alpha_phi + beta_phi)
phi_g_hat

with(s, lp_xc_given_hparams(xc, nc, alpha_theta, beta_theta, alpha_phi, beta_phi))

with(s, quant_estimate_theta_hparams_optim(xi, ni, 1, 1))
with(s, quant_estimate_theta_hparams_uniroots(xi, ni, 1, 1))
with(s, quant_estimate_theta_hparams_multiroot(xi, ni, 1, 1))
with(s, quant_estimate_theta_hparams_optim_root(xi, ni, 1, 1))

with(s, quant_estimate_theta_hparams_ravine(xi, ni))


# multiroot of the derivative is not faster than just optimizing the function
# it is always more fragile and tend to find local minima
# all derivative methods tend to find local minima where a / (a + b) = E[phi]
microbenchmark(
	with(s, quant_estimate_theta_hparams_optim(xi, ni, 1, 1)),
	with(s, quant_estimate_theta_hparams_uniroots(xi, ni, 1, 1)),
	with(s, quant_estimate_theta_hparams_multiroot(xi, ni, 1, 1)),
	with(s, quant_estimate_theta_hparams_optim_root(xi, ni, 1, 1)),
	times=5
)

hparams.phi <- with(s, quant_estimate_phi_hparams(xc, nc, alpha_theta, beta_theta, 1, 1));
hparams.phi

hparams.phi <- with(s, quant_estimate_phi_hparams(xc, nc, alpha_theta, beta_theta, 1, (1/phi_g_hat - 1)));
hparams.phi

hparams.phi <- with(s, quant_estimate_phi_hparams_ravine(xc, nc, alpha_theta, beta_theta));
hparams.phi

microbenchmark(
	with(s, quant_estimate_phi_hparams(xc, nc, alpha_theta, beta_theta, 1, 1)),
	with(s, quant_estimate_phi_hparams(xc, nc, alpha_theta, beta_theta, 1, (1/phi_g_hat - 1))),
	with(s, quant_estimate_phi_hparams_ravine(xc, nc, alpha_theta, beta_theta)),
	times=5
);

plot(curve(dbeta(x, 0.5, 1), n=1000), type="l")
lines(curve(dbeta(x, 2, 4), n=1000, add=TRUE))
lines(curve(dbeta(x, 8, 16), n=1000, add=TRUE))
lines(curve(dbeta(x, 32, 64), n=1000, add=TRUE))

# very slow!
#hparams.phi.cd <- with(s, quant_estimate_phi_hparams_cd(xc, nc, alpha_theta, beta_theta, 1, 1));

hparams <- with(s, quantify_variable_damage(xc, xi, nc, ni));

plot(curve(dbeta(x, alpha_phi, beta_phi), xlim=c(0, min(1, 10*truth_phi_moments$mean))), col="red", type="l");
with(hparams, lines(curve(dbeta(x, alpha_phi, beta_phi), add=TRUE)));

# in the hyperparameter estimate for phi, the mean of the phi distribution is
# usually well estimated but not the variance

# even if we continue the search after fixing E[phi], we often do not recover
# the ground truth...
mu <- with(hparams, alpha_phi / (alpha_phi + beta_phi));
#alphas <- seq(1, 10, by=0.1);

alphas <- seq(0.01, 2, by=0.05);
betas <- alphas * (1 - mu) / mu;
lps <- mapply(function(a, b) with(s, lp_xc_given_hparams(xc, nc, alpha_theta, beta_theta, a, b)), alphas, betas, SIMPLIFY=FALSE);
plot(alphas, lps);
idx <- which.max(lps);
alphas[idx]
betas[idx]
hparams
c(alpha_phi, beta_phi)

# has true alpha_theta and beta_theta here but not when the alpha_phi and
# beta_phi were initially estimated (results are in hparams)
hparams2 <- with(s, quant_estimate_phi_hparams_ravine(xc, nc, alpha_theta, beta_theta, hparams$alpha_phi, hparams$beta_phi));

# has the truth alpha_theta and beta_theta provided
hparams3 <- with(s, quant_estimate_phi_hparams(xc, nc, alpha_theta, beta_theta, improve=TRUE));

hparams
hparams2
hparams3

plot(curve(dbeta(x, alpha_phi, beta_phi), xlim=c(0, min(1, 10*truth_phi_moments$mean))), col="red", type="l");
with(hparams, lines(curve(dbeta(x, alpha_phi, beta_phi), add=TRUE)));
with(hparams2, lines(curve(dbeta(x, alpha_phi, beta_phi), add=TRUE), col="blue"));


alphas <- seq(0.01, 4, by=0.1);
lps <- unlist(lapply(alphas, function(a) with(s, lp_xc_given_hparams(xc, nc, alpha_theta, beta_theta, a, beta_phi))));
plot(alphas, lps);
plot(alphas, lps, log="x");
alpha_hat <- alphas[which.max(lps)];


betas <- seq(5, 100, by=2);
lps <- unlist(lapply(betas, function(b) with(s, lp_xc_given_hparams(xc, nc, alpha_theta, beta_theta, alpha_hat, b))));
plot(betas, lps);
plot(betas, lps, log="x");
beta_hat <- betas[which.max(lps)];
max(lps)

lps.m <- with(s, evaluate_grid(alphas, betas, function(a, b) lp_xc_given_hparams(xc, nc, alpha_theta, beta_theta, a, b)));
dim(lps.m)

idx <- which.max(lps.m);
lps.m[idx]
max(lps.m)
subi <- idx_to_sub(idx, nrow(lps.m));
lps.m[subi[1], subi[2]]
alphas[subi[1]]
betas[subi[2]]
image(alphas, betas, lps.m, col=heat.colors(100))
#contour(alphas, betas, exp(lps.m), add=TRUE)
contour(alphas, betas, lps.m, add=TRUE)

image(alphas, betas, lps.m, col=heat.colors(100), log="y")
contour(alphas, betas, lps.m, add=TRUE, log="y")
djhshih/orient-bias-filter documentation built on Nov. 4, 2019, 10:54 a.m.