bvs_sample <- function(x,
y,
n,
p,
intercept,
family,
prior_model,
prior_coef,
rare,
hap,
region_ind,
num_regions,
forced,
p_forced,
inform,
cov,
p_cov,
which_ind,
iter)
{
# initialize glm parameters
control <- do.call("glm.control", list())
family_func <- do.call(family, list())
weights <- rep(1.0, n)
offset <- rep(0.0, n)
if (family == "binomial") {
family_func$mu.eta <- logit_mu_eta
mustart <- (weights * y + 0.5) / (weights + 1)
m <- weights * y
if (any(abs(m - round(m)) > 0.001))
warning("non-integer # of successes in a binomial glm!")
} else {
family_func$mu.eta <- identity_mu_eta
mustart <- y
}
# initialize model size beta-binomial parameters
alpha_bb <- prior_model[[1]]
beta_bb <- prior_model[[2]]
# compute null marginal log like
nullLogLike <- bvs_fit(z = NULL,
num_active = 0L,
y = y,
x = NULL,
n = n,
p = p,
intercept = intercept,
rare = rare,
hap = hap,
region_ind = region_ind,
forced = forced,
p_forced = p_forced,
family_func = family_func,
prior_coef = prior_coef,
control = control,
weights = weights,
offset = offset,
mustart = mustart,
m = m)$marg_ll
# initialize hash table to hold indices of all unique previous models
models_fit <- new_table(1)
iter <- iter + 1 # remove when done testing
# initialize results objects
logfitness <- rep(NA, iter)
logPrM <- rep(NA, iter)
model_path <- c(1, rep(NA, iter - 1))
alpha <- matrix(0, nrow = iter, ncol = max(1, p_cov))
active <- rep(NA, iter)
num_active <- rep(NA, iter)
coef <- matrix(0, nrow = iter, ncol = length(which_ind))
loglike <- rep(NA, iter)
# initialize vector that indicates currently active variables
z_current <- rep(FALSE, p)
z_current[sample(1:p, 5)] <- TRUE
set_element_in_table(key = active[1] <- paste0(which(z_current), collapse = "-"),
value = 1,
table = models_fit)
# fit initial model
num_active[1] <- sum(z_current)
fit_glm <- bvs_fit(z = z_current,
num_active = num_active[1],
y = y,
x = x,
n = n,
p = p,
intercept = intercept,
rare = rare,
hap = hap,
region_ind = region_ind,
num_regions = num_regions,
forced = forced,
p_forced = p_forced,
family_func = family_func,
prior_coef = prior_coef,
control = control,
weights = weights,
offset = offset,
mustart = mustart,
m = m)
# get intial coef vector
if (fit_glm$num_vars > 0) {
if (rare) {
coef[1, which_ind] <- fit_glm$coef
} else {
coef[1, z_current] <- fit_glm$coef
}
}
# get marginal log likelihood
loglike[1] <- fit_glm$marg_ll
# initialize t_current
lower <- rep(0, p)
upper <- rep(Inf, p)
lower[!z_current] <- -Inf
upper[!z_current] <- 0
t_current <- rtnorm(p, lower = lower, upper = upper)
# calculate prior on model
if (inform) {
a0 <- qnorm(1 - 2^(-1 / p))
v_hat <- solve(diag(1, p_cov) + crossprod(cov))
a1_current <- rep(0, p_cov)
mu <- a0 + cov %*% a1_current
lprob_inc <- pnorm(0, mean = mu, lower.tail = FALSE, log.p = TRUE)
lprob_ninc <- pnorm(0, mean = mu, lower.tail = TRUE, log.p = TRUE)
logPrM[1] <- logPrM_current <- sum(lprob_inc[z_current]) + sum(lprob_ninc[!z_current])
} else {
logPrM[1] <- logPrM_current <- computeModelPrior(num_active[1], p, alpha_bb, beta_bb)
}
# calculate log(fitness) = marginal_ll - logPrM
logfitness[1] <- fitness_current <- loglike[1] + logPrM_current
# loop through all trials, track progress with progress bar
i <- 2
idx <- 2
pb <- txtProgressBar(min = i, max = iter, style = 3)
while (i <= iter) {
# sample new a1 if using prior or keep a1 = 0
if (inform) {
alpha_hat <- v_hat %*% crossprod(cov, t_current - a0)
a1_current <- mvrnorm(1, mu = alpha_hat, Sigma = v_hat)
# sample latent variable | current a1 and current model
lower <- rep(0, p)
upper <- rep(Inf, p)
lower[!z_current] <- -Inf
upper[!z_current] <- 0
mu <- a0 + cov %*% a1_current
t_current <- rtnorm(p, mean = mu, lower = lower, upper = upper)
# calculate new prior
lprob_inc <- pnorm(0, mean = mu, lower.tail = FALSE, log.p = TRUE)
lprob_ninc <- pnorm(0, mean = mu, lower.tail = TRUE, log.p = TRUE)
logPrM_current <- sum(lprob_inc[z_current]) + sum(lprob_ninc[!z_current])
# update fitness | a1 for currently selected model
fitness_current <- fitness_current - logPrM[i - 1] + logPrM_current
}
# sample model
mutate_ind <- ceiling(runif(1) * p)
testing <- runif(1) # remove when done testing
z_current[mutate_ind] <- !z_current[mutate_ind]
num_active_new <- sum(z_current)
# check if model already fit
if (num_active_new > 0) {
active_new <- paste0(which(z_current), collapse = "-")
} else {
active_new <- "0"
}
model_ind <- get_element_from_table(active_new, models_fit)
if (model_ind != 0) {
model_propose <- model_ind
} else {
active[idx] <- active_new
num_active[idx] <- num_active_new
set_element_in_table(active_new, idx, models_fit)
model_propose <- idx
# fit model
fit_glm <- bvs_fit(z = z_current,
num_active = num_active_new,
y = y,
x = x,
n = n,
p = p,
intercept = intercept,
rare = rare,
hap = hap,
region_ind = region_ind,
num_regions = num_regions,
forced = forced,
p_forced = p_forced,
family_func = family_func,
prior_coef = prior_coef,
control = control,
weights = weights,
offset = offset,
mustart = mustart,
m = m)
# get coef vector
if (fit_glm$num_vars > 0) {
if (rare) {
coef[idx, which_ind] <- fit_glm$coef
} else {
coef[idx, z_current] <- fit_glm$coef
}
}
# get marginal log likelihood
loglike[idx] <- fit_glm$marg_ll
idx <- idx + 1
}
# calculate prior on model
if (inform) {
logPrM_new <- sum(lprob_inc[z_current]) + sum(lprob_ninc[!z_current])
} else {
logPrM_new <- computeModelPrior(num_active_new, p, alpha_bb, beta_bb)
}
# calculate logfitness = loglike + logPrM
fitness_new <- loglike[model_propose] + logPrM_new
#If fit ratio > runif(1), accept new model and z, else keep current model
accept <- runif(1) <= min(1, exp(fitness_new - fitness_current))
if (accept) {
model_path[i] <- model_propose
logfitness[i] <- fitness_current <- fitness_new
logPrM[i] <- logPrM_current <- logPrM_new
} else {
model_path[i] <- model_path[i - 1]
logfitness[i] <- fitness_current
logPrM[i] <- logPrM_current
z_current[mutate_ind] <- !z_current[mutate_ind]
}
if (inform) {
alpha[i, ] <- a1_current
}
# Update progress
i <- i + 1
setTxtProgressBar(pb, i)
}
cat("\nDone\n")
models_chosen <- unique(model_path)
models_accepted <- list(model_id = models_chosen,
active = active[models_chosen],
num_active = num_active[models_chosen],
loglike = loglike[models_chosen],
coef = coef[models_chosen, , drop = FALSE])
rownames(models_accepted$coef) <- models_chosen
results <- list(logfitness = logfitness,
logPrM = logPrM,
model_path = model_path,
alpha = alpha,
models_accepted = models_accepted,
nullLogLike = nullLogLike)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.