# Pajor Methods for logLik calcuations
indicator = function(bounds, samples){
inbounds = function(x){mean(x > bounds[1,] & x < bounds[2,]) == 1}
apply(samples, 1, inbounds)
}
indicator_1 = function(bounds, samples){
inbounds = function(x){mean(x > bounds[1,] & x < bounds[2,]) == 1}
apply(samples, 1, inbounds)
}
indicator_2 = function(bounds, samples){
inbounds = function(x){mean(x > bounds[1] & x < bounds[2]) == 1}
apply(samples, 1, inbounds)
}
# Pajor IS CAME Method for logLik calcuations
# article{pajor2017estimating,
# title={Estimating the marginal likelihood using the arithmetic mean identity},
# author={Pajor, Anna and others},
# journal={Bayesian Analysis},
# volume={12},
# number={1},
# pages={261--287},
# year={2017},
# publisher={International Society for Bayesian Analysis}
# }
#' @importFrom mvtnorm rmvnorm dmvnorm
#' @importFrom stats acf var dbinom dpois dnorm
CAME_IS = function(posterior, y, X, Z, group, coef, sigma, family, M,
gaus_sig = NULL, trace, progress){
if(progress == TRUE) message("Pajor Log-Likelihood Calculation")
family_info = family_export(family)
fam_fun = family_info$family_fun
link = family_info$link
link_int = family_info$link_int # Recoded link as integer, see "family_export.R" for details
family = family_info$family
# Define variables
d = nlevels(group)
num_var = ncol(Z) / d
Gam = t(chol(sigma[which(diag(sigma)!=0),which(diag(sigma)!=0)]))
eta_fef = X %*% coef[1:ncol(X)]
post_mean = colMeans(posterior[,])
# Calculate bounds of the posterior for each dimension
bounds = apply(posterior[,], 2, range)
# Thin posterior draws based on acf results
grp_names = as.numeric(levels(group))
d = nlevels(group)
var_names = 1:ncol(X) - 1 # 0 = intercept, 1 to p for p covariates
var_num = length(var_names)
grp_index = rep(grp_names, times = var_num)
var_index = rep(var_names, each = d)
for(j in 1:ncol(posterior)){
ACF = acf(posterior[,j], plot=F, lag.max = 30)
ACF_df = with(ACF, data.frame(lag,acf))
ACF_df$grp_names = grp_index[j]
ACF_df$var_names = var_index[j]
if(j == 1){
ACF_all = ACF_df
}else{
ACF_all = rbind(ACF_all, ACF_df)
}
}
# Want lag where acf < 0.1
df_lag = subset(ACF_all, acf > 0.01)
max_lag = max(max(df_lag$lag) + 1, 10) # At minimum, thin by 10
thin = seq(from = 1, to = nrow(posterior), by = max_lag)
post_thin = posterior[thin,]
# Initiate logLik
ll = 0
for(k in 1:d){
# Define group-specific individuals
ids = (group == k)
y_k = y[ids]
n_k = sum(ids)
# Identify columns for variables corresponding to group k
cols_all = seq(from = k, by = d, length.out = num_var)
# Restrict above colums to those belonging to variables with non-zero random effect variance
cols = cols_all[which(diag(sigma) != 0)]
Z_k = Z[ids,cols,drop=F]
# if((length(cols) == 1) | (length(ids) == 1)){
# Z_k = matrix(Z_k, nrow = length(ids), ncol = length(cols))
# }
# Sample M samples from the importance function
## Importance function: multivariate normal (multivariate t?)
if(length(cols) > 1){
post_cov = var(post_thin[,cols])
}else{
post_cov = matrix(var(post_thin[,cols]), nrow = 1, ncol = 1)
}
imp_samp = rmvnorm(M, mean = post_mean[cols], sigma = post_cov)
# imp_samp = rmvnorm(M, mean = post_mean[cols], sigma = sigma)
# imp_samp = rmvt(M, df = 10, delta = post_mean[cols], sigma = sigma)
# Determine Importance Weights - f/g
prior = dmvnorm(imp_samp) # Mean 0, sigma I
imp_dens = dmvnorm(imp_samp, post_mean[cols], sigma = post_cov)
# imp_dens = dmvt(imp_samp, delta = post_mean[cols], sigma = sigma, df = 10, log = F)
wt = prior / imp_dens
# Calculate density given importance samples
## Fixed-effects component of eta (linear predictor)
eta_fef_k = matrix(eta_fef[ids], nrow = 1) # 1 X n_k
## Random-effects component of eta (linear predictor)
## switched Gam to be transposed
eta_ref = imp_samp %*% t(Gam) %*% t(Z_k) # M x n_k
## Full eta - fixed + random effects component
eta = eta_fef_k[rep(1, M),] + eta_ref # M x n_k
# Calculate indicator function
if(length(cols) > 1){
indic = indicator_1(bounds[,cols], imp_samp)
}else{
indic = indicator_2(bounds[,cols], imp_samp)
}
if(trace >= 1){
cat("proportion of importance samples inside bounds:", mean(indic), "\n")
}
dens_k = rep(1, times = M)
# Estimate fitted values (see "utility_glm.cpp" for invlink() function details)
# Columns of mu correspond to individuals
mu = matrix(0, nrow = nrow(eta), ncol = ncol(eta))
for(i in 1:ncol(eta)){
mu[,i] = invlink(link_int, eta[,i])
}
if(family == "binomial"){
for(i in 1:n_k){
dens_i = dbinom(y_k[i], size = 1, prob = mu[,i], log = FALSE)
# Multiply together all elements belonging to same alpha_m
dens_k = dens_k * dens_i
} # End i loop
}else if(family == "poisson"){
for(i in 1:n_k){
dens_i = dpois(y_k[i], lambda = mu[,i], log = FALSE)
# Multiply together all elements belonging to same alpha_m
dens_k = dens_k * dens_i
} # End i loop
}else if(family == "gaussian"){
if(is.null(gaus_sig)){
stop("gaus_sig must be specified for gaussian family")
}
for(i in 1:n_k){
dens_i = dnorm(y_k[i], mean = mu[,i], sd = gaus_sig, log = FALSE)
# Multiply together all elements belonging to same alpha_m
dens_k = dens_k * dens_i
} # End i loop
}else{
stop("specified family not currently available")
}
# else if(family == "negbin"){
# for(i in 1:n_k){
# dens_i = dnbinom(y_k[i], size = 1/phi, mu = mu[,i], log = FALSE)
# # Multiply together all elements belonging to same alpha_m
# dens_k = dens_k * dens_i
# } # End i loop
lik_k = mean(dens_k*indic*wt)
ll_k = log(lik_k)
ll = ll + ll_k
} # End k loop
return(ll)
} # End CAME_IS function
# Pajor regular CAME Method for logLik calculations
# Sample from prior, no importance sampling
# CAME = function(posterior, y, X, Z, group, coef, sigma, family, M){
#
# # Define variables
# d = nlevels(group)
# num_var = ncol(Z) / d
# Gam = t(chol(sigma[which(diag(sigma)!=0),which(diag(sigma)!=0)]))
# eta_fef = X %*% coef[1:ncol(X)]
# # post_mean = colMeans(posterior)
# # post_cov = sigma
#
# # Calculate bounds of the posterior for each dimension
# bounds = apply(posterior[,], 2, range)
#
# ll = 0
#
# for(k in 1:d){
# # Define group-specific individuals
# ids = (group == k)
# y_k = y[ids]
# n_k = sum(ids)
#
# # Identify columns corresponding to group k
# cols_all = seq(from = k, by = d, length.out = num_var)
# # Restrict above colums to those belonging to variables with non-zero random effect variance
# cols = cols_all[which(diag(sigma) != 0)]
# Z_k = Z[ids,cols]
#
# # Sample M samples from the prior function
# zero = rep(0, num_var)
# I = diag(x = 1, nrow = num_var)
# prior = rmvnorm(M, zero, I) # Mean 0, sigma I
#
# # Calculate density given importance samples
#
# ## Fixed-effects component of eta (linear predictor)
# eta_fef_k = matrix(eta_fef[ids], nrow = 1)
# ## Random-effects component of eta (linear predictor)
# eta_ref = prior %*% t(Gam) %*% t(Z_k)
# ## Full eta - fixed + random effects component
# eta = eta_fef_k[rep(1, M),] + eta_ref
#
# # Calculate indicator function
# indic = indicator(bounds[,cols], prior)
# cat("proportion of samples inside bounds:", mean(indic), "\n")
#
# dens_k = rep(1, times = M)
#
# if(family == "binomial"){
# prob_mat = exp(eta) / (1+exp(eta))
# # Columns of prob_mat correspond to individuals
#
# for(i in 1:n_k){
# dens_i = dbinom(y_k[i], size = 1, prob = prob_mat[,i], log = F)
# # Multiply together all elements belonging to same alpha_m
# dens_k = dens_k * dens_i
# } # End i loop
# }else{
# stop("specified family not currently available")
# }
#
# lik_k = mean(dens_k*indic)
# ll_k = log(lik_k)
# ll = ll + ll_k
#
# } # End k loop
#
# return(ll)
# } # End CAME function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.