Nothing
# Pajor Methods for logLik calcuations
indicator = function(bounds, samples){
inbounds = function(x){mean(x > bounds[1,] & x < bounds[2,]) == 1}
apply(samples, 1, inbounds)
}
indicator_1 = function(bounds, samples){
inbounds = function(x){mean(x > bounds[1,] & x < bounds[2,]) == 1}
apply(samples, 1, inbounds)
}
indicator_2 = function(bounds, samples){
inbounds = function(x){mean(x > bounds[1] & x < bounds[2]) == 1}
apply(samples, 1, inbounds)
}
# Pajor IS CAME Method for logLik calcuations
# article{pajor2017estimating,
# title={Estimating the marginal likelihood using the arithmetic mean identity},
# author={Pajor, Anna and others},
# journal={Bayesian Analysis},
# volume={12},
# number={1},
# pages={261--287},
# year={2017},
# publisher={International Society for Bayesian Analysis}
# }
#' @importFrom mvtnorm rmvnorm dmvnorm
#' @importFrom stats acf var dbinom dpois dnorm
CAME_IS = function(posterior, y, X, Z, group, coef, sigma, family, M,
gaus_sig = NULL, trace, progress){
if(progress == TRUE) message("Pajor Log-Likelihood Calculation")
family_info = family_export(family)
fam_fun = family_info$family_fun
link = family_info$link
link_int = family_info$link_int # Recoded link as integer, see "family_export.R" for details
family = family_info$family
# Define variables
d = nlevels(group)
num_var = ncol(Z) / d
Gam = t(chol(sigma[which(diag(sigma)!=0),which(diag(sigma)!=0)]))
eta_fef = X %*% coef[1:ncol(X)]
post_mean = colMeans(posterior[,])
# Calculate bounds of the posterior for each dimension
bounds = apply(posterior[,], 2, range)
# Thin posterior draws based on acf results
grp_names = as.numeric(levels(group))
d = nlevels(group)
var_names = 1:ncol(X) - 1 # 0 = intercept, 1 to p for p covariates
var_num = length(var_names)
grp_index = rep(grp_names, times = var_num)
var_index = rep(var_names, each = d)
for(j in 1:ncol(posterior)){
ACF = acf(posterior[,j], plot=F, lag.max = 30)
ACF_df = with(ACF, data.frame(lag,acf))
ACF_df$grp_names = grp_index[j]
ACF_df$var_names = var_index[j]
if(j == 1){
ACF_all = ACF_df
}else{
ACF_all = rbind(ACF_all, ACF_df)
}
}
# Want lag where acf < 0.1
df_lag = subset(ACF_all, acf > 0.01)
max_lag = max(max(df_lag$lag) + 1, 10) # At minimum, thin by 10
thin = seq(from = 1, to = nrow(posterior), by = max_lag)
post_thin = posterior[thin,]
# Initiate logLik
ll = 0
for(k in 1:d){
# Define group-specific individuals
ids = (group == k)
y_k = y[ids]
n_k = sum(ids)
# Identify columns for variables corresponding to group k
cols_all = seq(from = k, by = d, length.out = num_var)
# Restrict above colums to those belonging to variables with non-zero random effect variance
cols = cols_all[which(diag(sigma) != 0)]
Z_k = Z[ids,cols,drop=F]
# if((length(cols) == 1) | (length(ids) == 1)){
# Z_k = matrix(Z_k, nrow = length(ids), ncol = length(cols))
# }
# Sample M samples from the importance function
## Importance function: multivariate normal (multivariate t?)
if(length(cols) > 1){
post_cov = var(post_thin[,cols])
}else{
post_cov = matrix(var(post_thin[,cols]), nrow = 1, ncol = 1)
}
imp_samp = rmvnorm(M, mean = post_mean[cols], sigma = post_cov)
# imp_samp = rmvnorm(M, mean = post_mean[cols], sigma = sigma)
# imp_samp = rmvt(M, df = 10, delta = post_mean[cols], sigma = sigma)
# Determine Importance Weights - f/g
prior = dmvnorm(imp_samp) # Mean 0, sigma I
imp_dens = dmvnorm(imp_samp, post_mean[cols], sigma = post_cov)
# imp_dens = dmvt(imp_samp, delta = post_mean[cols], sigma = sigma, df = 10, log = F)
wt = prior / imp_dens
# Calculate density given importance samples
## Fixed-effects component of eta (linear predictor)
eta_fef_k = matrix(eta_fef[ids], nrow = 1) # 1 X n_k
## Random-effects component of eta (linear predictor)
## switched Gam to be transposed
eta_ref = imp_samp %*% t(Gam) %*% t(Z_k) # M x n_k
## Full eta - fixed + random effects component
eta = eta_fef_k[rep(1, M),] + eta_ref # M x n_k
# Calculate indicator function
if(length(cols) > 1){
indic = indicator_1(bounds[,cols], imp_samp)
}else{
indic = indicator_2(bounds[,cols], imp_samp)
}
if(trace >= 1){
cat("proportion of importance samples inside bounds:", mean(indic), "\n")
}
dens_k = rep(1, times = M)
# Estimate fitted values (see "utility_glm.cpp" for invlink() function details)
# Columns of mu correspond to individuals
mu = matrix(0, nrow = nrow(eta), ncol = ncol(eta))
for(i in 1:ncol(eta)){
mu[,i] = invlink(link_int, eta[,i])
}
if(family == "binomial"){
for(i in 1:n_k){
dens_i = dbinom(y_k[i], size = 1, prob = mu[,i], log = FALSE)
# Multiply together all elements belonging to same alpha_m
dens_k = dens_k * dens_i
} # End i loop
}else if(family == "poisson"){
for(i in 1:n_k){
dens_i = dpois(y_k[i], lambda = mu[,i], log = FALSE)
# Multiply together all elements belonging to same alpha_m
dens_k = dens_k * dens_i
} # End i loop
}else if(family == "gaussian"){
if(is.null(gaus_sig)){
stop("gaus_sig must be specified for gaussian family")
}
for(i in 1:n_k){
dens_i = dnorm(y_k[i], mean = mu[,i], sd = gaus_sig, log = FALSE)
# Multiply together all elements belonging to same alpha_m
dens_k = dens_k * dens_i
} # End i loop
}else{
stop("specified family not currently available")
}
# else if(family == "negbin"){
# for(i in 1:n_k){
# dens_i = dnbinom(y_k[i], size = 1/phi, mu = mu[,i], log = FALSE)
# # Multiply together all elements belonging to same alpha_m
# dens_k = dens_k * dens_i
# } # End i loop
lik_k = mean(dens_k*indic*wt)
ll_k = log(lik_k)
ll = ll + ll_k
} # End k loop
return(ll)
} # End CAME_IS function
# Pajor regular CAME Method for logLik calculations
# Sample from prior, no importance sampling
# CAME = function(posterior, y, X, Z, group, coef, sigma, family, M){
#
# # Define variables
# d = nlevels(group)
# num_var = ncol(Z) / d
# Gam = t(chol(sigma[which(diag(sigma)!=0),which(diag(sigma)!=0)]))
# eta_fef = X %*% coef[1:ncol(X)]
# # post_mean = colMeans(posterior)
# # post_cov = sigma
#
# # Calculate bounds of the posterior for each dimension
# bounds = apply(posterior[,], 2, range)
#
# ll = 0
#
# for(k in 1:d){
# # Define group-specific individuals
# ids = (group == k)
# y_k = y[ids]
# n_k = sum(ids)
#
# # Identify columns corresponding to group k
# cols_all = seq(from = k, by = d, length.out = num_var)
# # Restrict above colums to those belonging to variables with non-zero random effect variance
# cols = cols_all[which(diag(sigma) != 0)]
# Z_k = Z[ids,cols]
#
# # Sample M samples from the prior function
# zero = rep(0, num_var)
# I = diag(x = 1, nrow = num_var)
# prior = rmvnorm(M, zero, I) # Mean 0, sigma I
#
# # Calculate density given importance samples
#
# ## Fixed-effects component of eta (linear predictor)
# eta_fef_k = matrix(eta_fef[ids], nrow = 1)
# ## Random-effects component of eta (linear predictor)
# eta_ref = prior %*% t(Gam) %*% t(Z_k)
# ## Full eta - fixed + random effects component
# eta = eta_fef_k[rep(1, M),] + eta_ref
#
# # Calculate indicator function
# indic = indicator(bounds[,cols], prior)
# cat("proportion of samples inside bounds:", mean(indic), "\n")
#
# dens_k = rep(1, times = M)
#
# if(family == "binomial"){
# prob_mat = exp(eta) / (1+exp(eta))
# # Columns of prob_mat correspond to individuals
#
# for(i in 1:n_k){
# dens_i = dbinom(y_k[i], size = 1, prob = prob_mat[,i], log = F)
# # Multiply together all elements belonging to same alpha_m
# dens_k = dens_k * dens_i
# } # End i loop
# }else{
# stop("specified family not currently available")
# }
#
# lik_k = mean(dens_k*indic)
# ll_k = log(lik_k)
# ll = ll + ll_k
#
# } # End k loop
#
# return(ll)
# } # End CAME function
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.