#This script carries out HMC for Bayesian LOLOG:
#' @useDynLib Blolog
#' @importFrom Rcpp sourceCpp
#' @export
Blolog <- function(formula_rhs,
net,
n_perms,
perm_samples = NULL,
proposal_step = function(theta,net,...){list(proposal = MASS::mvrnorm(1,theta,Sigma = diag(1,length(theta))),prob_factor=1)},
verbose = TRUE,
prior = function(theta,net,change_on){(1/(10**length(theta)))*(sum(theta>10)==0)*(sum(theta < -10)==0)},
prior_grad = NULL,
samples = 10, #Number of samples from each chain
start = NULL, #specify the start if want the same start for all chains
cores = NULL, #if is non null specifies a cluster for parallelisation
diff_perms = TRUE, #if false will use the same perm for each sampling - allows assesment of convergence
L_steps = 10, #number of HMC steps
epsilon = 0.1, #HMC leapfrog integrator step size
momentum_sigma = NULL, #momentum matrix if method is HMC
momentum_sims = NULL, #Number of proposals to use to calculate momentum matrix for HMC to start
HMC_grad_samples = NULL, #Number samples to take in the HMC grad calculation - to speed up, if less than 1, is proportion of edges, else is the actual number
randomwalk_sigma = NULL, #covariance matrix if method is random walk
cov_start = NULL, #covariance matrix that can be used in start function
proposal_sims = 10, #number of simulations used if method is RMHMC
resamples = NULL, #number of resamples used in method is RMHMC_boot
method = "manual", # one of "manual" or "stan", if stan uses built in Bayesian GLM with default prior as now,
export_list = c("acceptance_prob"), #vector of extra variables to be exported to cluster
...
){
t <- proc.time()
#define network size
n <- network.size(net)
e_obs <- length(net$mel)
if(get.network.attribute(net,"directed")){e<-n*(n-1)
}else{e <- (n)*(n-1)/2}
#Make a sample
posterior_samples <- list()
length(posterior_samples) <- samples*n_perms
if(is.null(perm_samples)){
s <- sample(1:e,e,replace = FALSE)
}else{
s <- sample(perm_samples,1)[[1]]
}
#From s get the permutation heads and tails:
edges <- combn(seq(1,n),2)
tails <- edges[1,][s]-1
heads <- edges[2,][s]-1
#Make the lolog formula
formula <- as.formula(paste("net ~",formula_rhs,sep = ""))
#if the momentum function is
if(!is.null(momentum_sims)){
t <- proc.time()
#Get a starting value
if(is.function(start)){
start_tmp <- start()
}else{
start_tmp <- start
}
#Use the gradient to estimate Fisher information:
q_grad <- function(q,change_on,network){
if(is.null(prior_grad)){
prior_deriv <- (numDeriv::grad(prior,q,net=net,change_on = change_on)/prior(q,net=net,change_on = change_on))
}
else{prior_deriv <- prior_grad(q,net=net,change_on = change_on)/prior(q,net=net,change_on = change_on)}
if(sum(is.na(prior_deriv)) != 0){
prior_deriv <- rep(0,length(q))
}
#derivative of change statistics on top
#is just graph statistics
top_deriv <- lolog::calculateStatistics(as.formula(paste("network ~ ",formula_rhs)))
bottom_deriv <- bottom_deriv_helper_cpp(change_on,q,weights = NULL)
#return their sum with the correct signs
return(-prior_deriv - top_deriv + bottom_deriv)
}
#Simulate networks - uses a different permutation each time - i.e. unconditional on edge ordering:
#Just use to get an idea of the scale of the posterior dimensions
model <- lolog::createLatentOrderLikelihood(formula,theta = start)
momentum_sigma <- lapply(1:momentum_sims,function(i){
tmp = model$generateNetworkWithEdgeOrder(heads,tails)
grad = tryCatch({q_grad(start,change_on = tmp$changeStats, network = lolog::as.network(tmp$network))},error = function(e){return(rep(0,length(q)))})
#grad = q_grad(q,change_on = tmp$changeStats, network = lolog::as.network(tmp$network))
return(outer(grad,grad))
})
momentum_sigma <- Reduce("+",momentum_sigma)/length(momentum_sigma)
#check if singular, if not add to singular values
if(abs(det(momentum_sigma)) < 10**(-6)){
for(i in 10**seq(-6,20,1)){
if(abs(det(momentum_sigma))>10**(-6)){
break
}
else{
if(verbose){
print(paste("Adding ", i," to the singular values",sep=""))
}
tmp = svd(momentum_sigma)
values = tmp$d
values[which(values == min(values))] <- values[which(values == min(values))] + i
momentum_sigma <- tmp$u%*%diag(values)%*%t(tmp$v)
#momentum_sigma <- momentum_sigma + diag(min(diag(momentum_sigma))*i,dim(momentum_sigma)[1])
}
}
}
if(verbose){
print(paste("Assigning the initial momentum took ", (proc.time()-t)[3]))
}
}
sample_func <- function(start){
#Get the variational estimates as a starting point for the sampling
if(verbose){print("Calculating start point")}
if(is.null(start)){start <- lologVariational(as.formula(paste("net ~",formula_rhs,sep = "")))$theta}
if(is.function(start)){
start <- start()
}
if(verbose){print("Completed calculating start point")}
if(diff_perms){
if(is.null(perm_samples)){
s <- sample(1:e,e,replace = FALSE)
}else{
s <- sample(perm_samples,1)[[1]]
}
}
#Calculate the change stats to speed things up:
if(verbose){print("Calculating LOLOG change stats")}
t_1 <- proc.time()
change_on <- lolog_change_stats(net,s,formula_rhs)
change_off <- list()
change_off[1:length(change_on)] = rep(0,length(change_on[[1]]))
if(verbose){print("Completed calculating LOLOG change stats")
print(paste("Change Stat Calc took ",(proc.time() - t_1)[3],"seconds"))
}
#calculate the edges present to speed up
tmp <- combn(seq(1,n),2)
if(get.network.attribute(net,"directed")){
tmp <- cbind(tmp,tmp[c(2,1),])
edges <- lapply(1:dim(tmp)[2],function(i){tmp[,i]})
}
else{
edges <- lapply(1:dim(tmp)[2],function(i){tmp[,i]})
}
net_tmp <- as.BinaryNet(net)
edges <- edges[s]
t_1 <- proc.time()
edge_present <- sapply(edges,function(x){net_tmp$getDyads(x[1],x[2])})
if(verbose){print(paste("Calculating edges_present took ",(proc.time() - t_1)[3],"seconds"))}
#carry out MCMC
#make the start
if(is.null(start)){theta_old <- start}
else{theta_old <- start}
#need to store results
sample_results <- list(start)
sample_proposals <- list(start)
sample_probs <- c(1)
samples_accept <- runif((samples+1),0,1)
length(sample_results) <- (samples + 1)
length(sample_proposals) <- (samples + 1)
if(verbose){print("Beginning sampling, printing acceptance probabilties")}
for(i in 2:(samples+1)){
#allow for specified proposal distribution
if(verbose){print("Calculating proposal")}
t_1 <- proc.time()
tmp <- proposal_step(theta_old,
net,
s =s,
formula_rhs=formula_rhs,
prior = prior,
prior_grad = prior_grad,
L_steps = L_steps,
epsilon = epsilon,
momentum_sigma = momentum_sigma,
change_off = change_off,
change_on = change_on,
proposal_sims = proposal_sims,
resamples = resamples,
HMC_grad_samples = HMC_grad_samples)
if(verbose){print(paste("Proposal Calc took ",(proc.time() - t_1)[3],"seconds"))}
theta_prop = tmp$proposal
sample_proposals[[i]] <- theta_prop
if(verbose){
print("The proposed step was")
print(theta_prop)
}
t_1 <- proc.time()
prob <- acceptance_prob(s,
theta_old,
theta_prop,
prior,
net,
formula_rhs,
change_off = change_off,
change_on = change_on,
edge_present = edge_present)
prob <- prob*tmp$prob_factor
if(verbose){print(paste("Acceptance Prob took ",(proc.time() - t_1)[3],"seconds"))}
sample_probs[i] <- prob
if(is.na(prob) | is.nan(prob)){prob <- 0}
if(verbose){
print("The sample number is")
print(i)
print("The acceptance prob is;")
print(prob)
print("prior contribution to acceptance prob is")
print(prior(theta_prop,net,change_on)/prior(theta_old,net,change_on))
}
accept <- (samples_accept[i] <= prob)
if(accept){
sample_results[[i]] <- theta_prop
theta_old <- theta_prop}
else{sample_results[[i]] <- theta_old}
}
return(list(sample_results = sample_results,
sample_probs = sample_probs,
sample_proposals = sample_proposals))
}
sample_func_stan <- function(start){
#Get the variational estimates as a starting point for the sampling
if(verbose){print("Calculating start point")}
if(is.null(start)){start <- lologVariational(as.formula(paste("net ~",formula_rhs,sep = "")))$theta}
if(is.function(start)){
start <- start()
}
if(verbose){print("Completed calculating start point")}
if(diff_perms){
if(is.null(perm_samples)){
s <- sample(1:e,e,replace = FALSE)
}else{
s <- sample(perm_samples,1)[[1]]
}}
#Calculate the change stats to speed things up:
if(verbose){print("Calculating LOLOG change stats")}
t_1 <- proc.time()
change_on <- lolog_change_stats(net,s,formula_rhs)
change_off <- list()
change_off[1:length(change_on)] = rep(0,length(change_on[[1]]))
if(verbose){print("Completed calculating LOLOG change stats")
print(paste("Change Stat Calc took ",(proc.time() - t_1)[3],"seconds"))
}
#calculate the edges present to speed up
tmp <- combn(seq(1,n),2)
if(get.network.attribute(net,"directed")){
tmp <- cbind(tmp,tmp[c(2,1),])
edges <- lapply(1:dim(tmp)[2],function(i){tmp[,i]})
}else{
edges <- lapply(1:dim(tmp)[2],function(i){tmp[,i]})
}
net_tmp <- as.BinaryNet(net)
edges <- edges[s]
t_1 <- proc.time()
edge_present <- sapply(edges,function(x){net_tmp$getDyads(x[1],x[2])})
if(verbose){print(paste("Calculating edges_present took ",(proc.time() - t_1)[3],"seconds"))}
#Carry out the Bayesian GLM fit:
data = cbind(edge_present,do.call(rbind,change_on))
colnames(data) <- c("edge_present",names(start))
data <- as.data.frame(data)
prior_tmp <- function(theta){
return(prior(theta,net,change_on))
}
t <- proc.time()
model <- brms::brm(
as.formula(paste('edge_present ~ ', paste(names(start)[-1],collapse="+"),sep="")),
data = data,
#prior = prior_tmp,
family = bernoulli(link = "logit"),
seed = 123 # Adding a seed makes results reproducible.
)
if(verbose){
print(paste("STAN model took ",(proc.time()-t)[3], " seconds to run"),sep="")
}
#put the samples in the sample_posterior object:
sample_results = posterior_samples(model)
#remove the model to avoid crashing issues:
rm(list=c("model"))
#remove last column and rename
sample_results <- sample_results[,(1:(dim(sample_results)[2]-1))]
names(sample_results) <- names(start)
sample_results <- lapply(1:dim(sample_results)[1],function(x){sample_results[x,]})
#get the correct number
sample_results <- sample_results[sample(1:length(sample_results),samples)]
return(list(sample_results = sample_results,
sample_probs = NULL,
sample_proposals = NULL))
}
if(method == "stan"){
sample_func <- sample_func_stan
}
if(!is.null(cores)){
cluster <- makeCluster(cores)
clusterEvalQ(cl=cluster,library("lolog"))
clusterEvalQ(cl=cluster,library("statnet"))
clusterEvalQ(cl=cluster,library("MASS"))
clusterEvalQ(cl=cluster,library("brms"))
clusterExport(cl = cluster, varlist = c("acceptance_prob","lolog_change_stats"),envir = globalenv())
clusterExport(cl = cluster, varlist = c("proposal_step","perm_samples","start","net","formula_rhs","n","e","prior","prior_grad","verbose","sample_func","s","randomwalk_sigma","cov_start","resamples"),envir = environment())
clusterExport(cl = cluster, export_list)
doParallel::registerDoParallel(cluster)
prob_medians <- c()
posterior_samples <- foreach(j=1:n_perms)%dopar%{
tmp <- sample_func(start)
Sys.sleep(1)
tmp}
stopCluster(cl=cluster)
}
else{posterior_samples <- foreach(j=1:n_perms)%do%{
tmp <- sample_func(start)
Sys.sleep(1)
tmp}}
sample <- do.call(c,lapply(posterior_samples,function(x){x$sample_results}))
t <- proc.time() - t
return(list(sample = sample, full_results = posterior_samples,time = t[3]))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.