R/Blolog_var.R

Defines functions Blolog_var

#This function does variational bayesian inference for the LOLOG model:

Blolog_var <- function(formula_rhs,
                       net,
                       prior,
                       n_perms,
                       start = NULL,
                       samples,
                       expect_sims, #number of simulations used in the variational Bayes proceedure
                       verbose = TRUE,
                       cores = NULL, #if is non null specifies a cluster for parallelisation
                       ...
){
  t <- proc.time()
  #define network size
  n <- network.size(net)
  e_obs <- length(net$mel)
  if(get.network.attribute(net,"directed")){e<-n*(n-1)
  }else{e <- (n)*(n-1)/2}
  
  #calculate the network statistics
  net_stats <- lolog::calculateStatistics(as.formula(paste("net ~ ",formula_rhs)))
  
  posterior_samples <- list()
  length(posterior_samples) <- samples*n_perms
  s <- sample(1:e,e,replace = FALSE)
  
  var_Bayes_func <- function(start){
    
    if(verbose){print("Calculating start point")}
    if(is.null(start)){start <- lologVariational(as.formula(paste("net ~",formula_rhs,sep = "")))}
    if(is.function(start)){
      start <- start()
    }
    if(verbose){print("Completed calculating start point")}
    
    if(diff_perms){s <- sample(1:e,e,replace = FALSE)}
    
    #Calculate the change stats to speed things up:
    if(verbose){print("Calculating LOLOG change stats")}
    t_1 <- proc.time()
    change_on <- lolog_change_stats(net,s,formula_rhs)
    change_off <- list()
    change_off[1:length(change_on)] = rep(0,length(change_on[[1]]))
    if(verbose){print("Completed calculating LOLOG change stats")
      print(paste("Change Stat Calc took ",(proc.time() - t_1)[3],"seconds"))
    }
    
    #calculate the edges present to speed up
    tmp <- combn(seq(1,n),2)
    if(get.network.attribute(net,"directed")){
      tmp <- cbind(tmp,tmp[c(2,1),])
      edges <- lapply(1:dim(tmp)[2],function(i){tmp[,i]})
    
    }else{
      edges <- lapply(1:dim(tmp)[2],function(i){tmp[,i]})
    }
    net_tmp <- as.BinaryNet(net)
    edges <- edges[s]
    t_1 <- proc.time()
    edge_present <- sapply(edges,function(x){net_tmp$getDyads(x[1],x[2])})
    if(verbose){print(paste("Calculating edges_present took ",(proc.time() - t_1)[3],"seconds"))}
    
    #Do the optimaisation proceedure:
    #par function are the natural parameters for a multivariate normal
    # length(par) = d + (d(d+1)/2)
    d = length(net_stats)
    expect_func <- function(par,sims = 100){
      dup = matrixcalc::duplication.matrix(n=d)
      sigma = matrix(dup%*%par[(d+1):length(par)],byrow = F,ncol = d)
      mu = par[1:d]
      
      #define the argument then simulate and take the sample mean
      arg = function(theta){
        tmp <- sum(theta*net_stats) - 
        sum(log( 1 + exp(sapply(change_on,function(x){sum(theta*x)})))) + 
        log(prior(theta)) -
        log(mvtnorm::dmvnorm(theta,mean = mu,sigma = sigma))
        return(tmp)
      }
      
      #simulate from multivariate normal and take mean
      tmp = sapply(1:sims,function(x){
        result = arg(mvtnorm::rmvnorm(1,mean = mu,sigma=sigma))
        return(result)})
      tmp <- tmp[tmp != Inf & tmp != -Inf]
      tmp <- mean(tmp)
      
      return(tmp)
    }
    
    #maximise the expectation function using optim:
    dup = matrixcalc::duplication.matrix(n=d)
    mu_init = start$theta
    mu_init = rep(0,length(start$theta))
    sigma_init = start$vcov
    sigma_init = sigma_init[lower.tri(sigma_init,diag = T)]
    opt <- optim(par = c(mu_init,sigma_init),
                 fn = expect_func,
                 method = "Nelder-Mead",
                 control = list(fnscale = -1,trace = 3),
                 sims = 1000)
    
    return(list(mu = opt$par[1:d],
                sigma = matrix(dup%*%opt$par[(d+1):length(opt$par)],nrow = d)))
  }
  
  
  if(!is.null(cores)){
    cluster <- makeCluster(cores)
    clusterEvalQ(cl=cluster,library("lolog"))
    clusterEvalQ(cl=cluster,library("statnet"))
    clusterEvalQ(cl=cluster,library("MASS"))
    clusterExport(cl = cluster, varlist = c("acceptance_prob","lolog_change_stats"),envir = globalenv())
    clusterExport(cl = cluster, varlist = c("proposal_step","start","net","formula_rhs","n","e","prior","prior_grad","verbose","sample_func","s","randomwalk_sigma","cov_start","resamples"),envir = environment()) 
    doParallel::registerDoParallel(cluster)
    prob_medians <- c()
    posterior_fits <- foreach(j=1:n_perms)%dopar%{
      var_Bayes_func(start)}
    stopCluster(cl=cluster)
  }else{posterior_fits <- foreach(j=1:n_perms)%do%{var_Bayes_func(start)}}
  
  mu = do.call(rbind,lapply(posterior_fits,function(x){x$mu}))
  mu = apply(mu,2,mean)
  sigma = lapply(posterior_fits,function(x){x$sigma})
  sigma = Reduce('+',sigma)/(length(posterior_samples)**2)
  
  
  t <- proc.time() - t
  
  return(list(sample = sample, full_results = posterior_samples,time = t[3]))
}
duncan-clark/Blolog documentation built on June 22, 2022, 7:57 a.m.