mcmc_2sampleDAG0: Implementation of the parallel-tempered MCMC for one-sample...

View source: R/2sampleDAG0.R

mcmc_2sampleDAG0R Documentation

Implementation of the parallel-tempered MCMC for one-sample DAG0

Description

Implementation of the parallel-tempered MCMC for one-sample DAG0

Usage

mcmc_2sampleDAG0(
  data0,
  data1,
  starting,
  tuning,
  priors,
  n_sample = 5000,
  n_burnin = 2500,
  n_chain = 10,
  prob_swap = 0.1,
  temperature = NULL,
  do_par = FALSE,
  n_core = n_chain,
  verbose = TRUE,
  n_report = 100
)

Arguments

data0

a matrix containing data for the control group.

data1

a matrix containing data for the case/treatment group.

starting

a list with each tag corresponding to a parameter name. Valid tags are 'E0', 'D', 'E1', 'U', 'zeta', 'eta' 'alpha0', 'alpha1', 'beta0', 'beta1', 'delta0', 'delta1', 'gamma0', 'gamma1', 'psi0', 'psi1', 'tau', and 'rho'. The value portion of each tag is the parameters' starting values for MCMC.

tuning

a list with each tag corresponding to a parameter name. Valid tags are 'E', 'E_rev', 'U', 'zeta', 'eta', 'alpha0', 'alpha1', 'beta0', 'beta1', delta0', 'delta1', 'gamma0', 'gamma1', 'psi0', and 'psi1'. The value portion of each tag defines the standard deviations of Normal proposal distributions for Metropolis sampler for each parameter. The tag 'E' is for the birth or death update schemes for E = {E_0, E_1}, while 'E_rev' is for the reversal schemes. The value portion of both tags should consist of the standard deviations of Gaussian proposals for alpha, beta, delta, gamma, zeta, and eta for an update of E. Also, the value portion of the tag 'U' should include the standard deviations of Gaussian proposals for zeta and eta for an update of U.

priors

a list with each tag corresponding to a parameter name. Valid tags are 'psi', 'tau', and 'rho'. The value portion of each tag defines the hyperparameters for two-sample DAG0.

n_sample

the number of MCMC iterations.

n_burnin

the number of burn-in samples.

n_chain

the number of Markov chains for the parallel tempering.

prob_swap

the probability of a swapping step.

temperature

a sequence of increasing temperatures. Should have length of n_chain. Default value is T_l = (1.5)^{(l - 1) / (n_chain - 1)} for l = 1, ..., n_chain.

do_par

if TRUE, the parallel computation is used to update n_chain Markov chains in parallel. The parallel computation is only available on Unix-alike platforms as we create the worker process by forking.

n_core

the number of cores to be used for the parallel computation when do_par = TRUE.

verbose

if TRUE, progress of the sampler is printed to the screen. Otherwise, nothing is printed to the screen.

n_report

the interval to report MCMC progress.

Value

An object of class 1sampleDAG0, which is a list with the tags 'samples' and 'acceptance'. The value portion of the tag 'samples' gives MCMC samples from the posterior distribution of the parameters for two-sample DAG0. The value portion of the tag 'acceptance' shows the Metropolis sampling acceptance percents for each parameter.

Examples

library(BayesDAG0)
library(igraph)
library(pscl)

# set random seed
set.seed(1)

# generate E_0, D, and E1
p    = 10      # the number of variables
n_e0 = 10      # the number of edges in the graph for the control group
n_d  = 4       # the number of differences between two edge sets of both group
E0_true = E1_true = D_true = matrix(0, p, p)
while (sum(E0_true) < n_e0)
{
   id_A0 = matrix(sample(1 : p, 2), ncol = 2)
   E0_true[id_A0] = 1
   G0_true = graph_from_adjacency_matrix(t(E0_true))
   if (!(is_dag(G0_true)))
      E0_true[id_A0] = 0
}

E0_eq_1 = which(E0_true == 1, arr.ind = TRUE)
E0_eq_0 = which(E0_true != 1, arr.ind = TRUE)
while (sum(D_true) < n_d)
{
   # consider addition of an edge to E_0 or deletion of an edge from E_0 with equal probabilities
   if (runif(1) > 0.5)           
      id_D = matrix(E0_eq_1[sample(nrow(E0_eq_1), 1), ], ncol = 2)
   else
      id_D = matrix(E0_eq_0[sample(nrow(E0_eq_0), 1), ], ncol = 2)
   D_true[id_D] = 1
   E1_true = E0_true * (1 - D_true) + (1 - E0_true) * D_true 
   G1_true = graph_from_adjacency_matrix(t(E1_true))
   if (!(is_dag(G1_true)))
      D_true[id_D] = 0
}

E1_true = E0_true * (1 - D_true) + (1 - E0_true) * D_true 

# generate U, zeta, and eta
n_u = 2     # the number of shared edges having different strengths across groups
U_true = matrix(NA, p, p)
U_true[E0_true == 1 & E1_true == 1] = 0
shared = which(E0_true == 1 & E1_true == 1, arr.ind = TRUE)
while (sum(U_true, na.rm = TRUE) < n_u)
{
   id_U = matrix(shared[sample(nrow(shared), 1), ], ncol = 2)
   U_true[id_U] = 1
}

zeta_true = eta_true = matrix(NA, p, p)
zeta_true[E0_true == 1 & E1_true == 1] = 0
eta_true[E0_true == 1 & E1_true == 1]  = 0
zeta_true[U_true == 1 & !is.na(U_true)] = runif(sum(U_true, na.rm = TRUE), 0.7, 1)
eta_true[U_true == 1 & !is.na(U_true)]  = runif(sum(U_true, na.rm = TRUE), -1, -0.7)

# generate model parameters for two-sample DAG0
alpha0_true = alpha1_true = matrix(0, p, p)
beta0_true  = beta1_true  = matrix(0, p, p)
alpha0_true[E0_true == 1] = runif(sum(E0_true), 0.3, 1)
beta0_true[E0_true == 1]  = runif(sum(E0_true), -1, -0.3)
alpha1_true[E0_true == 0 & E1_true == 1] = runif(sum((1 - E0_true) * E1_true), 0.3, 1)
beta1_true[E0_true == 0 & E1_true == 1]  = runif(sum((1 - E0_true) * E1_true), -1, -0.3)
alpha1_true[E0_true == 1 & E1_true == 1] = alpha0_true[E0_true == 1 & E1_true == 1] + zeta_true[E0_true == 1 & E1_true == 1]
beta1_true[E0_true == 1 & E1_true == 1]  = beta0_true[E0_true == 1 & E1_true == 1] + eta_true[E0_true == 1 & E1_true == 1]
delta0_true = runif(p, -2, -1)
delta1_true = runif(p, -2, -1)
gamma0_true = runif(p, 1, 2)
gamma1_true = runif(p, 1, 2)
psi0_true   = 1 / runif(p, 1, 5)
psi1_true   = 1 / runif(p, 1, 5)

# generate data from two-sample DAG0 (dat0: control group, dat1: case/treatment group)
dat0 = generate_data_DAG0(n = 100, E0_true, alpha0_true, beta0_true, delta0_true, gamma0_true, psi0_true)
dat1 = generate_data_DAG0(n = 100, E1_true, alpha1_true, beta1_true, delta1_true, gamma1_true, psi1_true)

### apply one-sample DAG0 to each group to get starting values for MCMC for two-sample DAG0
### this technique saves a lot of iterations for convergence of our MCMC sampler for two-sample DAG0
# starting values for the control group
starting0 = list()
starting0$E     = matrix(0, ncol(dat0), ncol(dat0))
starting0$alpha = matrix(0, ncol(dat0), ncol(dat0))
starting0$beta  = matrix(0, ncol(dat0), ncol(dat0))
starting0$delta = rep(0, ncol(dat0))
starting0$gamma = rep(0, ncol(dat0))
starting0$psi   = rep(0, ncol(dat0))
for (j in 1 : ncol(dat0))
{
   mle0 = zeroinfl(dat0[ , j] ~ 1 | 1, dist = "negbin")
   starting0$delta[j] = mle0$coefficients$zero
   starting0$gamma[j] = mle0$coefficients$count
   starting0$psi[j]   = 1 / mle0$theta
}

starting0$tau = c(1, 1, 1, 1)
starting0$rho = 0.5

# starting values for the case/treatment group
starting1 = list()
starting1$E     = matrix(0, ncol(dat1), ncol(dat1))
starting1$alpha = matrix(0, ncol(dat1), ncol(dat1))
starting1$beta  = matrix(0, ncol(dat1), ncol(dat1))
starting1$delta = rep(0, ncol(dat1))
starting1$gamma = rep(0, ncol(dat1))
starting1$psi   = rep(0, ncol(dat1))
for (j in 1 : ncol(dat1))
{
   mle1 = zeroinfl(dat1[ , j] ~ 1 | 1, dist = "negbin")
   starting1$delta[j] = mle1$coefficients$zero
   starting1$gamma[j] = mle1$coefficients$count
   starting1$psi[j]   = 1 / mle1$theta
}

starting1$tau = c(1, 1, 1, 1)
starting1$rho = 0.5

# sd's of the Metropolis sampler Normal proposal distribution for MCMC for one-sample DAG0
tuning = list()
tuning$E     = c(0.05, 0.05, 1.2, 0.3)
tuning$E_rev = c(0.5, 0.5, 1.2, 0.3) 
tuning$alpha = 1.2  
tuning$beta  = 0.4  
tuning$delta = 1.2  
tuning$gamma = 0.3 
tuning$psi   = 1.0  

# hyperparameters for one-sample DAG0
priors = list()
priors$psi = c(1, 1)
priors$tau = c(1, 1)
priors$rho = c(1, 1)

# apply one-sample DAG0 approach to each group
# the parallel computation (do_par = TRUE) is only available on Unix-alike platforms
# Windows users should use do_par = FALSE not to allow the parallel computation
out0 = mcmc_1sampleDAG0(dat0, starting0, tuning, priors, n_sample = 500, n_burnin = 499, do_par = TRUE)
out1 = mcmc_1sampleDAG0(dat1, starting1, tuning, priors, n_sample = 500, n_burnin = 499, do_par = TRUE)

### conduct posterior inference for two-sample DAG0
# set starting values at the last iteration of MCMC for one-sample DAG0
starting = list()
starting$E0     = out0$samples$E
starting$E1     = out1$samples$E
starting$D      = abs(starting$E1 - starting$E0)
starting$alpha0 = out0$samples$alpha
starting$alpha1 = out1$samples$alpha
starting$beta0  = out0$samples$beta
starting$beta1  = out1$samples$beta
starting$delta0 = out0$samples$delta
starting$delta1 = out1$samples$delta
starting$gamma0 = out0$samples$gamma
starting$gamma1 = out1$samples$gamma
starting$psi0   = out0$samples$psi
starting$psi1   = out1$samples$psi
starting$U      = matrix(NA, ncol(dat0), ncol(dat0))
starting$U[starting$E0 == 1 & starting$E1 == 1] = 1
starting$zeta   = (starting$alpha1 - starting$alpha0) * starting$U
starting$eta    = (starting$beta1 - starting$beta0) * starting$U
starting$tau    = c(1, 1, 1, 1, 1, 1)
starting$rho    = c(0.5, 0.5, 0.5)

# sd's of the Metropolis sampler Normal proposal distribution for MCMC for two-sample DAG0
tuning = list()
tuning$E      = c(0.05, 0.05, 1.2, 0.3, 0.05, 0.05)
tuning$E_rev  = c(0.5, 0.5, 1.2, 0.3, 0.5, 0.5)
tuning$U      = c(0.05, 0.05)
tuning$zeta   = 1.6  
tuning$eta    = 0.8  
tuning$alpha0 = 1.2  
tuning$alpha1 = 1.2  
tuning$beta0  = 0.4 
tuning$beta1  = 0.4 
tuning$delta0 = 1.2  
tuning$delta1 = 1.2  
tuning$gamma0 = 0.3 
tuning$gamma1 = 0.3 
tuning$psi0   = 1.0  
tuning$psi1   = 1.0  

# hyperparameters for two-sample DAG0
priors = list()
priors$psi = c(1, 1)
priors$tau = c(1, 1)
priors$rho = c(1, 1)

# run the parallel-tempered MCMC for two-sample DAG0
# the parallel computation (do_par = TRUE) is only available on Unix-alike platforms
# Windows users should use do_par = FALSE not to allow the parallel computation
out = mcmc_2sampleDAG0(dat0, dat1, starting, tuning, priors, do_par = TRUE)

# report Metropolis sampling acceptance rates
out$acceptance

# estimate graph structure for each group based on median probability model
E0_est  = (apply(out$samples$E0, c(1, 2), mean) > 0.5) + 0
E1_est  = (apply(out$samples$E1, c(1, 2), mean) > 0.5) + 0

# estimate differential edge strengths based on median probability model
U_est  = (apply(out$samples$U, c(1, 2), function(z) mean(z, na.rm = TRUE)) > 0.5) + 0

# report contingency table for edge selection for each group
table(E0_est, E0_true)
table(E1_est, E1_true)

# report contingency table for estimation of differential edge strengths 
table(U_est, U_true)

junsoukchoi/BayesDAG0 documentation built on Feb. 26, 2025, 2:37 p.m.