tests/regression_tests/hojsgaard_model_tests/triangle_model/triangle-model_bayes-loglin_Notes-example.R

# Triangle model log linear
# Bayes Poisson direct via features model matrix and Stan

library(rstan)
library(shinystan)
library(CRFutil)

# Triangle model
grphf <- ~X.1:X.2+X.1:X.3+X.2:X.3
adj   <- ug(grphf, result="matrix")


# Load samps generated by a true triangle model:
#fpth <- "C:/Users/aliso/codes/CRFutil/tests/regression_tests/hojsgaard_model_tests/triangle_model/"
#fpth <- "/home/npetraco/codes/R/CRFutil/tests/regression_tests/hojsgaard_model_tests/triangle_model/triangle_data/"
#load(paste0(fpth,"triangle_samp.RData"))
#head(samps)
# Use the same "true" theta and seeds as with previous triange examples:
trit     <- make.empty.field(adj.mat = adj, parameterization.typ = "standard")
set.seed(6)
trit$par <- runif(6,-1.5,1.1)
trit$par
# "true" theta should be:
#0.07629757  0.93786913 -0.81268462 -0.51175581  0.59945681  1.04299689
# instantiate potentials
out.pot  <- make.pots(parms = trit$par,  crf = trit,  rescaleQ = F, replaceQ = T)

# Now sample
set.seed(1)
num.samps <- 25
samps     <- sample.exact(trit, num.samps)
mrf.sample.plot(samps)


# Make state count freq table from samples and compute frequencies of all possible state configs.
# State configs not observed will have 0 freq
ftab  <- data.frame(ftable(data.frame(samps)))
X.all <- ftab[,1:ncol(samps)]
freqs <- ftab[,ncol(ftab)]

# Model Matrix with respect to graph
f0 <- function(y){ as.numeric(c((y==1),(y==2)))}
M  <- compute.model.matrix(configs=X.all, edges.mat=trit$edges, node.par = trit$node.par, edge.par = trit$edge.par, ff = f0)


# Fit log(lam) = alp + M theta
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)

model.c <- stanc(file = "/home/npetraco/codes/R/CRFutil/inst/poisson_model.stan", model_name = 'model')
sm      <- stan_model(stanc_ret = model.c, verbose = T)

dat <- list(
  p = ncol(M),
  N = nrow(M),
  y = freqs,
  Mmodl = M
)

bfit <- sampling(sm, data = dat, iter=20000, thin = 4, chains = 4)

# Get the parameter estimtes:
theta <- extract(bfit, "theta")[[1]]
alpha <- extract(bfit, "alpha")[[1]]

# Extract the graph's partition function:
N    <- nrow(samps)
logZ <- log(N) - alpha
hist(logZ)

# Now using the (medians) posterior theta, estimate partition
# function with Belief propegation
llm2     <- make.empty.field(adj.mat = adj, parameterization.typ = "standard")
llm2$par <- apply(theta, MARGIN = 2, FUN = median)
out.pot  <- make.pots(parms = llm2$par,  crf = llm2,  rescaleQ = F, replaceQ = T)

pot.info.llm2.model     <- make.gRbase.potentials(llm2, node.names = c("X.1", "X.2", "X.3"), state.nmes = c("1","2"))
gR.dist.info.llm2.model <- distribution.from.potentials(pot.info.llm2.model$node.potentials, pot.info.llm2.model$edge.potentials)
logZ.llm2.model         <- gR.dist.info.llm2.model$logZ

logZ.llm2.model # log partition function from BP with a theta (from regression)
mean(logZ)      # log partition function direct from regression
median(logZ)

joint.dist.info.llm2.model <- as.data.frame(as.table(gR.dist.info.llm2.model$state.probs))
joint.dist.info.llm2.model <- joint.dist.info.llm2.model[,c(2,3,1,4)]
joint.dist.info.llm2.model[,4] <- joint.dist.info.llm2.model[,4] * 100
joint.dist.info.llm2.model
#    X1 X2 X3 bayes.lrm.cp mle.lrm.cp bayes.llm.cp* bayes.llm2 bayes.llm2.lams true.model.cp
# 1  1  1  1         10.3       10.5          16.6        19.9            19.9        18.4
# 2  1  1  2          1.1        1.5          10.7         8.3             8.3         8.0
# 3  2  1  1         22.0       21.5          14.3        14.8            14.8        15.6
# 4  2  1  2         23.1       22.5          17.1        21.0            21.0        22.6
# 5  1  2  1          9.1        9.5           7.9         4.5             4.5         4.2
# 6  1  2  2         10.3       10.5          14.1        14.3            14.3        14.9
# 7  2  2  1          2.0        2.5           4.5         1.5             1.5         1.3
# 8  2  2  2         22.0       21.5          14.8        15.8            15.8        15.0

# X.1 X.2 X.3   Poisson (log linear)
# 1   1   1   1 10.0327618
# 2   1   1   2  0.9185033
# 3   2   1   1 22.5293519
# 4   2   1   2 23.7361927
# 5   1   2   1  8.8969562
# 6   1   2   2 10.0371836
# 7   2   2   1  1.7055839
# 8   2   2   2 22.1434666


# Check maximum likelihood result too:
fit <- make.empty.field(adj.mat = adj, parameterization.typ = "standard")

# First compute the sufficient stats needed by the likelihood and its’ grad
fit$par.stat <- mrf.stat(fit, samps)

# Auxiliary, gradient convenience function. Follows train.mrf in CRF:
gradient <- function(par, crf, ...) { crf$gradient }

infr.meth <- infer.exact      # inference method needed for Z and marginals calcs
opt.info  <- stats::optim(    # optimize parameters
  par          = fit$par,       # theta
  fn           = negloglik,     # objective function
  gr           = gradient,      # grad of obj func
  crf          = fit,           # passed to fn/gr
  samples      = samps,         # passed to fn/gr
  infer.method = infr.meth,     # passed to fn/gr
  update.crfQ  = TRUE,          # passed to fn/gr
  method       = "L-BFGS-B",
  control      = list(trace = 1, REPORT=1))
opt.info$convergence
opt.info$message
fit$gradient
fit$nll

fit$par

out.pot  <- make.pots(parms = fit$par,  crf = fit,  rescaleQ = F, replaceQ = T)

pot.info.fit.model     <- make.gRbase.potentials(fit, node.names = c("X.1", "X.2", "X.3"), state.nmes = c("1","2"))
gR.dist.info.fit.model <- distribution.from.potentials(pot.info.fit.model$node.potentials, pot.info.fit.model$edge.potentials)
logZ.fit.model         <- gR.dist.info.fit.model$logZ

logZ.fit.model # log partition function from BP with a theta (from regression)
mean(logZ)      # log partition function direct from regression
median(logZ)

joint.dist.info.fit.model <- as.data.frame(as.table(gR.dist.info.fit.model$state.probs))
joint.dist.info.fit.model <- joint.dist.info.fit.model[,c(2,3,1,4)]
joint.dist.info.fit.model[,4] <- joint.dist.info.fit.model[,4] * 100
joint.dist.info.fit.model
npetraco/CRFutil documentation built on Nov. 23, 2023, 11:30 a.m.