# Triangle model log linear
# Bayes Poisson direct via features model matrix and Stan
library(rstan)
library(shinystan)
library(CRFutil)
# Triangle model
grphf <- ~X.1:X.2+X.1:X.3+X.2:X.3
adj <- ug(grphf, result="matrix")
# Load samps generated by a true triangle model:
#fpth <- "C:/Users/aliso/codes/CRFutil/tests/regression_tests/hojsgaard_model_tests/triangle_model/"
#fpth <- "/home/npetraco/codes/R/CRFutil/tests/regression_tests/hojsgaard_model_tests/triangle_model/triangle_data/"
#load(paste0(fpth,"triangle_samp.RData"))
#head(samps)
# Use the same "true" theta and seeds as with previous triange examples:
trit <- make.empty.field(adj.mat = adj, parameterization.typ = "standard")
set.seed(6)
trit$par <- runif(6,-1.5,1.1)
trit$par
# "true" theta should be:
#0.07629757 0.93786913 -0.81268462 -0.51175581 0.59945681 1.04299689
# instantiate potentials
out.pot <- make.pots(parms = trit$par, crf = trit, rescaleQ = F, replaceQ = T)
# Now sample
set.seed(1)
num.samps <- 25
samps <- sample.exact(trit, num.samps)
mrf.sample.plot(samps)
# Make state count freq table from samples and compute frequencies of all possible state configs.
# State configs not observed will have 0 freq
ftab <- data.frame(ftable(data.frame(samps)))
X.all <- ftab[,1:ncol(samps)]
freqs <- ftab[,ncol(ftab)]
# Model Matrix with respect to graph
f0 <- function(y){ as.numeric(c((y==1),(y==2)))}
M <- compute.model.matrix(configs=X.all, edges.mat=trit$edges, node.par = trit$node.par, edge.par = trit$edge.par, ff = f0)
# Fit log(lam) = alp + M theta
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
model.c <- stanc(file = "/home/npetraco/codes/R/CRFutil/inst/poisson_model.stan", model_name = 'model')
sm <- stan_model(stanc_ret = model.c, verbose = T)
dat <- list(
p = ncol(M),
N = nrow(M),
y = freqs,
Mmodl = M
)
bfit <- sampling(sm, data = dat, iter=20000, thin = 4, chains = 4)
# Get the parameter estimtes:
theta <- extract(bfit, "theta")[[1]]
alpha <- extract(bfit, "alpha")[[1]]
# Extract the graph's partition function:
N <- nrow(samps)
logZ <- log(N) - alpha
hist(logZ)
# Now using the (medians) posterior theta, estimate partition
# function with Belief propegation
llm2 <- make.empty.field(adj.mat = adj, parameterization.typ = "standard")
llm2$par <- apply(theta, MARGIN = 2, FUN = median)
out.pot <- make.pots(parms = llm2$par, crf = llm2, rescaleQ = F, replaceQ = T)
pot.info.llm2.model <- make.gRbase.potentials(llm2, node.names = c("X.1", "X.2", "X.3"), state.nmes = c("1","2"))
gR.dist.info.llm2.model <- distribution.from.potentials(pot.info.llm2.model$node.potentials, pot.info.llm2.model$edge.potentials)
logZ.llm2.model <- gR.dist.info.llm2.model$logZ
logZ.llm2.model # log partition function from BP with a theta (from regression)
mean(logZ) # log partition function direct from regression
median(logZ)
joint.dist.info.llm2.model <- as.data.frame(as.table(gR.dist.info.llm2.model$state.probs))
joint.dist.info.llm2.model <- joint.dist.info.llm2.model[,c(2,3,1,4)]
joint.dist.info.llm2.model[,4] <- joint.dist.info.llm2.model[,4] * 100
joint.dist.info.llm2.model
# X1 X2 X3 bayes.lrm.cp mle.lrm.cp bayes.llm.cp* bayes.llm2 bayes.llm2.lams true.model.cp
# 1 1 1 1 10.3 10.5 16.6 19.9 19.9 18.4
# 2 1 1 2 1.1 1.5 10.7 8.3 8.3 8.0
# 3 2 1 1 22.0 21.5 14.3 14.8 14.8 15.6
# 4 2 1 2 23.1 22.5 17.1 21.0 21.0 22.6
# 5 1 2 1 9.1 9.5 7.9 4.5 4.5 4.2
# 6 1 2 2 10.3 10.5 14.1 14.3 14.3 14.9
# 7 2 2 1 2.0 2.5 4.5 1.5 1.5 1.3
# 8 2 2 2 22.0 21.5 14.8 15.8 15.8 15.0
# X.1 X.2 X.3 Poisson (log linear)
# 1 1 1 1 10.0327618
# 2 1 1 2 0.9185033
# 3 2 1 1 22.5293519
# 4 2 1 2 23.7361927
# 5 1 2 1 8.8969562
# 6 1 2 2 10.0371836
# 7 2 2 1 1.7055839
# 8 2 2 2 22.1434666
# Check maximum likelihood result too:
fit <- make.empty.field(adj.mat = adj, parameterization.typ = "standard")
# First compute the sufficient stats needed by the likelihood and its’ grad
fit$par.stat <- mrf.stat(fit, samps)
# Auxiliary, gradient convenience function. Follows train.mrf in CRF:
gradient <- function(par, crf, ...) { crf$gradient }
infr.meth <- infer.exact # inference method needed for Z and marginals calcs
opt.info <- stats::optim( # optimize parameters
par = fit$par, # theta
fn = negloglik, # objective function
gr = gradient, # grad of obj func
crf = fit, # passed to fn/gr
samples = samps, # passed to fn/gr
infer.method = infr.meth, # passed to fn/gr
update.crfQ = TRUE, # passed to fn/gr
method = "L-BFGS-B",
control = list(trace = 1, REPORT=1))
opt.info$convergence
opt.info$message
fit$gradient
fit$nll
fit$par
out.pot <- make.pots(parms = fit$par, crf = fit, rescaleQ = F, replaceQ = T)
pot.info.fit.model <- make.gRbase.potentials(fit, node.names = c("X.1", "X.2", "X.3"), state.nmes = c("1","2"))
gR.dist.info.fit.model <- distribution.from.potentials(pot.info.fit.model$node.potentials, pot.info.fit.model$edge.potentials)
logZ.fit.model <- gR.dist.info.fit.model$logZ
logZ.fit.model # log partition function from BP with a theta (from regression)
mean(logZ) # log partition function direct from regression
median(logZ)
joint.dist.info.fit.model <- as.data.frame(as.table(gR.dist.info.fit.model$state.probs))
joint.dist.info.fit.model <- joint.dist.info.fit.model[,c(2,3,1,4)]
joint.dist.info.fit.model[,4] <- joint.dist.info.fit.model[,4] * 100
joint.dist.info.fit.model
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.