tests/regression_tests/hojsgaard_model_tests/triangle_model/triangle-model_bayes-loglin1.R

# Triangle model log linear
# Bayes Poisson vs GLM poisson
# Using the output coefficients, does CRF give logZ commensurate with intercept??

library(MASS)
library(rstan)
library(shinystan)
library(CRFutil)

# Load samps generated by a true triangle model:
fpth <- "/home/npetraco/codes/R/CRFutil/tests/regression_tests/hojsgaard_model_tests/triangle_model/triangle_data/"
load(paste0(fpth,"triangle_samp.RData"))
head(samps)

# Make state count freq table from samples
X.freq <- data.frame(ftable(data.frame(samps)))
X.freq

# Bayes Poisson Log linear will need with explicit model matrix
Xm <- model.matrix(~X.1 + X.2 + X.3 + X.1:X.2 + X.1:X.3 + X.2:X.3,
                   contrasts = list(X.1 = "contr.sum",
                                    X.2 = "contr.sum",
                                    X.3 = "contr.sum"), data = X.freq)
as.matrix(Xm)

model.c <- stanc(file = "/home/npetraco/codes/R/CRFutil/tests/regression_tests/hojsgaard_model_tests/triangle_model/vanalla.poisson.regression2.stan", model_name = 'model')
sm <- stan_model(stanc_ret = model.c, verbose = T)

dat <- list(
  p = ncol(Xm),
  N = nrow(Xm),
  y = X.freq[,4],
  Xmodl = Xm
)
#dat

bfit <- sampling(sm, data = dat, iter=10000, thin = 4, chains = 4)
bfit

pois.reg.glm.fit <- glm(X.freq[,4] ~ Xm - 1, family = poisson())
coef(pois.reg.glm.fit)
    # bayes llm
# beta[1]     5.82 # TRUE (to scale)??
# beta[2]     0.05  0.07629757
# beta[3]     0.44  0.93786913
# beta[4]    -0.38 -0.81268462
# beta[5]    -0.21 -0.51175581
# beta[6]     0.31  0.59945681
# beta[7]     0.51  1.04299689


nrow(Xm)
beta <- extract(bfit, "beta")[[1]]

# Partition function from the intercept:
hist(beta[,1])
# alpha = beta1 = log(N) - log(Z)
logZ <- nrow(Xm) - beta[,1]       # WRONG??? N = #samps, not num all possible configs....????
hist(logZ)
mean(logZ)

# Exmamine the theta from the regression and see how cole to the true conguig probs it yields
theta <- colMeans(beta[,2:ncol(beta)])
greq  <- ~X.1 + X.2 + X.3 + X.1:X.2 + X.1:X.3 + X.2:X.3
bllm   <- make.empty.field(graph.eq=greq, parameterization.typ="standard", plotQ=T)
bllm$par <- theta
bllm$par

out.pot <- make.pots(parms = bllm$par,  crf = bllm,  rescaleQ = F, replaceQ = T)
log(bllm$node.pot)
bllm$edge.pot

pot.info.bllm.model        <- make.gRbase.potentials(bllm, node.names = c("X.1", "X.2", "X.3"), state.nmes = c("1","2"))
gR.dist.info.bllm.model    <- distribution.from.potentials(pot.info.bllm.model$node.potentials, pot.info.bllm.model$edge.potentials)
logZ.bllm.model            <- gR.dist.info.bllm.model$logZ

# How does the partion function found from inference compare to the one obtained from the regression?
logZ.bllm.model
mean(logZ)
exp(logZ.bllm.model)
exp(mean(logZ))
# TRUE (to scale)???
# $logZ
# [1] 3.025716


joint.dist.info.bllm.model <- as.data.frame(as.table(gR.dist.info.bllm.model$state.probs))
joint.dist.info.bllm.model <- joint.dist.info.bllm.model[,c(2,3,1,4)]
joint.dist.info.bllm.model[,4] <- joint.dist.info.bllm.model[,4] * 100
joint.dist.info.bllm.model
#    X1 X2 X3 bayes.lrm.cp mle.lrm.cp bayes.llm.cp* true.model.cp
# 1  1  1  1         10.3       10.5          16.6           18.4
# 2  1  1  2          1.1        1.5          10.7            8.0
# 3  2  1  1         22.0       21.5          14.3           15.6
# 4  2  1  2         23.1       22.5          17.1           22.6
# 5  1  2  1          9.1        9.5           7.9            4.2
# 6  1  2  2         10.3       10.5          14.1           14.9
# 7  2  2  1          2.0        2.5           4.5            1.3
# 8  2  2  2         22.0       21.5          14.8           15.0
npetraco/CRFutil documentation built on Nov. 23, 2023, 11:30 a.m.