tests/regression_tests/hojsgaard_model_tests/triangle_model/triangle-model_loglin-modelmatrix.R

library(MASS)

# Log linear with explicit model matrix

# Load samps generated by true model:
fpth <- "/home/npetraco/codes/R/CRFutil/tests/regression_tests/hojsgaard_model_tests/triangle_model/triangle_data/"
load(paste0(fpth,"triangle_samp.RData"))
head(samps)

# Make state count freq table from samples
X.freq <- data.frame(ftable(data.frame(samps)))
X.freq

# Get contrasts to build model matrix:
X1.dc <- X.freq[,1]
contrasts(X1.dc)
contrasts(X1.dc) <- contr.sum(2)
contrasts(X1.dc) # Only really need this????

X2.dc <- X.freq[,2]
contrasts(X2.dc)
contrasts(X2.dc) <- contr.sum(2)
contrasts(X2.dc) # Only really need this????

X3.dc <- X.freq[,3]
contrasts(X3.dc)
contrasts(X3.dc) <- contr.sum(2)
contrasts(X3.dc) # Only really need this????

# Use Contrasts to build Model matrix
Xm <- model.matrix(~X1.dc + X2.dc + X3.dc + X1.dc:X2.dc + X1.dc:X3.dc + X2.dc:X3.dc,
                   contrasts = list(X1.dc = "contr.sum",
                                    X2.dc = "contr.sum",
                                    X3.dc = "contr.sum"))
Xm

# IS THERE A BETTER WAY TO BUILS THE MODEL MATRIX?????????
# YES! It's this:
# Xm <- model.matrix(~X.1 + X.2 + X.3 + X.1:X.2 + X.1:X.3 + X.2:X.3,
#                    contrasts = list(X.1 = "contr.sum",
#                                     X.2 = "contr.sum",
#                                     X.3 = "contr.sum"), data = X.freq)
# Xm

# Fit the loglinear model with contrasts used to build the explicit model matrix
freqs <- X.freq[,4]
loglin.mm <- loglm(freqs~X1.dc + X2.dc + X3.dc + X1.dc:X2.dc + X1.dc:X3.dc + X2.dc:X3.dc)
coef(loglin.mm)

# Fit contingency table:
X.cont.fitted <- fitted(loglin.mm)

# Flatten fit contingency table into matrix-table form
X.counts.fitted <- data.frame(ftable(X.cont.fitted))
X.counts.fitted
sum(X.counts.fitted[,4]) # Equal to the sample size?

# Estimate state probabilities is by the loglin fitted relative frquencies:
X.freq.fitted <- X.counts.fitted
X.freq.fitted[,4] <- X.freq.fitted[,4]/sum(X.counts.fitted[,4])
X.freq.fitted <- cbind(X.freq.fitted, X.counts.fitted[,4]) # Carry along the fit counts the computation is based on
colnames(X.freq.fitted)[c(4,5)] <- c("LogLin.MM.Freq","LogLin.MM.Counts")
loglin.MM.dist.info <- X.freq.fitted
save(loglin.MM.dist.info, file = paste0(fpth,"triangle_loglin_MM_dist.RData"))


# NOTE: These give the same fit:

#loglm(freqs~Xm[,2]+Xm[,3]+Xm[,4]+Xm[,5]+Xm[,6]+Xm[,7]) # Doesn't work
coef(loglm(freqs~X1.dc + X2.dc + X3.dc + X1.dc:X2.dc + X1.dc:X3.dc + X2.dc:X3.dc))
glm(freqs ~ Xm[,2] + Xm[,3] + Xm[,4] + Xm[,5] + Xm[,6] + Xm[,7], family = poisson())
glm(freqs ~ Xm[,2:7], family = poisson())
glm(freqs ~ Xm - 1, family = poisson())

# Coefs:
#5.82351 0.05025 0.44144 -0.37742 -0.20829 0.30693 0.50749
npetraco/CRFutil documentation built on Nov. 23, 2023, 11:30 a.m.