In Jinsong-Chen/LAWBL: Latent (Variable) Analysis with Bayesian Learning

knitr::opts_chunk$set(echo = TRUE)

Note: the estimation process can be time consuming depending on the computing power. You can same some time by reducing the length of the chains.

Categorical Data with Missingness but no Local Dependence:

1) Load the package, obtain the data, and check the true loading pattern (qlam) and local dependence.

library(LAWBL)
dat <- sim18ccfa40$dat
dim(dat)
summary(dat) #10% missingness at random
J <- ncol(dat) # no. of items
K <- 3 # no. of factors
sim18ccfa40$qlam
sim18ccfa40$LD

2) E-step: Estimate with the GPCFA-LI model (E-step) by setting LD=F and the design matrix Q. Only a few loadings need to be specified in Q (e.g., 2 per factor). Longer chain is suggested for more accurate and stable estimation.

Q<-matrix(-1,J,K); # -1 for unspecified items
Q[1:2,1]<-Q[7:8,2]<-Q[13:14,3]<-1 # 1 for specified items
Q

m0 <- pcfa(dat = dat, Q = Q,LD = FALSE, cati = -1,burn = 5000, iter = 5000)

# summarize basic information
summary(m0)

#summarize significant loadings in pattern/Q-matrix format
summary(m0, what = 'qlambda') 

#factorial eigenvalue
summary(m0,what='eigen') 

#thresholds for categorical items
summary(m0,what='thd')

#plotting factorial eigenvalue
plot_lawbl(m0) # trace
plot_lawbl(m0, what='density') #density
plot_lawbl(m0, what='EPSR') #EPSR

3) C-step: Reconfigure the Q matrix for the C-step with one specified loading per item based on results from the E-step. Estimate with the GPCFA model by setting LD=TRUE (by default). Longer chain is suggested for more accurate and stable estimation.

Q<-matrix(-1,J,K);
tmp<-summary(m0, what="qlambda")
cind<-apply(tmp,1,which.max)
Q[cbind(c(1:J),cind)]<-1
#alternatively
#Q[1:6,1]<-Q[7:12,2]<-Q[13:18,3]<-1 # 1 for specified items

m1 <- pcfa(dat = dat, Q = Q, cati = -1)
summary(m1)
summary(m1, what = 'qlambda')
summary(m1, what = 'offpsx') #summarize significant LD terms
summary(m1,what='eigen')
summary(m1,what='thd')

#plotting factorial eigenvalue
plot_lawbl(m1) # trace
plot_lawbl(m1, what='density') #density
plot_lawbl(m1, what='EPSR') #EPSR

4) CFA-LD: One can also configure the Q matrix for a CCFA model with local dependence (i.e. without any unspecified loading) based on results from the C-step.

Q<-summary(m1, what="qlambda")
Q[Q!=0]<-1
Q

m2 <- pcfa(dat = dat, Q = Q, cati = -1,burn = 5000, iter = 5000)
summary(m2)
summary(m2, what = 'qlambda') 
summary(m2, what = 'offpsx')
summary(m2,what='eigen')
summary(m2,what='thd')

plot_lawbl(m2) # Eigens' traces are excellent without regularization of the loadings

Categorical Data with Missingness and Local Dependence:

1) Obtain the data and check the true loading pattern (qlam) and local dependence.

dat <- sim18ccfa41$dat
summary(dat) #10% missingness at random
J <- ncol(dat) # no. of items
K <- 3 # no. of factors
sim18ccfa41$qlam
sim18ccfa41$LD # effect size = .3

2) E-step: Estimate with the GPCFA-LI model (E-step) by setting LD=FALSE and the design matrix Q. Only a few loadings need to be specified in Q (e.g., 2 per factor). Some loading estimates are biased due to ignoring the LD. So do the eigenvalues.

Q<-matrix(-1,J,K); # -1 for unspecified items
Q[1:2,1]<-Q[7:8,2]<-Q[13:14,3]<-1 # 1 for specified items
Q

m0 <- pcfa(dat = dat, Q = Q,LD = FALSE, cati = -1,burn = 5000, iter = 5000)
summary(m0)
summary(m0, what = 'qlambda')
summary(m0,what='eigen')
summary(m0,what='thd')

plot_lawbl(m0) # trace
plot_lawbl(m0, what='EPSR') #EPSR

3) C-step: Reconfigure the Q matrix for the C-step with one specified loading per item based on results from the E-step. Estimate with the GPCFA model by setting LD=TRUE (by default). The estimates are more accurate, and the LD terms can be largely recovered.

Q<-matrix(-1,J,K);
tmp<-summary(m0, what="qlambda")
cind<-apply(tmp,1,which.max)
Q[cbind(c(1:J),cind)]<-1
Q

m1 <- pcfa(dat = dat, Q = Q, cati = -1,burn = 5000, iter = 5000)
summary(m1)
summary(m1, what = 'qlambda')
summary(m1,what='eigen')
summary(m1, what = 'offpsx')
summary(m1,what='thd')

4) CFA-LD: Configure the Q matrix for a CCFA model with local dependence (i.e. without any unspecified loading) based on results from the C-step. Results are better than, but similar to the C-step.

Q<-summary(m1, what="qlambda")
Q[Q!=0]<-1
Q

m2 <- pcfa(dat = dat, Q = Q, cati = -1,burn = 5000, iter = 5000)
summary(m2)
summary(m2, what = 'qlambda') 
summary(m2,what='eigen')
summary(m2, what = 'offpsx')
summary(m2,what='thd')

Mixed-type Data with Missingness and Local Dependence:

1) Obtain the data and check the true loading pattern (qlam) and local dependence.

dat <- sim18mcfa41$dat
summary(dat) #10% missingness at random
J <- ncol(dat) # no. of items
K <- 3 # no. of factors
sim18mcfa41$qlam
sim18mcfa41$LD # effect size = .3

2) E-step: Estimate with the GPCFA-LI model (E-step) by setting LD=FALSE and the design matrix Q. Only a few loadings need to be specified in Q (e.g., 2 per factor). The first 6 items are categorical and need to be specified with cati.

Q<-matrix(-1,J,K); # -1 for unspecified items
Q[1:2,1]<-Q[7:8,2]<-Q[13:14,3]<-1 # 1 for specified items
Q

m0 <- pcfa(dat = dat, Q = Q,LD = FALSE, cati = c(1:6),burn = 5000, iter = 5000)
summary(m0)
summary(m0, what = 'qlambda')
summary(m0,what='eigen')
summary(m0,what='thd') # only for 12 items 

plot_lawbl(m0) # trace
plot_lawbl(m0, what='density')
plot_lawbl(m0, what='EPSR') #EPSR

3) C-step: Reconfigure the Q matrix for the C-step with one specified loading per item based on results from the E-step. Estimate with the GPCFA model by setting LD=TRUE (by default). The estimates are more accurate, and the LD terms can be largely recovered.

Q<-matrix(-1,J,K);
tmp<-summary(m0, what="qlambda")
cind<-apply(tmp,1,which.max)
Q[cbind(c(1:J),cind)]<-1
Q

m1 <- pcfa(dat = dat, Q = Q, cati = c(1:6),burn = 5000, iter = 5000)
summary(m1)
summary(m1, what = 'qlambda')
summary(m1,what='eigen')
summary(m1, what = 'offpsx')
summary(m1,what='thd')

4) CFA-LD: Configure the Q matrix for a mix of CFA and CCFA model with local dependence (i.e. without any unspecified loading) based on results from the C-step. Results are better than, but similar to the C-step.

Q<-summary(m1, what="qlambda")
Q[Q!=0]<-1
Q

m2 <- pcfa(dat = dat, Q = Q, cati = c(1:6),burn = 5000, iter = 5000)
summary(m2)
summary(m2, what = 'qlambda') 
summary(m2,what='eigen')
summary(m2, what = 'offpsx')
summary(m2,what='thd')