knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(lavaan)
library(R2spa)

The example is from https://lavaan.ugent.be/tutorial/sem.html.

Factor score

For a CFA model with multiple latent factors, even when each indicator only loads on one factor, the resulting factor scores generally are weighted composites of ALL indicators. Consider the regression score, which has the form

$$\tilde{\boldsymbol \eta} = \mathbf{A}(\mathbf{y} - \hat{\boldsymbol \mu}) + \boldsymbol{\alpha}$$

where $\mathbf{A} = \boldsymbol{\Psi}\boldsymbol{\Lambda}^\top \hat{\boldsymbol{\Sigma}}^{-1}$ is a $q$ $\times$ $p$ matrix, $\hat{\boldsymbol \mu} = \boldsymbol{\nu} + \boldsymbol{\Lambda} \boldsymbol{\alpha}$ and $\boldsymbol{\Sigma} = \boldsymbol{\Lambda} \boldsymbol{\Psi}\boldsymbol{\Lambda}^\top + \boldsymbol{\Theta}$ are the model-implied means and covariances of the indicators $\mathbf{y}$, and $\boldsymbol{\alpha}$ and $\boldsymbol{\Psi}$ are the latent means and latent covariances.

Therefore, assuming that the model is correctly specified such that $\mathbf{y} = \boldsymbol{\nu} + \boldsymbol{\Lambda} \boldsymbol{\eta} + \boldsymbol{\varepsilon}$,

$$ \begin{aligned} \tilde{\boldsymbol \eta} & = \mathbf{A}(\boldsymbol{\Lambda} \boldsymbol{\eta} + \boldsymbol{\varepsilon} - \boldsymbol{\Lambda} \boldsymbol{\alpha}) + \boldsymbol{\alpha} \ & = (\mathbf{I} - \mathbf{A}\boldsymbol{\Lambda}) \boldsymbol{\alpha} + \mathbf{A}\boldsymbol{\Lambda} \boldsymbol{\eta} + \mathbf{A}\boldsymbol{\varepsilon}. \end{aligned} $$

If we consider $\tilde{\boldsymbol \eta}$ as indicators of $\boldsymbol \eta$, we can see that $\boldsymbol{\nu}\tilde{\boldsymbol{\eta}} = (\mathbf{I} - \mathbf{A}\boldsymbol{\Lambda}) \boldsymbol{\alpha}$ is the intercept, $\boldsymbol{\Lambda}\tilde{\boldsymbol{\eta}} = \mathbf{A}\boldsymbol{\Lambda}$ is the $q$ $\times$ $q$ loading matrix, and $\boldsymbol{\Theta}_\tilde{\boldsymbol{\eta}} = \mathbf{A}\boldsymbol{\varepsilon}$ is the error covariance matrix.

We can see that $\boldsymbol{\Lambda}_\tilde{\boldsymbol{\eta}}$ is generally not diagonal, as the following shows:

# CFA
my_cfa <- "
  # latent variables
    ind60 =~ x1 + x2 + x3
    dem60 =~ y1 + y2 + y3 + y4
"
cfa_fit <- cfa(model = my_cfa,
               data  = PoliticalDemocracy,
               std.lv = TRUE)
# A matrix
pars <- lavInspect(cfa_fit, what = "est")
lambda_mat <- pars$lambda
psi_mat <- pars$psi
sigma_mat <- cfa_fit@implied$cov[[1]]
ginvsigma <- MASS::ginv(sigma_mat)
alambda <- psi_mat %*% crossprod(lambda_mat, ginvsigma %*% lambda_mat)
alambda

We can also use R2spa::get_fs():

(fs_dat <- get_fs(PoliticalDemocracy, model = my_cfa, std.lv = TRUE)) |> head()

Therefore, we need to specify cross-loadings when using 2S-PA.

tspa_fit <- tspa(model = "dem60 ~ ind60", data = fs_dat,
                 fsT = attr(fs_dat, "fsT"), 
                 fsL = attr(fs_dat, "fsL"))
cat(attr(tspa_fit, "tspaModel"))
parameterestimates(tspa_fit)
# Compute factor scores using lavPredict
fs_dat <- lavPredict(cfa_fit, acov = "standard")
colnames(fs_dat) <- c("fs_ind60", "fs_dem60")
# alambda %*% attr(fs_dat, "acov")[[1]] %*% t(alambda)
my2spa <- "
  # latent variables (indicated by factor scores)
    ind60 =~ 0.95538579 * fs_ind60 + 0.05816994 * fs_dem60
    dem60 =~ 0.01834111 * fs_ind60 + 0.86888887 * fs_dem60
  # constrain the errors
    fs_ind60 ~~ 0.034595518 * fs_ind60
    fs_dem60 ~~ 0.090775708 * fs_dem60
    fs_ind60 ~~ 0.004017388 * fs_dem60
  # latent variances
    ind60 ~~ ind60
    dem60 ~~ dem60
  # regressions
    dem60 ~ ind60
"
tspa_fit <- sem(model = my2spa, data  = fs_dat)
standardizedSolution(tspa_fit)

which is more consistent with the SEM results.

Three-factor model example

# CFA
cfa_3fac <-  "
  # latent variables
    ind60 =~ x1 + x2 + x3
    dem60 =~ y1 + y2 + y3 + y4
    dem65 =~ y5 + y6 + y7 + y8
"
cfa_3fac_fit <- cfa(model = cfa_3fac,
                    data  = PoliticalDemocracy,
                    std.lv = TRUE)
# A matrix
pars <- lavInspect(cfa_3fac_fit, what = "est")
lambda_mat <- pars$lambda
psi_mat <- pars$psi
sigma_mat <- cfa_3fac_fit@implied$cov[[1]]
ginvsigma <- MASS::ginv(sigma_mat)
alambda <- psi_mat %*% crossprod(lambda_mat, ginvsigma %*% lambda_mat)
alambda

We can also use R2spa::get_fs():

(fs_dat_3fac <- get_fs(PoliticalDemocracy, model = cfa_3fac, std.lv = TRUE)) |>
  head()

Therefore, we need to specify cross-loadings when using 2S-PA.

tspa_fit_3fac <- tspa(model = "dem60 ~ ind60
              dem65 ~ ind60 + dem60",
              data = fs_dat_3fac,
              fsT = attr(fs_dat_3fac, "fsT"),
              fsL = attr(fs_dat_3fac, "fsL"))
cat(attr(tspa_fit_3fac, "tspaModel"))
parameterestimates(tspa_fit_3fac)
# Compute factor scores using lavPredict
fs_3fac_dat <- lavPredict(cfa_3fac_fit, acov = "standard")
colnames(fs_3fac_dat) <- c("fs_ind60", "fs_dem60", "fs_dem65")
# alambda %*% attr(fs_dat, "acov")[[1]] %*% t(alambda)
mod_2spa_3fac <- "
  # latent variables (indicated by factor scores)
    ind60 =~ 0.95064774 * fs_ind60 + (-0.02069724) * fs_dem60 + 
             0.09868528 * fs_dem65
    dem60 =~ (-0.005967124) * fs_ind60 + 0.533020047 * fs_dem60 +
             0.401095944 * fs_dem65
    dem65 =~ 0.02951139 * fs_ind60 + 0.41603787 * fs_dem60 +
             0.49133377 * fs_dem65
  # constrain the errors
    fs_ind60 ~~ 0.0340104952 * fs_ind60 + 0.0003881641 * fs_dem60 + 
                0.005026024 * fs_dem65
    fs_dem60 ~~ 0.0586870314 * fs_dem60 + 0.0533752458 * fs_dem65
    fs_dem65 ~~ 0.051573299 * fs_dem65
  # latent variances
    ind60 ~~ ind60
    dem60 ~~ dem60
    dem65 ~~ dem65
  # regressions
    dem60 ~ ind60
    dem65 ~ ind60 + dem60
"
tspa_fit_3fac <- sem(model = mod_2spa_3fac, data = fs_3fac_dat)
standardizedSolution(tspa_fit_3fac)

Compare to SEM:

sem_3fac <- sem("
  # latent variables
    ind60 =~ x1 + x2 + x3
    dem60 =~ y1 + y2 + y3 + y4
    dem65 =~ y5 + y6 + y7 + y8
  # structural model
    dem60 ~ ind60
    dem65 ~ ind60 + dem60
  ",
  data = PoliticalDemocracy
)
standardizedSolution(sem_3fac)


Gengrui-Zhang/R2spa documentation built on Sept. 6, 2024, 5:01 p.m.