knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(lavaan) library(R2spa)
The example is from https://lavaan.ugent.be/tutorial/sem.html.
For a CFA model with multiple latent factors, even when each indicator only loads on one factor, the resulting factor scores generally are weighted composites of ALL indicators. Consider the regression score, which has the form
$$\tilde{\boldsymbol \eta} = \mathbf{A}(\mathbf{y} - \hat{\boldsymbol \mu}) + \boldsymbol{\alpha}$$
where $\mathbf{A} = \boldsymbol{\Psi}\boldsymbol{\Lambda}^\top \hat{\boldsymbol{\Sigma}}^{-1}$ is a $q$ $\times$ $p$ matrix, $\hat{\boldsymbol \mu} = \boldsymbol{\nu} + \boldsymbol{\Lambda} \boldsymbol{\alpha}$ and $\boldsymbol{\Sigma} = \boldsymbol{\Lambda} \boldsymbol{\Psi}\boldsymbol{\Lambda}^\top + \boldsymbol{\Theta}$ are the model-implied means and covariances of the indicators $\mathbf{y}$, and $\boldsymbol{\alpha}$ and $\boldsymbol{\Psi}$ are the latent means and latent covariances.
Therefore, assuming that the model is correctly specified such that $\mathbf{y} = \boldsymbol{\nu} + \boldsymbol{\Lambda} \boldsymbol{\eta} + \boldsymbol{\varepsilon}$,
$$ \begin{aligned} \tilde{\boldsymbol \eta} & = \mathbf{A}(\boldsymbol{\Lambda} \boldsymbol{\eta} + \boldsymbol{\varepsilon} - \boldsymbol{\Lambda} \boldsymbol{\alpha}) + \boldsymbol{\alpha} \ & = (\mathbf{I} - \mathbf{A}\boldsymbol{\Lambda}) \boldsymbol{\alpha} + \mathbf{A}\boldsymbol{\Lambda} \boldsymbol{\eta} + \mathbf{A}\boldsymbol{\varepsilon}. \end{aligned} $$
If we consider $\tilde{\boldsymbol \eta}$ as indicators of $\boldsymbol \eta$, we can see that $\boldsymbol{\nu}\tilde{\boldsymbol{\eta}} = (\mathbf{I} - \mathbf{A}\boldsymbol{\Lambda}) \boldsymbol{\alpha}$ is the intercept, $\boldsymbol{\Lambda}\tilde{\boldsymbol{\eta}} = \mathbf{A}\boldsymbol{\Lambda}$ is the $q$ $\times$ $q$ loading matrix, and $\boldsymbol{\Theta}_\tilde{\boldsymbol{\eta}} = \mathbf{A}\boldsymbol{\varepsilon}$ is the error covariance matrix.
We can see that $\boldsymbol{\Lambda}_\tilde{\boldsymbol{\eta}}$ is generally not diagonal, as the following shows:
# CFA my_cfa <- " # latent variables ind60 =~ x1 + x2 + x3 dem60 =~ y1 + y2 + y3 + y4 " cfa_fit <- cfa(model = my_cfa, data = PoliticalDemocracy, std.lv = TRUE) # A matrix pars <- lavInspect(cfa_fit, what = "est") lambda_mat <- pars$lambda psi_mat <- pars$psi sigma_mat <- cfa_fit@implied$cov[[1]] ginvsigma <- MASS::ginv(sigma_mat) alambda <- psi_mat %*% crossprod(lambda_mat, ginvsigma %*% lambda_mat) alambda
We can also use R2spa::get_fs()
:
(fs_dat <- get_fs(PoliticalDemocracy, model = my_cfa, std.lv = TRUE)) |> head()
Therefore, we need to specify cross-loadings when using 2S-PA.
tspa_fit <- tspa(model = "dem60 ~ ind60", data = fs_dat, fsT = attr(fs_dat, "fsT"), fsL = attr(fs_dat, "fsL")) cat(attr(tspa_fit, "tspaModel")) parameterestimates(tspa_fit)
# Compute factor scores using lavPredict fs_dat <- lavPredict(cfa_fit, acov = "standard") colnames(fs_dat) <- c("fs_ind60", "fs_dem60") # alambda %*% attr(fs_dat, "acov")[[1]] %*% t(alambda) my2spa <- " # latent variables (indicated by factor scores) ind60 =~ 0.95538579 * fs_ind60 + 0.05816994 * fs_dem60 dem60 =~ 0.01834111 * fs_ind60 + 0.86888887 * fs_dem60 # constrain the errors fs_ind60 ~~ 0.034595518 * fs_ind60 fs_dem60 ~~ 0.090775708 * fs_dem60 fs_ind60 ~~ 0.004017388 * fs_dem60 # latent variances ind60 ~~ ind60 dem60 ~~ dem60 # regressions dem60 ~ ind60 " tspa_fit <- sem(model = my2spa, data = fs_dat) standardizedSolution(tspa_fit)
which is more consistent with the SEM results.
# CFA cfa_3fac <- " # latent variables ind60 =~ x1 + x2 + x3 dem60 =~ y1 + y2 + y3 + y4 dem65 =~ y5 + y6 + y7 + y8 " cfa_3fac_fit <- cfa(model = cfa_3fac, data = PoliticalDemocracy, std.lv = TRUE) # A matrix pars <- lavInspect(cfa_3fac_fit, what = "est") lambda_mat <- pars$lambda psi_mat <- pars$psi sigma_mat <- cfa_3fac_fit@implied$cov[[1]] ginvsigma <- MASS::ginv(sigma_mat) alambda <- psi_mat %*% crossprod(lambda_mat, ginvsigma %*% lambda_mat) alambda
We can also use R2spa::get_fs()
:
(fs_dat_3fac <- get_fs(PoliticalDemocracy, model = cfa_3fac, std.lv = TRUE)) |> head()
Therefore, we need to specify cross-loadings when using 2S-PA.
tspa_fit_3fac <- tspa(model = "dem60 ~ ind60 dem65 ~ ind60 + dem60", data = fs_dat_3fac, fsT = attr(fs_dat_3fac, "fsT"), fsL = attr(fs_dat_3fac, "fsL")) cat(attr(tspa_fit_3fac, "tspaModel")) parameterestimates(tspa_fit_3fac)
# Compute factor scores using lavPredict fs_3fac_dat <- lavPredict(cfa_3fac_fit, acov = "standard") colnames(fs_3fac_dat) <- c("fs_ind60", "fs_dem60", "fs_dem65") # alambda %*% attr(fs_dat, "acov")[[1]] %*% t(alambda) mod_2spa_3fac <- " # latent variables (indicated by factor scores) ind60 =~ 0.95064774 * fs_ind60 + (-0.02069724) * fs_dem60 + 0.09868528 * fs_dem65 dem60 =~ (-0.005967124) * fs_ind60 + 0.533020047 * fs_dem60 + 0.401095944 * fs_dem65 dem65 =~ 0.02951139 * fs_ind60 + 0.41603787 * fs_dem60 + 0.49133377 * fs_dem65 # constrain the errors fs_ind60 ~~ 0.0340104952 * fs_ind60 + 0.0003881641 * fs_dem60 + 0.005026024 * fs_dem65 fs_dem60 ~~ 0.0586870314 * fs_dem60 + 0.0533752458 * fs_dem65 fs_dem65 ~~ 0.051573299 * fs_dem65 # latent variances ind60 ~~ ind60 dem60 ~~ dem60 dem65 ~~ dem65 # regressions dem60 ~ ind60 dem65 ~ ind60 + dem60 " tspa_fit_3fac <- sem(model = mod_2spa_3fac, data = fs_3fac_dat) standardizedSolution(tspa_fit_3fac)
Compare to SEM:
sem_3fac <- sem(" # latent variables ind60 =~ x1 + x2 + x3 dem60 =~ y1 + y2 + y3 + y4 dem65 =~ y5 + y6 + y7 + y8 # structural model dem60 ~ ind60 dem65 ~ ind60 + dem60 ", data = PoliticalDemocracy ) standardizedSolution(sem_3fac)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.