In Gengrui-Zhang/R2spa: An R package for two-stage path analysis (2S-PA) to adjust for measurement errors

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

library(mirt)  # for IRT
library(OpenMx)
library(umx)
library(R2spa)
set.seed(591028)

Simulate Data

#' Population parameters
num_obs <- 5000  # relatively large sample
cov_xm <- .45
betas <- c(.2, .3, -.4)
# Results seem more accurate with more items
lambdax <- 1.7 * c(.6, .7, .8, .5, .5, .3, .9, .7, .6)  # on normal ogive metric
lambdam <- 1.7 * c(.8, .7, .7, .6, .5)  # on normal ogive metric
lambday <- c(.5, .7, .9)
thresx <- matrix(c(1, 1.5, 1.3, 0.5, 2, -0.5, -1, 0.5, 0.8), nrow = 1)  # binary
thresm <- matrix(c(-2, -2, -0.4, -0.5, 0,
                   -1.5, -0.5, 0, 0, 0.5,
                   -1, 1, 1.5, 0.8, 1), nrow = 3,
                 byrow = TRUE)  # ordinal with 4 categories
inty <- 1.5  # intercept of y
# error variance of y
r2y <- crossprod(
    betas,
    matrix(c(1, cov_xm, 0, cov_xm, 1, 0, 0, 0, 1 + cov_xm^2), nrow = 3)
    ) %*% betas
evar <- as.numeric(1 - r2y)

#' Simulate latent variables X and M with variances of 1, and the disturbance of
#' Y
cov_xm_ey <- matrix(c(1, cov_xm, 0,
                      cov_xm, 1, 0,
                      0, 0, evar), nrow = 3)
eta <- MASS::mvrnorm(num_obs, mu = rep(0, 3), Sigma = cov_xm_ey,
                     empirical = TRUE)
# Add product term
eta <- cbind(eta, eta[, 1] * eta[, 2])

#' Compute latent y (standardized)
etay <- inty + eta[, -3] %*% betas + eta[, 3]
# Verify the structural coefficients with eta known
lm(etay ~ eta[, -3])

#' Simulate y (continuous)
y <- t(
    lambday %*% t(etay) +
        rnorm(num_obs * length(lambday), sd = sqrt(1 - lambday^2))
)

#' Simulate latent continuous responses for X and M
xstar <- eta[, 1] %*% t(lambdax) + rnorm(num_obs * length(lambdax))
mstar <- eta[, 2] %*% t(lambdam) + rnorm(num_obs * length(lambdam))
#' Obtain categorical items
x <- vapply(
    seq_along(lambdax),
    FUN = \(i) {
        findInterval(xstar[, i], thresx[, i])
    },
    FUN.VALUE = numeric(num_obs))
m <- vapply(
    seq_along(lambdam),
    FUN = \(i) {
        findInterval(mstar[, i], thresm[, i])
    },
    FUN.VALUE = numeric(num_obs))

Compute Factor Scores

Y

fsy <- get_fs(data = y, std.lv = TRUE)

X

In this example, we use EAP scores, which are conceptually similar to the regression scores for continuous data.

irtx <- mirt(data.frame(x), itemtype = "2PL",
             verbose = FALSE)  # IRT (2-PL) for x
marginal_rxx(irtx)  # marginal reliability
fsx <- fscores(irtx, full.scores.SE = TRUE)
empirical_rxx(fsx)  # empirical reliability

M

irtm <- mirt(data.frame(m), itemtype = "graded",
             verbose = FALSE)  # IRT (GRM) for m
marginal_rxx(irtm)  # marginal reliability
fsm <- fscores(irtm, full.scores.SE = TRUE)
empirical_rxx(fsm)  # empirical reliability

Loading and Error Variances for Product of Factor scores

Let $\tilde \eta_X$ and $\tilde \eta_M$ be factor scores of the predictor and the moderator. Then

$$\tilde \eta_X \tilde \eta_M = (\lambda_X \eta_X + e_{\tilde X}) (\lambda_M \eta_M + e_{\tilde M}) = (\lambda_X \lambda_M \eta_X \eta_M) + \lambda_X \eta_X e_M + \lambda_M \eta_M e_X + e_X e_M$$

So the loading is $\lambda_X \lambda_M$, and the error variance for the product indicator is

$$\lambda_X^2 V(\eta_X) V(e_M) + \lambda_M^2 V(\eta_M) V(e_X) + V(e_X) V(e_M)$$

Directly Using Factor Scores

# Assemble data
fs_dat <- data.frame(fsy[c(1, 3, 4)], fsx, fsm) |>
    setNames(c(
        "fsy", "rel_fsy", "ev_fsy",
        "fsx", "se_fsx", "fsm", "se_fsm"
    )) |>
    # Compute reliability; only needed for 2S-PA
    within(expr = {
        rel_fsx <- 1 - se_fsx^2
        rel_fsm <- 1 - se_fsm^2
        ev_fsx <- se_fsx^2 * (1 - se_fsx^2)
        ev_fsm <- se_fsm^2 * (1 - se_fsm^2)
    }) |>
    # Add interaction
    within(expr = {
        fsxm <- fsx * fsm
        ld_fsxm <- rel_fsx * rel_fsm
        ev_fsxm <- rel_fsx^2 * ev_fsm + rel_fsm^2 * ev_fsx + ev_fsm * ev_fsx
    })

lm(fsy ~ fsx * fsm, data = fs_dat)  # some bias

Two-Stage Path Analysis

Create OpenMx model without latent variables

fsreg_mx <- mxModel("str",
    type = "RAM",
    manifestVars = c("fsy", "fsx", "fsm", "fsxm"),
    # Path coefficients
    mxPath(
        from = c("fsx", "fsm", "fsxm"), to = "fsy",
        values = 0
    ),
    # Variances
    mxPath(
        from = c("fsy", "fsx", "fsm", "fsxm"),
        to = c("fsy", "fsx", "fsm", "fsxm"),
        arrows = 2,
        values = c(0.6, 1, 1, 1)
    ),
    # Covariances
    mxPath(
        from = c("fsx", "fsm", "fsxm"),
        to = c("fsx", "fsm", "fsxm"),
        arrows = 2, connect = "unique.pairs",
        values = 0
    ),
    mxPath(
        from = "one", to = c("fsy", "fsx", "fsm", "fsxm"),
        values = 0
    )
)
fsreg_umx <- umxLav2RAM(
    "
      fsy ~ b1 * fsx + b2 * fsm + b3 * fsxm
      fsy + fsx + fsm + fsxm ~ 1
      fsx ~~ vx * fsx + fsm + fsxm
      fsm ~~ vm * fsm + fsxm
    ",
    printTab = FALSE)

DiagrammeR::grViz(omxGraphviz(fsreg_mx))

Create loading and error covariance matrices

# Loading
matL <- mxMatrix(
    type = "Diag", nrow = 4, ncol = 4,
    free = FALSE,
    labels = c("data.rel_fsy", "data.rel_fsx", "data.rel_fsm", "data.ld_fsxm"),
    name = "L"
)
# Error
matE <- mxMatrix(
    type = "Diag", nrow = 4, ncol = 4,
    free = FALSE,
    labels = c("data.ev_fsy", "data.ev_fsx", "data.ev_fsm", "data.ev_fsxm"),
    name = "E"
)

Combine (1) and (2), and run 2S-PA

tspa_mx <-
    tspa_mx_model(
        mxModel(fsreg_umx,
                mxAlgebra(b1 * sqrt(vx), name = "stdx_b1"),
                mxAlgebra(b2 * sqrt(vm), name = "stdx_b2"),
                mxAlgebra(b3 * sqrt(vx) * sqrt(vm), name = "stdx_b3"),
                mxCI(c("stdx_b1", "stdx_b2", "stdx_b3"))),
        data = fs_dat,
        mat_ld = matL, mat_ev = matE)
# Run OpenMx
tspa_mx_fit <- mxRun(tspa_mx, intervals = TRUE)
# Summarize the results (takes a minute or so)
# Also include the X-Standardized coefficients
summary(tspa_mx_fit)
# Standard errors for X-Standardized coefficients
mxSE(m1.stdx_b1, tspa_mx_fit)
mxSE(m1.stdx_b2, tspa_mx_fit)
mxSE(m1.stdx_b3, tspa_mx_fit)

Gengrui-Zhang/R2spa documentation built on Sept. 6, 2024, 5:01 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Gengrui-Zhang/R2spa
An R package for two-stage path analysis (2S-PA) to adjust for measurement errors

In Gengrui-Zhang/R2spa: An R package for two-stage path analysis (2S-PA) to adjust for measurement errors

Simulate Data

Compute Factor Scores

Y

X

M

Loading and Error Variances for Product of Factor scores

Directly Using Factor Scores

Two-Stage Path Analysis

R Package Documentation

Browse R Packages

We want your feedback!

Gengrui-Zhang/R2spa An R package for two-stage path analysis (2S-PA) to adjust for measurement errors

In Gengrui-Zhang/R2spa: An R package for two-stage path analysis (2S-PA) to adjust for measurement errors

Simulate Data

Compute Factor Scores

Y

X

M

Loading and Error Variances for Product of Factor scores

Directly Using Factor Scores

Two-Stage Path Analysis

R Package Documentation

Browse R Packages

We want your feedback!

Gengrui-Zhang/R2spa
An R package for two-stage path analysis (2S-PA) to adjust for measurement errors