In trbrick/mxregsem: Regularization of models in OpenMx

This vignette follows the regsem package vignette. We'll compare raw and covariance models in both RAM and LISREL against the regsem lavaan model to test out our raw data reweighting scheme.

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

# install.packages("regsem") # lavaan is a dependency
# install.packages("semPlot")
library(semPlot) # for plotting the model
library(lavaan)
library(regsem)
library(mxregsem)

Data generation

Data generation from the regsem vignette, created with lavaan because of historical reasons.

sim.mod <- "
f1 =~ 1*y1 + 1*y2 + 1*y3+ 1*y4 + 1*y5
f1 ~ 0*x1 + 0*x2 + 0*x3 + 0*x4 + 0*x5 + 0.2*x6 + 0.5*x7 + 0.8*x8
f1~~1*f1"
dat.sim = simulateData(sim.mod,sample.nobs=100,seed=12)

Lavaan Run

Model run identically with lavaan:

run.mod <- "
f1 =~ NA*y1 + y2 + y3+ y4 + y5
f1 ~ c1*x1 + c2*x2 + c3*x3 + c4*x4 + c5*x5 + c6*x6 + c7*x7 + c8*x8
f1~~1*f1
"
lav.out <- sem(run.mod,dat.sim,fixed.x=FALSE)
parameterestimates(lav.out)[6:13,] # just look at regressions

Plot with semPaths

semPaths(lav.out)

Regularization is over a somewhat customized subset of the hyperparameter space.

reg.out <- cv_regsem(lav.out,n.lambda=30,type="lasso",jump=0.04,
                     pars_pen=c("c1","c2","c3","c4","c5","c6","c7","c8"))

Manual and plotted summary:

summary(reg.out)
plot(reg.out)

OpenMx setup

Overall, we'll want some helpers to build our OpenMx models. We'll add in some extras because manifest variables must have measurement models in "pure" LISREL format.

nOutcomes <- 5
nPreds <- 8
outcomes <- paste0("y", 1:nOutcomes)             # Factor indicators
predictors <- paste0("x", 1:nPreds)           # Predictors
regLabels <- paste0("c", 1:nPreds)          # The elements we're regularizing
loadLabels <- paste0("l", 1:nOutcomes)      # Other estimated parameters
factors <- paste0("f", 1)                # The factor
exolats <- paste0("l", predictors)       # LISREL Exogenous latents

covData <- cov(dat.sim)                 # Covariance data
nObs <- nrow(dat.sim)

RAM Covariance model

The OpenMx RAM model is the most straightforward. To start with, we'll just build the model and fit it to the covariance data directly.

ramCov <- mxModel("RAMCov", type="RAM", manifestVars =c(outcomes, predictors), latentVars =factors,

                  # Factor loadings:
                  mxPath("f1", outcomes, free=TRUE, values=1, labels=loadLabels),

                  # Predictor paths:
                  mxPath(predictors, factors, free=TRUE, values=1, labels=regLabels),

                  # Manifest Residuals
                  mxPath(c(outcomes, predictors), arrows=2, values=.5),

                  # Factor Residual:
                  mxPath("f1", arrows=2, free=FALSE, values=1),

                  # Data
                  mxData(type="cov", observed = covData, numObs = nObs)
)

ramCov <- mxModel(ramCov,                   # Regularization
                  mxRegularizeLASSO(regLabels, "LASSO", lambda=0, lambda.min=0, lambda.step=.04, lambda.max=10)
)

Let's run and plot, just like above.

ramCovFit <- mxPenaltySearchExternal(ramCov, ebicGamma = 1, epsilon = .01, returnConstrained = TRUE)
summary(ramCovFit)
plotReg(ramCovFit)

Luckily, the models output the results in the right places for a direct comparison:

mxRAMcov.est <- round(sapply(c(loadLabels, regLabels), mxEvalByName, ramCovFit), 3)
lav.est <- reg.out$final_pars[1:13]
omxCheckCloseEnough(mxRAMcov.est, lav.est, .1)
cbind(mxRAMcov.est, lav.est)

Not terrible considering these are drastically different approaches to fitting.

RAM FIML model

We'll try it again with the same model, except this time with raw data.

ramFIML <- mxModel("RAMFIML", type="RAM", manifestVars =c(outcomes, predictors), latentVars =factors,

                  # Factor loadings:
                  mxPath("f1", outcomes, free=TRUE, values=1, labels=loadLabels),

                  # Predictor paths:
                  mxPath(predictors, factors, free=TRUE, values=1, labels=regLabels),

                  # Manifest Residuals
                  mxPath(c(outcomes, predictors), arrows=2, values=.5),

                  # Factor Residual:
                  mxPath("f1", arrows=2, free=FALSE, values=1),

                  # Means needed for FIML
                  mxPath("one", c(outcomes, predictors), arrows=1, free=TRUE, values=colMeans(dat.sim)),

                  # Data
                  mxData(type="raw", observed = dat.sim)
)

ramFIML <- mxModel(ramFIML,                   # Regularization
                  mxRegularizeLASSO(regLabels, "LASSO", lambda=0, lambda.min=0, lambda.step=.04, lambda.max=10)
)

Let's run and plot, just like above.

ramFIMLFit <- mxPenaltySearchExternal(ramFIML, ebicGamma = 1, epsilon = .05, returnConstrained = TRUE)
summary(ramFIMLFit)
plotReg(ramFIMLFit)

Luckily, the models output the results in the right places for a direct comparison:

mxRAMFIML.est <- round(sapply(c(loadLabels, regLabels), mxEvalByName, ramFIMLFit), 3)
lav.est <- reg.out$final_pars[1:13]
omxCheckCloseEnough(mxRAMFIML.est, lav.est, .1)
cbind(mxRAMFIML.est, mxRAMcov.est, lav.est)