Disclaimer

This is a test case for regularization of SEM in OpenMx. Expect regular changes to the interface until we manage to get things right.

Regularized MIMIC model in mxRegSEM

We need to load the appropriate libraries and a data set from lavaan. We'll want to switch to a different example just to avoid the dependency, but for now we can assume everybody comfortable with OpenMx is comfortable installing lavaan as well. We'll also be using regsem with lavaan as the test case, so the dependency is there regardless.

We load the example from the HolzingerSwineford data set.

library(mxregsem)
library(regsem)
mimicHS <- as.matrix(HolzingerSwineford1939[,c(2,3,6,7:15)])
covMIMIC <- cov(mimicHS, use="pairwise.complete")

The OpenMx model looks like this:

# Specify variables
indicators <- paste0("x", 1:9)
covariates <- c("sex","ageyr","grade")
manifests <- c(indicators, covariates)
latents <- c("f1") # latent variable name
colats <- paste0("var", indicators)
pen.seq <- c(0,(seq(1,sqrt(nrow(mimicHS)),length.out=50)**2))
pen.val <- pen.seq[40]

# Build the model
mimicModel <- mxModel("MIMIC", 
                      type="RAM", manifestVars=manifests, latentVars=latents,
                      # type="LISREL", manifestVars=list(exo=covariates, endo=indicators), latentVars=list(exo=colats, endo=latents),

                      # Saturated Predictor Covariances:
                      mxPath(covariates, arrows=2, connect="unique.bivariate", free=TRUE, values=-.2, labels=c("C_sex_ageyr", "C_sex_grade", "C_ageyr_grade")),

                      # Error variances:
                      mxPath(from=c(manifests, latents), arrows=2, free=TRUE, connect="single", values=1, labels=paste0("V_", c(manifests, latents))),

                      # Means (saturated means model):
                      # mxPath(from="one", to=manifests, values=c(rep(5, length(indicators)), rep(1, length(covariates))), free=FALSE),

                      # Loadings:
                      mxPath(from="f1", to=indicators, values=1, free=c(FALSE, rep(TRUE, length(indicators)-1)), labels=paste0("F_to_", indicators)),

                      # Fix Factor variance?

                      # Covariate paths
                      mxPath(from=covariates, to="f1",  values=.5, labels=paste0("reg", 1:length(covariates)), free=TRUE),

                      # Data
                      mxData(observed = covMIMIC,
                             type = "cov",
                             # means=colMeans(mimicHS, na.rm=TRUE),
                             numObs=nrow(mimicHS))
                      # mxFitFunctionWLS(),
                      # mxData(observed = data.frame(na.omit(mimicHS)), type = "raw")

                      )
regMimicModel <- mxModel(mimicModel,
                      mxRegularizeLASSO(paste0("reg", 1:length(covariates)), name="LASSO", lambda = 0, lambda.max=100, lambda.step = 1)
                      )
regMIMIC <- mxRun(mimicModel)

For simplicity, we'll avoid running tryhard.

regMIMIC <- mxTryHard(regMimicModel)
cvMIMIC <- mxPenaltySearchExternal(regMimicModel, epsilon=.001) # seq(0,309,by=1)), ebicGamma = 0) # pen.seq), ebicGamma=0, epsilon=1e-3) # 

Comparison model in regsem:

# regsem
mimic.lavmod <- "
f1 =~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9
f1 ~ sex + ageyr +grade
sex ~~ ageyr
ageyr ~~ grade
sex ~~ grade
"
mimic.lavout <- sem(mimic.lavmod,sample.cov=covMIMIC, sample.nobs=nrow(mimicHS))
summary(mimic.lavout)

mimic.cvout <- cv_regsem(mimic.lavout, mult.start = TRUE, jump=.001, n.lambda=70)

There's some annoyance in trying to line up the names for easy comparison, but here we go:

# Organize names
params <- summary(regMIMIC)$parameters
oneway <- grep("A[\\[]", params$name)
twoway <- grep("S[\\[]", params$name)
means <- grep("M[\\[]", params$name)
named <- startsWith(params$name, prefix = "reg")
estimates <- round(params$Estimate, 3)
names(estimates)[named] <- c("sex -> f1", "ageyr -> f1", "grade -> f1")
names(estimates)[oneway] <- paste(params$col[oneway], '->', params$row[oneway])
names(estimates)[twoway] <- paste(params$col[twoway], '~~', params$row[twoway])
names(estimates)[means] <- paste(params$col[means], '~~', 1)
matches <- intersect(names(mimic.cvout$parameters[19,]), names(estimates))

And finally we test the results:

omxCheckCloseEnough(mimic.cvout$parameters[19,matches], estimates[matches], .02) # First approximation.
cbind(regsem=mimic.cvout$parameters[19,matches],mxregsem=round(estimates[matches], 3), pcterror=round((mimic.cvout$parameters[19,matches]-estimates[matches])/max(mimic.cvout$parameters[19,matches],.01)*100, 1))
# Plotting checks
plot(mimic.cvout)
plotReg(cvMIMIC)


trbrick/mxregsem documentation built on Nov. 18, 2020, 7:30 p.m.