Nothing
# How does lavaan work?
library(lavaan)
# PART 1: from syntax to matrices
model <- 'f =~ x1 + a*x2 + 3*x3'
# parsing the syntax
lavParseModelString(model, as.data.frame. = TRUE)
# creates a 'FLAT' initial parameter table
FLAT <- lavParseModelString(model)
lavNames(FLAT)
lavNames(FLAT, "lv")
# lavaanify()
# - first creates FLAT
# - needs information from the data (eg number of groups)
# - builds the initial parameter table
# - the parameter table is the core representation of the model in lavaan
lavaanify(model)
# if sem/cfa is used, more parameters are set free
lavaanify(model, auto = TRUE)
# example: equality constraints using labels
model <- 'f =~ x1 + a*x2 + a*x3'
lavaanify(model, auto = TRUE)
# alternative for 'simple' equality constraints
# (will become the default soon)
lavaanify(model, auto = TRUE, ceq.simple = TRUE)
# explicit equality constraints
model <- 'f =~ x1 + a*x2 + b*x3; a == b'
lavaanify(model, auto = TRUE, ceq.simple = TRUE)
# multiple groups/blocks
model <- 'f =~ x1 + c(a1,a2)*x2 + c(b1, b2)*x3'
lavaanify(model, auto = TRUE, ngroups = 2)
# matrix representation: LISREL-all-y
model <- 'f =~ x1 + x2 + x3'
PT <- lavaanify(model, auto = TRUE, as.data.frame. = TRUE)
PT
# map every parameter to a matrix element
MAT <- as.data.frame(lavaan:::lav_lisrel(PT))
cbind(PT, MAT)
# alternative matrix representation: RAM
MAT <- as.data.frame(lavaan:::lav_ram(PT))
cbind(PT, MAT)
# first create default lavoptions
fit <- sem(model = 'f =~x1 + x2 + x3', data = HolzingerSwineford1939)
lavoptions <- fit@Options
# create 'Model' object (S4 class)
# (often called lavmodel in internal functions)
Model <- lavaan:::lav_model(PT, lavoptions = lavoptions)
# another representation of the model, more suitable for computations
Model@nmat
Model@isSymmetric
Model@dimNames
# the matrix representation is in GLIST (group list)
# GLIST may contain the matrices of multiple groups, levels, blocks, ...
MLIST <- Model@GLIST
# MLIST is the matrix representation of a single block
# MLIST is used to compute model-based statistics, e.g.,
# the model-implied variance-covariance matrix
lavaan:::computeSigmaHat.LISREL(MLIST)
# PART 2: lavaan() workflow:
# 18 steps in total (many of them can be skipped)
# 1. lavParseModelString + ov.names + ngroups + nlevels ...
# 2. set/check options (eg meanstructure = TRUE)
# lav_options_set() # lav_options.R
# 3. check the (raw) data, or the sample statistics
# lavData()
lavdata <- lavaan:::lavData(data = HolzingerSwineford1939,
ov.names = c("x1", "x2", "x3"))
slotNames(lavdata)
lavdata@ov.names
lavdata@ngroups
lavdata@X[[1]] # the raw data in group/block 1
# 4. lavpartable: create the parameter table
# needs to know: how many groups, how many categories, ...
lavpartable <- lavaanify(model = FLAT, auto = TRUE)
# 4b. compute and store partable attributes (ov.names, ov.names.x, ...)
lavpta <- lav_partable_attributes(lavpartable)
lavpta$vnames$ov
# 5. lavsamplestats
# compute sample statistics (cov, mean, Gamma, ...)
lavsamplestats <- lavaan:::lav_samplestats_from_data(lavdata,
lavoptions = lavoptions)
slotNames(lavsamplestats)
lavsamplestats@cov[[1]] # observed covariance matrix first group/block
lavsamplestats@icov[[1]] # inverse observed covariance matrix
lavsamplestats@cov.log.det[[1]] # log determinant covariance matrix
# 6. lavh1
# summary statistics of the 'unrestricted' (h1) model
# with complete data, this is trivial (cov, mean)
# when data is missing, we need to estimate cov/mean using EM
lavh1 <- lavaan:::lav_h1_implied_logl(lavdata = lavdata,
lavsamplestats = lavsamplestats,
lavpta = lavpta,
lavoptions = lavoptions)
lavh1$implied
# 7. parameter bounds (needs lavh1$implied)
# only used if bounded estimation is requested
lavoptions$bounds <- "standard"
lavoptions$optim.bounds <- list(lower = c("ov.var", "loadings"),
upper = c("ov.var", "loadings"),
min.reliability.marker = 0.1)
lavpartable <- lavaan:::lav_partable_add_bounds(partable = lavpartable,
lavh1 = lavh1, lavdata = lavdata, lavsamplestats = lavsamplestats,
lavoptions = lavoptions)
# remove bounds again to save space
lavpartable$lower <- NULL
lavpartable$upper <- NULL
# 8. compute starting values
lavpartable$start <- lavaan:::lav_start(start.method = lavoptions$start,
lavpartable = lavpartable,
lavsamplestats = lavsamplestats)
lavpartable
# 9. lavmodel: create internal model representation (with GLIST)
lavmodel <- lavaan:::lav_model(lavpartable = lavpartable,
lavpta = lavpta,
lavoptions = lavoptions)
lavmodel@GLIST
# 10. lavcache: compute some additional summary statistis
# only used when estimator = "PML", and "MML" (for now)
lavcache <- list()
# 11. estimation
# - default: lav_model_estimate() + nlminb() (quasi-Newton optimization)
# - lav_optim_gn(): Gauss-Newton optimization
# - lav_optim_noniter(): non-iterative procedures
# - lav_mvnorm_cluster_em_h0: EM for multilevel models
current.verbose <- lav_verbose()
if (lav_verbose(TRUE)) on.exit(lav_verbose(current.verbose), TRUE)
x <- try(lavaan:::lav_model_estimate(lavmodel = lavmodel,
lavpartable = lavpartable,
lavsamplestats = lavsamplestats,
lavdata = lavdata,
lavoptions = lavoptions,
lavcache = lavcache))
# store parameters in lavmodel
lavmodel <- lav_model_set_parameters(lavmodel, x = as.numeric(x))
# store parameters in @ParTable$est
lavpartable$est <- lav_model_get_parameters(lavmodel = lavmodel,
type = "user", extra = TRUE)
lavpartable
# 12. lavimplied + lavloglik
# store model implied statistics in @implied
# if likelihood-based method, store also loglik in @loglik
lavimplied <- lav_model_implied(lavmodel)
lavloglik <- lavaan:::lav_model_loglik(lavdata = lavdata,
lavsamplestats = lavsamplestats,
lavimplied = lavimplied,
lavmodel = lavmodel,
lavoptions = lavoptions)
# 13. compute 'vcov': the variance matrix of the (free) parameters
# this is needed to compute standard errors
VCOV <- lavaan:::lav_model_vcov(lavmodel = lavmodel,
lavsamplestats = lavsamplestats,
lavoptions = lavoptions,
lavdata = lavdata,
lavpartable = lavpartable,
lavcache = lavcache,
lavimplied = lavimplied,
lavh1 = lavh1)
VCOV
# prepare lavvcov slot
lavvcov <- list(se = lavoptions$se, information = lavoptions$information,
vcov = VCOV)
# store standard errors in parameter table
lavpartable$se <- lavaan:::lav_model_vcov_se(lavmodel = lavmodel,
lavpartable = lavpartable,
VCOV = VCOV)
# 14. compute global test statistic (chi-square)
# trivial for standard test (=N * F_ML)
# more work for 'robust' test statistics (eg test = "satorra-bentler")
TEST <- lavaan:::lav_model_test(lavmodel = lavmodel,
lavpartable = lavpartable,
lavpta = lavpta,
lavsamplestats = lavsamplestats,
lavimplied = lavimplied,
lavh1 = lavh1,
lavoptions = lavoptions,
x = x,
VCOV = VCOV,
lavdata = lavdata,
lavcache = lavcache,
lavloglik = lavloglik)
lavtest <- TEST
# 14bis. lavfit
# store 'fit'information
# no longer used, but if I remove it, a dozen (old) packages break...
# 15. fit baseline model
fit.indep <- try(lavaan:::lav_object_independence(object = NULL,
lavsamplestats = lavsamplestats,
lavdata = lavdata,
lavcache = lavcache,
lavoptions = lavoptions,
lavpta = lavpta,
lavh1 = lavh1), silent = TRUE)
# 16. rotation
# only needed if efa() blocks are defined
# lavmodel <- lav_model_efa_rotate(lavmodel = lavmodel,
# lavoptions = lavoptions)
# 17. create lavaan object
# don't run, as some pieces have not been created...
# lavaan <- new("lavaan",
# version = as.character(packageVersion("lavaan")),
# call = mc, # match.call
# timing = timing, # list
# Options = lavoptions, # list
# ParTable = lavpartable, # list
# pta = lavpta, # list
# Data = lavdata, # S4 class
# SampleStats = lavsamplestats, # S4 class
# Model = lavmodel, # S4 class
# Cache = lavcache, # list
# Fit = lavfit, # S4 class
# boot = lavboot, # list
# optim = lavoptim, # list
# implied = lavimplied, # list
# loglik = lavloglik, # list
# vcov = lavvcov, # list
# test = lavtest, # list
# h1 = lavh1, # list
# baseline = lavbaseline, # list
# internal = list(), # empty list
# external = list() # empty list
# )
# 18. post-fitting check of parameters
# lavInspect(lavaan, "post.check")
# the sem/cfa/growth function just set some
# options to user-friendly settings:
# default options for sem/cfa call
# mc$int.ov.free = TRUE
# mc$int.lv.free = FALSE
# #mc$auto.fix.first = !std.lv
# mc$auto.fix.first = TRUE # (re)set in lav_options_set
# mc$auto.fix.single = TRUE
# mc$auto.var = TRUE
# mc$auto.cov.lv.x = TRUE
# mc$auto.cov.y = TRUE
# mc$auto.th = TRUE
# mc$auto.delta = TRUE
# mc$auto.efa = TRUE
# PART 3: extractor functions
fit <- sem(model = 'f =~ x1 + x2 + x3 + x4', data = HolzingerSwineford1939)
parameterEstimates(fit)
# = subset of parTable(fit), but with additional columsn (z, pvalues, ...)
parameterEstimates(fit, output = "text")
# this is a big part of the summary() output
# summary()
# first creates summary output as a list:
out <- summary(fit)
names(out)
class(out)
# with a specific print function
out
# fit indices
fitMeasures(fit)
fitMeasures(fit, output = "matrix")
fitMeasures(fit, output = "text")
# lavInspect/lavTech
lavInspect(fit, "est")
lavTech(fit, "est")
# PART 4: lavaan slots
fit <- sem(model = 'f =~ x1 + x2 + x3 + x4', data = HolzingerSwineford1939)
class(fit)
slotNames(fit)
# 1. lavaan version used to create this object
fit@version
# 2. user-specified call
fit@call
# 3. timings of several substeps
unlist(fit@timing)
# 4. options used for this object
unlist(fit@Options)
# 5. the parameter table
fit@ParTable # list
parTable(fit) # return as data.frame
# 6. the parameter table attributes
names(fit@pta)
fit@pta$vnames$ov
fit@pta$vidx$ov
fit@pta$nfac
fit@pta$nblocks
# 7. Data slot (S4)
fit@Data # has its own print function
slotNames(fit@Data)
as.data.frame(fit@Data@ov)
str(fit@Data)
# 8. SampleStats (S4)
fit@SampleStats
slotNames(fit@SampleStats)
fit@SampleStats@cov[[1]] # list with element per group
# 9. Model (S4)
slotNames(fit@Model)
fit@Model@x.free.idx # parameter index in parameter table
fit@Model@m.free.idx # parameter index in model matrix
# 10. Cache (list)
# cached information, only used for estimator PML and MML (for now)
# 11. Fit
# deprecated, only kept to avoid breaking some (old) packages (eg rsem)
# 12. boot (list)
# only used if bootstrapping was used
fitb <- sem(model = 'f =~ x1 + x2 + x3 + x4', data = HolzingerSwineford1939,
se = "bootstrap", bootstrap = 100L, verbose = TRUE)
head(fitb@boot$coef)
# 13. optim -- info about (iterative) optimization process
str(fit@optim)
# 14. loglik -- loglikelihood information (ML estimation only)
unlist(fit@loglik)
# 15. implied -- implied sample statistics (per group)
fit@implied
# 16. vcov -- variance-covariance matrix of the (free) model parameters
fit@vcov
# 17. test -- global test statistics
fit2 <- sem(model = 'f =~ x1 + x2 + x3 + x4', data = HolzingerSwineford1939,
test = "satorra-bentler")
names(fit2@test)
fit2@test$satorra.bentler
# 18. h1 -- sample statistics + logl unrestricted/saturated model
# often similar to info in @SampleStats, but not if missing data,
# multilevel data, ...
fit@h1$implied # this is what is used for lavInspect(fit, "implied")
fit@h1$logl
# 19. baseline -- information about the baseline model (needed for CFI/TLI)
names(fit@baseline)
as.data.frame(fit@baseline$partable)
fit@baseline$test$standard
# 20. internal -- list to store specials flags/info, for internal use only
# 21. external -- list to store specials flags/info if you are an external
# developer
# PART 5: source code structure
# 5 main functions:
# 1) xxx_lavaan.R
# 2) xxx_lavaanList.R
# 3) xxx_efa.R (new in 0.6-13)
# 4) xxx_sam.R
# 5) xxx_fsr.R (only for repro reasons; access via lavaan:::fsr())
# mixture of new, old, and very old code
# very old code: functions do not start with lav_ prefix
# for example:
lavaan:::computeSigmaHat
# files that start with ctr_ contain contributed code
# written by others
# for example: ctr_pml_plrt.R (written by Myrsini Katsikatsou)
# (with only minor edits by YR)
# 00class.R contains S4 class definitions
# 00generic.R defines 'generic' functions (that can be specialized)
# fitMeasures(), lavInspect(), lavTech()
# zzz_OLDNAMES.R contains aliases for (very old) function names
# that are still used by some packages
# eg computeExpectedInformation <- lav_model_information_expected
# zzz.R traditionally contains the (in)famous startup message
# most files start with the lav_ prefix
# the second term often refers the type of object for which the file
# contains functions, for example
# lav_matrix.R
# lav_partable_subset.R
# lav_model_estimate.R
# lav_object_post_check.R
# but sometimes, it refers to what is created, or what is done
# lav_test.R # creates @test slot
# lav_print.R # prints various objects
# for ML, an important set of functions are:
# lav_mvnorm.R
# lav_mvnorm_h1.R
# lav_mvnorm_missing.R
# lav_mvnorm_missing_h1.R
# lav_mvnorm_cluster.R
# lav_mvnorm_cluster_missing.R
# the standard ML discrepancy function is found at the top of
# lav_objective.R
# sometimes, lavaan needs to do trivial things (like regression), but
# needs a bit more info than what we get from standard functions (like lm):
# lav_uvreg.R # univariate regression
# lav_uvord.R # univariate probit regression
# lav_mvreg.R # multivariate regression
# lav_mvreg_cluster.R # multivariate twolevel regression
# during iterative estimation, we need to compute the value of the
# objective function (i.e., the discrepancy function) many times
# lav_model_estimate.R # runs the iterations
# and defines the following objective function:
# function to be minimized
objective_function <- function(x, verbose = FALSE, infToMax = FALSE,
debug = FALSE) {
# 2. unpack
if(lavmodel@eq.constraints) {
x <- as.numeric(lavmodel@eq.constraints.K %*% x) +
lavmodel@eq.constraints.k0
}
# 1. unscale
x <- x / parscale
# update GLIST (change `state') and make a COPY!
GLIST <- lav_model_x2GLIST(lavmodel, x = x)
fx <- lav_model_objective(lavmodel = lavmodel,
GLIST = GLIST,
lavsamplestats = lavsamplestats,
lavdata = lavdata,
lavcache = lavcache,
verbose = verbose)
# only for PML: divide by N (to speed up convergence)
if(estimator == "PML") {
fx <- fx / lavsamplestats@ntotal
}
if(debug || verbose) {
cat(" objective function = ",
sprintf("%18.16f", fx), "\n", sep="")
}
if(debug) {
cat("Current free parameter values =\n"); print(x); cat("\n")
}
if(lavoptions$optim.partrace) {
PENV$PARTRACE <- rbind(PENV$PARTRACE, c(fx, x))
}
# for L-BFGS-B
#if(infToMax && is.infinite(fx)) fx <- 1e20
if(!is.finite(fx)) {
fx.group <- attr(fx, "fx.group")
fx <- 1e20
attr(fx, "fx.group") <- fx.group # only for lav_model_fit()
}
fx
}
# lav_model_objective() can be found in lav_model_objective.R
# 1) compute model implied summary statistics (for each group)
# using eg computeSigmaHat()
# 2) compute value discrepancy function
# eg estimator.GLS() or estimator.ML() # see lav_objective.R
# 3) if multiple groups, combine the values using group weights
# 4) return value (fx)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.