Nothing
# Complex sampling analysis of SEM models
# Daniel Oberski, 2015-11-03
lavaan.survey <-
function(lavaan.fit, survey.design,
estimator=c("MLM", "MLMV", "MLMVS", "WLS", "DWLS", "ML"),
estimator.gamma=c("default","Yuan-Bentler")) {
# Not all estimators in lavaan make sense to use here, therefore matching args
estimator <- match.arg(estimator)
if(estimator=="ML") warning("Estimator 'ML' will not correct standard errors and chi-square statistic.")
estimator.gamma <- match.arg(estimator.gamma) # Smoothing Gamma or not
# Names of the observed variables (same for each group)
ov.names <- lavaanNames(lavaan.fit, type="ov", group=1)
# The MP-inverse duplication matrix is handy for removing redundancy
Dplus <- lavaan::lav_matrix_duplication_ginv( length(ov.names) )
# Create a formula that includes all observed variables for svymean
ov.formula <- as.formula(paste("~", paste(ov.names, collapse="+")))
# <no. group>-sized lists that will contain the asy. covariance matrix,
# and sample covariance matrix and mean vector
ngroups <- lavInspect(lavaan.fit, "ngroups")
Gamma <- vector("list", ngroups)
sample.cov <- vector("list", ngroups)
sample.mean <- vector("list", ngroups)
sample.nobs <- vector("list", ngroups)
for(g in seq(ngroups)) {
if(ngroups > 1) {
# Use survey::subset to create data groups
survey.design.g <-
subset(survey.design, eval(parse(text=sprintf("%s == '%s'",
lavInspect(lavaan.fit, "group"),
lavInspect(lavaan.fit, "group.label")[[g]]))))
}
else { # In case of no groups, just use the original survey design object.
survey.design.g <- survey.design
}
# Function that takes survey design and returns the Gamma & observed moments
get.stats.design <- function(survey.design.g, sample.nobs.g) {
sample.cov.g <- as.matrix(svyvar(ov.formula, design=survey.design.g, na.rm=TRUE))
# survey package returns the variance matrix of the (co)variances as attr:
Gamma.cov.g <- attr(sample.cov.g, "var")
# Remove (co)variances wrt redundant elements of covs; not used by lavaan.
Gamma.cov.g <- Dplus %*% Gamma.cov.g %*% t(Dplus)
# Same for mean vector
sample.mean.g <- svymean(ov.formula, design=survey.design.g, na.rm=TRUE)
Gamma.mean.g <- attr(sample.mean.g, "var")
# Join asy. variance matrices for means and covariances
# TODO add offdiag
Gamma.g <- lavaan::lav_matrix_bdiag(Gamma.mean.g, Gamma.cov.g)
Gamma.g <- Gamma.g * sample.nobs.g # lavaan wants nobs * Gamma.
# Since the above nonparametric estimate of Gamma can be unstable, Yuan
# and Bentler suggested a model-smoothed estimate of it, optional here:
if(estimator.gamma == "Yuan-Bentler") {
r <- get.residuals(lavaan.fit) # Iff these asy = 0, all will be well...
Gamma.g <- Gamma.g + (sample.nobs.g/(sample.nobs.g - 1)) * (r %*% t(r))
}
# Get rid of attribute, preventing errors caused by lazy evaluation
# (This has to be at the end or lazy evaluation mayhem will ensue)
attr(sample.cov.g, "var") <- NULL
tmp <- as.vector(sample.mean.g)
names(tmp) <- names(sample.mean.g)
sample.mean.g <- tmp
list(Gamma.g=Gamma.g, sample.cov.g=sample.cov.g, sample.mean.g=sample.mean.g)
}
# The data may be a list of multiply imputed datasets
if(!any(class(survey.design.g) == "svyimputationList")) {
# If no imputations, just use usual no. observations and asy variance
sample.nobs.g <- lavInspect(lavaan.fit, "nobs")[[g]]
stats <- get.stats.design(survey.design.g, sample.nobs.g)
}
else { # In case of multiply imputed data
# Not only can nobs differ from lavaan.fit, but also per imputation
sample.nobs.g <- get.sample.nobs(survey.design.g, lavInspect(lavaan.fit, "group"))
# Retrieve point and variance estimates per imputation,
# [TODO: this line will not be correct when nobs differs over imputations]
stats.list <- lapply(survey.design.g[[1]], get.stats.design, sample.nobs=sample.nobs.g)
m <- length(stats.list) # no. imputation
# Point estimates are average over imputations
sample.cov.list <- lapply(stats.list, `[[`, 'sample.cov.g')
sample.cov.g <- Reduce(`+`, sample.cov.list) / m
cov.df <- Reduce(`rbind`, lapply(sample.cov.list, lavaan::lav_matrix_vech))
sample.mean.list <- lapply(stats.list, `[[`, 'sample.mean.g')
sample.mean.g <- Reduce(`+`, sample.mean.list) / m
mean.df <- Reduce(`rbind`, sample.mean.list)
# Variance estimates depend on within- and between-imputation variance:
Gamma.within <- Reduce(`+`, lapply(stats.list, `[[`, 'Gamma.g')) / m
Gamma.between <- cov(cbind(mean.df, cov.df))
Gamma.g <- Gamma.within + ((m + 1)/m) * Gamma.between
# set stats with multiple imputation point and variance estimates
stats <- list(Gamma.g=Gamma.g, sample.cov.g=sample.cov.g, sample.mean.g=sample.mean.g)
}
# Augment the list for this group
Gamma[[g]] <- stats$Gamma.g
sample.cov[[g]] <- stats$sample.cov.g
sample.mean[[g]] <- stats$sample.mean.g
sample.nobs[[g]] <- sample.nobs.g
} # End of loop over groups
new.call <- lavInspect(lavaan.fit, "call")
new.call$data <- NULL # Remove any data argument
new.call$sample.cov <- sample.cov # Set survey covariances
new.call$sample.mean <- sample.mean # Set survey means
new.call$sample.nobs <- sample.nobs
new.call$estimator <- estimator # Always use Satorra-Bentler or WLS estimator
if(substr(estimator, 1, 2) == "ML") { # ML, robust options
# Set asymptotic covariance matrix of sample means and covariances
new.call$NACOV <- Gamma
}
if(estimator %in% c("WLS", "DWLS")) {
# Weighted Least Squares, adjust the weight matrix: MP inverse of Gamma
# Note that Gamma may be singular.
new.call$WLS.V <- lapply(Gamma, ginv)
}
new.fit <- eval(as.call(new.call)) # Run lavaan with the new arguments
if(estimator %in% c("WLS", "DWLS")) return(new.fit) # We are done for WLS
# For ML with robust se's, check that a possibly singular Gamma has not
# created dependencies in the parameter estimates.
# (Code below should really be implemented in lavaan...)
evs.too.small <- sapply(Gamma, function(Gamma.g) {
any(eigen(Gamma.g, only.values=TRUE)$values < (.Machine$double.eps*100))
})
if(any(evs.too.small)) {
V.est <- lavaan::vcov(new.fit)
if(any(Re(eigen(V.est, only.values=TRUE)$values) < (.Machine$double.eps*100))) {
long.string <- sprintf("Some of the standard errors may not be trustworthy.
Some of the observed covariances or means are
collinear, and this has generated collinearity in your
parameter estimates. This may be a sample size issue,
missing data problem, or due to having too few
clusters relative to the number of parameters. Problem
encountered in group(s) %s",
paste(which(evs.too.small), collapse=", "))
warning(strwrap(long.string, width=9999, simplify=TRUE))#gotta love it
}
}
new.fit
}
# Obtain residuals from a lavaan fit object, concatenating means w/ covariances
# (used in Yuan-Bentler correction)
get.residuals <- function(fit) {
r <- lavaan::residuals(fit)
c(r$mean, lavaan::lav_matrix_vech(r$cov))
}
# Obtain sample size from multiply imputed svydesign object.
# In case sample size differs over imputations, takes median over imputations.
# TODO: Does not work with multiple group yet.
get.sample.nobs <- function(svy.imp.design, group=NULL) {
nobs.imp <- lapply(svy.imp.design[[1]], function(des) {nrow(des$variables)})
return(median(unlist(nobs.imp)))
}
# Use the pFsum function from the survey package to obtain p value for the
# overall model fit using an F reference distribution where the
# denominator degrees of freedom is the design degrees of freedom.
# An anonymous reviewer for J Stat Software suggested that
# "in surveys with small numbers of primary sampling units this sort of
# correction has often improved the
# behavior of tests in other contexts."
# The eigenvalues of the U.Gamma matrix will be the coefficients in the
# mixture of F's distribution (Skinner, Holt & Smith, pp. 86-87).
pval.pFsum <- function(lavaan.fit, survey.design, method = "saddlepoint") {
# Check that Satorra-Bentler or Satterthwaite adjustment is present
if(!lavInspect(lavaan.fit, "options")$test %in%
c("satorra.bentler", "mean.var.adjusted", "Satterthwaite")) {
stop("Please refit the model with Satorra-Bentler (MLM) or Satterthwaite (MLMVS) adjustment.")
}
UGamma <- lavTech(lavaan.fit, "ugamma")
real.eigen.values <- Re(eigen(lavTech(lavaan.fit, "ugamma"), only.values = TRUE)$values)
return(survey::pFsum(x=fitMeasures(lavaan.fit, "chisq"), df=rep(1, length(real.eigen.values)),
a=real.eigen.values, ddf=survey::degf(survey.design), lower.tail=FALSE,
method=method))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.