## R code to run LCA/LPA models on datasets generated from a 3-class LCA (see GenData_MixSim.R)
## This script runs latent class models using an external Monte Carlo analysis implemented by Mplus,
## as well as the individual replications. The script then stores the results of readModels() for
## more detailed model diagnostics, plots, etc.
## Author: Michael Hallquist
## Last Updated: 31 Oct 2017
## It is necessary to set the working directory if you are not working from the Rstudio project
#setwd("/path/to/directory/containing/simulation/R/scripts")
#source functions for syntax generation and temp file cleanup
source("helper_functions.R")
## Define directory to store all results, both Mplus outputs and RData structures containing parsed results
inputdir <- "~/mplus_tmp/data" #location to "data" folder, the root of replication files created by GenData_MixSim.R
## outputdir <- "data/cfa_montecarlo_results" #can be relative to working directory
outputdir <- "~/mplus_tmp/fmm_outputs" #or can be absolute
## ensure that the destination directory is created
dir.create(outputdir, showWarnings = FALSE, recursive=TRUE)
## location of Mplus executable to be called by runModels
Mplus_command <- "/Users/mnh5174/Applications/Mplus/Mplus"
## Mplus_command <- "/Applications/Mplus/mplus"
library(MplusAutomation)
library(foreach)
library(doSNOW)
## You can control how many replications are estimated here.
## By default, GenData_MixSim.R generates 1000 replications per condition.
## This variable defines how many replications are fit below
replications <- 1:10
##Load a list of all models to be run (generated by GenData_MixSim.R)
load(file.path(inputdir, "lcaMasterList.RData"))
njobs <- 8
setDefaultClusterOptions(master="localhost")
clusterobj <- makeSOCKcluster(njobs)
registerDoSNOW(clusterobj)
## Use foreach to estimate models in parallel
## Note that we estimate models within tempdir(), which should be a unique directory for each R worker process
fmm <- foreach(m=iter(lcalist), .inorder=FALSE, .multicombine=TRUE, .packages=c("MplusAutomation")) %dopar% {
## do not re-estimate replications that have already completed
if (file.exists(file.path(outputdir, m$fname_resultsRData))) {
cat("Already processed replications:", m$fname_resultsRData, "\n")
return(NULL) #next
}
## verify that replications object exists (generated by GenData_MixSim.R)
if (!file.exists(file.path(inputdir, m$fname_sourceReps))) {
cat("Cannot open replications file: ", file.path(inputdir, m$fname_sourceReps), "\n")
return(NULL) #next
}
if (m$fc!=3) {
#only running 3-class model for now (just to speed up simulation). Remove this if you want 2:5
return(NULL) #next
}
load(file=file.path(inputdir, m$fname_sourceReps))
##setup model syntax for means across classes
if (m$nc==3) {
means <- list(
"%c#1%\n",
paste0(" [x1-x", m$nv, "*", -m$sep, "];\n"),
"%c#2%\n",
paste0(" [x1-x", m$nv, "*0];\n"),
"%c#3%\n",
paste0(" [x1-x", m$nv, "*", m$sep, "];\n"),
"%c#4%\n", #for misspecified models above 3 true classes
paste0(" [x1-x", m$nv, "*0];\n"),
"%c#5%\n",
paste0(" [x1-x", m$nv, "*0];\n")
)
propMeans <- list(
" !default to equal sizes\n",
" [c#1*0];\n",
" [c#2*0];\n",
" [c#3*0];\n",
" [c#4*0];\n"
)
} else if (m$nc==5) {
means <- list(
"%c#1%\n",
paste0(" [x1-x", m$nv, "*", -2*m$sep, "];\n"),
"%c#2%\n",
paste0(" [x1-x", m$nv, "*", -m$sep, "];\n"),
"%c#3%\n",
paste0(" [x1-x", m$nv, "*0];\n"),
"%c#4%\n",
paste0(" [x1-x", m$nv, "*", m$sep, "];\n"),
"%c#5%\n",
paste0(" [x1-x", m$nv, "*", 2*m$sep, "];\n")
)
propMeans <- list(
" !default to equal sizes\n",
" [c#1*0];\n",
" [c#2*0];\n",
" [c#3*0];\n",
" [c#4*0];\n"
)
}
##restrict means and proportions to the number of classes being fit (as opposed to true structure)
if (m$fc == 2) {
means <- do.call(paste0, means[1:4])
propMeans <- do.call(paste0, propMeans[1:2])
} else if (m$fc == 3) {
means <- do.call(paste0, means[1:6])
propMeans <- do.call(paste0, propMeans[1:3])
} else if (m$fc == 4) {
means <- do.call(paste0, means[1:8])
propMeans <- do.call(paste0, propMeans[1:4])
} else if (m$fc == 5) {
means <- do.call(paste0, means[1:10])
propMeans <- do.call(paste0, propMeans[1:5])
} else { stop("Cannot fit ", m$fc, "classes.") }
##define data section for external Monte Carlo
dataSection <- paste0(
"DATA:\n",
" FILE=external_montecarlo_list.dat;\n",
" TYPE=MONTECARLO;\n"
)
outputSection <- paste0(
"OUTPUT:\n",
" TECH1 TECH9;\n"
)
##write Monte Carlo syntax to file
cat(getFMMSyntax(m$fc, m$nc, m$nv, m$ss, m$sep, m$np, dataSection=dataSection,
propMeans=propMeans, means=means, output=outputSection), file=file.path(tempdir(), "external_montecarlo.inp"))
## write each replication dataset and the list of datasets to file
datList <- c()
for (i in 1:min(length(fmm_sim), max(replications))) {
dat <- cbind(fmm_sim[[i]]$X, fmm_sim[[i]]$id)
write.table(dat, file=file.path(tempdir(), paste0("external_montecarlo_rep", sprintf("%04d", i), ".dat")), row.names=FALSE, col.names=FALSE)
datList <- c(datList, paste0("external_montecarlo_rep", sprintf("%04d", i), ".dat"))
}
write.table(datList, file=file.path(tempdir(), "external_montecarlo_list.dat"), row.names=FALSE, col.names=FALSE, quote=FALSE)
## run external monte carlo through Mplus
runModels(file.path(tempdir(), "external_montecarlo.inp"), recursive=FALSE, logFile=NULL, Mplus_command = Mplus_command)
## read in monte carlo results
monteCarloCombined <- readModels(file.path(tempdir(), "external_montecarlo.out"))
## move input and output files to results directory. rename uniquely based on cell in simulation design
file.rename(
file.path(tempdir(), "external_montecarlo.inp"),
file.path(outputdir, paste0("fmm", m$fc, "_n", m$ss, "_p", gsub(".", "_", m$np*100, fixed=TRUE), "_s", gsub(".", "_", m$sep, fixed=TRUE), "_c", m$nc, "_v", m$nv, ".inp"))
)
file.rename(
file.path(tempdir(), "external_montecarlo.out"),
file.path(outputdir, paste0("fmm", m$fc, "_n", m$ss, "_p", gsub(".", "_", m$np*100, fixed=TRUE), "_s", gsub(".", "_", m$sep, fixed=TRUE), "_c", m$nc, "_v", m$nv, ".out"))
)
## 2) now run models for each replication individually and collate the results into a list
## save class probabilities for each run
savedata <- paste0("SAVEDATA:\n",
" FILE=cprobs.dat;\n",
" SAVE=CPROBABILITIES;\n")
## Add tech output to each replication
outputSection <- paste0(
"OUTPUT:\n",
" TECH1 TECH7 TECH12;\n"
)
## loop over individual replications and run each using runModels and readModels
repResults <- list()
for (i in 1:min(length(fmm_sim), max(replications))) {
##specify the replication dataset to be used
dataSection <- paste0(
"DATA:\n",
" FILE=external_montecarlo_rep", sprintf("%04d", i), ".dat;\n"
)
cat(getFMMSyntax(m$fc, m$nc, m$nv, m$ss, m$sep, m$np, dataSection=dataSection, propMeans=propMeans, means=means, output=outputSection, savedata=savedata), file=file.path(tempdir(), "individual_rep.inp"))
runModels(file.path(tempdir(), "individual_rep.inp"), recursive=FALSE, logFile=NULL, Mplus_command = Mplus_command) #run replication
repResults[[i]] <- readModels(file.path(tempdir(), "individual_rep.out")) #read replication results
}
## add simulation parameters to object for clarity
attr(repResults, "f") <- m$fc #number of classes fit
attr(repResults, "n") <- m$ss #sample size
attr(repResults, "p") <- m$np #noise proportion
attr(repResults, "s") <- m$sep #latent separation
attr(repResults, "c") <- m$nc #number of true classes
attr(repResults, "v") <- m$nv #number of variables
## 3) save all results for this cell in the simulation design to an .RData file
save(repResults, monteCarloCombined, file=file.path(outputdir, m$fname_resultsRData), compress="xz", compression_level=9)
cleanupTempFiles() ##cleanup temporary files generated by Mplus
return(m$fname_sourceReps)
}
## Shutdown parallel cluster
stopCluster(clusterobj)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.