examples/monte_carlo/RunFMM_MCReps.R

## R code to run LCA/LPA models on datasets generated from a 3-class LCA (see GenData_MixSim.R)
## This script runs latent class models using an external Monte Carlo analysis implemented by Mplus,
## as well as the individual replications. The script then stores the results of readModels() for 
## more detailed model diagnostics, plots, etc.
## Author: Michael Hallquist
## Last Updated: 31 Oct 2017

## It is necessary to set the working directory if you are not working from the Rstudio project
#setwd("/path/to/directory/containing/simulation/R/scripts")

#source functions for syntax generation and temp file cleanup
source("helper_functions.R")

## Define directory to store all results, both Mplus outputs and RData structures containing parsed results
inputdir <- "~/mplus_tmp/data" #location to "data" folder, the root of replication files created by GenData_MixSim.R

## outputdir <- "data/cfa_montecarlo_results" #can be relative to working directory
outputdir <- "~/mplus_tmp/fmm_outputs" #or can be absolute

## ensure that the destination directory is created
dir.create(outputdir, showWarnings = FALSE, recursive=TRUE)

## location of Mplus executable to be called by runModels
Mplus_command <- "/Users/mnh5174/Applications/Mplus/Mplus"
## Mplus_command <- "/Applications/Mplus/mplus"

library(MplusAutomation)
library(foreach)
library(doSNOW)

## You can control how many replications are estimated here.
## By default, GenData_MixSim.R generates 1000 replications per condition.
## This variable defines how many replications are fit below
replications <- 1:10

##Load a list of all models to be run (generated by GenData_MixSim.R)
load(file.path(inputdir, "lcaMasterList.RData"))

njobs <- 8
setDefaultClusterOptions(master="localhost")
clusterobj <- makeSOCKcluster(njobs)
registerDoSNOW(clusterobj)

## Use foreach to estimate models in parallel
## Note that we estimate models within tempdir(), which should be a unique directory for each R worker process
fmm <- foreach(m=iter(lcalist), .inorder=FALSE, .multicombine=TRUE, .packages=c("MplusAutomation")) %dopar% {
  ## do not re-estimate replications that have already completed
  if (file.exists(file.path(outputdir, m$fname_resultsRData))) {
    cat("Already processed replications:", m$fname_resultsRData, "\n")
    return(NULL) #next
  }
  
  ## verify that replications object exists (generated by GenData_MixSim.R)
  if (!file.exists(file.path(inputdir, m$fname_sourceReps))) {
    cat("Cannot open replications file: ", file.path(inputdir, m$fname_sourceReps), "\n")
    return(NULL) #next
  }
  
  if (m$fc!=3) {
    #only running 3-class model for now (just to speed up simulation). Remove this if you want 2:5
    return(NULL) #next
  }
    
  load(file=file.path(inputdir, m$fname_sourceReps))
  
  ##setup model syntax for means across classes
  if (m$nc==3) {
    means <- list(
      "%c#1%\n",
      paste0("  [x1-x", m$nv, "*", -m$sep, "];\n"),
      "%c#2%\n",
      paste0("  [x1-x", m$nv, "*0];\n"),
      "%c#3%\n",
      paste0("  [x1-x", m$nv, "*", m$sep, "];\n"),
      "%c#4%\n", #for misspecified models above 3 true classes
      paste0("  [x1-x", m$nv, "*0];\n"),
      "%c#5%\n",
      paste0("  [x1-x", m$nv, "*0];\n")
    )
    propMeans <- list(
      "  !default to equal sizes\n",
      "  [c#1*0];\n",
      "  [c#2*0];\n",
      "  [c#3*0];\n",
      "  [c#4*0];\n"
    )
  } else if (m$nc==5) {
    means <- list(
      "%c#1%\n",
      paste0("  [x1-x", m$nv, "*", -2*m$sep, "];\n"),
      "%c#2%\n",
      paste0("  [x1-x", m$nv, "*", -m$sep, "];\n"),
      "%c#3%\n",
      paste0("  [x1-x", m$nv, "*0];\n"),
      "%c#4%\n",
      paste0("  [x1-x", m$nv, "*", m$sep, "];\n"),
      "%c#5%\n",
      paste0("  [x1-x", m$nv, "*", 2*m$sep, "];\n")                    
    )
    propMeans <- list(
      "  !default to equal sizes\n",
      "  [c#1*0];\n",
      "  [c#2*0];\n",
      "  [c#3*0];\n",
      "  [c#4*0];\n"
    )              
  }
  
  ##restrict means and proportions to the number of classes being fit (as opposed to true structure)
  if (m$fc == 2) {
    means <- do.call(paste0, means[1:4])
    propMeans <- do.call(paste0, propMeans[1:2])
  } else if (m$fc == 3) {
    means <- do.call(paste0, means[1:6])
    propMeans <- do.call(paste0, propMeans[1:3])
  } else if (m$fc == 4) {
    means <- do.call(paste0, means[1:8])
    propMeans <- do.call(paste0, propMeans[1:4])
  } else if (m$fc == 5) {
    means <- do.call(paste0, means[1:10])
    propMeans <- do.call(paste0, propMeans[1:5])
  } else { stop("Cannot fit ", m$fc, "classes.") }
  
  ##define data section for external Monte Carlo
  dataSection <- paste0(
    "DATA:\n",
    "  FILE=external_montecarlo_list.dat;\n",
    "  TYPE=MONTECARLO;\n"
  )
  outputSection <- paste0(
    "OUTPUT:\n",
    "  TECH1 TECH9;\n"
  )   
  
  ##write Monte Carlo syntax to file
  cat(getFMMSyntax(m$fc, m$nc, m$nv, m$ss, m$sep, m$np, dataSection=dataSection, 
      propMeans=propMeans, means=means, output=outputSection), file=file.path(tempdir(), "external_montecarlo.inp"))
  
  ## write each replication dataset and the list of datasets to file
  datList <- c()
  for (i in 1:min(length(fmm_sim), max(replications))) {
    dat <- cbind(fmm_sim[[i]]$X, fmm_sim[[i]]$id)
    write.table(dat, file=file.path(tempdir(), paste0("external_montecarlo_rep", sprintf("%04d", i), ".dat")), row.names=FALSE, col.names=FALSE)
    datList <- c(datList, paste0("external_montecarlo_rep", sprintf("%04d", i), ".dat"))
  }
  write.table(datList, file=file.path(tempdir(), "external_montecarlo_list.dat"), row.names=FALSE, col.names=FALSE, quote=FALSE)
  
  ## run external monte carlo through Mplus
  runModels(file.path(tempdir(), "external_montecarlo.inp"), recursive=FALSE, logFile=NULL, Mplus_command = Mplus_command)
  
  ## read in monte carlo results
  monteCarloCombined <- readModels(file.path(tempdir(), "external_montecarlo.out"))
  
  ## move input and output files to results directory. rename uniquely based on cell in simulation design
  file.rename(
    file.path(tempdir(), "external_montecarlo.inp"), 
    file.path(outputdir, paste0("fmm", m$fc, "_n", m$ss, "_p", gsub(".", "_", m$np*100, fixed=TRUE), "_s", gsub(".", "_", m$sep, fixed=TRUE), "_c", m$nc, "_v", m$nv, ".inp"))
  )
  
  file.rename(
    file.path(tempdir(), "external_montecarlo.out"), 
    file.path(outputdir, paste0("fmm", m$fc, "_n", m$ss, "_p", gsub(".", "_", m$np*100, fixed=TRUE), "_s", gsub(".", "_", m$sep, fixed=TRUE), "_c", m$nc, "_v", m$nv, ".out"))
  )
  
  ## 2) now run models for each replication individually and collate the results into a list
  ## save class probabilities for each run
  savedata <- paste0("SAVEDATA:\n",
    "  FILE=cprobs.dat;\n",
    "  SAVE=CPROBABILITIES;\n")
  
  ## Add tech output to each replication
  outputSection <- paste0(
    "OUTPUT:\n",
    "  TECH1 TECH7 TECH12;\n"
  )
  
  ## loop over individual replications and run each using runModels and readModels
  repResults <- list()
  for (i in 1:min(length(fmm_sim), max(replications))) {
    ##specify the replication dataset to be used
    dataSection <- paste0(
      "DATA:\n",
      "  FILE=external_montecarlo_rep", sprintf("%04d", i), ".dat;\n"
    )
    
    cat(getFMMSyntax(m$fc, m$nc, m$nv, m$ss, m$sep, m$np, dataSection=dataSection, propMeans=propMeans, means=means, output=outputSection, savedata=savedata), file=file.path(tempdir(), "individual_rep.inp"))
    runModels(file.path(tempdir(), "individual_rep.inp"), recursive=FALSE, logFile=NULL, Mplus_command = Mplus_command) #run replication
    repResults[[i]] <- readModels(file.path(tempdir(), "individual_rep.out")) #read replication results
  }
  
  ## add simulation parameters to object for clarity
  attr(repResults, "f") <- m$fc  #number of classes fit
  attr(repResults, "n") <- m$ss  #sample size
  attr(repResults, "p") <- m$np  #noise proportion
  attr(repResults, "s") <- m$sep #latent separation
  attr(repResults, "c") <- m$nc  #number of true classes
  attr(repResults, "v") <- m$nv  #number of variables
  
  ## 3) save all results for this cell in the simulation design to an .RData file
  save(repResults, monteCarloCombined, file=file.path(outputdir, m$fname_resultsRData), compress="xz", compression_level=9)
  
  cleanupTempFiles()  ##cleanup temporary files generated by Mplus
  
  return(m$fname_sourceReps)
}

## Shutdown parallel cluster
stopCluster(clusterobj)
michaelhallquist/MplusAutomation documentation built on May 9, 2024, 11:37 p.m.