#'A Function for Analyzing Monte Carlo Data Sets Created by mplus_montecarlo_data() or simdata_grm().
#'
#' This package contains the mplus_montecarlo_analysis_grm() function,
#' which will analyze Monte Carlo data sets using the script files for various estimators. The data sets and the script files are generated by the mplus_montecarlo_data() function.
#' @param model_object A set of model specifications that can be passed from the function mplus_montecarlo_data() or simdata_grm().
#' @param use_new_dir If NULL, the recorded directory given by the model_object is used.
#' Otherwise, a new main directory is needed for writing Mplus input scripts. The scripts
#' will use the new directory to access the data sets. A new directory is useful
#' when the folder containing conditions and data sets are analyzed on different computers
#' having different directories for data sets.
#' @param estimators A list of estimators. Available estimators are: c('ML', 'MLR', 'MLF',
#' 'ML_logit',
#' 'ML_probit',
#' 'MLR_logit',
#' 'MLR_probit',
#' 'MLF_logit',
#' 'MLF_probit',
#' 'WLS_delta',
#' 'WLS_theta',
#' 'WLSM_delta',
#' 'WLSM_theta',
#' 'WLSMV_delta',
#' 'WLSMV_theta',
#' 'ULS_delta',
#' 'ULS_theta',
#' 'ULSMV_delta',
#' 'ULSMV_theta')
#' @param rep a number of replication of data set if the analysis is done on each replicated data set separately. Null if type_montecarlo = TRUE.
#' @param type_montecarlo if TRUE, the analysis is done using the type of MONTECARLO, which analyze all replications and give average parameters based on the number of replications.
#' @param run_files if TRUE, it will execute the Mplus script files based on the selected estimators.
#' @return mplus_montecarlo_analysis will analyze Monte Carlo data sets using the selected estimators. It uses
#' MplusAutomation::runModels().
#' @export
#'
#' @examples
#' library(MplusAutomation)
#' data.2F = mplus_montecarlo_data(
#' # list your model in increasing order
#' model = list(c(1,2,3,4), #factor 1
#' c(5,6,7,8)) # factor 2
#'eloadval = NULL,
#'vloadings = c(0.4, 0.4, 0.4, 0.4, 0.5, 0.5, 0.5, 0.5), # a vector of factor loadings
#'thresholds = c(-2.478, -1.818, -0.865, 0.425), # threshold values per item for 1,2,3,4
#'factor.cor = NULL,# factor correlation
#'vfactor.cor = c(0.31), # length(vfactor.cor)
#'N = 300, # sample size
#'R = 5,# n of replications
#'seed_mplus = 4567,# seed number for Mplus
#'naming_data_files = 'data4_rep*.dat;',
#'file_dir = getwd(),
#'file_name = "generate_data4.inp",
#'run_files = TRUE
#')
#'
#' # running analysis as Monte Carlo type
#' setwd("C:/Users/Dell/mplus files")
#' mplus_montecarlo_analysis(
#' model_object = data.2F,
#' estimators = c('ULS_delta',
#' 'ULS_theta',
#' 'WLSMV_theta',
#' 'WLSMV_delta'),
#' rep = NULL,
#' type_montecarlo = TRUE,
#' run_files = TRUE)
#'
#'# running each replicate separately to get theta values
#'#' setwd("C:/Users/Dell/mplus files")
#' mplus_montecarlo_analysis(
#' model_object = data.2F,
#' estimators = c('ULS_delta',
#' 'ULS_theta',
#' 'WLSMV_theta',
#' 'WLSMV_delta'),
#' rep = 1,
#' type_montecarlo = FALSE,
#' run_files = TRUE)
#'
#' # running replicated data sets separately using loop
#'#' setwd("C:/Users/Dell/mplus files")
#' for (i in 1:10){
#' mplus_montecarlo_analysis_grm2(
#' model_object = data.2F,
#' estimators = c('ULS_delta',
#' 'ULS_theta',
#' 'WLSMV_theta',
#' 'WLSMV_delta'),
#' rep = i,
#' type_montecarlo = FALSE,
#' run_files = TRUE)
#' }
#' @references
#' \insertRef{Asparaouhov2020}{MplusMontecarlo}
#' \insertRef{Hallquist2018}{MplusMontecarlo}
mplus_montecarlo_analysis_grm2 <- function(
model_object,
use_new_dir = NULL,
estimators,
rep,
type_montecarlo,
run_files
){
# declare variables ####
model <- model_object$model_spec$model
thresholds <- model_object$model_spec$thresholds
vfactor.cor <- model_object$model_spec$vfactor.cor
R <- model_object$model_spec$R
N <- model_object$model_spec$N
vloadings <- model_object$model_spec$vloadings
naming_data_files <- model_object$model_spec$naming_data_files
# file_name <- model_object$model_spec$file_name
# model directory or a new directory for creating mplus scripts
if(is.null(use_new_dir)){
file_dir <- model_object$model_spec$file_dir
} else {
file_dir <- use_new_dir
}
# Check: model ####
# Ensure that each item is unique to each factor
# Check: one item in no more than one factor and integer ####
items <- unlist(model)
items_rank <- rank(items)
for (i in 1:length(items_rank)){
if(items_rank[i]%%1 != 0){
stop("Currently, this function does not support having one item in more than one dimension. Or The numbers in model must integer, not fraction.")
} else if (items[i]%%1 !=0){
stop("Currently, this function does not support having one item in more than one dimension. Or The numbers in model must integer, not fraction.")
}
}
# Check order of items in model ####
a <- unlist(model) # original input
b <- rank(unlist(model)) # ranks of input
for (i in 1:length(unlist(model))){
if (a[i] - b[i] !=0){
stop("In specifying your model, you must input the whole numbers, which represent items, in a consecutive, increasing order from 1, 2, ..j, where j is the last item and represents the the total number of items.")
}
}
# check: order of items in model ####
for (i in 1:(length(unlist(model))-1)){
if (a[i+1]-a[i] != 1) {
stop("In specifying your model, you must input the whole numbers, which represent items, in a consecutive, increasing order from 1, 2, ..j, where j is the last item and represents the the total number of items.")
}
}
# Check: nFs, n of factor ####
nFs <- length(model)
# n of factors (nFs)
if (nFs > 0 & nFs <= 10 & nFs%%1==0 ){
nFs = nFs
} else {
stop("The number of factors (nFs) must be a positive integer between 0 and 10.")
}
# Check: nYs, n of items ####
nYs <- max(unlist(model))
# n of observed indicators (nYs)
if (nYs > 0 & nYs%%1==0 ){
nYs = nYs
} else {
stop("The number of observed indicators (nYs) must be a positive integer.")
}
# Check: threshold values ####
if (is.null(thresholds)){
stop("A vector of thresholds is needed.")
} else {
thresholds = thresholds
nTs <- length(thresholds)
}
# check: n of factor correlations ####
# iterate pair-wise factor correlations
Fcor.tb <- NULL
for (i in 1:(nFs-1)){
for (j in (i+1):nFs){
Fcor.tb <- rbind(Fcor.tb, c(i, j))
}
}
# Check: n of factor correlations given n of factors ####
# factor correlation less than 1
for (i in 1:length(vfactor.cor)){
if(vfactor.cor[i]>=1){
stop(paste("Factor correlation values must be less than 1. Check value number", i, '.', sep=' '))
}
}
if (is.null(vfactor.cor)){
stop("A vector of vfactor.cor is needed.")
} else if (nrow(Fcor.tb)!=length(vfactor.cor)){
stop(paste("The length of factor correlation vector (vfactor.cor) must be ",
nrow(Fcor.tb), ",(", nFs, "*", (nFs-1), "/2)",
",given the number of factors:",nFs, sep=' '))
} else {
vfactor.cor = vfactor.cor
}
# Check: n of vloadings = n of items ####
if (length(vloadings)!=length(unlist(model))){
stop("The number of factor loadings must equal the number of items in the model.")
}
# Check: N, sample size ####
if (is.null(N)) {
stop("A sample size that is greater than 0 is needed.")
} else if (N <= 0) {
stop("A sample size that is greater than 0 is needed.")
} else {
N <- N
}
# Check: R, n of replications ####
if (is.null(R)) {
stop("A replication number (R) must be a positive integer.")
} else if (R <= 0) {
stop("A replication number (R) must be a positive integer.")
} else {
R <- R
}
# Check: file directory ####
if (file.exists(file_dir)) {
file_dir <- file_dir
} else {
stop("The directory is not valid. Please provide a valid directory.")
}
# Check: Naming data files ####
if(unlist(strsplit(naming_data_files, "\\*"))[2]== ".dat;"){
naming_files <- naming_data_files
} else {
stop("Naming data files for replicated data sets must end with '*.dat;'; e.g., 'mplus_data*.dat;'")
}
# Check: n of Ys per factor ####
FCTR <- factor(c(1:nFs), labels = 'F')
Y <- factor(c(1:nYs), labels = 'y')
if ((length(Y)/length(FCTR))<3){
stop("There are too few observed indicators per factor.There should be 3 items per factor.")
}
L0 <- "! Author: Bo Klauth"
L3 <- paste0('NAMES = ', Y[1], '-', Y[nYs], ';')
L10 <- paste0('CATEGORICAL= ', Y[1], '-', Y[nYs], ';')
# formating into y1*loading1 y2*loading2 ...etc
Yloadings <- paste(Y, vloadings, sep='*')
# Write lines for factor loadings: FYlines ####
# Convert input model into Mplus Syntax
# unpack the factors
D <- NULL
F_label <- NULL
FYlines<-NULL
for (i in 1:length(model)){
F_label[i] <- paste0('F', i)
D[i] <- paste(F_label[i], 'BY', sep=' ')
for (k in min(unlist(model[i])): max(unlist(model[i]))){
D[i] <- paste(D[i], Yloadings[k], sep=' ') # iterate all loading lines per dimension
}
FYlines[i] <- paste0(D[i], ';')
}
FYlines
# Write lines for factor variances: FVarlines ####
L13 <- '! Factor variance'
FVarlines <- NULL
for (i in 1:length(model)){
FVarlines[i] <- paste0('F', i, '@1;')
}
L14 <- '! Thresholds'
# Write lines for thresholds ####
Ythresholds <- paste0(
rep(Y, each=nTs), # make Yj to match N thresholds
rep('$',each = nTs*nYs),
rep(1:nTs, times= nYs),
rep('*',each = nTs*nYs),
rep(thresholds, nYs))
Ythresholds <- paste(
paste(rep(Y, each=nTs), # make Yj to match N thresholds
rep(1:nTs, times= nYs), sep='$'),
rep(thresholds, nYs), sep='*')
# turning into matrix with columns of 4
Ythresholds <- matrix(Ythresholds, byrow = TRUE, ncol=nTs)
# Lines of thresholds: Thlines
Thlines <- NULL
for (i in 1:nYs){
Thlines[i] <- paste0(c('[', Ythresholds[i,], '];'), collapse = ' ')
}
# Lines of Error Variance: Yerrvarlines ####
L15 <- '! Error variance'
# Interfactor corr lines: ####
L16 <- '! Interfactor correlations'
# references: vfactor.cor, Fcor.tb
FCorlines <- NULL
for (i in 1:nrow(Fcor.tb)){
# use Fcor.tb to get labels of factors and match factor correlation
FCorlines[i] <-paste0('F', Fcor.tb[i,1], ' WITH ', 'F', Fcor.tb[i,2], '*',
vfactor.cor[i], ';', sep=' ')
}
# Extract the model for analysis ####
# titles
MS1 <- paste0('TITLE: ', 'Analyze data in an MS model, N = ', N, ', R = ', R, ';')
MS1.2 <- paste0('TITLE: ', 'Analyze data in an MS model, N = ', N, ', R ', rep, ' of ', R, ';')
usedata <- paste0('DATA: FILE= ', file_dir, "/",
unlist(strsplit(naming_data_files, "\\*"))[1], 'list', '.dat;')
MS2 <- 'TYPE = MONTECARLO;'
MS3 <- 'VARIABLE:'
MS4 <- paste0('USEVARIABLES ARE ', Y[1], '-', Y[nYs], ';')
MS6 <- 'ANALYSIS:'
mplus_estimators <- c('ML', 'MLR', 'MLF', 'WLS','WLSM', 'WLSMV', 'ULS', 'ULSMV')
chosen_estimators <- mplus_estimators
parameterizations <- c('DELTA', 'THETA')
link_functions <- c('!TREAT CATEGORICAL VARS AS CONTINUOUS VARS', 'LINK IS LOGIT', 'LINK IS PROBIT')
# iterate estimator, parameterization, and link functions
# Note: only parameterization is applied to WLSMV and ULSMV.
est_para_linkf <- NULL
estimator_lines <- NULL
for (i in 1:length(chosen_estimators)){
est <- paste0('ESTIMATOR = ', chosen_estimators[i], ';') # ESTIMATOR
if(chosen_estimators[i] %in% c('WLSMV', 'ULSMV', 'ULS', 'WLSM', 'WLS')){
for (j in 1: length(parameterizations)){
para <- paste0('PARAMETERIZATION = ', parameterizations[j], ';') # parameterization for WLSMS/ULSMV (limited info)
est_para_linkf[i] <- paste(est, para)
estimator_lines <- rbind (estimator_lines, est_para_linkf[i])
}
} # end j iterations
if(chosen_estimators[i] %in% c('ML', 'MLR', 'MLF')){
for (k in 1: length(link_functions)){
linkf <- paste0(link_functions[k], ';')
est_para_linkf[i] <- paste(est, linkf)
estimator_lines <- rbind (estimator_lines, est_para_linkf[i])
}
} # end k iterations
} # end i iterations
MS7 <- 'MODEL:'
# model specifications
D <- NULL # dimensions and loadings
F_label <- NULL
model_spec<-NULL
for (i in 1:length(model)){
F_label[i] <- paste0('F', i)
# writing F_j BY
D[i] <- paste(F_label[i], 'BY ', sep=' ')
# iterate all loading lines per dimension
D[i] <- paste0(D[i], 'Y', min(unlist(model[i])), '-', 'Y', max(unlist(model[i])),
# add factor loadings to be estimated
'*','(',
'Load', min(unlist(model[i])), '-', 'Load', max(unlist(model[i])),
')',
sep=' ')
model_spec[i] <- paste0(D[i], ';')
}
# model specification
# model_spec
# Threholds
#nTs
#nYs
tau <- NULL
for (i in 1:nYs){
tau[i] <- paste0('[', Y[i], '$', 1, '-', Y[i], '$', nTs,'*', ']',
'(', Y[i], 'T',1, '-', Y[i], 'T', nTs, ')', ';')
}
# tau
MS8 <- '! Factor Mean'
# Factor mean
factor_means <-paste0('[', 'F', 1, '-', 'F', length(model), '*]',
'(', 'Fmean', 1, '-', 'Fmean', length(model), ');')
# Factor variance
factor_variances <-paste0('F', 1, '-', 'F', length(model),
'*(', 'Fvariance', 1, '-', 'Fvariance', length(model), ');')
# Model constraint
MS9 <- 'MODEL CONSTRAINT:'
model_constraint_m <- NULL
# model constraint
for (i in 1: length(model)){
model_constraint_m[i] <- paste0('Fmean', i, '= 0;')
}
model_constraint_v <- NULL
# model constraint
for (i in 1: length(model)){
model_constraint_v[i] <- paste0('Fvariance', i, '= 1;')
}
# interfactor correlation
# Factor correlations
FCor <- NULL
for (i in 1:nrow(Fcor.tb)){
# use Fcor.tb to get labels of factors and match factor correlation
FCor[i] <-paste0('F', Fcor.tb[i,1], ' WITH ', 'F', Fcor.tb[i,2],
';', sep=' ')
}
MS10 <- 'OUTPUT: STDYX;! Standardized solution'
# Saving output thetas syntax
MS11 <- 'SAVEDATA: SAVE = FSCORES; ! Saving factor scores'
# writing file name for factor scores
MS12 <- paste0('FILE = ',
gsub("\\*",
paste0(rep, "_fscores"),
naming_data_files))
usedata2 <- gsub("list", rep, usedata)
# need to iterate for all estimators
# Iterate scripts through estimators: estimator_lines
est_names <- c('ML',
'ML_logit',
'ML_probit',
'MLR',
'MLR_logit',
'MLR_probit',
'MLF',
'MLF_logit',
'MLF_probit',
'WLS_delta',
'WLS_theta',
'WLSM_delta',
'WLSM_theta',
'WLSMV_delta',
'WLSMV_theta',
'ULS_delta',
'ULS_theta',
'ULSMV_delta',
'ULSMV_theta')
# add monte carlo conditions
if (isTRUE(type_montecarlo)){
script_container <- NULL
for (i in 1:length(estimator_lines)){
if(i %in% c(1,4,7)){ # continuous estimators
script_container <- capture.output(
cat(L0, # Author info
MS1, # title
usedata, # put data file for analysis
MS2, # TYPE = MONTECARLO
MS3, # VARIABLE:
L3, # names are y variables
MS4, # usevariables are y variables
#L10, # categorical are
MS6, # ANALYSIS
# select estimator, parameterization, and link function
estimator_lines[i],
MS7, # MODEL:
model_spec,
#L14, # thresholds title
#tau, # threshold lines
FCor, # interactor correlations
MS8, # Factor mean title
factor_means, # Factor mean
L13,# factor variance title
factor_variances, # factor variances
MS9, # model constraint
model_constraint_m, # constraining factor means
model_constraint_v, # constraining factor variance
MS10, # output, standardized solution
sep='\n')
)
} else {
script_container <- capture.output(
cat(L0, # Author info
MS1, # title
usedata, # put data file for analysis
MS2, # TYPE = MONTECARLO
MS3, # VARIABLE:
L3, # names are y variables
MS4, # usevariables are y variables
L10, # categorical are
MS6, # ANALYSIS
# select estimator, parameterization, and link function
estimator_lines[i],
MS7, # MODEL:
model_spec,
#L14, # thresholds title
#tau, # threshold lines
FCor, # interactor correlations
MS8, # Factor mean title
factor_means, # Factor mean
L13,# factor variance title
factor_variances, # factor variances
MS9, # model constraint
model_constraint_m, # constraining factor means
model_constraint_v, # constraining factor variance
MS10, # output, standardized solution
sep='\n')
)
} # end categorical estimators
# assign estimator name to script
assign(est_names[i], script_container)
script_container <- NULL
} # end iterations for all estimators
# creating folders
folder <- NULL
for (j in 1:length(estimators)){
# create a new directory to store script of different estimator
if(isFALSE(dir.exists(paste0(file_dir, "/", estimators[j])))){ # not exist, create
dir.create(paste0(file_dir, "/", estimators[j]))
folder[j] <- paste0(file_dir, "/", estimators[j])
} else { # if exist, remove, then create
folder[j] <- paste0(file_dir, "/", estimators[j])
}
# writing an mplus script data analysis to file
# add condition from ther user: if the user's estimator, then write
# otherwise, don't write
for (i in 1:length(est_names)){
if (est_names[i] %in% estimators[j]){ # matching user's estimator
filename_obj <- paste0(folder[j], "/", est_names[i],"_replist", '.inp')
message(paste0("creating:", filename_obj))
# write file
write.table(get(est_names[i]),# get the assigned object
file = filename_obj,
row.names = FALSE,
col.names = FALSE,
quote=FALSE) # use quote = FALSE to eliminate quotes in text
if (isTRUE(run_files)){
message(paste0("running: ", filename_obj))
MplusAutomation::runModels(filename_obj)
message(paste0("done: ", filename_obj))
}
}
}
}
mplus_script <- list('ML' = ML,
'MLR' = MLR,
'MLF' = MLF,
'ML_logit' = ML_logit,
'ML_probit' = ML_probit,
'MLR_logit' = MLR_logit,
'MLR_probit' = MLR_probit,
'MLF_logit' = MLF_logit,
'MLF_probit' = MLF_probit,
'WLS_delta' = WLS_delta,
'WLS_theta' = WLS_theta,
'WLSM_delta' = WLSM_delta,
'WLSM_theta' = WLSM_theta,
'WLSMV_delta' = WLSMV_delta,
'WLSMV_theta' = WLSMV_theta,
'ULS_delta' = ULS_delta,
'ULS_theta' = ULS_theta,
'ULSMV_delta' = ULSMV_delta,
'ULSMV_theta' = ULSMV_theta,
'script_list' = c('montecarlo_data', est_names))
} # end type_montecarlo = TRUE
if (isFALSE(type_montecarlo)){
script_container <- NULL
for (i in 1:length(estimator_lines)){
if(i %in% c(1, 4, 7)){ # continuous estimators
script_container <- capture.output(
cat(L0, # Author info
MS1.2, # changed title from MS1 to MS1.2
usedata2, # put data file for a specific rep for analysis
#MS2, # TYPE = MONTECARLO
MS3, # VARIABLE:
L3, # names are y variables
MS4, # usevariables are y variables
#L10, # categorical are
MS6, # ANALYSIS
# select estimator, parameterization, and link function
estimator_lines[i],
MS7, # MODEL:
model_spec,
#L14, # thresholds title
#tau, # threshold lines
FCor, # interactor correlations
MS8, # Factor mean title
factor_means, # Factor mean
L13,# factor variance title
factor_variances, # factor variances
MS9, # model constraint
model_constraint_m, # constraining factor means
model_constraint_v, # constraining factor variance
MS10, # output, standardized solution
MS11, # SAVEDATA, FSCORES
MS12, # naming the factor score file
sep='\n')
)
} else {
script_container <- capture.output(
cat(L0, # Author info
MS1.2, # changed title from MS1 to MS1.2
usedata2, # put data file for a specific rep for analysis
#MS2, # TYPE = MONTECARLO
MS3, # VARIABLE:
L3, # names are y variables
MS4, # usevariables are y variables
L10, # categorical are
MS6, # ANALYSIS
# select estimator, parameterization, and link function
estimator_lines[i],
MS7, # MODEL:
model_spec,
#L14, # thresholds title
#tau, # threshold lines
FCor, # interactor correlations
MS8, # Factor mean title
factor_means, # Factor mean
L13,# factor variance title
factor_variances, # factor variances
MS9, # model constraint
model_constraint_m, # constraining factor means
model_constraint_v, # constraining factor variance
MS10, # output, standardized solution
MS11, # SAVEDATA, FSCORES
MS12, # naming the factor score file
sep='\n')
)
} # end categorical estimators
# assign estimator name to script
assign(est_names[i], script_container)
script_container <- NULL
}
# writing an mplus script data analysis to file
# adding user's estimators
folder <- NULL
for (j in 1:length(estimators)){
# create a new directory to store script of different estimator
if(isFALSE(dir.exists(paste0(file_dir, "/", estimators[j])))){ # not exist, create
dir.create(paste0(file_dir, "/", estimators[j]))
folder[j] <- paste0(file_dir, "/", estimators[j])
} else { # if exist, remove, then create
folder[j] <- paste0(file_dir, "/", estimators[j])
}
for (i in 1:length(est_names)){
if (est_names[i] %in% estimators[j]){ # matching user's estimator
filename_obj <- paste0(folder[j], "/", est_names[i],"_rep", rep, '.inp')
# message(paste0("creating:", filename_obj))
# write file
write.table(get(est_names[i]),# get the assigned object
file = filename_obj,
row.names = FALSE,
col.names = FALSE,
quote=FALSE) # use quote = FALSE to eliminate quotes in text
if (isTRUE(run_files)){ # run the input file
message(paste0("Executing: ", filename_obj))
MplusAutomation::runModels(filename_obj)
message(paste0("Done"))
}
}
}
}
mplus_script <- list('ML' = ML,
'MLR' = MLR,
'MLF' = MLF,
'ML_logit' = ML_logit,
'ML_probit' = ML_probit,
'MLR_logit' = MLR_logit,
'MLR_probit' = MLR_probit,
'MLF_logit' = MLF_logit,
'MLF_probit' = MLF_probit,
'WLS_delta' = WLS_delta,
'WLS_theta' = WLS_theta,
'WLSM_delta' = WLSM_delta,
'WLSM_theta' = WLSM_theta,
'WLSMV_delta' = WLSMV_delta,
'WLSMV_theta' = WLSMV_theta,
'ULS_delta' = ULS_delta,
'ULS_theta' = ULS_theta,
'ULSMV_delta' = ULSMV_delta,
'ULSMV_theta' = ULSMV_theta,
'script_list' = c('montecarlo_data', est_names))
} # end type_montecarlo = FALSE
} # function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.