Nothing
#' Read Variable Names, Formats, and Widths from data generated by the SAVEDATA Command
#'
#' This function reads the SAVEDATA INFORMATION section from an Mplus output file that used
#' the SAVEDATA command, and it returns a list with the filename, variable names, variable formats,
#' and variable widths of the SAVEDATA file. If present, the function also parses information about the
#' Bayesian Parameters (BPARAMETERS) file.
#'
#' @param outfile required. The name of the Mplus output file to read. Can be an absolute or relative path.
#' If \code{outfile} is a relative path or just the filename, then it is assumed that the file resides in
#' the working directory \code{getwd()}.
#' @return Returns a list of SAVEDATA file information that includes:
#' \item{fileName}{The name of the file containing the analysis dataset created by the Mplus SAVEDATA command.}
#' \item{fileVarNames}{A character vector containing the names of variables in the dataset.}
#' \item{fileVarFormats}{A character vector containing the Fortran-style formats of variables in the dataset.}
#' \item{fileVarWidths}{A numeric vector containing the widths of variables in the dataset (which is stored in fixed-width format).}
#' \item{bayesFile}{The name of the BPARAMETERS file containing draws from the posterior distribution created by
#' the Mplus SAVEDATA BPARAMETERS command.}
#' \item{bayesVarNames}{A character vector containing the names of variables in the BPARAMETERS dataset.}
#' \item{tech3File}{A character vector of the tech 3 output.}
#' \item{tech4File}{A character vector of the tech 4 output.}
#' @export
#' @author Michael Hallquist
#' @seealso \code{\link{getSavedata_Data}}
#' @examples
#' \dontrun{
#' fileInfo <- getSavedata_Fileinfo("C:/Program Files/Mplus/Test Output.out")
#' }
#' @keywords interface
getSavedata_Fileinfo <- function(outfile) {
#wraps l_getSavedata_Fileinfo by checking for the outfile and reading it as a character vector.
#helper function to parse output from savedata command
#if returnData is true, the data file created will be read in as an R data frame
#if returnData is false, just the variable names are returned
#considering using addHeader to prepend a header row
if(!file.exists(outfile)) {
stop("Cannot locate outfile: ", outfile)
}
outfiletext <- scan(outfile, what="character", sep="\n", strip.white=FALSE, blank.lines.skip=FALSE, quiet=TRUE)
if (length(outfiletext) == 0) {
warning("Empty outfile")
return(NULL)
}
return(l_getSavedata_Fileinfo(outfile, outfiletext))
}
#' local function that does the work of getSaveData_Fileinfo
#'
#' This function is split out so that \code{getSaveData_Fileinfo} is
#' exposed to the user, but the parsing function can be used by
#' \code{readModels}
#'
#' @param outfile A character string giving the name of the Mplus
#' output file.
#' @param outfiletext The contents of the output file, for example as read by \code{scan}
#' @return A list that includes:
#' \item{fileName}{The name of the file containing the analysis dataset created by the Mplus SAVEDATA command.}
#' \item{fileVarNames}{A character vector containing the names of variables in the dataset.}
#' \item{fileVarFormats}{A character vector containing the Fortran-style formats of variables in the dataset.}
#' \item{fileVarWidths}{A numeric vector containing the widths of variables in the dataset (which is stored in fixed-width format).}
#' \item{bayesFile}{The name of the BPARAMETERS file containing draws from the posterior distribution created by
#' the Mplus SAVEDATA BPARAMETERS command.}
#' \item{bayesVarNames}{A character vector containing the names of variables in the BPARAMETERS dataset.}
#' \item{tech3File}{A character vector of the tech 3 output.}
#' \item{tech4File}{A character vector of the tech 4 output.}
#' @importFrom gsubfn strapply
#' @seealso \code{\link{getSavedata_Data}}
#' @examples
#' # make me!
#' @keywords internal
l_getSavedata_Fileinfo <- function(outfile, outfiletext, summaries) {
sectionStarts <- c("Estimates", #estimates
"Estimated Covariance Matrix for the Parameter Estimates", #tech3
"Estimated Means and Covariance Matrix for the Latent Variables", #tech4
"Sample/H1/Pooled-Within Matrix", #sample
"Bayesian Parameters", #bparameters
"Within and between sample statistics with Weight matrix", #swmatrix
"Model Estimated Covariance Matrix" #covariance
)
#extract entire savedata section
savedataSection <- getSection("^\\s*SAVEDATA INFORMATION\\s*$", outfiletext)
#need to have SAVEDATA section to proceed
if (is.null(savedataSection)) {
#omit warning -- probably more common for section to be missing
#warning("Unable to locate a SAVEDATA section in output file: ", outfile)
return(NULL)
}
#initialize these variables to empty character strings so that list return value is complete
#important in cases where some savedata output available, but other sections unused
listVars <- c("saveFile", "fileVarNames", "fileVarFormats", "fileVarWidths", "bayesFile", "bayesVarNames",
"tech3File", "tech4File", "covFile", "sampleFile", "estimatesFile")
l_ply(listVars, assign, value=NA_character_, envir=environment())
# NULL bparameters iterations if not available (needs to be NULL instead of NA since the return is a data.frame)
bayesIterDetails <- NULL
#in Mplus v7, "Save file" comes before "Order and format of variables"
#new approach: process the savedata output more sequentially
#In general, the savedata section is formatted with the unlabeled stuff first.
#TECH3, TECH4, and BPARAMATERS are all labeled below
#Thus, the logic here is to parse all labeled sections, then remove these from consideration.
linesParsed <- c()
#model covariance matrix (COVARIANCE) -- only available when all DVs are continuous
### Model Estimated Covariance Matrix
###
### Save file
### ex3.1.cov
### Save format Free
covSection <- getSection("^\\s*Model Estimated Covariance Matrix\\s*$", savedataSection, sectionStarts)
if (!is.null(covSection)) {
covFileStart <- grep("^\\s*Save file\\s*$", covSection, ignore.case=TRUE, perl=TRUE)
if (length(covFileStart) > 0L) {
covFile <- trimSpace(covSection[covFileStart+1])
}
linesParsed <- c(linesParsed, attr(covSection, "lines"))
}
#parameter estimates (ESTIMATES)
### Estimates
###
### Save file
### ex3.1.est
### Save format Free
estSection <- getSection("^\\s*Estimates\\s*$", savedataSection, sectionStarts)
if (!is.null(estSection)) {
estFileStart <- grep("^\\s*Save file\\s*$", estSection, ignore.case=TRUE, perl=TRUE)
if (length(estFileStart) > 0L) {
estimatesFile <- trimSpace(estSection[estFileStart+1])
}
linesParsed <- c(linesParsed, attr(estSection, "lines"))
}
#sample/h1/pooled-within matrix (SAMPLE)
### Sample/H1/Pooled-Within Matrix
###
### Save file
### ex3.12.sample
### Save type CORRELATION
### Save format Free
sampleSection <- getSection("^\\s*Sample/H1/Pooled-Within Matrix\\s*$", savedataSection, sectionStarts)
if (!is.null(sampleSection)) {
sampleFileStart <- grep("^\\s*Save file\\s*$", sampleSection, ignore.case=TRUE, perl=TRUE)
if (length(sampleFileStart) > 0L) {
sampleFile <- trimSpace(sampleSection[sampleFileStart+1])
}
linesParsed <- c(linesParsed, attr(sampleSection, "lines"))
}
#tech3 output (TECH3)
tech3Section <- getSection("^\\s*Estimated Covariance Matrix for the Parameter Estimates\\s*$", savedataSection, sectionStarts)
if (!is.null(tech3Section)) {
tech3FileStart <- grep("^\\s*Save file\\s*$", tech3Section, ignore.case=TRUE, perl=TRUE)
if (length(tech3FileStart) > 0L) {
tech3File <- trimSpace(tech3Section[tech3FileStart+1])
}
linesParsed <- c(linesParsed, attr(tech3Section, "lines"))
}
#tech4 output (TECH4)
tech4Section <- getSection("^\\s*Estimated Means and Covariance Matrix for the Latent Variables\\s*$", savedataSection, sectionStarts)
if (!is.null(tech4Section)) {
tech4FileStart <- grep("^\\s*Save file\\s*$", tech4Section, ignore.case=TRUE, perl=TRUE)
if (length(tech4FileStart) > 0L) {
tech4File <- trimSpace(tech4Section[tech4FileStart+1])
}
linesParsed <- c(linesParsed, attr(tech4Section, "lines"))
}
#Bayesian parameters section (BPARAMETERS)
bparametersSection <- getSection("^\\s*Bayesian Parameters\\s*$", savedataSection, sectionStarts)
if (!is.null(bparametersSection)) {
bayesFileStart <- grep("^\\s*Save file\\s*$", bparametersSection, ignore.case=TRUE, perl=TRUE)
if (length(bayesFileStart > 0)) {
bayesFile <- trimSpace(bparametersSection[bayesFileStart+1])
paramOrderStart <- grep("^\\s*Order of parameters saved\\s*$", bparametersSection, ignore.case=TRUE, perl=TRUE)
paramOrderSection <- sapply(bparametersSection[(paramOrderStart+2):length(bparametersSection)], trimSpace, USE.NAMES=FALSE) #trim leading/trailing spaces and skip "Order of parameters" line and the subsequent blank line
#parameters list ends with a blank line, so truncate section at next newline
#or in case where section ends after last param, then no blank line is present (just end of vector), so do nothing
nextBlankLine <- which(paramOrderSection == "")
if (length(nextBlankLine) > 0) paramOrderSection <- paramOrderSection[1:(nextBlankLine[1]-1)]
#rather than split into two columns, concatenate so that these are workable as column names
#paramOrderSection <- strsplit(paramOrderSection, "\\s*,\\s*", perl=TRUE)
#bayesVarTypes <- gsub("\\s+", "\\.", sapply(paramOrderSection, "[", 1), perl=TRUE)
#bayesVarNames <- gsub("\\s+", "\\.", sapply(paramOrderSection, "[", 2), perl=TRUE)
#15Mar2012: Workaround for bug: means for categorical latent variables are in output file listing
#but these are not actually present in bparameters. Filter these out.
#12Oct2015: This appears to have been fixed in Mplus 7 and beyond, so check version if available.
if (missing(summaries) || is.null(summaries$Mplus.version) || as.numeric(summaries$Mplus.version) < 7) {
paramOrderSection <- paramOrderSection[!grepl("Parameter\\s+\\d+, \\[ [^#]#\\d+ \\]", paramOrderSection, perl=TRUE)]
}
bayesVarNames <- gsub("\\s*,\\s*", "_", paramOrderSection, perl=TRUE)
bayesVarNames <- gsub("\\[", "MEAN", bayesVarNames, perl=TRUE)
bayesVarNames <- gsub("\\s*\\]\\s*", "", bayesVarNames, perl=TRUE)
bayesVarNames <- gsub("\\s+", ".", bayesVarNames, perl=TRUE)
# figure out if there additional draws from the posterior, typically for factor score estimation
convergenceLine <- grep("Convergence iterations", bparametersSection)
if (length(convergenceLine) == 1L) {
nextBlankLine <- which(bparametersSection[convergenceLine:length(bparametersSection)] == "")
iterLines <- bparametersSection[convergenceLine:(convergenceLine + nextBlankLine - 2)]
bayesIterDetails <- data.frame()
for (ii in seq_along(iterLines)) {
iterType <- sub("\\s*(.*) iterations .*", "\\1", iterLines[ii])
iterType <- tolower(gsub("\\s", ".", iterType))
iterStart <- as.numeric(sub(".*iterations\\s*([0-9]+)-[0-9]+", "\\1", iterLines[ii]))
iterEnd <- as.numeric(sub(".*iterations\\s*[0-9]+-([0-9]+)", "\\1", iterLines[ii]))
bayesIterDetails <- rbind(bayesIterDetails, data.frame(type=iterType, start=iterStart, end=iterEnd))
}
}
}
linesParsed <- c(linesParsed, attr(bparametersSection, "lines"))
}
#remove all lines already parsed within named sections
#this should just leave basic savedata output containing save file info, MC replication info, etc.
if (length(linesParsed) > 0L) savedataSection <- savedataSection[linesParsed*-1]
#process FILE= section. Also applies to Monte Carlo and multiple imputation output
#savedata filename
saveFile.text <- grep("^\\s*Save file\\s*$", savedataSection, perl=TRUE)
if (length(saveFile.text) > 0L) {
saveFile <- trimSpace(savedataSection[(saveFile.text[1L] + 1)])
}
#save file format (sometimes on same line, sometimes on next line)
# saveFileFormat.text <- getSection("^\\s*Save file format", savedataSection, sectionStarts)
#
# #actually, sometimes the format is "Free" and falls on the same line...
# if (!is.null(saveFileFormat.text)) {
# saveFileFormat <- trimSpace(saveFileFormat.text[1]) #first line contains format
# }
#
# #file record length
# savefile.recordlength <- getSection("^\\s*Save file record length\\s+\\d+\\s*$", savedataSection, sectionStarts)
# #Skip for now
orderFormat.text <- getMultilineSection("Order and format of variables", savedataSection, outfile, allowMultiple=FALSE)
if (!is.na(orderFormat.text[1L])) {
variablesToParse <- orderFormat.text[orderFormat.text != ""]
#Mplus v8: remove + sign in front of variables that plausible values from SAVE=FSCORES (XX) in BSEM.
if (length(nimp_line <- grep("\\+ variables that have a value for each of the \\d+ imputations", savedataSection)) > 0L) {
nimp <- as.numeric(sub(".*\\+ variables that have a value for each of the (\\d+) imputations.*", "\\1", savedataSection[nimp_line]))
which_imp <- grep("^\\s*\\+.*", variablesToParse)
variablesToParse <- sub("+", "", variablesToParse, fixed=TRUE)
} else { which_imp <- integer(0) } #no imputations to unpack
#Mplus v8: because variables are now allowed to have spaces in the output, switch to a string split on spaces.
#In this approach, the last element will be the format.
varsSplit <- strsplit(trimSpace(variablesToParse), "\\s+")
# Use rlang::flatten to remove the nested lists that result from the inner lapply over imputations
# 2024: removed rlang::flatten in favor of inner unlist. Cannot find case where this was important
varsSplit <- lapply(1:length(varsSplit), function(x) {
vname <- varsSplit[[x]]
if (x %in% which_imp) {
#replicate the variable for each imputation
return(unlist(lapply(1:nimp, function(i) { c(vname[1:(length(vname)-1)], sprintf("I_%03d", i), vname[length(vname)]) })))
} else {
return(vname)
}
})
fileVarNames <- sapply(varsSplit, function(x) { paste(x[1:(length(x) - 1)], collapse=".") })
fileVarFormats <- sapply(varsSplit, function(x) { x[length(x)] }) #last element
fileVarWidths <- strapply(fileVarFormats, "[IEFG]+(\\d+)(\\.\\d+)*", as.numeric, perl=TRUE, simplify=TRUE)
}
#Monte carlo and multiple imputation output: contains only order of variables, not their format
order.text <- getMultilineSection("Order of variables", savedataSection, outfile, allowMultiple=FALSE)
if (!is.na(order.text[1L])) {
#dump any blank fields because they will cause nulls in the names, formats, widths.
#This is handled by blank.lines.skip=TRUE in wrappers, but readModels needs to retain blank lines
#for other functions, so strip here.
variablesToParse <- order.text[order.text != ""]
#just have variable names, no formats
fileVarNames <- trimSpace(variablesToParse)
}
#future: plausible values output from Bayesian runs
#PLAUSIBLE VALUE MEAN, MEDIAN, SD, AND PERCENTILES FOR EACH OBSERVATION
#return the file information as a list
#N.B. Would like to shift return to "saveFile", but need to update everywhere and note deprecation in changelog
#bayesVarTypes=bayesVarTypes,
return(list(fileName=saveFile, fileVarNames=fileVarNames, fileVarFormats=fileVarFormats, fileVarWidths=fileVarWidths,
bayesFile=bayesFile, bayesVarNames=bayesVarNames, bayesIterDetails=bayesIterDetails, tech3File=tech3File, tech4File=tech4File))
}
#' Load an analysis dataset from the SAVEDATA command into an R data.frame
#'
#' This function reads an analysis dataset generated by the Mplus SAVEDATA command
#' and returns an R \code{data.frame} object.
#'
#' @note Note that the \code{outfile} parameter should refer to the Mplus output file (.out extension), not the
#' actual dataset generated by SAVEDATA. This function reads information about the dataset from the .out file
#' and loads the dataset accordingly.
#'
#' @param outfile Required. The name of the Mplus output file to read. Can be an absolute or relative path.
#' If \code{outfile} is a relative path or just the filename, then it is assumed that the file resides in
#' the working directory \code{getwd()}.
#' @return A \code{data.frame} containing the analysis dataset generated by the SAVEDATA command.
#' @export
#' @author Michael Hallquist
#' @seealso \code{\link{getSavedata_Fileinfo}}
#' @examples
#' \dontrun{
#' savedat <- getSavedata_Data("C:/Program Files/Mplus/Test Output.out")
#' }
#' @keywords interface
getSavedata_Data <- function(outfile) {
#exposed wrapper for l_getSavedata_Data, which pulls saveData data into data.frame
message("getSavedata_Data has been deprecated. Please use readModels(\"nameofMplusoutfile.out\", what=\"savedata\")$savedata to replicate the old functionality.")
#
# if(!file.exists(outfile)) {
# stop("Cannot locate outfile: ", outfile)
# }
#
# outfiletext <- scan(outfile, what="character", sep="\n", strip.white=FALSE, blank.lines.skip=FALSE, quiet=TRUE)
#
# if (length(outfiletext) == 0) {
# warning("Empty outfile")
# return(NULL)
# }
#
# fileInfo <- l_getSavedata_Fileinfo(outfile, outfiletext)
#
# if (is.null(fileInfo) || all(is.na(fileInfo))) return(NULL)
# else if (is.na(fileInfo[["fileVarWidths"]][1])) return(l_getSavedata_readRawFile(outfile, outfiletext, format="free", fileName=fileInfo[["fileName"]],
# varNames=fileInfo[["fileVarNames"]]))
# else return(l_getSavedata_readRawFile(outfile, outfiletext, format="fixed", fileName=fileInfo[["fileName"]],
# varNames=fileInfo[["fileVarNames"]], varWidths=fileInfo[["fileVarWidths"]]))
}
#' Load the draws from the Bayesian model posterior distribution (SAVEDATA BPARAMETERS) command into an R data.frame
#'
#' This function reads a the BPARAMETERS output file from the Mplus SAVEDATA BPARAMETERS command
#' and returns an R \code{data.frame} object.
#'
#' @note Note that the \code{outfile} parameter should refer to the Mplus output file (.out extension), not the
#' actual dataset generated by SAVEDATA. This function reads information about the dataset from the .out file
#' and loads the dataset accordingly.
#'
#' @param outfile Required. The name of the Mplus output file to read. Can be an absolute or relative path.
#' If \code{outfile} is a relative path or just the filename, then it is assumed that the file resides in
#' the working directory \code{getwd()}.
#' @param discardBurnin Optional. Whether to discard the burn-in phase of each MCMC chain (i.e., the first half).
#'
#' @return A \code{list} containing the draws from the MCMC chains for a Bayesian model that uses the
#' SAVEDATA BPARAMETERS command. Each list element corresponds to a single MCMC chain, as specified by
#' the ANALYSIS: CHAINS syntax in \code{Mplus}. If discardBurnin is \code{FALSE}, then a superordinate list is
#' provided that divides output in terms of burn-in versus valid draw halves of the MCMC chains. For documentation
#' of how \code{Mplus} implements chain convergence checks and MCMC draws, see here: \url{http://www.statmodel.com/download/Bayes3.pdf}.
#'
#' @author Michael Hallquist, Florian Boeing-Messing
#' @export
#' @seealso \code{\link{getSavedata_Fileinfo}}, \code{\link{getSavedata_Data}}
#' @references \url{http://www.statmodel.com/download/Bayes3.pdf}
#' @examples
#' \dontrun{
#' fileInfo <- getSavedata_Data("C:/Program Files/Mplus/Test Output.out")
#' }
#' @keywords interface
getSavedata_Bparams <- function(outfile, discardBurnin=TRUE) {
#exposed wrapper for l_getSavedata_readRawFile, which pulls bayesian parameters into a data.frame
message("getSavedata_Bparams has been deprecated. Please use readModels(\"nameofMplusoutfile.out\", what=\"bparameters\")$bparameters to replicate the old functionality.")
# if(!file.exists(outfile)) {
# stop("Cannot locate outfile: ", outfile)
# }
#
# outfiletext <- scan(outfile, what="character", sep="\n", strip.white=FALSE, blank.lines.skip=FALSE, quiet=TRUE)
#
# if (length(outfiletext) == 0) {
# warning("Empty outfile")
# return(NULL)
# }
#
# fileInfo <- l_getSavedata_Fileinfo(outfile, outfiletext)
#
# if (is.null(fileInfo) || all(is.na(fileInfo))) return(NULL)
# else return(l_getSavedata_Bparams(outfile, outfiletext, fileInfo, discardBurnin=discardBurnin))
}
#' Internal function to load the draws from the Bayesian model posterior distribution
#'
#' To do: add details.
#'
#' @param outfile The Mplus output file to be used.
#' @param outfiletext The contents of the Mplus output file
#' @param fileInfo The file info
#' @param discardBurnin Logical value whether to discard the burnin iterations or not. Defaults to \code{TRUE}.
#' @return A list of class \code{mcmc} and \code{mplus.bparameters}
#' @import coda
#' @keywords internal
l_getSavedata_Bparams <- function(outfile, outfiletext, fileInfo, discardBurnin=TRUE) {
#missing fileInfo
if (is.null(fileInfo)) return(NULL)
bp <- l_getSavedata_readRawFile(outfile, outfiletext, format="free", fileName=fileInfo[["bayesFile"]], varNames=fileInfo[["bayesVarNames"]])
if (is.null(bp)) return(NULL)
#for(i in 1:nrow(bp)) {
# if (i > 1 && bp$Chain.number[i] != bp$Chain.number[i-1]) cat(i, " ", bp$Chain.number[i], "\n")
#}
#logic and code adapted from Florian Boeing-Messing.
#Mplus runs 100 iterations for each chain, and then computes R-hat for each parameter.
#If R-hat drops below a critical value for Potential Scale Reduction ~ 1.1, the chains are treated as converged (the values generated among chains are indistinguishable)
#Then draws from the converged chains are obtained.
#Thus, the second half of each chain contains valid draws, whereas the first half is burn-in
if (!"Chain.number" %in% names(bp)) {
warning("Chain number not in bparameters file. Assuming 1 chain.")
bp$Chain.number <- 1
}
#number of chains
nchains <- max(bp[,"Chain.number"])
# separate out the convergence iterations from other iterations (e.g., factor score computation)
additional_iterations <- list() # tagged on after splitting and parsing convergence iterations
if (!is.null(fileInfo[["bayesIterDetails"]])) {
c_row <- which(fileInfo$bayesIterDetails == "convergence")
if (length(c_row) == 1L) {
# the length of convergence iterations will be nchains * ndraws
offset <- fileInfo$bayesIterDetails$end[c_row]*(nchains - 1)
# since we have nchains * ndraws for convergence, we need to add the offset to cover the difference
other_iter <- fileInfo$bayesIterDetails[-c_row,,drop=FALSE]
other_iter$start <- other_iter$start + offset
other_iter$end <- other_iter$end + offset
if (nrow(other_iter) > 0L) {
for (ii in seq_len(nrow(other_iter))) {
additional_iterations[[other_iter$type[ii]]] <- bp[other_iter$start[ii]:other_iter$end[ii],,drop=FALSE]
}
}
# subset the main object down to just convergence draws
bp <- bp[fileInfo$bayesIterDetails$start[c_row]:(fileInfo$bayesIterDetails$end[c_row]*nchains),,drop=FALSE]
}
}
#sort by chain number for separate burn-in and valid draws
bp <- bp[sort.list(bp[,"Chain.number"]),]
#Each section contains two halves (burn-in and valid draws)
nsections <- 2*nchains
lengthsec <- nrow(bp)/nsections
ind <- rep(rep(c(FALSE, TRUE), each=lengthsec), nchains)
bp$Burnin <- factor(ind, levels=c(FALSE, TRUE), labels=c("burn_in", "valid_draw"))
#convert to mcmc.list object to be usable with coda
if (nchains > 1) {
#only keep numeric columns (required by mcmc objects)
#divide into burn-in versus draw
bp.burnin <- split(bp[,sapply(bp, is.numeric)], list(bp$Burnin))
#divide the draws by chain and convert to mcmc list
bp.split <- lapply(bp.burnin, function(bpsub) {
mcmc.list(lapply(split(bpsub[,sapply(bpsub, is.numeric)], list(bpsub$Chain.number)), mcmc))
})
} else {
#just one chain, so divide into burn-in versus draw
bp.split <- lapply(split(bp[,sapply(bp, is.numeric)], list(bp$Burnin)), mcmc)
}
# Small logical inconsistency introduced here with additional iterations (e.g., factor scores)
# Even if we discard the burn-in, should we also discard any other iterations? The discard is intended
# to drop the return to a data.frame/mcmc object, not a list, so I'm sticking with that for now.
if (discardBurnin) {
bp.split <- bp.split[["valid_draw"]]
} else {
bp.split <- c(bp.split, additional_iterations)
}
class(bp.split) <- c(class(bp.split), "mplus.bparameters")
return(bp.split)
}
#' Internal function to load the draws from the Bayesian model posterior distribution
#'
#' This function is used by other extractor functions that read particular types of
#' saved data such as factor scores or iterations from an MCMC chain.
#'
#' @param outfile The Mplus output file to be used.
#' @param outfiletext The contents of the Mplus output file
#' @param format A character string indicating the format. Defaults to \dQuote{fixed}.
#' @param fileName The name of the file
#' @param varNames The names of the variables to extract, comes from the fileInfo
#' @param varWidths The widths of the variables to extract, comes from the fileInfo
#' @param input list of parsed Mplus input section extracted upstream in readModels
#' @return A data frame of the extracted data.
#' @keywords internal
#' @importFrom utils read.table read.fwf
l_getSavedata_readRawFile <- function(outfile, outfiletext, format="fixed", fileName, varNames, varWidths, input) {
outfileDirectory <- splitFilePath(outfile)$directory
#if file requested is missing, then abort data pull
if (isEmpty(fileName) || isEmpty(varNames)) return(NULL)
savedataSplit <- splitFilePath(fileName)
#if outfile target directory is non-empty, but savedataFile is without directory, then append
#outfile directory to savedataFile. This ensures that R need not be in the working directory
#to read the savedataFile. But if savedataFile has an absolute directory, don't append
#if savedata directory is present and absolute, or if no directory in outfile, just use filename as is
if (!is.na(savedataSplit$directory) && savedataSplit$absolute) {
savedataFile <- fileName #just use savedata filename if has absolute path
} else if (is.na(outfileDirectory)) {
savedataFile <- fileName #just use savedata filename if outfile is missing path (working dir)
} else {
savedataFile <- file.path(outfileDirectory, fileName) #savedata path is relative or absent and outfile dir is present: append
}
#cat("Outfile dir: ", outfileDirectory, "\n")
#cat("Savedata directory: ", savedataSplit$directory, "\n")
#cat("concat result: ", savedataFile, "\n")
#need to read as fixed width format given the way Mplus left-aligns missing vals (*)
#dataset <- read.table(file=file.path(path, fileInfo$fileName), header=FALSE,
# na.strings="*", col.names=fileInfo$varNames)
if (format == "free") {
#handle case where filename contains * indicating Monte Carlo (MC) or multiple imputation (MI) dataset with reps
if (grepl("\\*", savedataSplit$filename, perl=TRUE)) {
#N.B. Mplus does not output the directory to which the MC/MI replications are saved.
#Thus, the file name for the "Save file" section in "SAVEDATA INFORMATION" is always just the filename alone.
#Using MONTECARLO: SAVE = or DATA IMPUTATION: SAVE = in the input instructions, one can specify a relative or
#absolute path to the saved files. If this is set, then the above logic for determining
#where to look for saved data should be overridden. In particular, we should use
#the path from MONTECARLO: SAVE or DATA IMPUTATION: SAVE = and ignore where the output file is located.
#if save command is present in montecarlo section and there is a directory specified, then use that.
#resplit now that outfileDir and savedataDir are assembled to setup wildcard matching using list.files.
split.above <- splitFilePath(savedataFile) #get info the directory for the output file, as determined above
if (!is.null(input$montecarlo$save)) {
mcmi.savesplit <- splitFilePath(input$montecarlo$save)
} else if (!is.null(input$data.imputation$save)) {
mcmi.savesplit <- splitFilePath(input$data.imputation$save)
} else {
mcmi.savesplit <- NULL
}
if (!is.null(mcmi.savesplit) &&
!is.na(mcmi.savesplit$directory)) {
if (mcmi.savesplit$absolute) {
resplit <- splitFilePath(file.path(mcmi.savesplit$directory, split.above$filename)) #concat absolute montecarlo: save dir with rep filename
} else {
#mc save directory is present, but relative
#thus, need to combine it with directory above
if (!is.na(split.above$directory)) {
resplit <- splitFilePath(file.path(split.above$directory, mcmi.savesplit$directory, split.above$filename))
}
}
} else {
resplit <- splitFilePath(savedataFile)
}
#patch file pattern to match perl syntax
pat <- gsub("\\.", "\\\\.", resplit$filename, perl=TRUE)
pat <- gsub("\\*", "\\\\d+", pat, perl=TRUE)
fileNames <- list.files(path=resplit$directory, pattern=pat, full.names=FALSE) #very klunky way to handle this
fileList <- list.files(path=resplit$directory, pattern=pat, full.names=TRUE)
dataset <- list()
if (length(fileList) == 0L) {
#function was generating error when files could not be found
warning("Unable to locate SAVEDATA files for filename: ", resplit$filename)
} else {
for (f in 1:length(fileList)) {
dataset[[make.names(fileNames[f])]] <- read.table(file=fileList[f], header=FALSE,
na.strings="*", col.names=varNames, strip.white=TRUE)
}
}
} else {
dataset <- read.table(file=savedataFile, header=FALSE, na.strings="*", strip.white=TRUE)
if (length(varNames) > ncol(dataset)) {
warning("Number of variable names for Bayesian Parameters section exceeds number of columns in: ", savedataFile)
varNames <- varNames[1:ncol(dataset)] #heuristically, just using columns names up to the last column in the data
}
names(dataset) <- varNames
}
}
else if (format == "fixed") {
if (!length(varWidths) > 0) stop("Fixed width file specified, but no widths obtained.")
#strip.white is necessary for na.strings to work effectively with fixed width fields
#otherwise would need something like "* " for na.strings
dataset <- read.fwf(file=savedataFile, widths=varWidths, header=FALSE,
na.strings="*", col.names=varNames, strip.white=TRUE)
}
return(dataset)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.