R/parseOutput.R
In MplusAutomation: An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus

Documented in divideIntoFields extractClassCounts extractDataSummary extractFacScoreStats extractFreeFile extractInput_1file extractModelSummaries extractResiduals extractResiduals_1section extractSampstat extractSummaries_1file extractSummaries_1plan extractSummaries_1section extractTech1 extractTech10 extractTech12 extractTech15 extractTech3 extractTech4 extractTech7 extractTech8 extractTech9 extractValue extractWarningsErrors_1file matrixExtract unlabeledMatrixExtract

#' Extract values from Mplus output
#' An internal function used by extractSummaries_1file to extract
#' parameters from the output file using regular expressions.
#'
#' @param pattern the exact text to be matched in the outfile that identifies the parameter of interest
#' @param textToScan the chunk of Mplus output to be parsed, passed as a vector of character strings (from the scan command).
#' @param filename the name of the file containing textToScan. Used to make more intelligible warning messages.
#' @param type the data type of the parameter, which determines the regexp used. Currently can be \dQuote{int}, \dQuote{dec}, \dQuote{str}, or \dQuote{calc}. Defaults to \dQuote{int}.
#' @return A string or numeric vector
#' @keywords internal
#' @examples
#' #make me!!!
extractValue <- function(pattern, textToScan, filename, type="int") {
  #regex pattern now allows for specification to search for value on some line before or after match
  #example: +2:the Observed and the Replicated Chi-Square Values
  
  offset <- 0
  if (grepl("^[+-]+\\d+:.*$", pattern, perl=TRUE)) {
    offset <- as.numeric(sub("^([+-]+\\d+):.*$", "\\1", pattern, perl=TRUE))
    pattern <- sub("^[+-]+\\d+:(.*)$", "\\1", pattern, perl=TRUE) #chop offset
  }
  
  #locate the matching line in the output file
  matchpos <- grep(pattern, textToScan, ignore.case=TRUE)
  matchlines <- textToScan[(matchpos+offset)]
  
  if (length(matchlines) > 1) {
    stop("More than one match found for parameter: ", pattern, "\n  ", filename)
    #return(matchlines) #not sure what I was thinking here... seems better to stop than warn and return lines
  }
  else if (length(matchlines) == 0) {
    #if the parameter of interest not found in this file, then return NA
    #warning(paste("Parameter not found: ", pattern, "\n  ", filename, sep=""))
    if (type == "int") return(NA_integer_)
    else if (type == "dec") return(NA_real_)
    else if (type == "str") return(NA_character_)
  }
  
  #different idea: concatenate pattern with var type and match on that
  #then sub just the pattern part from the larger line
  
  typePrefix <- substr(type, 1, 3)
  
  if (typePrefix == "int") {
    regexp <- "-*\\d+" #optional negative sign in front
  }
  else if (typePrefix == "dec") {
    #regexpr: -*\\d+\\.\\d+ : -* optional negative sign, \\d+ match at least one digit \\. match decimal sign \\d+ match decimal digits
    regexp <- "-*\\d+\\.\\d+"
  }
  else if (typePrefix == "str") {
    regexp <- paste(pattern, ".*", sep="")
  }
  
  #locate the match
  valueMatches <- gregexpr(regexp, matchlines[1], perl=TRUE)[[1]]
  
  if (type == "str") {
    #remove the tag portion of the string (e.g., "title:"), retaining rest of line
    returnVal <- as.character(sub(pattern, "", matchlines[1], ignore.case=TRUE))
  }
  else {
    #excessively tight syntax: replace dec[15] with 15, if number at end of type. Otherwise return just "dec".
    #then grep result for only numeric characters (\\d+). If grep is false (i.e., no numerals in substitution,
    #then no index was specified in type, so type must be simply "dec", "int", or "str" (as opposed to "int[15]"), so set as 1
    if (!grepl("^\\d+$", whichMatch <- sub("^.*\\[(\\d+)\\]$", "\\1", type, perl=TRUE), perl=TRUE)) whichMatch <- 1
    else whichMatch <- as.numeric(whichMatch)
    
    #pull from the start of the match through match.length, which is the length of characters that matched
    #need to subtract one from the start + length offset to grab the correct number of characters
    #(e.g., if the match runs from 40-44, the start will be 40, with length 5, but 40 + 5 would be 6 characters, hence -1
    returnVal <- as.numeric(substr(matchlines[1], valueMatches[whichMatch], valueMatches[whichMatch] + attr(valueMatches, "match.length")[whichMatch] - 1))
    
  }
  
  return(returnVal)
}

#' Worker function used in extractSummaries_1section
#'
#' @param arglist The argument list
#' @param sectionHeaders A character vector with headers for each section of interest
#' @param sectionFields is a list of data.frames where each data.frame specifies the fields to be extracted for that section
#' @param textToParse The text to parse
#' @param filename The filename
#' @return A list
#' @keywords internal
#' @examples
#' # make me!!!
extractSummaries_1plan <- function(arglist, sectionHeaders, sectionFields, textToParse, filename) {
  #make this a more generic function that accepts headers and fields in case it is useful outside the MODEL FIT section
  if (length(sectionHeaders) < 1) stop("No section headers provided.")
  if (length(sectionHeaders) != length(sectionFields)) stop("Section headers and section fields have different lengths.")
  
  #multiple sections
  for (header in 1:length(sectionHeaders)) {
    #a blank section header indicates to match anywhere in the textToParse
    if (sectionHeaders[header] == "") {
      sectionText <- textToParse
    } else {
      #could be pretty inefficient if the same section header is repeated several times.
      #could build a list with divided output and check whether a section is present in the list before extracting
      sectionText <- getMultilineSection(sectionHeaders[header], textToParse, filename)
    }
    
    #process all fields for this section
    sectionFieldDF <- sectionFields[[header]]
    
    for (i in 1:nrow(sectionFieldDF)) {
      thisField <- sectionFieldDF[i,]
      
      #Check whether this field already exists and is not missing. If so, skip the extraction.
      #This was initially setup because of Tech 14 section changes where the number of final stage optimizations is different from v6 to v7.
      if (!thisField$varName %in% names(arglist) || is.na(arglist[[ thisField$varName ]])) {
        arglist[[ thisField$varName ]] <- extractValue(pattern=thisField$regexPattern, sectionText, filename, type=thisField$varType)
      }
    }
  }
  
  return(arglist)
  
}

#' Extract summary information for one section from Mplus output
#'
#' Function to extract model fit statistics from a section, wrapped to allow for multiple fit sections, as in EFA files.
#' Calls \code{extractSummaries_1plan}
#'
#' @param modelFitSection The fit information section
#' @param arglist The argument list
#' @param filename The file name
#' @return The argument list
#' @keywords internal
#' @examples
#' # make me!!!
extractSummaries_1section <- function(modelFitSection, arglist, filename, input=list()) {
  
  #DATA IMPUTATION outputs sometimes use the Mean/SD output (I believe in Mplus v6.12 and perhaps v7)
  #In Mplus v8, Model fit statistics are output as usual (e.g., ex11.6.out).
  #This is confusing, so we should just test for the Mean/SD output here and use the MI-type output if found
  useMIHeadings <- FALSE
  if (!is.null(input$data.imputation)) {
    header <- "Chi-Square Test of Model Fit"
    fields <- list(data.frame(
        varName=c("ChiSqM_DF", "ChiSqM_Mean", "ChiSqM_SD", "ChiSqM_NumComputations"),
        regexPattern=c("Degrees of Freedom", "Mean", "Std Dev", "Number of successful computations"),
        varType=c("int", "dec", "dec", "int"), stringsAsFactors=FALSE))
    
    test <- extractSummaries_1plan(arglist, header, fields, modelFitSection, filename)
    if (!is.na(test$ChiSqM_Mean)) {
      useMIHeadings <- TRUE
    }
  }
  
  #MI and Montecarlo data types have fundamentally different output (means and sds per fit stat)
  if (useMIHeadings || grepl("imputation", arglist$DataType, ignore.case=TRUE) || grepl("montecarlo", arglist$DataType, ignore.case=TRUE)) {
    modelFitSectionHeaders <- c(
      "", #section-nonspecific parameters
      "Chi-Square Test of Model Fit",
#      "Chi-Square Test of Model Fit for the Baseline Model",
      "Loglikelihood::H0 Value",
      "Loglikelihood::H1 Value",
      "CFI/TLI::CFI",
      "CFI/TLI::TLI",
      "Bayesian Posterior Predictive Checking using Chi-Square::Posterior Predictive P-Value",
      "Bayesian Prior Posterior Predictive Checking using Chi-Square::Prior Posterior Predictive P-Value",
      "Information Criteria( Including the Auxiliary Part)*::Akaike \\(AIC\\)",
      "Information Criteria( Including the Auxiliary Part)*::Bayesian \\(BIC\\)",
      "Information Criteria( Including the Auxiliary Part)*::Sample-Size Adjusted BIC \\(n\\* = \\(n \\+ 2\\) / 24\\)",
      "RMSEA \\(Root Mean Square Error Of Approximation\\)",
      "WRMR \\(Weighted Root Mean Square Residual\\)",
      "Information Criteri(a|on)::Deviance \\(DIC\\)", #
      "Information Criteri(a|on)::Estimated Number of Parameters \\(pD\\)",
      "Information Criteri(a|on)::Bayesian \\(BIC\\)"
    )
    modelFitSectionFields <- list(
      data.frame(
        varName=c("Parameters"), #defined outside of information criteria section for non-ML estimators
        regexPattern=c("^Number of Free Parameters"),
        varType=c("int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("ChiSqM_DF", "ChiSqM_Mean", "ChiSqM_SD", "ChiSqM_NumComputations"),
        regexPattern=c("Degrees of Freedom", "Mean", "Std Dev", "Number of successful computations"),
        varType=c("int", "dec", "dec", "int"), stringsAsFactors=FALSE
      ),
#        data.frame(
#            varName=c("ChiSqBaseline_Value", "ChiSqBaseline_DF", "ChiSqBaseline_PValue"),
#            regexPattern=c("Value", "Degrees of Freedom", "^P-Value"),
#            varType=c("dec", "int", "dec"), stringsAsFactors=FALSE
#        ),
      data.frame(
        varName=c("LL_Mean", "LL_SD", "LL_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("UnrestrictedLL_Mean", "UnrestrictedLL_SD", "UnrestrictedLL_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("CFI_Mean", "CFI_SD", "CFI_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("TLI_Mean", "TLI_SD", "TLI_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("PostPred_PValue_Mean", "PostPred_PValue_SD", "PostPred_PValue_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("PriorPostPred_PValue_Mean", "PriorPostPred_PValue_SD", "PriorPostPred_PValue_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("AIC_Mean", "AIC_SD", "AIC_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("BIC_Mean", "BIC_SD", "BIC_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("aBIC_Mean", "aBIC_SD", "aBIC_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("RMSEA_Mean", "RMSEA_SD", "RMSEA_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("WRMR_Mean", "WRMR_SD", "WRMR_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame( #Information Criterion:: DIC
        varName=c("DIC_Mean", "DIC_SD", "DIC_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame( #Information Criterion:: Estimated number of parameters (pD)
        varName=c("pD_Mean", "pD_SD", "pD_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      ),
      data.frame( #Information Criterion:: Bayesian (BIC) -- sometimes within Information Criterion, sometimes Information Criteria (above)...
        varName=c("BIC_Mean", "BIC_SD", "BIC_NumComputations"),
        regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
        varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
      )
    )
    
    #handle two-level models, which return separate srmr for between vs. within
    if (grepl("twolevel", arglist$AnalysisType, ignore.case=TRUE)) {
      modelFitSectionHeaders <- append(modelFitSectionHeaders, c(
          "SRMR \\(Standardized Root Mean Square Residual\\) for the WITHIN level",
          "SRMR \\(Standardized Root Mean Square Residual\\) for the BETWEEN level"))
      
      modelFitSectionFields <- c(modelFitSectionFields,
        list(data.frame(
            varName=c("SRMR.Within_Mean", "SRMR.Within_SD", "SRMR.Within_NumComputations"),
            regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
            varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
          ),
          data.frame(
            varName=c("SRMR.Between_Mean", "SRMR.Between_SD", "SRMR.Between_NumComputations"),
            regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
            varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
          ))
      )
    } else {
      modelFitSectionHeaders <- append(modelFitSectionHeaders, "SRMR \\(Standardized Root Mean Square Residual\\)")
      modelFitSectionFields <- c(modelFitSectionFields,
        list(data.frame(
            varName=c("SRMR_Mean", "SRMR_SD", "SRMR_NumComputations"),
            regexPattern=c("Mean", "Std Dev", "Number of successful computations"),
            varType=c("dec", "dec", "int"), stringsAsFactors=FALSE
          ))
      )
      
    }
    
  }
  else { #not imputation or monte carlo output
    modelFitSectionHeaders <- c(
      "", #section-inspecific parameters
      "Chi-Square Test of Model Fit",
      "Chi-Square Test of Model Fit for the Baseline Model",
      "{+3i}Chi-Square Test of Model Fit for the Binary and Ordered Categorical::{+2b}Pearson Chi-Square", #chi-square header spans two lines, so +3i
      "{+3i}Chi-Square Test of Model Fit for the Binary and Ordered Categorical::{+2b}Likelihood Ratio Chi-Square",
      "Chi-Square Test for MCAR under the Unrestricted Latent Class Indicator Model::{+2b}Pearson Chi-Square", #use blank line to find pearson within section
      "Chi-Square Test for MCAR under the Unrestricted Latent Class Indicator Model::{+2b}Likelihood Ratio Chi-Square",
      "Chi-Square Test for Difference Testing",
      "Loglikelihood( Including the Auxiliary Part)*",
      "CFI/TLI",
      "Information Criteria( Including the Auxiliary Part)*",
      "Information Criteria Including the Auxiliary Part",
      "RMSEA \\(Root Mean Square Error Of Approximation\\)",
      "WRMR \\(Weighted Root Mean Square Residual\\)",
      "Bayesian Posterior Predictive Checking using Chi-Square",
      "Information Criterion", #somehow singular for bayes output?
      "Wald Test of Parameter Constraints"
    )
    modelFitSectionFields <- list(
      data.frame(
        varName=c("Parameters"), #defined outside of information criteria section for non-ML estimators
        regexPattern=c("^Number of Free Parameters"), #only match beginning of line (aux section has its own indented variant)
        varType=c("int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("ChiSqM_Value", "ChiSqM_DF", "ChiSqM_PValue", "ChiSqM_ScalingCorrection"),
        regexPattern=c("^\\s*Value", "Degrees of Freedom", "^\\s*P-Value", "Scaling Correction Factor"),
        varType=c("dec", "int", "dec", "dec"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("ChiSqBaseline_Value", "ChiSqBaseline_DF", "ChiSqBaseline_PValue"),
        regexPattern=c("^\\s*Value", "Degrees of Freedom", "^\\s*P-Value"),
        varType=c("dec", "int", "dec"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("ChiSqCategoricalPearson_Value", "ChiSqCategoricalPearson_DF", "ChiSqCategoricalPearson_PValue"),
        regexPattern=c("^\\s*Value", "Degrees of Freedom", "^\\s*P-Value"),
        varType=c("dec", "int", "dec"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("ChiSqCategoricalLRT_Value", "ChiSqCategoricalLRT_DF", "ChiSqCategoricalLRT_PValue"),
        regexPattern=c("^\\s*Value", "Degrees of Freedom", "^\\s*P-Value"),
        varType=c("dec", "int", "dec"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("ChiSqMCARUnrestrictedPearson_Value", "ChiSqMCARUnrestrictedPearson_DF", "ChiSqMCARUnrestrictedPearson_PValue"),
        regexPattern=c("^\\s*Value", "Degrees of Freedom", "^\\s*P-Value"),
        varType=c("dec", "int", "dec"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("ChiSqMCARUnrestrictedLRT_Value", "ChiSqMCARUnrestrictedLRT_DF", "ChiSqMCARUnrestrictedLRT_PValue"),
        regexPattern=c("^\\s*Value", "Degrees of Freedom", "^\\s*P-Value"),
        varType=c("dec", "int", "dec"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("ChiSqDiffTest_Value", "ChiSqDiffTest_DF", "ChiSqDiffTest_PValue"),
        regexPattern=c("^\\s*Value", "Degrees of Freedom", "^\\s*P-Value"),
        varType=c("dec", "int", "dec"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("LL", "UnrestrictedLL", "LLCorrectionFactor", "UnrestrictedLLCorrectionFactor"),
        regexPattern=c("H0 Value", "H1 Value", "H0 Scaling Correction Factor", "H1 Scaling Correction Factor"),
        varType=c("dec", "dec", "dec", "dec"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("CFI", "TLI"),
        regexPattern=c("CFI", "TLI"),
        varType=c("dec", "dec"), stringsAsFactors=FALSE
      ),
      data.frame( #Information Criteria (v8 now includes DIC and pD here)
        varName=c("AIC", "BIC", "aBIC", "DIC", "pD"),
        regexPattern=c("Akaike \\(AIC\\)", "Bayesian \\(BIC\\)", "Sample-Size Adjusted BIC", "Deviance \\(DIC\\)", "Estimated Number of Parameters \\(pD\\)"),
        varType=c("dec", "dec", "dec", "dec", "dec"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("ParametersWithAux"),
        regexPattern=c("Number of Free Parameters"),
        varType=c("int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("RMSEA_Estimate", "RMSEA_90CI_LB", "RMSEA_90CI_UB", "RMSEA_pLT05"),
        regexPattern=c("Estimate", "90 Percent C.I.", "90 Percent C.I.", "Probability RMSEA <= .05"),
        varType=c("dec", "dec[1]", "dec[2]", "dec"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("WRMR"),
        regexPattern=c("Value"),
        varType=c("dec"), stringsAsFactors=FALSE
      ),
      data.frame( #Bayesian Posterior Predictive Checking using Chi-Square
        varName=c("ObsRepChiSqDiff_95CI_LB", "ObsRepChiSqDiff_95CI_UB", "PostPred_PValue", "PriorPostPred_PValue"),
        regexPattern=c("+2:the Observed and the Replicated Chi-Square Values", "+2:the Observed and the Replicated Chi-Square Values", "^\\s*Posterior Predictive P-Value", "Prior Posterior Predictive P-Value"),
        varType=c("dec[1]", "dec[2]", "dec", "dec"), stringsAsFactors=FALSE
      ),
      data.frame( #Information Criterion (singular name under Mplus Bayes v7. Corrected to "Criteria" in v8)
        varName=c("DIC", "pD", "BIC"),
        regexPattern=c("Deviance \\(DIC\\)", "Estimated Number of Parameters \\(pD\\)", "Bayesian \\(BIC\\)"), #sometimes BIC is listed here (e.g., MI Bayes output)
        varType=c("dec", "dec", "dec"), stringsAsFactors=FALSE
      ),
      data.frame( #Wald Test of Parameter Constraints
        varName=c("WaldChiSq_Value", "WaldChiSq_DF", "WaldChiSq_PValue"),
        regexPattern=c("^\\s*Value", "Degrees of Freedom", "^\\s*P-Value"),
        varType=c("dec", "int", "dec"), stringsAsFactors=FALSE
      )
    )
    
    if (grepl("twolevel", arglist$AnalysisType, ignore.case=TRUE)) {
      modelFitSectionHeaders <- append(modelFitSectionHeaders, "SRMR \\(Standardized Root Mean Square Residual\\)")
      
      modelFitSectionFields <- c(modelFitSectionFields,
        list(data.frame(
            varName=c("SRMR.Within", "SRMR.Between"),
            regexPattern=c("Value for Within", "Value for Between"),
            varType=c("dec", "dec"), stringsAsFactors=FALSE
          ))
      )
    } else if (grepl("threelevel", arglist$AnalysisType, ignore.case=TRUE)) {
      modelFitSectionHeaders <- append(modelFitSectionHeaders, "SRMR \\(Standardized Root Mean Square Residual\\)")
      
      modelFitSectionFields <- c(modelFitSectionFields,
        list(data.frame(
            varName=c("SRMR.Within", "SRMR.Between.L2", "SRMR.Between.L3"),
            regexPattern=c("Value for Within", "Value for Between Level 2", "Value for Between Level 3"),
            varType=c("dec", "dec", "dec"), stringsAsFactors=FALSE
          ))
      )
    } else {
      
      modelFitSectionHeaders <- append(modelFitSectionHeaders, "SRMR \\(Standardized Root Mean Square Residual\\)")
      
      #append two lists together
      modelFitSectionFields <- c(modelFitSectionFields,
        list(data.frame(
            varName=c("SRMR"),
            regexPattern=c("Value"),
            varType=c("dec"), stringsAsFactors=FALSE
          )
        ))
    }
  }
  
  arglist <- extractSummaries_1plan(arglist, modelFitSectionHeaders, modelFitSectionFields, modelFitSection, filename)
  return(arglist)
}


#' Divide text into fields
#'
#' Helper function to divide an input section into key-value pair list taken from mplus2lavaan
#'
#' @param section.text The section text
#' @param required Required sections
#' @return Divided sections
#' @keywords internal
#' @examples
#' # make me!!!
divideIntoFields <- function(section.text, required) {
  
  if (is.null(section.text)) { return(NULL) }
  section.split <- strsplit(paste(section.text, collapse=" "), ";", fixed=TRUE)[[1]]
  
  section.divide <- list()
  
  for (cmd in section.split) {
    if (grepl("^\\s*!.*", cmd, perl=TRUE)) next #skip comment lines
    if (grepl("^\\s+$", cmd, perl=TRUE)) next #skip blank lines
    
    #mplus is apparently tolerant of specifications that don't include IS/ARE/=
    #example: usevariables x1-x10;
    #thus, split on spaces and assume that first element is lhs, drop second element if IS/ARE/=, and assume remainder is rhs
    
    #but if user uses equals sign, then spaces will not always be present (e.g., usevariables=x1-x10)
    if ( (leadingEquals <- regexpr("^\\s*[A-Za-z]+[A-Za-z_\\-\\d]*\\s*(=)", cmd[1L], perl=TRUE))[1L] > 0) {
      cmdName <- trimSpace(substr(cmd[1L], 1, attr(leadingEquals, "capture.start") - 1))
      cmdArgs <- trimSpace(substr(cmd[1L], attr(leadingEquals, "capture.start") + 1, nchar(cmd[1L])))
    } else {
      cmd.spacesplit <- strsplit(trimSpace(cmd[1L]), "\\s+", perl=TRUE)[[1L]]
      
      if (length(cmd.spacesplit) < 2L) {
        #for future: make room for this function to parse things like just TECH13 (no rhs)
      } else {
        cmdName <- trimSpace(cmd.spacesplit[1L])
        if (length(cmd.spacesplit) > 2L && tolower(cmd.spacesplit[2L]) %in% c("is", "are")) {
          cmdArgs <- paste(cmd.spacesplit[3L:length(cmd.spacesplit)], collapse=" ") #remainder, removing is/are
        } else {
          cmdArgs <- paste(cmd.spacesplit[2L:length(cmd.spacesplit)], collapse=" ") #is/are not used, so just join rhs
        }
      }
      
    }
    
    section.divide[[make.names(tolower(cmdName))]] <- cmdArgs
    
  }
  
  if (!missing(required)) { stopifnot(all(required %in% names(section.divide))) }
  return(section.divide)
}

#' Extract warnings and errors from 1 mplus file
#'
#' Helper function
#'
#' @param outfiletext The text of the output file
#' @param filename The filename
#' @param input The input
#' @return A list with two elements
#'   \item{errors}{Mplus Errors}
#'   \item{warnings}{Mplus Warnings}
#' @keywords internal
#' @examples
#' # make me!!!
extractWarningsErrors_1file <- function(outfiletext, filename, input) {
  warnerr <- list(warnings = list(), errors = list())
  class(warnerr$errors) <- c("mplus.errors", "list")
  class(warnerr$warnings) <- c("mplus.warnings", "list")
  if (!inherits(input, "mplus.inp")) {
    warning("Could not identify warnings and errors; input is not of class mplus.inp")
    return(warnerr)
  }
  if (is.null(attr(input, "start.line")) || is.null(attr(input, "end.line")) ||
    attr(input, "start.line") < 0L || attr(input, "end.line") < 0L) {
    warning("Could not identify bounds of input section: ", filename)
    return(warnerr)
  }
  
  #handle input warnings and errors first
  startInputWarnErr <- attr(input, "end.line") + 1L #first eligible line is after input section
  endInputWarnErr <- grep("^\\s*(INPUT READING TERMINATED NORMALLY|\\*\\*\\* WARNING.*|\\d+ (?:ERROR|WARNING)\\(S\\) FOUND IN THE INPUT INSTRUCTIONS|\\*\\*\\* ERROR.*)\\s*$", outfiletext, ignore.case=TRUE, perl=TRUE)
  

# Check for fatal errors --------------------------------------------------

  fatalerror <- grep("FATAL ERROR", outfiletext, fixed = T)
  if(isTRUE(length(fatalerror) > 0)){
    if(fatalerror > tail(endInputWarnErr, 1)){
      endInputWarnErr <- c(endInputWarnErr, length(outfiletext))
    }
  }
  w <- 1 #counters for warnings and errors lists
  e <- 1
  
  #only process section if end was identified properly
  if (length(endInputWarnErr) > 0L) {
    #The above will match all of the possible relevant lines.
    #To identify input warnings/errors section, need to go to first blank line after the final warning or error. (look in next 100 lines)
    lastWarn <- endInputWarnErr[length(endInputWarnErr)]
    blank <- which(outfiletext[lastWarn:(lastWarn + 100 )] == "")[1L] + lastWarn - 1
    if(is.na(blank)) blank <- length(outfiletext)+1
    warnerrtext <- outfiletext[startInputWarnErr[1L]:(blank-1)]
    
    lines <- friendlyGregexpr("^\\s*(\\*\\*\\* WARNING|\\*\\*\\* ERROR|\\*\\*\\* FATAL ERROR).*\\s*$", warnerrtext, perl=TRUE)
    
    if (!is.null(lines)) {
      for (l in 1:nrow(lines)) {
        if (l < nrow(lines)) {
          warn.err.body <- trimSpace(warnerrtext[(lines[l,"element"] + 1):(lines[l+1,"element"] - 1)])
        } else {
          warn.err.body <- trimSpace(warnerrtext[(lines[l,"element"] + 1):length(warnerrtext)])
        }
        
        if (substr(lines[l,"tag"], 1, 11) == "*** WARNING") {
          warnerr$warnings[[w]] <- warn.err.body
          w <- w + 1
        } else if ((substr(lines[l,"tag"], 1, 9) == "*** ERROR") | (grepl("FATAL ERROR", lines[l,"tag"], fixed = TRUE))) {
          warnerr$errors[[e]] <- warn.err.body
          splittag <- strsplit(lines[l,"tag"], "\\s+", perl=TRUE)[[1L]]
          if (length(splittag) > 3L && splittag[3L] == "in") {
            attr(warnerr$errors[[e]], "section") <- tolower(paste(splittag[4L:(which(splittag == "command") - 1L)], collapse="."))
          }
          e <- e + 1
        } else { stop ("Cannot discern warning/error type: ", lines[l, "tag"]) }
      }
    }
  }
  
  #now handle estimation errors and warnings
  #these fall above either
  #  1) MODEL FIT INFORMATION: model converged with warnings
  #  2) MODEL RESULTS: model did not converge, so no fit statistics produced
  #  3) FINAL CLASS COUNTS (occurs for some mixture models, which report class counts before model results)
  #  4) TESTS OF MODEL FIT (older versions of Mplus)
  #
  # It's harder to determine where the section begins, however, because there is no clear boundary
  # with the preceding section, which is heterogeneous (e.g., sample stats).
  #
  # In the case of warnings only, the estimation warnings section is demarcated by
  # THE MODEL ESTIMATION TERMINATED NORMALLY above and MODEL FIT INFORMATION below.
  #
  # In other cases (maybe dependent on Mplus version), warnings are printed above THE MODEL ESTIMATION TERMINATED NORMALLY.
  # Allow for the possibility that the estimation warnings/errors section begins with WARNING:
  #
  # For failed models, the section likely begins with one of three possibilities:
  #  1) THE MODEL ESTIMATION DID NOT TERMINATE NORMALLY
  #  2) THE LOGLIKELIHOOD DECREASED
  #  3) NO CONVERGENCE
  #
  # Warnings that can potentially be ignored are prefixed by "WARNING: "
  # whereas more serious estimation problems (errors) typically have no prefix.
  #
  # Blank lines indicate a boundary in each message.
  
  #the end sections are more well behaved (esp. if there is Tech 9 output). Identify end first, then constrain start to precede end
  endEstWarnErr <- grep("^\\s*(MODEL FIT INFORMATION|FINAL CLASS COUNTS|MODEL RESULTS|TESTS OF MODEL FIT)\\s*$", outfiletext, ignore.case=TRUE, perl=TRUE)
  
  if (length(endEstWarnErr) == 0L) { return(warnerr) } #unable to find section properly
  startEstWarnErr <- grep("^\\s*(WARNING:.*|THE MODEL ESTIMATION DID NOT TERMINATE NORMALLY.*|THE LOGLIKELIHOOD DECREASED.*|THE MODEL ESTIMATION TERMINATED NORMALLY|NO CONVERGENCE\\.\\s+NUMBER OF ITERATIONS EXCEEDED\\..*)\\s*$",
    outfiletext[1:endEstWarnErr[1L]], ignore.case=TRUE, perl=TRUE)
  
  if (length(startEstWarnErr) > 0L && length(endEstWarnErr) > 0L) {
    warnerrtext <- outfiletext[startEstWarnErr[1L]:(endEstWarnErr[1L] - 1)]
    
    #if the model estimation terminated normally, delete this line from the text to parse (whereas the other start flags indicate a meaningful message)
    if (length(normexit <- grep("^\\s*THE MODEL ESTIMATION TERMINATED NORMALLY\\s*$", warnerrtext, perl=TRUE, ignore.case=TRUE)) > 0L) {
      warnerrtext <- warnerrtext[-normexit]
    }
    
    if (!any(warnerrtext != "")) {
      return(warnerr) #no non-blank lines -- just exit function as is
    }
    
    #trim blank lines from beginning and end of section
    warnerrtext <- warnerrtext[min(which(warnerrtext != "")):max(which(warnerrtext != ""))]
    
    #estimation warnings and errors are separated by blank lines.
    blanks <- which(warnerrtext == "")
    
    #trim consecutive blank lines (throw off blanks-based parsing below)
    consec <- which(diff(blanks) == 1)
    if (length(consec) > 0L) {
      warnerrtext <- warnerrtext[-1*blanks[consec]]
      blanks <- which(warnerrtext == "") #clunky
    }
    
    #for loop is probably clunky here, but works for now
    startMsg <- 1 #first line of a message
    for (line in 1:length(warnerrtext)) {
      if ((line %in% blanks && ! (line-1) %in% blanks) || line == length(warnerrtext)) {
        msg <- trimSpace(warnerrtext[startMsg:ifelse(line %in% blanks, line - 1, line)])
        
        if (grepl("^\\s*WARNING:", msg[1L], ignore.case=TRUE, perl=TRUE)) {
          warnerr$warnings[[w]] <- msg
          w <- w+1
        } else {
          warnerr$errors[[e]] <- msg #if not prefixed by WARNING:, treat as error
          e <- e + 1
        }
        
        startMsg <- line + 1
      }
    }
    
  } else { } #warning("Unable to identify estimation warnings and errors section.") }
  
  return(warnerr)
}



#' Extract and parse Mplus input file
#'
#' Function to extract and parse mplus input syntax from the output file
#'
#' @param outfiletext The text of the output file
#' @param filename The filename
#' @return The parsed input file
#' @keywords internal
#' @examples
#' # make me!!!
extractInput_1file <- function(outfiletext, filename) {
  input <- list()
  class(input) <- c("mplus.inp", "list")
  
  startInput <- grep("^\\s*INPUT INSTRUCTIONS\\s*$", outfiletext, ignore.case=TRUE, perl=TRUE)
  if (length(startInput) == 0L) {
    warning("Could not find beginning of input for: ", filename)
    attr(input, "start.line") <- attr(input, "end.line") <- -1L
    return(input)
  } else { startInput <- startInput[1L] + 1L } #skip input instructions line itself
  
  endInput <- grep("^\\s*(INPUT READING TERMINATED NORMALLY|\\*\\*\\* WARNING.*|\\d+ (?:ERROR|WARNING)\\(S\\) FOUND IN THE INPUT INSTRUCTIONS|\\*\\*\\* ERROR.*)\\s*$", outfiletext, ignore.case=TRUE, perl=TRUE)
  if (length(endInput) == 0L) {
    #In Mplus v6.12 (and perhaps at some other point in the evolution), the input parser output was not included.
    #In such cases, try to fall back to the first line of the TITLE: XXX line, which is reprinted after input
    title1 <- grep("\\s*TITLE:\\s*(.*)$", outfiletext[1:100], perl=TRUE) #assume it lives in first 100 lines
    if (length(title1)==1L && length((endinputTitle <- grep(sub("\\s*TITLE:\\s*(.*)$", "^\\\\s*\\1\\\\s*$", outfiletext[title1], perl=TRUE), outfiletext)) == 1L)) {
      endInput <- endinputTitle - 1L
    } else {
      warning("Could not find end of input for: ", filename)
      attr(input, "start.line") <- attr(input, "end.line") <- -1
      return(input)
    }
  } else { endInput <- endInput[1L] - 1L } #one line before first warning or end of instructions
  
  input.text <- outfiletext[startInput[1L]:endInput[1L]] #explicit first element because there could be both warnings and errors.
  
  #some code adapted from mplus2lavaan prototype
  inputHeaders <- grep("^\\s*(title:|data.*:|variable:|define:|analysis:|model.*:|output:|savedata:|plot:|montecarlo:)", input.text, ignore.case=TRUE, perl=TRUE)
  
  stopifnot(length(inputHeaders) > 0L)
  
  for (h in 1:length(inputHeaders)) {
    sectionEnd <- ifelse(h < length(inputHeaders), inputHeaders[h+1] - 1, length(input.text))
    section <- input.text[inputHeaders[h]:sectionEnd]
    sectionName <- trimSpace(sub("^([^:]+):.*$", "\\1", section[1L], perl=TRUE)) #obtain text before the colon
    
    #dump section name from input syntax
    section[1L] <- sub("^[^:]+:(.*)$", "\\1", section[1L], perl=TRUE)
    
    input[[make.names(tolower(sectionName))]] <- section
  }
  
  #divide some input sections into fields
  #need to do a better job here of handling blank lines and such
  input$title <- paste(trimSpace(input$title), collapse=" ")
  input$data <- divideIntoFields(input$data)
  input$data.imputation <- divideIntoFields(input$data.imputation)
  input$variable <- divideIntoFields(input$variable)
  input$analysis <- divideIntoFields(input$analysis)
  input$montecarlo <- divideIntoFields(input$montecarlo)
  input$savedata <- divideIntoFields(input$savedata)
  
  
  attr(input, "start.line") <- startInput
  attr(input, "end.line") <- endInput
  
  return(input)
}

#' Extract the summaries from one file
#'
#' Description: This function parses an output file for specific model details. It returns a list of model details for a single output file.
#'
#' @param outfiletext This is the output file in string form to be parsed. Passed in from extractModelSummaries.
#' @param filename Name of the file being parsed. Used in case of bad model, prints a warning.
#' @return A list of the summaries
#' @keywords internal
#' @examples
#' # make me!!!
extractSummaries_1file <- function(outfiletext, filename, input)
{
  #preallocates list
  arglist <- list()
  
  #obtain mplus software version
  if ((mplus.version <- regexpr("\\s*Mplus VERSION ([\\d\\.]+)\\s*", outfiletext[1L], perl=TRUE)) > 0L) {
    arglist$Mplus.version <- substr(outfiletext[1L], attr(mplus.version, "capture.start")[1L], attr(mplus.version, "capture.start")[1L] + attr(mplus.version, "capture.length")[1L] - 1)
  }
  
  ###Copy some elements of the input instructions into the summaries
  
  #copy title into arglist
  if (!is.null(input$title)) {
    arglist$Title <- input$title
  } else {
    #warning("Unable to locate title field. Returning missing") #Warning doesn't seem very useful
    arglist$Title <- NA_character_
  }
  
  #extract the analysis type, which is important for setting other parameters.
  if (!is.null(input$analysis$type)) {
    arglist$AnalysisType <- input$analysis$type
  } else {
    arglist$AnalysisType <- "GENERAL" #Analysis type not specified, default to general
  }
  
  #extract the data type (important for detecting imputation datasets)
  if (!is.null(input$data$type)) {
    arglist$DataType <- input$data$type
  }  else if (any(c("montecarlo", "model.population") %in% names(input))) {
    arglist$DataType <- "MONTECARLO"
  } else {
    arglist$DataType <- "INDIVIDUAL" #Data type not specified, default to individual
  }
  
  if (!is.null(input$data.imputation)) {
    arglist$NImputedDatasets <- input$data.imputation$ndatasets #number of imputed datasets
  }
  #End input instructions processing
  
  #BEGIN ANALYSIS SUMMARY PROCESSING
  analysisSummarySection <- getSection("^\\s*SUMMARY OF ANALYSIS\\s*$", outfiletext)
  
  arglist$Estimator <- extractValue(pattern="^\\s*Estimator\\s*", analysisSummarySection, filename, type="str")
  arglist$Observations <- extractValue(pattern="^\\s*(Average n|N)umber of observations\\s*", analysisSummarySection, filename, type="int")
  # Fix for multigroup models, where Observations were not parsed correctly
  if(is.na(arglist$Observations)){
    arglist$Observations <- extractValue(pattern="^\\s*Total sample size\\s*", analysisSummarySection, filename, type="int")
  }
  arglist$NGroups <- extractValue(pattern="^\\s*Number of groups\\s*", analysisSummarySection, filename, type="int")
  arglist$NDependentVars <- extractValue(pattern="^\\s*Number of dependent variables\\s*", analysisSummarySection, filename, type="int")
  arglist$NIndependentVars <- extractValue(pattern="^\\s*Number of independent variables\\s*", analysisSummarySection, filename, type="int")
  arglist$NContinuousLatentVars <- extractValue(pattern="^\\s*Number of continuous latent variables\\s*", analysisSummarySection, filename, type="int")
  arglist$NCategoricalLatentVars <- extractValue(pattern="^\\s*Number of categorical latent variables\\s*", analysisSummarySection, filename, type="int")
  arglist$InformationMatrix <- extractValue(pattern="^\\s*Information matrix\\s*", analysisSummarySection, filename, type="int")
  
  #END ANALYSIS SUMMARY PROCESSING
  
  #BEGIN MODEL FIT STATISTICS PROCESSING
  #handle EFA output, which has separate model fit sections within each file
  #do this by extracting model fit sections for each and using an rbind call
  if (grepl("(?!MIXTURE|TWOLEVEL)\\s*EFA\\s+", arglist$AnalysisType, ignore.case=TRUE, perl=TRUE)) {

    EFASections <- grep(paste(c(
      "^\\s*(EXPLORATORY FACTOR ANALYSIS WITH \\d+ FACTOR\\(S\\):|",
      "EXPLORATORY FACTOR ANALYSIS WITH \\d+ WITHIN FACTOR\\(S\\) AND \\d+ BETWEEN FACTOR\\(S\\):|",
      "EXPLORATORY FACTOR ANALYSIS WITH \\d+ WITHIN FACTOR\\(S\\) AND UNRESTRICTED BETWEEN COVARIANCE:|",
      "EXPLORATORY FACTOR ANALYSIS WITH UNRESTRICTED WITHIN COVARIANCE AND \\d+ BETWEEN FACTOR\\(S\\):",
      ")\\s*$"), collapse=""), outfiletext, perl=TRUE)
    
    efa_nfac <- get_efa_nfactors(outfiletext[EFASections])

    mlefa <- nrow(efa_nfac) == 2L #a little error prone if we later build out the helper function, but okay for now
    
    if (!length(EFASections) > 0) { stop("Unable to locate section headers for EFA model fit statistics") }
    
    #need to convert from list to data.frame format to allow for proper handling of rbind below
    arglistBase <- as.data.frame(arglist, stringsAsFactors=FALSE)
    
    efaList <- list()
    for (thisFactor in 1:length(EFASections)) {
      #subset output by starting text to be searched at the point where factor output begins
      s_end <- ifelse(thisFactor==length(EFASections), length(outfiletext), EFASections[thisFactor+1])
      modelFitSection <- getSection_Blanklines("^(TESTS OF MODEL FIT|MODEL FIT INFORMATION)$", outfiletext[EFASections[thisFactor]:s_end])
      
      #N.B. Multilevel EFA outputs produce two top-level section headers for each combination of wi + bw factors
      #  But, MODEL FIT INFORMATION is only produced in the first section. Thus, the second section will not have this
      #  information, returning NULL for modelFitSection. This is no problem since we don't want to duplicate
      #  global fit information. Thus, a NULL indicates that we may safely skip to the next iteration.
      if (is.null(modelFitSection)) { next }
      
      efaList[[thisFactor]] <- extractSummaries_1section(modelFitSection, arglistBase, filename)
      if (isTRUE(mlefa)) {
        efaList[[thisFactor]]$NumFactors_Between <- efa_nfac["bw", thisFactor]
        efaList[[thisFactor]]$NumFactors_Within <- efa_nfac["wi", thisFactor]
      } else {
        efaList[[thisFactor]]$NumFactors <- efa_nfac["f", thisFactor]
      }
      
    }
    
    arglist <- do.call(rbind, efaList)
  } else if (length(multisectionMatches <- grep("^\\s*MODEL FIT INFORMATION FOR (?!THE LATENT CLASS INDICATOR MODEL PART).*", outfiletext, perl=TRUE, value=TRUE)) > 0L) {
    #use negative lookahead to ensure we don't grab the TECH10 output for LCA where it lists model fit info for latent class part
    #support Mplus v8 invariance testing outputs with one model fit section per variant (MODEL FIT INFORMATION FOR THE SCALAR MODEL etc.)
    #need to convert from list to data.frame format to allow for proper handling of rbind below
    arglistBase <- as.data.frame(arglist, stringsAsFactors=FALSE)
    multiList <- list()
    sectionNames <- sub("^\\s*MODEL FIT INFORMATION FOR\\s+(?:THE)*\\s*([\\w\\.]+)", "\\1", multisectionMatches, perl=TRUE)
    for (s in 1:length(multisectionMatches)) {
      fitinfo <- getSection(multisectionMatches[s], outfiletext)
      if (!is.null(fitinfo)) {
        multiList[[s]] <- extractSummaries_1section(fitinfo, arglistBase, filename, input)
      }      
    }
    arglist <- do.call(rbind, multiList)
    arglist$Model <- sectionNames #add model info
  } else {
    modelFitSection <- getSection("^(TESTS OF MODEL FIT|MODEL FIT INFORMATION)$", outfiletext)
    arglist <- extractSummaries_1section(modelFitSection, arglist, filename, input)
  }
  
  
  #CLASSIFICATION QUALITY
  classificationQuality <- getSection("^CLASSIFICATION QUALITY$", outfiletext)
  
  if (!is.null(classificationQuality))
    arglist$Entropy <- extractValue(pattern="^\\s*Entropy\\s*", classificationQuality, filename, type="dec")
  #overkill
  #arglist <- extractSummaries_1plan(arglist, "", list(data.frame(varName="Entropy", regexPattern="Entropy", varType=c("dec"), stringsAsFactors=FALSE)), classificationQuality, filename)
  else
    arglist$Entropy <- NA_real_ #maybe try to avoid the is null logic and just have extractModelSummary correctly handle null sections
  
  #TECH11 OUTPUT: LMR LRT
  tech11Output <- getSection("^\\s*TECHNICAL 11 OUTPUT\\s*$", outfiletext)
  
  if (!is.null(tech11Output)) {
    tech11headers <- c(
      "Random Starts Specifications for the k-1 Class Analysis Model",
      "VUONG-LO-MENDELL-RUBIN LIKELIHOOD RATIO TEST FOR \\d+ \\(H0\\) VERSUS \\d+ CLASSES",
      "LO-MENDELL-RUBIN ADJUSTED LRT TEST"
    )
    tech11fields <- list(
      data.frame(
        varName=c("T11_KM1Starts", "T11_KM1Final"),
        regexPattern=c("Number of initial stage random starts", "Number of final stage optimizations"),
        varType=c("int", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("T11_KM1LL", "T11_VLMR_2xLLDiff", "T11_VLMR_ParamDiff", "T11_VLMR_Mean", "T11_VLMR_SD", "T11_VLMR_PValue"),
        regexPattern=c("H0 Loglikelihood Value", "2 Times the Loglikelihood Difference", "Difference in the Number of Parameters", "Mean", "Standard Deviation", "P-Value"),
        varType=c("dec", "dec", "int", "dec", "dec", "dec"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("T11_LMR_Value", "T11_LMR_PValue"),
        regexPattern=c("^\\s*Value", "^\\s*P-Value"),
        varType=c("dec", "dec"), stringsAsFactors=FALSE
      )
    )
    
    arglist <- extractSummaries_1plan(arglist, tech11headers, tech11fields, tech11Output, filename)
  }
  
  
  tech14Output <- getSection("^\\s*TECHNICAL 14 OUTPUT\\s*$", outfiletext)
  
  if (!is.null(tech14Output)) {
    tech14headers <- c(
      "", #section-inspecific parameters
      "Random Starts Specifications for the k-1 Class Analysis Model",
      "Random Starts Specification for the k-1 Class Model for Generated Data",
      "Random Starts Specification for the k Class Model for Generated Data",
      "PARAMETRIC BOOTSTRAPPED LIKELIHOOD RATIO TEST FOR \\d+ \\(H0\\) VERSUS \\d+ CLASSES"
    )
    tech14fields <- list(
      #top-level (no section)
      data.frame(
        varName=c("BLRT_RequestedDraws"),
        regexPattern=c("Number of bootstrap draws requested"),
        varType=c("str"), stringsAsFactors=FALSE
      ),
      #Random Starts Specifications for the k-1 Class Analysis Model
      data.frame(
        varName=c("BLRT_KM1AnalysisStarts", "BLRT_KM1AnalysisFinal"),
        regexPattern=c("Number of initial stage random starts", "Number of final stage optimizations"),
        varType=c("int", "int"), stringsAsFactors=FALSE
      ),
      #Random Starts Specification for the k-1 Class Model for Generated Data
      #v7 format: Number of final stage optimizations for the\n initial stage random starts  <N>
      #v6 format: Number of final stage optimizations <N>
      #Thus, include the genfinal twice here to catch both circumstances
      data.frame(
        varName=c("BLRT_KM1GenStarts", "BLRT_KM1GenFinal", "BLRT_KM1GenFinal"),
        regexPattern=c("Number of initial stage random starts", "+1:Number of final stage optimizations for the", "Number of final stage optimizations"),
        varType=c("int", "int", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("BLRT_KGenStarts", "BLRT_KGenFinal"),
        regexPattern=c("Number of initial stage random starts", "Number of final stage optimizations"),
        varType=c("int", "int"), stringsAsFactors=FALSE
      ),
      data.frame(
        varName=c("BLRT_KM1LL", "BLRT_2xLLDiff", "BLRT_ParamDiff", "BLRT_PValue", "BLRT_SuccessfulDraws"),
        regexPattern=c("H0 Loglikelihood Value", "2 Times the Loglikelihood Difference", "Difference in the Number of Parameters", "Approximate P-Value", "Successful Bootstrap Draws"),
        varType=c("dec", "dec", "int", "dec", "int"), stringsAsFactors=FALSE
      )
    )
    
    arglist <- extractSummaries_1plan(arglist, tech14headers, tech14fields, tech14Output, filename)
    
  }
  
  #calculate adjusted AIC per Burnham & Anderson(2004), which is better than AIC for non-nested model selection
  #handle AICC calculation, requires AIC, Parameters, and observations
  if (!is.null(arglist$Parameters) && !all(is.na(arglist$Parameters)) &&
    !is.null(arglist$AIC) && !all(is.na(arglist$AIC)) &&
    !is.null(arglist$Observations) && !all(is.na(arglist$Observations))) {
    arglist$AICC <- arglist$AIC + (2*arglist$Parameters*(arglist$Parameters+1))/(arglist$Observations-arglist$Parameters-1)
  } else {
    arglist$AICC <- NA_real_
  }
  
  #Only warn about missing LL for ML-based estimators
#too convoluted to maintain (and not so useful), generating errors I don't want to debug
#  if ("Estimator" %in% extract && "LL" %in% extract
#			&& !is.na(arglist$Estimator) && arglist$Estimator %in% c("ML", "MLR", "MLM", "MLMV", "MLF")
#			&& ((grepl("imputation", arglist$DataType, ignore.case=TRUE) && is.na(arglist$LL_Mean))
#			|| (!grepl("imputation", arglist$DataType, ignore.case=TRUE) && is.na(arglist$LL))))
#    warning("Model missing LL value, despite use of ML-based estimator. Likely a failed run.\n  ", filename)
#
  
  #for now, skip including input instructions in the returned data.frame. Makes the output too cluttered.
  #arglist$InputInstructions <- paste((outfiletext[(startInput+1):(endInput-1)]), collapse="\n")
  arglist$Filename <- splitFilePath(filename)$filename #only retain filename, not path
  
  arglist <- as.data.frame(arglist, stringsAsFactors=FALSE)
  class(arglist) <- c("mplus.summaries", "data.frame")
  attr(arglist, "filename") <- arglist$Filename
  
  return(arglist)
}

#' Extract summary statistics from a single output file or from a group of Mplus models within a directory
#'
#' Parses a group of Mplus model output files (.out extension) for model fit statistics.
#' At this time, the details extracted are fixed and include: \code{Filename, InputInstructions, Title, Estimator,
#' LL, BIC, aBIC, AIC, AICC, Parameters, Observations, CFI, TLI, RMSEA_Estimate, RMSEA_90CI_LB, RMSEA_90CI_UB,
#' RMSEA_pLT05, ChiSqM_Value, ChiSqM_DF, ChiSq_PValue, BLRT_KM1LL, BLRT_PValue, BLRT_Numdraws)}. The
#' infrastructure is in place to allow for user-specified selection of summary statistics in future versions.
#'
#' @param target the directory containing Mplus output files (.out) to parse OR the
#'   single output file to be parsed. Defaults to the current working directory.
#'   Example: "C:/Users/Michael/Mplus Runs"
#' @param recursive optional. If \code{TRUE}, parse all models nested in
#'   subdirectories within \code{directory}. Defaults to \code{FALSE}.
#' @param filefilter a Perl regular expression (PCRE-compatible) specifying particular
#'   output files to be parsed within \code{directory}. See \code{regex} or
#'   \url{http://www.pcre.org/pcre.txt} for details about regular expression syntax.
#'
#' @return Returns a \code{data.frame} containing model fit statistics for all output files within \code{directory}.
#' The \code{data.frame} contains some of the following variables (depends on model type):
#' \item{Title}{Title for the model, specified by the TITLE: command}
#' \item{Filename}{Filename of the output file}
#' \item{Estimator}{Estimator used for the model (e.g., ML, MLR, WLSMV, etc.)}
#' \item{LL}{Log-likelihood of the model}
#' \item{BIC}{Bayesian Information Criterion}
#' \item{aBIC}{Sample-Size-Adjusted BIC (Sclove, 1987)}
#' \item{AIC}{Akaike's Information Criterion}
#' \item{AICC}{Corrected AIC, based on Sugiura (1978) and recommended by Burnham & Anderson (2002)}
#' \item{DIC}{Deviance Information Criterion. Available in ESTIMATOR=BAYES output.}
#' \item{Parameters}{Number of parameters estimated by the model}
#' \item{pD}{Estimated number of parameters in Bayesian output}
#' \item{Observations}{The number of observations for the model (does not suppport multiple-groups analysis at this time)}
#' \item{CFI}{Confirmatory Fit Index}
#' \item{TLI}{Tucker-Lewis Index}
#' \item{RMSEA_Estimate}{Point estimate of root mean squared error of approximation}
#' \item{RMSEA_90CI_LB}{Lower bound of the 90\% Confidence Interval around the RMSEA estimate.}
#' \item{RMSEA_90CI_UB}{Upper bound of the 90\% Confidence Interval around the RMSEA estimate.}
#' \item{RMSEA_pLT05}{Probability that the RMSEA estimate falls below .05, indicating good fit.}
#' \item{ChiSqM_Value}{Model chi-squared value}
#' \item{ChiSqM_DF}{Model chi-squared degrees of freedom}
#' \item{ChiSqM_PValue}{Model chi-squared p value}
#' \item{ChiSqM_ScalingCorrection}{H0 Scaling Correction Factor}
#' \item{ObsRepChiSqDiff_95CI_LB}{Lower bound of 95\% confidence interval for the difference between observed and replicated chi-square values}
#' \item{ObsRepChiSqDiff_95CI_UB}{Upper bound of 95\% confidence interval for the difference between observed and replicated chi-square values}
#' \item{PostPred_PValue}{Posterior predictive p-value}
#' \item{PriorPostPred_PValue}{Prior Posterior Predictive P-Value}
#' \item{BLRT_RequestedDraws}{Number of requested bootstrap draws for TECH14.}
#' \item{BLRT_KM1LL}{Log-likelihood of the K-1 model (one less class) for the Bootstrapped Likelihood Ratio Test (TECH14).}
#' \item{BLRT_2xLLDiff}{Two times the log-likelihood difference of the models with K and K-1 classes (TECH14).}
#' \item{BLRT_ParamDiff}{Difference in the number of parameters for models with K and K-1 classes (TECH14).}
#' \item{BLRT_PValue}{P-value of the Bootstrapped Likelihood Ratio Test (TECH14) testing whether the K class model is significantly better than K-1}
#' \item{BLRT_SuccessfulDraws}{The number of successful bootstrapped samples used in the Bootstrapped Likelihood Ratio Test}
#' \item{SRMR}{Standardized root mean square residual}
#' \item{SRMR.Between}{For TYPE=TWOLEVEL output, standardized root mean square residual for between level}
#' \item{SRMR.Within}{For TYPE=TWOLEVEL output, standardized root mean square residual for within level}
#' \item{WRMR}{Weighted root mean square residual}
#' \item{ChiSqBaseline_Value}{Baseline (unstructured) chi-squared value}
#' \item{ChiSqBaseline_DF}{Baseline (unstructured) chi-squared degrees of freedom}
#' \item{ChiSqBaseline_PValue}{Baseline (unstructured) chi-squared p value}
#' \item{NumFactors}{For TYPE=EFA output, the number of factors}
#' \item{T11_KM1Starts}{TECH11: Number of initial stage random starts for k-1 model}
#' \item{T11_KM1Final}{TECH11: Number of final stage optimizations for k-1 model}
#' \item{T11_KM1LL}{TECH11: Log-likelihood of the K-1 model used for the Vuong-Lo-Mendell-Rubin LRT}
#' \item{T11_VLMR_2xLLDiff}{TECH11: 2 * Log-likelihood Difference of K-class vs. K-1-class model for the Vuong-Lo-Mendell-Rubin LRT}
#' \item{T11_VLMR_ParamDiff}{TECH11: Difference in number of parameters between K-class and K-1-class model for the Vuong-Lo-Mendell-Rubin LRT}
#' \item{T11_VLMR_Mean}{TECH11: Vuong-Lo-Mendell-Rubin LRT mean}
#' \item{T11_VLMR_SD}{TECH11: Vuong-Lo-Mendell-Rubin LRT standard deviation}
#' \item{T11_VLMR_PValue}{TECH11: Vuong-Lo-Mendell-Rubin LRT p-value}
#' \item{T11_LMR_Value}{TECH11: Lo-Mendell-Rubin Adjusted LRT value}
#' \item{T11_LMR_PValue}{TECH11: Lo-Mendell-Rubin Adjusted LRT p-value}
#'
#' @author Michael Hallquist
#' @seealso \code{\link{regex}}, \code{\link{runModels}}, \code{\link{readModels}}
#' @keywords interface
#' @export
#' @examples
#' \dontrun{
#'   allExamples <- extractModelSummaries(
#'     "C:/Program Files/Mplus/Mplus Examples/User's Guide Examples")
#' }
extractModelSummaries <- function(target=getwd(), recursive=FALSE, filefilter) {
  #message("This function is deprecated and will be removed from future versions of MplusAutomation. Please use readModels() instead.")
  message("extractModelSummaries has been deprecated. Please use readModels(\"nameofMplusoutfile.out\", what=\"summaries\")$summaries to replicate the old functionality.")
  
  return(invisible(NULL))
}


#' Helper subfunction to extract one section of OUTPUT: RESIDUALS
#' Can be called multiple times, as in the case of invariance testing
#' 
#' @param residSection The character vector containing strings for the residual section to be parsed
#' @param filename Character string containing the current output filename being parsed
#' @keywords internal
extractResiduals_1section <- function(residSection, filename) {
  #allow for multiple groups
  residSubsections <- getMultilineSection("ESTIMATED MODEL AND RESIDUALS \\(OBSERVED - ESTIMATED\\)( FOR .+)*",
                                          residSection, filename, allowMultiple=TRUE)
  
  matchlines <- attr(residSubsections, "matchlines")
  
  if (length(residSubsections) == 0) {
    warning("No sections found within residuals output.")
    return(list())
  } else if (length(residSubsections) > 1) {
    groupNames <- make.names(gsub("^\\s*ESTIMATED MODEL AND RESIDUALS \\(OBSERVED - ESTIMATED\\)( FOR (.+))*\\s*$", "\\2", residSection[matchlines], perl=TRUE))
  }
  
  residList <- list()
  #multiple groups possible
  for (g in 1:length(residSubsections)) {
    targetList <- list()
    
    targetList[["meanEst"]] <- matrixExtract(residSubsections[[g]], "Model Estimated Means(/Intercepts/Thresholds)*", filename)
    targetList[["meanResid"]] <- matrixExtract(residSubsections[[g]], "Residuals for Means(/Intercepts/Thresholds)*", filename)
    targetList[["meanResid.std"]] <- matrixExtract(residSubsections[[g]], "Standardized Residuals \\(z-scores\\) for Means(/Intercepts/Thresholds)*", filename)
    targetList[["meanResid.norm"]] <- matrixExtract(residSubsections[[g]], "Normalized Residuals for Means(/Intercepts/Thresholds)*", filename)
    targetList[["covarianceEst"]] <- matrixExtract(residSubsections[[g]], "Model Estimated Covariances(/Correlations/Residual Correlations)*", filename)
    targetList[["covarianceResid"]] <- matrixExtract(residSubsections[[g]], "Residuals for Covariances(/Correlations/Residual Correlations)*", filename)
    targetList[["covarianceResid.std"]] <- matrixExtract(residSubsections[[g]], "Standardized Residuals \\(z-scores\\) for Covariances(/Correlations/Residual Corr)*", filename)
    targetList[["covarianceResid.norm"]] <- matrixExtract(residSubsections[[g]], "Normalized Residuals for Covariances(/Correlations/Residual Correlations)*", filename)
    targetList[["correlationEst"]] <- matrixExtract(residSubsections[[g]], "Model Estimated Correlations", filename)
    targetList[["correlationResid"]] <- matrixExtract(residSubsections[[g]], "Residuals for Correlations", filename)
    targetList[["slopeEst"]] <- matrixExtract(residSubsections[[g]], "Model Estimated Slopes", filename)
    targetList[["slopeResid"]] <- matrixExtract(residSubsections[[g]], "Residuals for Slopes", filename)
    
    if (length(residSubsections) > 1) {
      class(targetList) <- c("mplus.residuals", "list")
      residList[[groupNames[g]]] <- targetList
    } else{ 
      residList <- targetList
    }
    
  }
  
  class(residList) <- c("mplus.residuals", "list")
  if (length(residSubsections) > 1) { attr(residList, "group.names") <- groupNames }
  
  return(residList)
}



#' Extract residual matrices
#'
#' Function that extracts the residual matrices including standardized ones
#'
#' @param outfiletext the text of the output file
#' @param filename The name of the file
#' @return A list of the residual matrices
#' @keywords internal
#' @seealso \code{\link{matrixExtract}}
#' @examples
#' # make me!!!
extractResiduals <- function(outfiletext, filename) {
  #allow multiple model residual sections
  #mimic extractParameters_1file
  if (length(multisectionMatches <- grep("^\\s*RESIDUAL OUTPUT FOR .*", outfiletext, perl=TRUE, value=TRUE)) > 0L) {
    sectionNames <- make.names(sub("^\\s*RESIDUAL OUTPUT FOR\\s+(?:THE)*\\s*([\\w\\.]+)", "\\1", multisectionMatches, perl=TRUE))
    
    residList <- list()
    for (s in 1:length(sectionNames)) {
      residSection <- getSection(multisectionMatches[s], outfiletext)
      if (!is.null(residSection)) {
        residList[[ sectionNames[s] ]] <- extractResiduals_1section(residSection, filename) #[[1]]
      }
    }
  } else {
    residSection <- getSection("^RESIDUAL OUTPUT$", outfiletext)
    if (is.null(residSection)) { return(list()) } #no residuals output
    
    residList <- extractResiduals_1section(residSection) #[[1]]
  }
  
  # class(residList) <- c("mplus.residuals", "list")
  # if (length(residSubsections) > 1) { attr(residList, "group.names") <- groupNames }
  
  return(residList)
}

#' Extract Technical 1 matrix from Mplus
#'
#' Function that extracts the Tech1 matrix
#'
#' @param outfiletext the text of the output file
#' @param filename The name of the file
#' @return A list of class \dQuote{mplus.tech1}
#' @keywords internal
#' @seealso \code{\link{matrixExtract}}
#' @examples
#' # make me!!!
extractTech1 <- function(outfiletext, filename) {
  tech1Section <- getSection("^TECHNICAL 1 OUTPUT$", outfiletext)
  if (is.null(tech1Section)) return(list()) #no tech1 output
  
  tech1List <- list()
  
  paramSpecSubsections <- getMultilineSection("PARAMETER SPECIFICATION( FOR [\\w\\d\\s\\.,_]+)*",
    tech1Section, filename, allowMultiple=TRUE)
  
  matchlines <- attr(paramSpecSubsections, "matchlines")
  
  paramSpecList <- list()
  if (length(paramSpecSubsections) == 0)
    warning ("No parameter specfication sections found within TECH1 output.")
  else if (length(paramSpecSubsections) > 1)
    groupNames <- make.names(gsub("^\\s*PARAMETER SPECIFICATION( FOR ([\\w\\d\\s\\.,_]+))*\\s*$", "\\2", tech1Section[matchlines], perl=TRUE))
  else #just one section, no groups
    groupNames <- ""
  
  for (g in 1:length(paramSpecSubsections)) {
    targetList <- list()
    
    targetList[["tau"]] <- matrixExtract(paramSpecSubsections[[g]], "TAU", filename)
    targetList[["nu"]] <- matrixExtract(paramSpecSubsections[[g]], "NU", filename)
    targetList[["lambda"]] <- matrixExtract(paramSpecSubsections[[g]], "LAMBDA", filename)
    targetList[["theta"]] <- matrixExtract(paramSpecSubsections[[g]], "THETA", filename)
    targetList[["alpha"]] <- matrixExtract(paramSpecSubsections[[g]], "ALPHA", filename)
    targetList[["beta"]] <- matrixExtract(paramSpecSubsections[[g]], "BETA", filename)
    targetList[["gamma"]] <- matrixExtract(paramSpecSubsections[[g]], "GAMMA", filename)
    targetList[["psi"]] <- matrixExtract(paramSpecSubsections[[g]], "PSI", filename)
    targetList[["delta"]] <- matrixExtract(paramSpecSubsections[[g]], "DELTA", filename)
    targetList[["gamma.c"]] <- matrixExtract(paramSpecSubsections[[g]], "GAMMA\\(C\\)", filename)
    targetList[["alpha.c"]] <- matrixExtract(paramSpecSubsections[[g]], "ALPHA\\(C\\)", filename)
    targetList[["new_additional"]] <- matrixExtract(paramSpecSubsections[[g]], "NEW/ADDITIONAL PARAMETERS", filename)
    
    #latent class indicator part includes subsections for each latent class, such as class-varying thresholds
    if (groupNames[g] == "LATENT.CLASS.INDICATOR.MODEL.PART") {
      tauLines <- grep("TAU\\(U\\) FOR LATENT CLASS \\d+", paramSpecSubsections[[g]], perl=TRUE, value=TRUE)
      uniqueLC <- unique(gsub("^\\s*TAU\\(U\\) FOR LATENT CLASS (\\d+)\\s*$", "\\1", tauLines, perl=TRUE))
      for (lc in uniqueLC) {
        targetList[[paste0("tau.u.lc", lc)]] <- matrixExtract(paramSpecSubsections[[g]], paste0("TAU\\(U\\) FOR LATENT CLASS ", lc), filename)
      }
    }
    
    if (length(paramSpecSubsections) > 1) {
      class(targetList) <- c("mplus.parameterSpecification", "list")
      paramSpecList[[groupNames[g]]] <- targetList
    }
    else
      paramSpecList <- targetList
  }
  
  class(paramSpecList) <- c("mplus.parameterSpecification", "list")
  if (length(paramSpecSubsections) > 1) attr(paramSpecList, "group.names") <- groupNames
  
  startValSubsections <- getMultilineSection("STARTING VALUES( FOR [\\w\\d\\s\\.,_]+)*",
    tech1Section, filename, allowMultiple=TRUE)
  
  matchlines <- attr(startValSubsections, "matchlines")
  
  startValList <- list()
  if (length(startValSubsections) == 0)
    warning ("No starting value sections found within TECH1 output.")
  else if (length(startValSubsections) > 1)
    groupNames <- make.names(gsub("^\\s*STARTING VALUES( FOR ([\\w\\d\\s\\.,_]+))*\\s*$", "\\2", tech1Section[matchlines], perl=TRUE))
  else
    groupNames <- ""
  
  for (g in 1:length(startValSubsections)) {
    targetList <- list()
    
    targetList[["tau"]] <- matrixExtract(startValSubsections[[g]], "TAU", filename)
    targetList[["nu"]] <- matrixExtract(startValSubsections[[g]], "NU", filename)
    targetList[["lambda"]] <- matrixExtract(startValSubsections[[g]], "LAMBDA", filename)
    targetList[["theta"]] <- matrixExtract(startValSubsections[[g]], "THETA", filename)
    targetList[["alpha"]] <- matrixExtract(startValSubsections[[g]], "ALPHA", filename)
    targetList[["beta"]] <- matrixExtract(startValSubsections[[g]], "BETA", filename)
    targetList[["gamma"]] <- matrixExtract(startValSubsections[[g]], "GAMMA", filename)
    targetList[["psi"]] <- matrixExtract(startValSubsections[[g]], "PSI", filename)
    targetList[["delta"]] <- matrixExtract(startValSubsections[[g]], "DELTA", filename)
    targetList[["gamma.c"]] <- matrixExtract(startValSubsections[[g]], "GAMMA\\(C\\)", filename)
    targetList[["alpha.c"]] <- matrixExtract(startValSubsections[[g]], "ALPHA\\(C\\)", filename)
    targetList[["new_additional"]] <- matrixExtract(startValSubsections[[g]], "NEW/ADDITIONAL PARAMETERS", filename)
    
    #latent class indicator part includes subsections for each latent class, such as class-varying thresholds
    if (groupNames[g] == "LATENT.CLASS.INDICATOR.MODEL.PART") {
      tauLines <- grep("TAU\\(U\\) FOR LATENT CLASS \\d+", startValSubsections[[g]], perl=TRUE, value=TRUE)
      uniqueLC <- unique(gsub("^\\s*TAU\\(U\\) FOR LATENT CLASS (\\d+)\\s*$", "\\1", tauLines, perl=TRUE))
      for (lc in uniqueLC) {
        targetList[[paste0("tau.u.lc", lc)]] <- matrixExtract(startValSubsections[[g]], paste0("TAU\\(U\\) FOR LATENT CLASS ", lc), filename)
      }
    }
    
    if (length(startValSubsections) > 1) {
      class(targetList) <- c("mplus.startingValues", "list")
      startValList[[groupNames[g]]] <- targetList
    }
    else
      startValList <- targetList
  }
  
  class(startValList) <- c("mplus.startingValues", "list")
  if (length(startValSubsections) > 1) attr(startValList, "group.names") <- groupNames
  
  tech1List <- list(parameterSpecification=paramSpecList, startingValues=startValList)
  class(tech1List) <- c("mplus.tech1", "list")
  
  return(tech1List)
  
}

#' Helper function to extract the sample statistics from Mplus output
#' Depends on OUTPUT: SAMPSTAT
#' 
#' @param outfiletext The character vector containing all strings from output file
#' @param filename The current out file being parsed
#' @importFrom utils tail
#' @keywords internal
extractSampstat <- function(outfiletext, filename) {
  sampstatSection <- getSection("^SAMPLE STATISTICS$", outfiletext)
  if (is.null(sampstatSection)) {
    #try output from TYPE=BASIC, which places these in a section of a different name
    sampstatSection <- getSection("^RESULTS FOR BASIC ANALYSIS$", outfiletext)
  }
  
  if(!is.null(sampstatSection) & all(sampstatSection == "")){
    first_line <- (attr(outfiletext, "headerlines")[attr(outfiletext, "headerlines") > tail(attr(sampstatSection, "lines"), 1)][1]+1)
    final_line <- (attr(outfiletext, "headerlines")[attr(outfiletext, "headerlines") > tail(attr(sampstatSection, "lines"), 1)][2]-1)
    sampstatSection <- outfiletext[first_line:final_line]
  }
  
  sampstatList <- list()
  sampstatSubsections <- getMultilineSection("(ESTIMATED )*SAMPLE STATISTICS( FOR [\\w\\d\\s\\.,_]+)*",
    sampstatSection, filename, allowMultiple=TRUE)
  
  matchlines <- attr(sampstatSubsections, "matchlines")
  
  if(is.na(sampstatSubsections[1])){
    sampstatSubsections <- list(sampstatSection)
    matchlines <- attr(sampstatSubsections, "lines")
  }
  
  if (length(sampstatSubsections) == 0) {
    warning ("No sample statistics sections found within SAMPSTAT output.")
  } else if (length(sampstatSubsections) > 1) {
    groupNames <- make.names(gsub("^\\s*(?:ESTIMATED )*SAMPLE STATISTICS( FOR ([\\w\\d\\s\\.,_]+))*\\s*$", "\\2", sampstatSection[matchlines], perl=TRUE))
  } else { #just one section, no groups
    groupNames <- ""
  }
  
  for (g in 1:length(sampstatSubsections)) {
    targetList <- list()
    
    targetList[["means"]] <- matrixExtract(sampstatSubsections[[g]], "Means", filename)
    targetList[["covariances"]] <- matrixExtract(sampstatSubsections[[g]], "Covariances", filename)
    targetList[["correlations"]] <- matrixExtract(sampstatSubsections[[g]], "Correlations", filename)
    targetList[["correlations.vardiag"]] <- matrixExtract(sampstatSubsections[[g]], "CORRELATION MATRIX \\(WITH VARIANCES ON THE DIAGONAL\\)", filename, ignore.case=TRUE)
    
    #these seem to show up in DATA: TYPE=IMPUTATION outputs (e.g., ex11.8part2.out)
    targetList[["means.intercepts.thresholds"]] <- matrixExtract(sampstatSubsections[[g]], "Means/Intercepts/Thresholds", filename, ignore.case=TRUE)
    targetList[["within.level.variance.covariance"]] <- matrixExtract(sampstatSubsections[[g]], "WITHIN LEVEL VARIANCE/COVARIANCE", filename, ignore.case=TRUE)
    targetList[["within.level.correlation"]] <- matrixExtract(sampstatSubsections[[g]], "WITHIN LEVEL CORRELATION", filename, ignore.case=TRUE)
    targetList[["between.level.variance.covariance"]] <- matrixExtract(sampstatSubsections[[g]], "BETWEEN LEVEL VARIANCE/COVARIANCE", filename, ignore.case=TRUE)
    targetList[["between.level.correlation"]] <- matrixExtract(sampstatSubsections[[g]], "BETWEEN LEVEL CORRELATION", filename, ignore.case=TRUE)
    
    #I think these are only in older outputs
    targetList[["covariances.correlations.resid_correlations"]] <- matrixExtract(sampstatSubsections[[g]], "Covariances/Correlations/Residual Correlations", filename)
    targetList[["slopes"]] <- matrixExtract(sampstatSubsections[[g]], "Slopes", filename)
    
    #latent class indicator part includes subsections for each latent class, such as class-varying thresholds
#    if (groupNames[g] == "LATENT.CLASS.INDICATOR.MODEL.PART") {
#      tauLines <- grep("TAU\\(U\\) FOR LATENT CLASS \\d+", sampstatSubsections[[g]], perl=TRUE, value=TRUE)
#      uniqueLC <- unique(gsub("^\\s*TAU\\(U\\) FOR LATENT CLASS (\\d+)\\s*$", "\\1", tauLines, perl=TRUE))
#      for (lc in uniqueLC) {
#        targetList[[paste0("tau.u.lc", lc)]] <- matrixExtract(sampstatSubsections[[g]], paste0("TAU\\(U\\) FOR LATENT CLASS ", lc), filename)
#      }
#    }
    
    if (length(sampstatSubsections) > 1) {
      class(targetList) <- c("mplus.sampstat", "list")
      sampstatList[[groupNames[g]]] <- targetList
    } else{
      sampstatList <- targetList
    }
    
  }
  
  ##Extract Univariate counts and proportions
  univariateCountsSection <- getSection("^UNIVARIATE PROPORTIONS AND COUNTS FOR CATEGORICAL VARIABLES$", outfiletext)
  
  #remove warning lines, which throw off the parser (e.g., ex6.15.out)
  univariateCountsSection <- univariateCountsSection[!grepl("\\s*WARNING:.*", univariateCountsSection, perl=TRUE)]
  
  if (!is.null(univariateCountsSection)) {
    countSubsections <- getMultilineSection("Group\\s+([\\w\\d\\.,_]+)*",
      univariateCountsSection, filename, allowMultiple=TRUE)
    
    matchlines <- attr(countSubsections, "matchlines")
    
    if (!is.list(countSubsections) && is.na(countSubsections[1])) {
      countSubsections <- list(univariateCountsSection) #no sublists by group
    } else if (length(countSubsections) > 1)
      groupNames <- make.names(gsub("^\\s*Group\\s+([\\w\\d\\s\\.,_]+)\\s*$", "\\1", univariateCountsSection[matchlines], perl=TRUE))
    else #just one section, no groups
      stop("not sure how we got here")
    
    for (g in 1:length(countSubsections)) {
      targetList <- list()
      
      df <- data.frame(do.call(rbind, strsplit(trimSpace(parseCatOutput(countSubsections[[g]])), "\\s+", perl=TRUE)), stringsAsFactors=FALSE)
      names(df) <- c("variable", "proportion", "count")
      df$proportion <- as.numeric(df$proportion)
      df$count <- as.numeric(df$count)
      
      #divide variable column into variable and category for clarity
      df$category <- as.numeric(sub(".*\\.Cat\\.(\\d+)", "\\1", df$variable, perl=TRUE))
      df$variable <- sub("^(.*)\\.Cat\\.\\d+$", "\\1", df$variable, perl=TRUE)
      df <- df[,c("variable", "category", "proportion", "count")] #reorder df
      
      #targetList[["proportions.counts"]] <- df
      targetList <- df #just a single element at the moment
      
      class(targetList) <- c("mplus.propcounts.data.frame", "data.frame")
      
      if (length(countSubsections) > 1) {
        #class(targetList) <- c("mplus.propcounts", "list")
        sampstatList[[groupNames[g]]][["proportions.counts"]] <- targetList
      } else {
        sampstatList[["proportions.counts"]] <- targetList
      }
        
    }
  }

  # Extract univariate sample statistics ------------------------------------

  univariate_sampstat <- getSection("^UNIVARIATE SAMPLE STATISTICS$", outfiletext)
  if (!is.null(univariate_sampstat)) {
    
    #group-wise headings don't follow Mplus indentation conventions. Hack these by prefixing with X, then use getMultilineSection to parse
    has_groups <- any(which_g <- grepl("UNIVARIATE HIGHER-ORDER MOMENT DESCRIPTIVE STATISTICS(?: FOR [\\w\\d\\s\\.,_]+)*", univariate_sampstat, perl=TRUE))
    if (has_groups) {
      univariate_sampstat[which_g] <- sub("^(.*)$", "X\\1", univariate_sampstat[which_g], perl=TRUE)
      univariateSubsections <- getMultilineSection("X.*UNIVARIATE HIGHER-ORDER MOMENT DESCRIPTIVE STATISTICS(?: FOR [\\w\\d\\s\\.,_]+)*",
                                              univariate_sampstat, filename, allowMultiple=TRUE)
    } else {
      univariateSubsections <- NA
    }

    matchlines <- attr(univariateSubsections, "matchlines")
    
    if(is.na(univariateSubsections[1])) {
      univariateSubsections <- list(univariate_sampstat)
      matchlines <- attr(univariateSubsections, "lines")
    }
    
    if (length(univariateSubsections) == 0L) {
      warning ("No univariate statistics sections found within SAMPSTAT output.")
    } else if (length(univariateSubsections) > 1) {
      groupNames <- make.names(gsub("^X\\s*UNIVARIATE HIGHER-ORDER MOMENT DESCRIPTIVE STATISTICS( FOR ([\\w\\d\\s\\.,_]+))*\\s*$", "\\2", univariate_sampstat[matchlines], perl=TRUE))
    } else { #just one section, no groups
      groupNames <- ""
    }
    
    for (g in 1:length(univariateSubsections)) {
      thisSub <- univariateSubsections[[g]]
      stats <- lapply(thisSub[grepl("\\d$", thisSub)], function(x) { strsplit(trimws(x), split = "\\s+")[[1]] })
      if(length(stats) %% 2 == 0) {
        out <- cbind(do.call(rbind, stats[seq(1, length(stats), by = 2)]),
                     do.call(rbind, stats[seq(2, length(stats), by = 2)]))
        
        #headers <- univariate_sampstat[grepl("\\/", univariate_sampstat)]
        #headers <- gsub("%", " %", headers)
        #headers <- lapply(trimws(headers), function(x){strsplit(x, "\\s{2,}")[[1]]})
        #headers[[1]] <- c(gsub("\\/", "", headers[[1]][grepl("\\/", headers[[1]])]), gsub("\\/.*$", "", headers[[2]][grepl("\\/", headers[[2]])]))
        #headers[[2]] <- gsub("^.+?\\/", "", headers[[2]])
        #colnames(out) <- gsub(" %", "%", c(headers[[1]], headers[[2]]))
        var_names <- out[, 1]
        out <- gsub("%", "", out)
        out <- out[,-1, drop=FALSE] #drop variable column
        dd <- dim(out)
        out <- matrix(apply(out, 2, as.numeric), nrow=dd[1], ncol=dd[2]) #need to preserve dimensions in the single-row case
        colnames(out) <- c("Mean", "Skewness", "Minimum", "%Min", "20%", "40%", "Median", "Sample Size", "Variance", "Kurtosis", "Maximum", "%Max", "60%", "80%")
        rownames(out) <- var_names
        
        out <- out[, c("Sample Size", "Mean", "Variance", "Skewness", "Kurtosis", "Minimum", "Maximum", "%Min", "%Max", "20%", "40%", "Median", "60%", "80%"), drop=FALSE]
        
        if (length(univariateSubsections) > 1) {
          #class(targetList) <- c("mplus.propcounts", "list")
          sampstatList[[groupNames[g]]][["univariate.sample.statistics"]] <- out
        } else {
          sampstatList[["univariate.sample.statistics"]] <- out
        }
      }
    }
    
  }
  class(sampstatList) <- c("mplus.sampstat", "list")
  if (length(sampstatSubsections) > 1) attr(sampstatList, "group.names") <- groupNames
  
  return(sampstatList)
  
}

extractCovarianceCoverage <- function(outfiletext, filename) {
  #TODO: Return type is sometimes list, sometimes matrix; a bit inconsistent
  covcoverageSection <- getSection("^COVARIANCE COVERAGE OF DATA$", outfiletext)
  if (is.null(covcoverageSection)) { return(list()) } #no COVARIANCE COVERAGE OF DATA output
  
  covcoverageList <- list()
  
  covcoverageSubsections <- getMultilineSection("PROPORTION OF DATA PRESENT( FOR [\\w\\d\\s\\.,_]+)*",
    covcoverageSection, filename, allowMultiple=TRUE)
  
  matchlines <- attr(covcoverageSubsections, "matchlines")
  
  if (length(covcoverageSubsections) == 0 || all(is.na(covcoverageSubsections))) { #See UG ex9.7.out
    message("No PROPORTION OF DATA PRESENT sections found within COVARIANCE COVERAGE OF DATA output.")
    return(covcoverageList)
  } else if (length(covcoverageSubsections) > 1) {
    groupNames <- make.names(gsub("^\\s*PROPORTION OF DATA PRESENT( FOR ([\\w\\d\\s\\.,_]+))*\\s*$", "\\2", covcoverageSection[matchlines], perl=TRUE))
  } else { #just one section, no groups
    groupNames <- ""
  }
  
  for (g in 1:length(covcoverageSubsections)) {
    #targetList <- list()
    
    #for now, there is just one matrix extracted, so no need to label it or treat it as a list. Leaving scaffolding commented out if useful later
    #targetList[["covcoverage"]] <- matrixExtract(covcoverageSubsections[[g]], "Covariance Coverage", filename)
    
    targetList <- matrixExtract(covcoverageSubsections[[g]], "Covariance Coverage", filename)
    
    if (length(covcoverageSubsections) > 1) {
      #class(targetList) <- c("mplus.covcoverage", "list")
      covcoverageList[[groupNames[g]]] <- targetList
    }
    else
      covcoverageList <- targetList
  }
  
  if (is.list(covcoverageList)) { class(covcoverageList) <- c("mplus.covcoverage", "list")
  } else { class(covcoverageList) <- c("mplus.covcoverage", "matrix") } #single numeric matrix
  if (length(covcoverageSubsections) > 1) { attr(covcoverageList, "group.names") <- groupNames }
  
  return(covcoverageList)
  
}


#' Extract free file output
#'
#' Function for reading "free" output where a sequence of values populates a matrix
#'
#' @param filename The name of the output file
#' @param outfile The output file
#' @param make_symmetric A logical indicating whether or not to make the matrix symmetric, defaults to \code{TRUE}
#' @return a matrix
#' @keywords internal
#' @examples
#' # make me!!!
extractFreeFile <- function(filename, outfile, make_symmetric=TRUE) {
  #Adapted from code graciously provided by Joe Glass.
  
  if (isEmpty(filename)) return(NULL)
  
  #TODO: make this filename building into a function (duped from read raw)
  outfileDirectory <- splitFilePath(outfile)$directory
  savedataSplit <- splitFilePath(filename)
  
  #if outfile target directory is non-empty, but savedataFile is without directory, then append
  #outfile directory to savedataFile. This ensures that R need not be in the working directory
  #to read the savedataFile. But if savedataFile has an absolute directory, don't append
  
  #if savedata directory is present and absolute, or if no directory in outfile, just use filename as is
  if (!is.na(savedataSplit$directory) && savedataSplit$absolute)
    savedataFile <- filename #just use savedata filename if has absolute path
  else if (is.na(outfileDirectory))
    savedataFile <- filename #just use savedata filename if outfile is missing path (working dir)
  else
    savedataFile <- file.path(outfileDirectory, filename) #savedata path relative or absent and outfile dir is present
  
  if (!file.exists(savedataFile)) {
    warning("Cannot read file: ", filename)
    return(NULL)
  }
  
  values <- scan(savedataFile, what="character", strip.white=FALSE, blank.lines.skip=FALSE, quiet=TRUE)
  matrix.size <- function(x) {
    # per algebra of quadratic equations: p is the # of rows & columns in a symmetric
    # matrix given x unique covariance elements (the lower triangle plus diagonal).
    # This was constructed from the equation x = p(p+1)/2.
    p <- (-1/2) + sqrt(2*x + (1/4))
    # if p is not an integer, having x elements does not result in a symmetric matrix
    p.isinteger <- !length(grep("[^[:digit:]]", as.character(p)))
    if (p.isinteger) {
      return (p)
    } else {
      cat("The length of the supplied vector is not appropriate to generate the matrix. Please check the data file.")
      return(NULL)
    }
  }
  
  matSize <- matrix.size(length(values))
  mat <- matrix(NA_real_, nrow=matSize, ncol=matSize,
    dimnames=list(1:matSize, 1:matSize)) # create empty symmetric matrix
  mat[upper.tri(mat, diag=TRUE)] <- as.numeric(values) # import savedata information into the upper triangle (plus diagonal) of the matrix
  
  if (make_symmetric) {
    mat[lower.tri(mat)] <- t(mat)[lower.tri(mat)] #populate lower triangle
  } else {
    mat <- t(mat) # transpose the matrix to create a lower triangular matrix (plus diagonal)
  }
  
  return(mat)
}

#' Extract Technical 3 matrix from Mplus
#'
#' Function that extracts the Tech3 matrix
#'
#' @param outfiletext the text of the output file
#' @param savedata_info Information on saved data
#' @param filename The name of the file
#' @return A list of class \dQuote{mplus.tech3}
#' @keywords internal
#' @seealso \code{\link{matrixExtract}}
#' @examples
#' # make me!!!
extractTech3 <- function(outfiletext, savedata_info, filename) {
  tech3Section <- getSection("^TECHNICAL 3 OUTPUT$", outfiletext)
  if (is.null(tech3Section)) return(list()) #no tech3 output
  
  tech3List <- list()
  tech3List[["paramCov"]] <- matrixExtract(tech3Section, "ESTIMATED COVARIANCE MATRIX FOR PARAMETER ESTIMATES", filename)
  tech3List[["paramCor"]] <- matrixExtract(tech3Section, "ESTIMATED CORRELATION MATRIX FOR PARAMETER ESTIMATES", filename)
  
  if (!is.null(savedata_info) && !is.na(savedata_info$tech3File)) {
    tech3List[["paramCov.savedata"]] <- extractFreeFile(savedata_info$tech3File, filename, make_symmetric=TRUE)
  } else {
    tech3List[["paramCov.savedata"]] <- NULL
  }
  
  class(tech3List) <- c("mplus.tech3", "list")
  
  return(tech3List)
}

#' Extract Technical 4 matrix from Mplus
#'
#' Function that extracts the Tech4 matrix
#'
#' @param outfiletext the text of the output file
#' @param filename The name of the file
#' @return A list of class \dQuote{mplus.tech4}
#' @keywords internal
#' @seealso \code{\link{matrixExtract}}
#' @examples
#' # make me!!!
extractTech4 <- function(outfiletext, filename) {
  #TODO: have empty list use mplus.tech4 class
  tech4Section <- getSection("^TECHNICAL 4 OUTPUT$", outfiletext)
  if (is.null(tech4Section)) return(list()) #no tech4 output
  
  tech4List <- list()
  
  tech4Subsections <- getMultilineSection("ESTIMATES DERIVED FROM THE MODEL( FOR [\\w\\d\\s\\.,_]+)*",
    tech4Section, filename, allowMultiple=TRUE)
  
  matchlines <- attr(tech4Subsections, "matchlines")
  
  if (length(tech4Subsections) == 0) {
    warning("No sections found within TECH4 output.")
    return(list())
  }
  else if (length(tech4Subsections) > 1) {
    groupNames <- make.names(gsub("^\\s*ESTIMATES DERIVED FROM THE MODEL( FOR ([\\w\\d\\s\\.,_]+))*\\s*$", "\\2", tech4Section[matchlines], perl=TRUE))
  }
  
  for (g in 1:length(tech4Subsections)) {
    targetList <- list()
    
    targetList[["latMeansEst"]] <- matrixExtract(tech4Subsections[[g]], "ESTIMATED MEANS FOR THE LATENT VARIABLES", filename)
    targetList[["latCovEst"]] <- matrixExtract(tech4Subsections[[g]], "ESTIMATED COVARIANCE MATRIX FOR THE LATENT VARIABLES", filename)
    targetList[["latCorEst"]] <- matrixExtract(tech4Subsections[[g]], "ESTIMATED CORRELATION MATRIX FOR THE LATENT VARIABLES", filename)
    
    if (length(tech4Subsections) > 1) {
      class(targetList) <- c("mplus.tech4", "list")
      tech4List[[groupNames[g]]] <- targetList
    }
    else
      tech4List <- targetList
    
  }
  
  class(tech4List) <- c("mplus.tech4", "list")
  
  return(tech4List)
}

#' Extract Technical 7 from Mplus
#'
#' The TECH7 option is used in conjunction with TYPE=MIXTURE to request sample statistics
#' for each class using raw data weighted by the estimated posterior probabilities for each class.
#'
#' @param outfiletext the text of the output file
#' @param filename The name of the file
#' @return A list of class \dQuote{mplus.tech7}
#' @keywords internal
#' @seealso \code{\link{matrixExtract}}
#' @examples
#' # make me!!!
extractTech7 <- function(outfiletext, filename) {
  #TODO: have empty list use mplus.tech7 class
  #not sure whether there are sometimes multiple groups within this section.
  tech7Section <- getSection("^TECHNICAL 7 OUTPUT$", outfiletext)
  if (is.null(tech7Section)) return(list()) #no tech7 output
  
  tech7List <- list()
  
  tech7Subsections <- getMultilineSection("SAMPLE STATISTICS WEIGHTED BY ESTIMATED CLASS PROBABILITIES FOR CLASS \\d+",
    tech7Section, filename, allowMultiple=TRUE)
  
  matchlines <- attr(tech7Subsections, "matchlines")
  
  if (length(tech7Subsections) == 0) {
    warning("No sections found within tech7 output.")
    return(list())
  }
  else if (length(tech7Subsections) > 1) {
    groupNames <- make.names(gsub("^\\s*SAMPLE STATISTICS WEIGHTED BY ESTIMATED CLASS PROBABILITIES FOR (CLASS \\d+)\\s*$", "\\1", tech7Section[matchlines], perl=TRUE))
  }
  
  for (g in 1:length(tech7Subsections)) {
    targetList <- list()
    
    targetList[["classSampMeans"]] <- matrixExtract(tech7Subsections[[g]], "Means", filename)
    targetList[["classSampCovs"]] <- matrixExtract(tech7Subsections[[g]], "Covariances", filename)
    
    if (length(tech7Subsections) > 1) {
      class(targetList) <- c("mplus.tech7", "list")
      tech7List[[groupNames[g]]] <- targetList
    }
    else
      tech7List <- targetList
  }
  
  class(tech7List) <- c("mplus.tech7", "list")
  
  return(tech7List)
}


#' Extract Technical 8 from Mplus
#'
#' The TECH8 option is used to print the optimization history of a model.
#' It also prints the potential scale reduction in Bayesian models.
#'
#' @param outfiletext the text of the output file
#' @param filename The name of the file
#' @return A list of class \dQuote{mplus.tech8}
#' @keywords internal
#' @seealso \code{\link{matrixExtract}}
#' @examples
#' # make me!!!
extractTech8 <- function(outfiletext, filename) {
  #not sure whether there are sometimes multiple groups within this section.
  #for now, this function only extract PSR in Bayes models
  tech8Section <- getSection("^TECHNICAL 8 OUTPUT$", outfiletext)
  tech8List <- list()
  class(tech8List) <- c("mplus.tech8", "list")
  psr <- data.frame(); class(psr) <- c("mplus.psr.data.frame", "data.frame"); tech8List[["psr"]] <- psr
  
  if (is.null(tech8Section)) return(tech8List) #no tech8 output

  #psr extraction subfunction
  extractPSR <- function(text) {
    startline <- grep("ITERATION\\s+SCALE REDUCTION\\s+HIGHEST PSR", text, perl=TRUE)
    if (length(startline) > 0L) {
      firstBlank <- which(text == "")
      firstBlank <- firstBlank[firstBlank > startline][1L] #first blank after starting line
      toparse <- text[(startline+1):firstBlank]
      psr <- data.frame(matrix(as.numeric(unlist(strsplit(trimSpace(toparse), "\\s+", perl=TRUE))), ncol=3, byrow=TRUE, dimnames=list(NULL, c("iteration", "psr", "param.highest.psr"))))
      class(psr) <- c("mplus.psr.data.frame", "data.frame")
      return(psr)
    } else {
      return(NULL)
    }
  }    
  
  bayesPSR <- getMultilineSection("TECHNICAL 8 OUTPUT FOR BAYES ESTIMATION", tech8Section, filename, allowMultiple=FALSE)
  
  if (!is.na(bayesPSR[1L])) {
    #new outputs have "Iterations for model estimation" and "Iterations for computing PPPP"
    if (any(grepl("Iterations for computing PPPP", bayesPSR))) {
      pppp_text <- getSection("Iterations for computing PPPP", bayesPSR, headers = c("Iterations for computing PPPP", "Iterations for model estimation"))
      model_text <- getSection("Iterations for model estimation", bayesPSR, headers = c("Iterations for computing PPPP", "Iterations for model estimation"))
      tech8List[["psr"]] <- extractPSR(model_text)
      tech8List[["psr_pppp"]] <- extractPSR(pppp_text)
    } else {
      tech8List[["psr"]] <- extractPSR(bayesPSR)
    }
  }
  
  return(tech8List)
}


#' Extract Technical 9 matrix from Mplus
#'
#' Function that extracts the Tech9 matrix
#'
#' @param outfiletext the text of the output file
#' @param filename The name of the file
#' @return A list of class \dQuote{mplus.tech9}
#' @keywords internal
#' @seealso \code{\link{matrixExtract}}
#' @examples
#' # make me!!!
extractTech9 <- function(outfiletext, filename) {
  tech9List <- list()
  class(tech9List) <- c("mplus.tech9", "list")
  
  tech9Section <- getSection("^TECHNICAL 9 OUTPUT$", outfiletext)
  if (is.null(tech9Section)) return(tech9List) #no tech9 output
  
  tech9Reps <- grep("^\\s*REPLICATION \\d+:\\s*$", tech9Section, perl=TRUE)
  
  repNums <- as.numeric(gsub("^\\s*REPLICATION (\\d+):\\s*$", "\\1", tech9Section[tech9Reps], perl=TRUE))
  
  if (length(tech9Reps) > 0L) {
    for (l in 1:length(tech9Reps)) {
      if (l < length(tech9Reps)) {
        msg <- paste(tech9Section[ (tech9Reps[l]+1):(tech9Reps[l+1]-1) ], collapse=" ")
      } else {
        msg <- paste(tech9Section[ (tech9Reps[l]+1):length(tech9Section) ], collapse=" ")
      }
      msg <- trimSpace(gsub("\\s+", " ", msg, perl=TRUE))
      tech9List[[ paste0("rep", repNums[l]) ]] <- list(rep=repNums[l], error=msg)
    }
  }
  
  return(tech9List)
}

#' Extract Technical 10 matrix from Mplus
#'
#' Function that extracts the Tech10 matrix
#'
#' @param outfiletext the text of the output file
#' @param filename The name of the file
#' @return An empty list
#' @keywords internal
#' @seealso \code{\link{matrixExtract}}
#' @examples
#' # make me!!!
extractTech10 <- function(outfiletext, filename) {
  tech10Section <- getSection("^TECHNICAL 10 OUTPUT$", outfiletext)
  if (is.null(tech10Section)) return(list()) #no tech10 output
  
  tech10List <- list()
  
  bivarFit <- getSection("^\\s*BIVARIATE MODEL FIT INFORMATION\\s*$", outfiletext)
  
  if (is.null(bivarFit)) return(tech10List) 
  
  # Skip header lines
  bivarFit <- bivarFit[6:length(bivarFit)]
  
  # Find the lines numbers with the variable name pairs
  #  (and add location for the end of text, for the ldply below)
  var.lines <- c(grep("^\\s{5}\\S{1,8}\\s+\\S{1,8}", bivarFit), length(bivarFit))

  # Iterate over each set of variable pairs to extract values
  res <- ldply(1:(length(var.lines)-1), function(i) {
    # Get start and end line numbers for this pair
    v.line <- var.lines[i]
    end <- var.lines[i+1] - 1
    
    # Split the var names into a vector
    vars <- unlist(strsplit(trimSpace(bivarFit[v.line]), "\\s+", perl = TRUE))
    
    lines.idxs <- (v.line+1):end
    
    # Parse remaining lines for this variable pair
    ldply(lines.idxs, function(line) {
      if (grepl("^\\s+Category \\d+\\s+Category \\d+", bivarFit[line], perl = TRUE)) {
        vals <- unlist(strsplit(trimSpace(bivarFit[line]), "\\s{2,}", perl = TRUE))
        
        c(vars, vals)
      }
      else if (grepl("^\\s+Bivariate (Pearson|Log-Likelihood) Chi-Square", bivarFit[line], perl = TRUE)) {
        m <- unlist(regmatches(bivarFit[line], regexec("Bivariate (Pearson|Log-Likelihood) Chi-Square\\s+(\\S+)", bivarFit[line], perl = TRUE)))
        
        c(vars, "Summary", m[2:3], NA, NA)
      }
      else if (grepl("^\\s+Number of Significant Standardized Residuals", bivarFit[line], perl = TRUE)) {
        m <- unlist(regmatches(bivarFit[line], regexec("Number of Significant Standardized Residuals\\s+(\\S+)", bivarFit[line], perl = TRUE)))
        
        c(vars, "Summary", "Significant", m[2], NA, NA)
      }      
    })
  })
  
  # Split out the "summary" lines from the bivariate data
  bivarFitData <- res[res$V3 != "Summary",]
  bivarFitStats <- res[res$V3 == "Summary", c("V1", "V2", "V4", "V5")]
  
  names(bivarFitData) <- c("var1", "var2", "cat1", "cat2", "h1", "h0", "z")
  rownames(bivarFitData) <- NULL
  
  names(bivarFitStats) <- c("var1", "var2", "stat", "value")
  bivarFitStats <- reshape(bivarFitStats, idvar = c("var1", "var2"), timevar = "stat", direction = "wide"
                           , varying = c("Pearson", "Log-Liklihood", "Significant"))
  rownames(bivarFitStats) <- NULL
  attr(bivarFitStats, "reshapeWide") <- NULL
  
  tech10List$bivar_model_fit_info <- bivarFitData
  tech10List$bivar_chi_square <- bivarFitStats
  
  return(tech10List)
  
}

#' Extract Technical 12 from Mplus
#'
#' The TECH12 option is used in conjunction with TYPE=MIXTURE to request residuals for observed
#' versus model estimated means, variances, covariances, univariate skewness, and univariate
#' kurtosis. The observed values come from the total sample. The estimated values are computed as
#' a mixture across the latent classes.
#'
#' @param outfiletext the text of the output file
#' @param filename The name of the file
#' @return A list of class \dQuote{mplus.tech12}
#' @keywords internal
#' @seealso \code{\link{matrixExtract}}
#' @examples
#' # make me!!!
extractTech12 <- function(outfiletext, filename) {
  #not sure whether there are sometimes multiple groups within this section.
  tech12Section <- getSection("^TECHNICAL 12 OUTPUT$", outfiletext)
  tech12List <- list()
  class(tech12List) <- c("mplus.tech12", "list")
  
  if (is.null(tech12Section)) return(tech12List) #no tech12 output
  
  tech12Subsections <- getMultilineSection("ESTIMATED MIXED MODEL AND RESIDUALS \\(OBSERVED - EXPECTED\\)",
    tech12Section, filename, allowMultiple=TRUE)
  
  matchlines <- attr(tech12Subsections, "matchlines")
  
  if (length(tech12Subsections) == 0) {
    warning("No sections found within tech12 output.")
    return(list())
  }
  else if (length(tech12Subsections) > 1) {
    warning("extractTech12 does not yet know how to handle multiple sections (if such exist)")
  }
  
  for (g in 1:length(tech12Subsections)) {
    targetList <- list()
    
    targetList[["obsMeans"]] <- matrixExtract(tech12Subsections[[g]], "Observed Means", filename)
    targetList[["mixedMeans"]] <- matrixExtract(tech12Subsections[[g]], "Estimated Mixed Means", filename)
    targetList[["mixedMeansResid"]] <- matrixExtract(tech12Subsections[[g]], "Residuals for Mixed Means", filename)
    targetList[["obsCovs"]] <- matrixExtract(tech12Subsections[[g]], "Observed Covariances", filename)
    targetList[["mixedCovs"]] <- matrixExtract(tech12Subsections[[g]], "Estimated Mixed Covariances", filename)
    targetList[["mixedCovsResid"]] <- matrixExtract(tech12Subsections[[g]], "Residuals for Mixed Covariances", filename)
    targetList[["obsSkewness"]] <- matrixExtract(tech12Subsections[[g]], "Observed Skewness", filename)
    targetList[["mixedSkewness"]] <- matrixExtract(tech12Subsections[[g]], "Estimated Mixed Skewness", filename)
    targetList[["mixedSkewnessResid"]] <- matrixExtract(tech12Subsections[[g]], "Residuals for Mixed Skewness", filename)
    targetList[["obsKurtosis"]] <- matrixExtract(tech12Subsections[[g]], "Observed Kurtosis", filename)
    targetList[["mixedKurtosis"]] <- matrixExtract(tech12Subsections[[g]], "Estimated Mixed Kurtosis", filename)
    targetList[["mixedKurtosisResid"]] <- matrixExtract(tech12Subsections[[g]], "Residuals for Mixed Kurtosis", filename)
    
    if (length(tech12Subsections) > 1) {
      class(targetList) <- c("mplus.tech12", "list")
      tech12List[[g]] <- targetList #no known case where there are many output sections
    }
    else
      tech12List <- targetList
  }
  
  class(tech12List) <- c("mplus.tech12", "list")
  
  return(tech12List)
}



#' Extract Technical 15 from Mplus
#'
#' The TECH15 option is used in conjunction with TYPE=MIXTURE to request conditional probabilities
#' for the latent class variables.
#'
#' @param outfiletext the text of the output file
#' @param filename The name of the file
#' @return A list of class \dQuote{mplus.tech15}
#' @keywords internal
#' @seealso \code{\link{matrixExtract}}
#' @examples
#' # make me!!!
extractTech15 <- function(outfiletext, filename) {
  tech15Section <- getSection("^TECHNICAL 15 OUTPUT$", outfiletext)
  tech15List <- list(conditional.probabilities = trimws(tech15Section[grepl("^\\s+?P\\(", tech15Section)]))
  class(tech15List) <- c("mplus.tech15", "list")
  
  if (is.null(tech15Section)) return(tech15List) #no tech15 output
  
  return(tech15List)
}

#' Extract Factor Score Statistics
#'
#' Function for extracting matrices for factor scores
#'
#' @param outfiletext The text of the output file
#' @param filename The name of the output file
#' @return A list
#' @keywords internal
#' @seealso \code{\link{matrixExtract}}
#' @examples
#' # make me!!!
extractFacScoreStats <- function(outfiletext, filename) {
  #for now, skip getSection call and use nested header to getMultilineSection to avoid issue of SAMPLE STATISTICS appearing both
  #as top-level header and sub-header within factor scores
  
  fssSection <- getMultilineSection("SAMPLE STATISTICS FOR ESTIMATED FACTOR SCORES::SAMPLE STATISTICS",
    outfiletext, filename, allowMultiple=FALSE)
  fssList <- list()
  class(fssList) <- c("mplus.facscorestats", "list")
  
  if (is.na(fssSection[1L])) return(fssList) #no factor scores output
  
  fssList[["Means"]] <- matrixExtract(fssSection, "Means", filename)
  fssList[["Covariances"]] <- matrixExtract(fssSection, "Covariances", filename)
  fssList[["Correlations"]] <- matrixExtract(fssSection, "Correlations", filename)
  
  return(fssList)
  
}


#' Extract Latent Class Counts
#'
#' Function for extracting counts of latent classes
#'
#' @param outfiletext The text of the output file
#' @param filename The name of the output file
#' @return a list
#' @keywords internal
#' @examples
#' # make me!!!
extractClassCounts <- function(outfiletext, filename, summaries) {
  
  #helper function for three-column class output
  getClassCols <- function(sectiontext) {
    #identify lines of the form class number, class count, class proportion: e.g., 1		136.38		.2728
    numberLines <- grep("^\\s*\\d+\\s+[0-9\\.-]+\\s+[0-9\\.-]+\\s*$", sectiontext, perl=TRUE)
    
    if (length(numberLines) > 0) {
      #row bind each line, convert to numeric, and store as data.frame
      counts <- data.frame(do.call(rbind, lapply(strsplit(trimSpace(sectiontext[numberLines]), "\\s+", perl=TRUE), as.numeric)))
      if (!ncol(counts) == 3) {
        warning("Number of columns for model class counts is not three.")
        return(NULL)
      }
      
      names(counts) <- c("class", "count", "proportion")
      
      #store counts as integer
      counts <- transform(counts, class=as.integer(class))
      return(counts)
    } else {
      return(NULL)
    }
    
  }
  
  countlist <- list()
  
  if (is.null(summaries) || missing(summaries) || isTRUE(summaries$NCategoricalLatentVars[1L]==1) || all(is.na(summaries$NCategoricalLatentVars))) {
    #Starting in Mplus v7.3 and above, formatting of the class counts appears to have changed...
    #Capture the alternatives here
    if (is.null(summaries) || missing(summaries) || is.null(summaries$Mplus.version) || as.numeric(summaries$Mplus.version[1L]) < 7.3) {
      modelCounts <- getSection("^FINAL CLASS COUNTS AND PROPORTIONS FOR THE LATENT CLASSES$", outfiletext)
      ppCounts <- getSection("^FINAL CLASS COUNTS AND PROPORTIONS FOR THE LATENT CLASS PATTERNS$", outfiletext)
      mostLikelyCounts <- getSection("^CLASSIFICATION OF INDIVIDUALS BASED ON THEIR MOST LIKELY LATENT CLASS MEMBERSHIP$", outfiletext)
    } else {
      modelCounts <- getSection("^FINAL CLASS COUNTS AND PROPORTIONS FOR THE LATENT CLASSES$::^BASED ON THE ESTIMATED MODEL$", outfiletext)
      ppCounts <- getSection("^FINAL CLASS COUNTS AND PROPORTIONS FOR THE LATENT CLASSES$::^BASED ON ESTIMATED POSTERIOR PROBABILITIES$", outfiletext)
      mostLikelyCounts <- getSection("^FINAL CLASS COUNTS AND PROPORTIONS FOR THE LATENT CLASSES$::^BASED ON THEIR MOST LIKELY LATENT CLASS MEMBERSHIP$", outfiletext)
    }
    
    countlist[["modelEstimated"]] <- getClassCols(modelCounts)
    countlist[["posteriorProb"]] <- getClassCols(ppCounts)
    countlist[["mostLikely"]] <- getClassCols(mostLikelyCounts)
    
    #most likely by posterior probability section
    mostLikelyProbs <- getSection("^Average Latent Class Probabilities for Most Likely Latent Class Membership \\((Row|Column)\\)$", outfiletext)
    if (length(mostLikelyProbs) > 1L) { mostLikelyProbs <- mostLikelyProbs[-1L] } #remove line 1: "by Latent Class (Column)"
    
    #Example:
    #Average Latent Class Probabilities for Most Likely Latent Class Membership (Row)
    #by Latent Class (Column)
    #
    #  			1        2
    #
    #  1   0.986    0.014
    #  2   0.030    0.970
    #
    #A bit of a wonky section. Some notes:
    # 1) Rows represent those hard classified into that class.
    # 2) Rows sum to 1.0 and represent the summed average posterior probabilities of all the class assignment possibilities.
    # 3) Columns represent average posterior probabilitity of being in class 1 for those hard classified as 1 or 2.
    # 4) High diagonal indicates that hard classification matches posterior probability patterns.
    
    countlist[["avgProbs.mostLikely"]] <- unlabeledMatrixExtract(mostLikelyProbs, filename)
    
    #same, but for classification probabilities
    #also, starting ~Mplus 7.3, the columns and rows appear to have switched in this and the logit section (hence the Column|Row syntax)
    classificationProbs <- getSection("^Classification Probabilities for the Most Likely Latent Class Membership \\((Column|Row)\\)$", outfiletext)
    if (length(classificationProbs) > 1L) { classificationProbs <- classificationProbs[-1L] } #remove line 1: "by Latent Class (Column)"
    
    countlist[["classificationProbs.mostLikely"]] <- unlabeledMatrixExtract(classificationProbs, filename)
    
    #same, but for classification probability logits
    classificationLogitProbs <- getSection("^Logits for the Classification Probabilities for the Most Likely Latent Class Membership \\((Column|Row)\\)$", outfiletext)
    if (length(classificationLogitProbs) > 1L) { classificationLogitProbs <- classificationLogitProbs[-1L] } #remove line 1: "by Latent Class (Column)"
    
    countlist[["logitProbs.mostLikely"]] <- unlabeledMatrixExtract(classificationLogitProbs, filename)
  } else {
    # Extract class_counts for multiple categorical latent variables.
    
    getClassCols_lta <- function(sectiontext) {
      numberLines <- grep("^\\s*([a-zA-Z0-9]+)?(\\s+[0-9\\.-]{1,}){1,}$", sectiontext, perl=TRUE)
      if (length(numberLines) > 0) {
        parsedlines <- strsplit(trimSpace(sectiontext[numberLines]), "\\s+", perl=TRUE)
        num_values <- sapply(parsedlines, length)
        if(length(unique(num_values)) == 1){
          counts <- data.frame(t(sapply(parsedlines, as.numeric)), stringsAsFactors = FALSE)
        } else {
          # Pad shorter lines with NA on the left side
          parsedlines[which(num_values != max(num_values))] <- lapply(parsedlines[which(num_values != max(num_values))], function(x){
              c(rep(NA, (max(num_values) - length(x))), x)
            })
          counts <- do.call(rbind, parsedlines)
          # Repeat existing values on subsequent rows in columns containing NAs
          counts[,1] <- inverse.rle(list(lengths = diff(c(which(!is.na(counts[,1])), (nrow(counts)+1))), values = counts[,1][complete.cases(counts[,1])]))
          counts <- data.frame(counts, stringsAsFactors = FALSE)
          counts[, 2:4] <- lapply(counts[, 2:4], as.numeric)
        }
        return(counts)
      } else {
        return(NULL)
      }
    }
    
    if (missing(summaries) || is.null(summaries$Mplus.version) || as.numeric(summaries$Mplus.version) < 7.3) {
      posteriorProb.patterns <- getSection("^FINAL CLASS COUNTS AND PROPORTIONS FOR THE LATENT CLASSES$::^BASED ON ESTIMATED POSTERIOR PROBABILITIES$", outfiletext)
      mostLikely.patterns <- getSection("^CLASSIFICATION OF INDIVIDUALS BASED ON THEIR MOST LIKELY LATENT CLASS PATTERN$", outfiletext)
      mostLikelyCounts <- getSection("CLASSIFICATION OF INDIVIDUALS BASED ON THEIR MOST LIKELY LATENT CLASS PATTERN", outfiletext)
    } else {
      posteriorProb.patterns <- getSection("^FINAL CLASS COUNTS AND PROPORTIONS FOR THE LATENT CLASS PATTERNS$::^BASED ON ESTIMATED POSTERIOR PROBABILITIES$", outfiletext)
      mostLikely.patterns <- getSection("^FINAL CLASS COUNTS AND PROPORTIONS FOR THE LATENT CLASS PATTERNS$::^BASED ON THEIR MOST LIKELY LATENT CLASS PATTERN$", outfiletext)
      mostLikelyCounts <- getSection("^FINAL CLASS COUNTS AND PROPORTIONS FOR EACH LATENT CLASS VARIABLE$::^BASED ON THEIR MOST LIKELY LATENT CLASS PATTERN$", outfiletext)
    }
    
    # Class counts
    countlist[["modelEstimated"]] <- getClassCols_lta(
      getSection(
        "^FINAL CLASS COUNTS AND PROPORTIONS FOR EACH LATENT CLASS VARIABLE$::^BASED ON THE ESTIMATED MODEL$",
        outfiletext
      )
    )
    countlist[["posteriorProb"]] <- getClassCols_lta(
      getSection(
        "^FINAL CLASS COUNTS AND PROPORTIONS FOR EACH LATENT CLASS VARIABLE$::^BASED ON ESTIMATED POSTERIOR PROBABILITIES$",
        outfiletext
      )
    )
    countlist[["mostLikely"]] <- getClassCols_lta(mostLikelyCounts)
    
    countlist[which(names(countlist) %in% c("modelEstimated", "posteriorProb", "mostLikely"))] <-
      lapply(countlist[which(names(countlist) %in% c("modelEstimated", "posteriorProb", "mostLikely"))],
        setNames,
        c("variable", "class", "count", "proportion"))
    
    # Patterns
    countlist[["modelEstimated.patterns"]] <-
      getClassCols_lta(
        getSection(
          "^FINAL CLASS COUNTS AND PROPORTIONS FOR THE LATENT CLASS PATTERNS$::^BASED ON THE ESTIMATED MODEL$",
          outfiletext
        )
      )
    
    countlist[["posteriorProb.patterns"]] <- getClassCols_lta(posteriorProb.patterns)
    countlist[["mostLikely.patterns"]] <- getClassCols_lta(mostLikely.patterns)
    
    # Determine order of column names for categorical latent variables
    morder <- getSection("MODEL RESULTS USE THE LATENT CLASS VARIABLE ORDER", outfiletext)
    if (!is.null(morder)) {
      first_present <- which(morder != "")[1]
      catvar_names <- strsplit(trimSpace(morder[first_present]), "\\s+")[[1]]
    } else {
      catvar_names <- unique(countlist[["mostLikely"]]$variable) #fall back approximation
      #stop("Unable to determine names of categorical latent variables.")
    }
    
    rename_sections <- which(names(countlist) %in% c("modelEstimated.patterns", "posteriorProb.patterns", "mostLikely.patterns"))
    if (length(rename_sections) > 0L) {
      for (s in rename_sections) {
        countlist[[s]] <- setNames(countlist[[s]], c(catvar_names, "count", "proportion"))
      }
    }
    
    #Average latent class probabilities (not always present in outputs)
    avgProbs <- getSection("^Average Latent Class Probabilities for Most Likely Latent Class Pattern \\((Row|Column)\\)$::^by Latent Class Pattern \\((Row|Column)\\)$", outfiletext)
    
    if (!is.null(avgProbs)) {
      column_headers <- strsplit(trimws(grep("\\s*Latent Class\\s{2,}", avgProbs, value = TRUE)), "\\s+", perl=TRUE)[[1]][-1]
      variable_pattern_rows <- grep(paste(c("^(\\s{2,}\\d+){", length(column_headers), "}$"), collapse = ""), avgProbs, perl=TRUE)
      variable_pattern_rows <- variable_pattern_rows[!c(FALSE, diff(variable_pattern_rows) != 1)]
      variable_patterns <- avgProbs[variable_pattern_rows]
      variable_patterns <- data.frame(t(sapply(strsplit(trimws(variable_patterns), "\\s+", perl=TRUE), as.numeric)))
      names(variable_patterns) <- c("Latent Class Pattern No.", column_headers[-1])
      
      probs <- grep(paste(c("^\\s+\\d{1,}(\\s{2,}[0-9\\.-]+)+$"), collapse = ""), avgProbs[(variable_pattern_rows[length(variable_pattern_rows)]+1):length(avgProbs)], perl=TRUE, value = TRUE)
      # If the table is truncated, concatenate its parts
      if(length(probs) %% nrow(variable_patterns) > 1){
        for(i in 2:(length(probs) %% nrow(variable_patterns))){
          probs[1:(nrow(variable_patterns)+1)] <- 
            paste(probs[1:(nrow(variable_patterns)+1)],
                  substring(probs[((i-1)*(nrow(variable_patterns)+1)+1):(i*(nrow(variable_patterns)+1))], first = 8)
            )
        }
        probs <- probs[1:nrow(variable_patterns)]
      }
      probs <- t(sapply(strsplit(trimws(probs[-1]), "\\s+", perl=TRUE), as.numeric))[,-1]
      
      countlist[["avgProbs.mostLikely"]] <- probs
      countlist[["avgProbs.mostLikely.patterns"]] <- variable_patterns
    }
    
    # AFAIK this section is not reported for multiple categorical variables
    countlist[["classificationProbs.mostLikely"]] <- NULL
    # AFAIK this section is not reported for multiple categorical variables
    countlist[["logitProbs.mostLikely"]] <- NULL
    
    transitionProbs <- getSection("^LATENT TRANSITION PROBABILITIES BASED ON THE ESTIMATED MODEL$", outfiletext)
    if(!is.null(transitionProbs)){
      section_starts <- grep("\\(Columns\\)$", transitionProbs)
      transitionProbs <- mapply(FUN = function(begin, end){
          probs <- grep("^\\s+\\d{1,}(\\s{2,}[0-9\\.-]{2,}){1,}$", transitionProbs[begin:end], perl=TRUE, value = TRUE)
          probs <- do.call(rbind, strsplit(trimws(probs), "\\s+", perl=TRUE))[,-1]
          cbind(paste(gsub("\\s+(\\w+) Classes.*$", "\\1", transitionProbs[begin]) , ".", rep(c(1:nrow(probs)), ncol(probs)), sep = ""),
            paste(gsub(".+?by (\\w+) Classes.*$", "\\1", transitionProbs[begin]) , ".", as.vector(sapply(1:ncol(probs), rep, nrow(probs))), sep = ""),
            as.vector(probs))
        },
        begin = section_starts, end = c(section_starts[-1], length(transitionProbs)), SIMPLIFY = FALSE)
      if(length(transitionProbs) > 1){
        transitionProbs <- do.call(rbind, transitionProbs)
      } else {
        transitionProbs <- transitionProbs[[1]]
      }
      transitionProbs <- data.frame(transitionProbs, stringsAsFactors = FALSE)
      names(transitionProbs) <- c("from", "to", "probability")
      transitionProbs$probability <- as.numeric(transitionProbs$probability)
    }
    countlist[["transitionProbs"]] <- transitionProbs
  }
  return(countlist)
}

#' Reconstruct matrix from unlabeled multi-line text output
#'
#' worker function for extracting Mplus matrix output from an unlabeled section
#' where matrices are spread across blocks to keep within width constraints
#' example: class counts output from latent class models.
#'
#' @param outfiletext The text of the output file
#' @param filename The name of the output file
#' @return a matrix
#' @keywords internal
#' @examples
#' # make me!!!
unlabeledMatrixExtract <- function(outfiletext, filename) {
  #This function extends the matrixExtract function by allowing for the matrix to be recreated
  #to have no header labels and where section headers have a blank line on either side. Only example is in the class counts section, where when there
  #are many classes, the most likely x posterior probability matrix is too wide and is output like this:
  
  #       1        2        3        4        5        6        7        8        9
  #
  #1   0.885    0.000    0.000    0.017    0.024    0.000    0.000    0.019    0.055
  #2   0.000    0.775    0.006    0.000    0.000    0.064    0.097    0.013    0.000
  #3   0.000    0.004    0.826    0.035    0.000    0.082    0.000    0.000    0.052
  #4   0.014    0.002    0.070    0.804    0.018    0.035    0.000    0.008    0.046
  #5   0.042    0.000    0.001    0.076    0.842    0.000    0.000    0.001    0.038
  #6   0.000    0.096    0.063    0.014    0.001    0.732    0.021    0.026    0.008
  #7   0.002    0.091    0.010    0.005    0.001    0.034    0.808    0.005    0.005
  #8   0.118    0.014    0.006    0.004    0.000    0.030    0.015    0.514    0.139
  #9   0.030    0.001    0.056    0.059    0.014    0.024    0.000    0.109    0.691
  #10  0.030    0.062    0.007    0.007    0.002    0.052    0.130    0.108    0.063
  #
  #      10
  #
  #1   0.000
  #2   0.046
  #3   0.001
  #4   0.004
  #5   0.000
  #6   0.038
  #7   0.039
  #8   0.159
  #9   0.016
  #10  0.539
  
  #Only one matrix can be extracted from outfiletext since sections are unlabeled
  
  if (length(outfiletext) > 0L && length(outfiletext) > 1L) {
    
    #pattern match: 1) blank line; 2) integers line; 3) blank line
    #find these cases, then add "DUMMY" to each of the header blank lines
    
    blankLines <- which(outfiletext == "")
    
    if (length(blankLines) > 0L) {
      headerLines <- c()
      
      for (b in 1:length(blankLines)) {
        if (b < length(blankLines) && blankLines[b+1] == blankLines[b] + 2) {
          # a blank line followed by a non-blank line followed by a blank line...
          # check that it represents an integer sequence (this may need to be removed in more general cases)
          intLine <- strsplit(trimSpace(outfiletext[blankLines[b]+1]), "\\s+", perl=TRUE)[[1L]]
          firstCol <- as.numeric(intLine[1L]) #number of the class in the first column
          if (all(intLine == firstCol:(firstCol + length(intLine) - 1) )) {
            headerLines <- c(headerLines, blankLines[b])
          }
        }
      }
      
      #add the header to blank lines preceding class labels row
      outfiletext[headerLines] <- "DUMMY"
      
      #now use matrix extract to reconstruct matrix
      unlabeledMat <- matrixExtract(outfiletext, "DUMMY", filename)
      
      return(unlabeledMat)
      
    } else {
      return(NULL)
    }
  } else {
    return(NULL)
  }
}


#' Reconstruct matrix from multi-line text output
#'
#' main worker function for extracting Mplus matrix output
#' where matrices are spread across blocks to keep within width constraints
#' example: tech1 matrix output.
#'
#' @param outfiletext The text of the output file
#' @param headerLine The header line
#' @param filename The name of the output file
#' @param ignore.case Whether to ignore case of header line
#' @param expect_sig Whether to track value significance TRUE/FALSE (* in Mplus) as an attribute
#' @return a matrix
#' @keywords internal
#' @examples
#' # make me!!!
matrixExtract <- function(outfiletext, headerLine, filename, ignore.case=FALSE, expect_sig=FALSE) {
  
  matLines <- getMultilineSection(headerLine, outfiletext, filename, allowMultiple=TRUE, ignore.case=ignore.case)
  
  if (!is.na(matLines[1])) {
    numBlocks <- length(matLines)
    blockList <- list()
    for (m in 1:numBlocks) {
      colHeaders <- strsplit(trimSpace(matLines[[m]][1]), "\\s+", perl=TRUE)[[1]]
      
      #m+3 because m+1 is col header, m+2 is line of underscores
      block <- matLines[[m]][c(-1,-2)]
      block <- block[block != ""] #drop blank lines
      
      #10Jul2012: Occasionally, Mplus includes a blank line block just for fun... like this:
      #Residuals for Covariances/Correlations/Residual Correlations
      #STRES4
      #________
      #in this case, skip the block
      if (length(block) == 0) next
      
      splitData <- strsplit(trimSpace(block), "\\s+", perl=TRUE)
      
      #alternative to remove blank lines after strsplit (above easier to read)
      #remove blank lines by comparing against character(0)
      #splitData2 <- splitData[sapply(splitData, function(x) !identical(x, character(0)))]
      
      #May 2017: in Mplus v7*, there is a header on the beginning of each row, including for vectors such as NU,TAU, etc.
      #example:
      # NU
      #       Y             X1            X2            W
      #       ________      ________      ________      ________
      # 1           0             0             0             0
      
      #in Mplus v8, the "1" header on parameter vectors has been removed.
      # NU
      #    Y12T3         Y13T3         Y14T3
      #    ________      ________      ________
      #          0             0             0
      
      #To overcome this problem, check the number of columns in splitData compared to the number of column headers.
      #If the number of columns is equal to the number of column headers, add a "1" at the beginning to make parsing code
      #  consistent with v7 and expectation is matrix assembly in the aggMat section below.
      #Only add this tweak if the first element of v is not identical to any column header.
      #Otherwise this will add a "1" to some rows that are part of a matrix, not param vector.
      splitData <- lapply(splitData, function(v) {
          if (length(v) == length(colHeaders) && (! v[1L] %in% colHeaders)) { v <- c("1", v) }
          return(v)
        })
      
      #pull out row names from each element
      rowHeaders <- sapply(splitData, "[", 1)
      
      mat <- matrix(NA_real_, nrow=length(rowHeaders), ncol=length(colHeaders),
        dimnames=list(rowHeaders, colHeaders))
      
      if (isTRUE(expect_sig)) { attr(mat, "sig") <- array(NA, dim=dim(mat)) }
      
      for (r in 1:length(splitData)) {
        #use mplus_as.numeric to handle D+XX scientific notation in output and sig values
        line <- mplus_as.numeric(splitData[[r]][-1], expect_sig=expect_sig)
        if (length(line) == 0L) { next } #occasionally have blank rows, skip assignment into mat
        
        mat[r,1:length(line)] <- line
        if (isTRUE(expect_sig)) { attr(mat, "sig")[r,1:length(line)] <- attr(line, "sig") }
      }
      
      blockList[[m]] <- mat
    }
    
    #aggregate sections
    aggMatCols <- do.call("c", lapply(blockList, colnames))
    aggMatRows <- rownames(blockList[[1]]) #row names are shared across blocks in Mplus output
    aggMat <- matrix(NA, nrow=length(aggMatRows), ncol=length(aggMatCols), dimnames=list(aggMatRows, aggMatCols))
    attr(aggMat, "sig") <- array(NA, dim=dim(aggMat), dimnames = dimnames(aggMat))
    
    #Unfortunately, due to Mplus 8-character printing limits for matrix sections, row/col names are not guaranteed to be unique.
    #This causes problems for using name-based matching to fill the matrix.
    #We know that blocks are printed from left-to-right by column (i.e., the block 1 has the first X columns, block 2 has the next Y columns).
    #Thus, we should be able to use a counter and fill columns numerically. This does not get around a problem of non-unique row names since we
    #can't easily discern the rows represented in a block based on row numbering alone. Thus, this is an incomplete solution for now (Aug2015 MH)
    colCounter <- 1
    for (l in blockList) {
      aggMat[rownames(l), colCounter:(colCounter + ncol(l) - 1)] <- l #fill in just the block of the aggregate matrix represented in l
      if (isTRUE(expect_sig)) { attr(aggMat, "sig")[rownames(l), colCounter:(colCounter + ncol(l) - 1)] <- attr(l, "sig") } #add sig values
      colCounter <- colCounter + ncol(l)
    }
  } else {
    #warning("No lines identified for matrix extraction using header: \n  ", headerLine)
    aggMat <- NULL
  }
  
  #if sig is not used, ensure that we dump it as an attribute
  if (isFALSE(expect_sig)) { attr(aggMat, "sig") <- NULL }
  
  return(aggMat)
  
}

#EXTRACT DATA SUMMARY SECTION

#' Function to extract the SUMMARY OF DATA section from Mplus outputs
#'
#' @param outfiletext The text of the output file 
#' @param filename the name of the file containing textToScan. Used to make more intelligible warning messages.
#' @keywords internal
extractDataSummary <- function(outfiletext, filename) {
  dataSummarySection <- getSection("^\\s*SUMMARY OF DATA( FOR THE FIRST DATA SET)*\\s*$", outfiletext)
  if (is.null(dataSummarySection)) {
    empty <- list()
    class(empty) <- c("mplus.data_summary", "list")
    return(empty)
  }
  
  #detect three-level outputs, which have 2 cluster size sections, ICCs, etc.
  clus_check <- grepl("Number of [^\\s]+ clusters", dataSummarySection, perl=TRUE)
  is_three_level <- any(clus_check)
  if (isTRUE(is_three_level)) { 
    level_names <- sub(".*Number of ([^\\s]+) clusters.*", "\\1", dataSummarySection[clus_check]) 
  }
  
  # cross-classified outputs have subsections
  cross_check <- grepl("Cluster information for [^\\s]+", dataSummarySection, perl = TRUE)
  is_cross_classified <- any(cross_check)
  if (isTRUE(is_cross_classified)) {
    level_names <- sub(".*Cluster information for ([^\\s]+).*", "\\1", dataSummarySection[cross_check]) 
  }
  
  #detect groups
  #support Mplus v8 syntax Group G1 (0) with parentheses of numeric value
  multipleGroupMatches <- grep("^\\s*Group \\w+(?:\\s+\\(\\d+\\))*\\s*$", dataSummarySection, ignore.case=TRUE, perl=TRUE) 

  if (length(multipleGroupMatches) > 0L) {
    groupNames <- sub("^\\s*Group (\\w+)(?:\\s+\\(\\d+\\))*\\s*$", "\\1", dataSummarySection[multipleGroupMatches], perl=TRUE)
    toparse <- list() #divide into a list by group
    for (i in 1:length(multipleGroupMatches)) {
      if (i < length(multipleGroupMatches)) {
        end <- multipleGroupMatches[i+1] - 1
      } else { end <- length(dataSummarySection) }
      section <- dataSummarySection[(multipleGroupMatches[i]+1):end]
      attr(section, "group.name") <- groupNames[i]
      toparse[[groupNames[i]]] <- section
    }    
  } else {
    attr(dataSummarySection, "group.name") <- "all"
    toparse <- list(all=dataSummarySection)
  }
  
  summaries <- c()
  iccs <- list()
  
  for (section in toparse) {
    if (isTRUE(is_three_level)) {
      summaries <- rbind(summaries, data.frame(
        NClusters_1 = extractValue(pattern=paste("^\\s*Number of", level_names[1], "clusters\\s*"), section, filename, type="int"),
        NClusters_2 = extractValue(pattern=paste("^\\s*Number of", level_names[2], "clusters\\s*"), section, filename, type="int"),
        NMissPatterns = extractValue(pattern="^\\s*Number of missing data patterns\\s*", section, filename, type="int"),
        AvgClusterSize_1 = extractValue(pattern=paste("^\\s*Average cluster size for", level_names[1], "level\\s*"), section, filename, type="dec"),
        AvgClusterSize_2 = extractValue(pattern=paste("^\\s*Average cluster size for", level_names[2], "level\\s*"), section, filename, type="dec"),
        Group=attr(section, "group.name")
      ))
      
      names(summaries)[names(summaries)=="NClusters_1"] <- paste0("NClusters_", level_names[1])
      names(summaries)[names(summaries)=="NClusters_2"] <- paste0("NClusters_", level_names[2])
      names(summaries)[names(summaries)=="AvgClusterSize_1"] <- paste0("AvgClusterSize_", level_names[1])
      names(summaries)[names(summaries)=="AvgClusterSize_2"] <- paste0("AvgClusterSize_", level_names[2])
    } else if (isTRUE(is_cross_classified)) {
      # for now, not 
      sum_list <- list()
      crossclass_sections <- getMultilineSection("Cluster information for .*", dataSummarySection, filename, allowMultiple=TRUE)
      stopifnot(length(crossclass_sections) == length(level_names))
      for (ll in seq_along(level_names)) {
        sum_list[[paste0("NClusters_", level_names[ll])]] <- extractValue(pattern=paste("^\\s*Number of clusters\\s*"), crossclass_sections[[ll]], filename, type="int")
        clus_size_section <- getMultilineSection("^\\s*Size \\(s\\)\\s+Cluster ID with Size s\\s*$", crossclass_sections[[ll]], allowMultiple = TRUE)
        for (ii in seq_along(clus_size_section)) {
          clus_nums <- unlist(strsplit(trimSpace(clus_size_section), "\\s+"))
          clus_nums <- clus_nums[clus_nums != ""] # make sure any blanks are skipped (trimSpace should generally get them)
          sum_list[[paste0("ClusterSize_", level_names[ll], "_", ii)]] <- as.integer(clus_nums[1L])
          
          # pull out IDs for each cluster size
          sum_list[[paste0("ClusterSize_", level_names[ll], "_", ii, "_IDs")]] <- paste(clus_nums[2L:length(clus_nums)], collapse=",")
        }
      }
      summaries <- rbind(summaries, as.data.frame(sum_list))
    } else {
      summaries <- rbind(summaries, data.frame(
        NClusters = extractValue(pattern="^\\s*Number of clusters\\s*", section, filename, type="int"),
        NMissPatterns = extractValue(pattern="^\\s*Number of missing data patterns\\s*", section, filename, type="int"),
        AvgClusterSize = extractValue(pattern="^\\s*Average cluster size\\s*", section, filename, type="dec"),
        Group=attr(section, "group.name")
      ))
    }
    
    #parse icc
    icc_start <- grep("^\\s*Estimated Intraclass Correlations for the Y Variables( for [\\w\\._]+ level)*\\s*$", section, perl=TRUE)
    if (length(icc_start) > 0L) {
      if (is_three_level && length(icc_start) == 2L) {
        level_names <- sub(".*for the Y Variables for ([^\\s]+) level.*", "\\1", section[icc_start], perl=TRUE)
        sections_to_parse <- list(
          trimSpace(section[(icc_start[1]+1):(icc_start[2]-3)]), #-3 to chop off preceding Average cluster size section
          trimSpace(section[(icc_start[2]+1):length(section)])
        )
        names(sections_to_parse) <- level_names
      } else {
        sections_to_parse <- list(l2=trimSpace(section[(icc_start[1]+1):length(section)])) #this assumes nothing comes afterwards in the section
      }
      
      for (ss in seq_along(sections_to_parse)) {
        iccout <- c()
        section_name <- names(sections_to_parse)[ss]
        this_section <- sections_to_parse[[ss]]
        #problem: there is an unknown number of columns in this output. Example:
        #
        #           Intraclass              Intraclass              Intraclass
        #Variable  Correlation   Variable  Correlation   Variable  Correlation
        #Q22          0.173      Q38          0.320      Q39          0.127
        #Q40          0.270
        
        #solution: variables are always odd positions, correlations are always even
        
        repeat_line <- grep("(\\s*Variable\\s+Correlation\\s*)+", this_section)
        if (length(repeat_line) == 1L) {
          #x <- this_section[repeat_line] #not needed with odd/even solution
          #nrepeats <- length(regmatches(x, gregexpr("g", x)))
          
          icc_values <- strsplit(this_section[(repeat_line+1):length(this_section)], "\\s+")
          for (ss in icc_values) {
            if (length(ss) > 0L) {
              positions <- 1:length(ss)
              vars <- ss[positions[positions %% 2 != 0]]
              vals <- ss[positions[positions %% 2 == 0]]
              iccout <- rbind(iccout, data.frame(variable=vars, ICC=as.numeric(vals), stringsAsFactors=FALSE))
            }          
          }
          
          
          iccout$Group <- attr(section, "group.name")
          iccs[[section_name]] <- rbind(iccs[[section_name]], iccout)
        }
      }
    }
  }
  
  #trim out "all" in single group case
  if (length(multipleGroupMatches) == 0L) {
    summaries$Group <- NULL
    iccs <- lapply(iccs, function(ii) {
      ii$Group <- NULL
      return(ii)
    })
  }
  if (length(iccs) == 1L) iccs <- iccs[[1]] #for two-level models don't nest ICCs
  
  retlist <- list(overall=summaries, ICCs=iccs) 
  class(retlist) <- c("mplus.data_summary", "list")
  return(retlist)   
}

#Caspar van Lissa code for extract invariance testing section
extractInvarianceTesting <- function(outfiletext, filename) {
  inv_test_firstline <- grep("^Invariance Testing$", outfiletext)
  if (length(inv_test_firstline) == 0L) { return(list()) } #invariance section not found
  
  inv_test_endline <- grep("^MODEL FIT INFORMATION", outfiletext)
  
  retlist <- list()
  inv_test_endline <- inv_test_endline[inv_test_endline > inv_test_firstline][1]
  
  inv_test <- outfiletext[(inv_test_firstline+2):(inv_test_endline-3)]

  #match words followed by four groups of numbers (Number of parameters, Chi-Square, Degrees of Freedom, P-Value)
  model_rows <- grep("^\\s+?\\w+(\\s{2,}[0-9.]+){4}$", inv_test, value = TRUE)
  model_rows <- t(sapply(model_rows, function(x){strsplit(trimws(x), "\\s+")[[1]]}, USE.NAMES = FALSE))
  model_rownames <- model_rows[, 1]
  model_rows <- apply(model_rows[, -1, drop=FALSE], c(1,2), as.numeric)
  row.names(model_rows) <- model_rownames
  colnames(model_rows) <- c("Parameters", "Chi-Square", "DF", "Pvalue")
  retlist$models <- model_rows
  
  
  test_rows <- grep("^\\s+?(\\w+\\s){3}(\\s{2,}[0-9.]+){3}$", inv_test, value = TRUE)
  test_rows <- t(sapply(test_rows, function(x){strsplit(trimws(x), "\\s{2,}")[[1]]}, USE.NAMES = FALSE))
  test_rownames <- test_rows[, 1]
  test_rows <- apply(test_rows[, -1, drop=FALSE], c(1,2), as.numeric)
  row.names(test_rows) <- test_rownames
  colnames(test_rows) <- c("Chi-Square", "DF", "Pvalue")
  retlist$compared <- test_rows
  
  return(retlist)
}
Any scripts or data that you put into this service are public.
MplusAutomation documentation built on May 29, 2024, 5:29 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
MplusAutomation
An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus

R/parseOutput.R
In MplusAutomation: An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus

Try the MplusAutomation package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MplusAutomation An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus

R/parseOutput.R In MplusAutomation: An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus

Try the MplusAutomation package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MplusAutomation
An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus

R/parseOutput.R
In MplusAutomation: An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus