#' Extract Indirect Effects output
#'
#' This function parses both unstandardized and standardized indirect effects
#' It returns a list composed of $unstandardized and $standardized.
#' The base structure of each is a list containing $overall and $specific effects (as data.frames)
#'
#' @param outfiletext a character vector containing the indirect effects output section returned by getSection
#' @param curfile the name of the current output file being parsed
#' @return An mplus.indirect object (inherits list) containing $overall and $specific
#' @keywords internal
extractIndirect <- function(outfiletext, curfile) {
indirect_results <- list()
indirectSection <- getSection("^TOTAL, TOTAL INDIRECT, SPECIFIC INDIRECT, AND DIRECT EFFECTS( FOR LATENT RESPONSE VARIABLES)*$", outfiletext)
if (!is.null(indirectSection)) { indirect_results[["unstandardized"]] <- extractIndirect_section(indirectSection, curfile, sectionType="model_results") }
ciSection <- getSection("^CONFIDENCE INTERVALS OF TOTAL, TOTAL INDIRECT, SPECIFIC INDIRECT, AND DIRECT EFFECTS*$", outfiletext)
if (!is.null(ciSection)) { indirect_results[["ci.unstandardized"]] <- extractIndirect_section(ciSection, curfile, sectionType="confidence_intervals") }
stdindirectSection <- getSection("^STANDARDIZED TOTAL, TOTAL INDIRECT, SPECIFIC INDIRECT, AND DIRECT EFFECTS$", outfiletext)
if (!is.null(stdindirectSection)) {
#check for each subsection
stdheaders <- grep("STDY?X? Standardization", stdindirectSection, perl=TRUE)
if (length(stdheaders) > 0L) {
for (ix in 1:length(stdheaders)) {
endsection <- ifelse(ix == length(stdheaders), length(stdindirectSection), stdheaders[ix+1])
thissection <- stdindirectSection[(stdheaders[ix]+1):(endsection-1)]
secname <- tolower(sub("(STDY?X?)\\s+Standardization", "\\1", stdindirectSection[stdheaders[ix]], perl=TRUE))
indirect_results[[paste0(secname, ".standardized")]] <- extractIndirect_section(thissection, curfile, sectionType="model_results")
}
}
}
cistdindirectSection <- getSection("^CONFIDENCE INTERVALS OF STANDARDIZED TOTAL, TOTAL INDIRECT, SPECIFIC INDIRECT,( AND DIRECT EFFECTS)*$", outfiletext) #last part is omitted before Mplus v8
if (!is.null(cistdindirectSection)) {
#check for each subsection
stdheaders <- grep("STDY?X? Standardization", cistdindirectSection, perl=TRUE)
if (length(stdheaders) > 0L) {
for (ix in 1:length(stdheaders)) {
endsection <- ifelse(ix == length(stdheaders), length(cistdindirectSection), stdheaders[ix+1])
thissection <- cistdindirectSection[(stdheaders[ix]+1):(endsection-1)]
secname <- tolower(sub("(STDY?X?)\\s+Standardization", "\\1", cistdindirectSection[stdheaders[ix]], perl=TRUE))
indirect_results[[paste0("ci.", secname, ".standardized")]] <- extractIndirect_section(thissection, curfile, sectionType="confidence_intervals")
}
}
}
#mirror parameters
listOrder <- c("unstandardized", "ci.unstandardized",
"stdyx.standardized", "ci.stdyx.standardized",
"stdy.standardized", "ci.stdy.standardized",
"std.standardized", "ci.std.standardized")
listOrder <- listOrder[listOrder %in% names(indirect_results)]
#only re-order if out of order
if(!identical(names(indirect_results), listOrder)) indirect_results <- indirect_results[listOrder]
return(indirect_results)
}
#' Extract Indirect Effects output
#'
#' This function parses a given indirect section
#' It returns a list composed of $overall and $specific effects
#'
#' @param indirectSection a character vector containing the indirect effects for a specific section (e.g., stdyx)
#' @param curfile the name of the current output file being parsed
#' @return An mplus.indirect object (inherits list) containing $overall and $specific
#' @keywords internal
extractIndirect_section <- function(indirectSection, curfile, sectionType) {
columnNames <- detectColumnNames(curfile, trimSpace(indirectSection[1:50]), sectionType) #assume that column headers are somewhere in the first 50 lines
columnNames[1] <- "outcome" #rename param -> outcome for clarity
multipleGroupMatches <- grep("^\\s*Group \\w+(?:\\s+\\(\\d+\\))*\\s*$", indirectSection, ignore.case=TRUE, perl=TRUE) #support Mplus v8 syntax Group G1 (0) with parentheses of numeric value
splitAt <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos))) #courtesy of https://stackoverflow.com/questions/16357962/r-split-numeric-vector-at-position
#this will return the entire section as the first element of a list if no groups are found
isplit <- splitAt(indirectSection, multipleGroupMatches)
indirectOutput <- vector("list", length(isplit))
for (idx in 1:length(isplit)) {
i_element <- isplit[[idx]]
i_out <- list()
effectHeaders <- grep("^Effects from [A-z_0-9]+ to [A-z_0-9]+$", i_element, ignore.case=TRUE, perl=TRUE)
if (length(effectHeaders) == 0L) { next } #nothing to parse -- skip to next iteration
#determine group name, if relevant
if (grepl("^\\s*Group .*$", i_element[1L])) {
groupName <- sub("^\\s*Group (\\w+)(?:\\s+\\(\\d+\\))*\\s*$", "\\1", i_element[1L], perl=TRUE)
} else {
groupName <- NULL
}
for (e in 1:length(effectHeaders)) {
elist <- list()
elist$pred <- sub("^Effects from ([A-z_0-9]+) to [A-z_0-9]+$", "\\1", i_element[effectHeaders[e]], ignore.case=TRUE, perl=TRUE)
elist$outcome <- sub("^Effects from [A-z_0-9]+ to ([A-z_0-9]+)$", "\\1", i_element[effectHeaders[e]], ignore.case=TRUE, perl=TRUE)
end <- ifelse (e < length(effectHeaders), effectHeaders[e+1]-1, length(i_element))
esection <- i_element[(effectHeaders[e]+1):end]
#parse total, total indirect, specific indirect, and direct
totalLine <- trimSpace(grep("Total\\s+[\\-0-9\\.]+.*$", esection, ignore.case=TRUE, perl=TRUE, value=TRUE))
if (length(totalLine) > 0L) {
totalLine <- as.list(strsplit(totalLine, "\\s+", perl=TRUE)[[1]])
names(totalLine) <- columnNames; totalLine$summary <- "Total"; totalLine$outcome <- NULL
}
totalIndirectLine <- trimSpace(grep("(Indirect|(Total|Sum of) indirect)\\s+[\\-0-9\\.]+.*$", esection, ignore.case=TRUE, perl=TRUE, value=TRUE))
if (length(totalIndirectLine) > 0L) {
totalIndirectLine <- as.list(strsplit(totalIndirectLine, "\\s+", perl=TRUE)[[1]])
if (paste(unlist(totalIndirectLine[1:3]), collapse=" ") == "Sum of indirect") { #mplus v6 output? Not sure what generates sum versus total
totalIndirectLine <- totalIndirectLine[-1:-3]
hname <- "Sum of indirect"
} else if (paste(unlist(totalIndirectLine[1:2]), collapse=" ") == "Total indirect") {
totalIndirectLine <- totalIndirectLine[-1:-2]
hname <- "Total indirect"
} else if (unlist(totalIndirectLine[1]) == "Indirect") {
totalIndirectLine <- totalIndirectLine[-1]
hname <- "Indirect"
} else { stop("Unable to parse header from total indirect line: ", totalIndirectLine)}
names(totalIndirectLine) <- columnNames[-1]; #don't include "outcome" from columnNames since this is added en masse to summaries below
totalIndirectLine$summary <- hname #relabel according to Mplus output
}
#mplus v8 appears to output a line 'Direct effect' with parameters, whereas prior versions placed this under specific indirect
directLine <- trimSpace(grep("Direct effect\\s+[\\-0-9\\.]+.*$", esection, ignore.case=TRUE, perl=TRUE, value=TRUE))
if (length(directLine) > 0L) {
direct <- as.list(strsplit(directLine, "\\s+", perl=TRUE)[[1]])
direct <- direct[-2] #drop 'line' element for matching column names
names(direct) <- columnNames; direct$summary <- "Direct"; direct$outcome <- NULL
} else {
directSection <- strsplit(trimSpace(getMultilineSection("Direct", esection, curfile)), "\\s+")
useful <- which(sapply(directSection, length) > 1L)
if (length(useful) == 1L) {
direct <- as.list(directSection[[useful]])
names(direct) <- columnNames
direct$summary <- "Direct"; direct$outcome <- NULL
} else {
direct <- list()
}
}
elist$summaries <- data.frame(pred=elist$pred, outcome=elist$outcome,
rbind(unlist(totalLine), unlist(totalIndirectLine), unlist(direct)), row.names=NULL, stringsAsFactors=FALSE)
#reorder columns to put pred, outcome, summary first. Use columnNames vector without "outcome" to place remainder in order
elist$summaries <- elist$summaries[,c("pred", "outcome", "summary", columnNames[-1])]
#use white space to demarcate ending of specific indirect subsection
#new versions of Mplus (8.4+, I think) add numbers for each indirect effect to the output, which breaks the logic below
#
# Old format:
# Specific indirect
#
# BPD_I
# A11XEMOT
# IMPLW1 -0.002 0.001 -1.744 0.081
#
# New format:
# Specific indirect 1
# KOMP5TO
# SNITT02
# FODTMND -0.010 0.005 -2.132 0.033
#
# Specific indirect 2
# KOMP5TO
# FORSTEON
# FODTMND -0.003 0.002 -1.113 0.266
if (any(grepl("Specific indirect\\s+\\d+", esection, perl=TRUE))) { #numbered subsections
specSection <- trimSpace(getMultilineSection("Specific indirect(\\s+\\d+)*", esection, curfile, allowMultiple=TRUE))
#specSection is now a list of matches, where each element is a match-lines vector with the headers dumped (which is useful)
#flatten this into a vector so that parsing below proceeds as usual
if (is.list(specSection)) { specSection <- unlist(specSection) }
#N.B. The parser below depends on the first line of the section being blank to demarcate the first effect.
# The new format, however, starts with Specific indirect 1, then goes straight to the effect.
# Thus, add a blank line to the section before proceeding
if (specSection[1L] != "") specSection <- c("", specSection)
} else { #single section
specSection <- trimSpace(getMultilineSection("Specific indirect", esection, curfile))
}
blanks <- which(specSection=="")
if (length(blanks) > 0L) {
thisEffect <- NULL
for (i in 1:length(blanks)) {
if (i < length(blanks)) {
startLine <- blanks[i]+1
endLine <- blanks[i+1]-1
if (startLine >= endLine) { next } #occurs with consecutive blanks -> skip out
toparse <- specSection[startLine:endLine]
} else {
if (blanks[i]+1 < length(specSection)) {
toparse <- specSection[(blanks[i]+1):length(specSection)]
} else { next } #nothing to parse, just a trailing blank
}
#if (length(toparse) < 2L) { next } #double blank line problem
outcome <- toparse[1] #first variable is the "outcome" (i.e., the variable furthest downstream) (X IND Y)
source <- strsplit(toparse[length(toparse)], "\\s+")[[1]] #this should always be the earliest variable in the chain; it should have the statistics on it
names(source) <- columnNames
names(source)[1] <- "pred" #for specific effects, the first column of source is actually the predictor, not outcome (bit of a kludge here)
source <- as.data.frame(as.list(source), stringAsFactors=FALSE)
intervening <- toparse[2:(length(toparse)-1)]
thisEffect <- rbind(thisEffect, data.frame(source[,"pred", drop=F], intervening = paste(intervening, collapse="."), outcome=outcome, source[,-1*which(names(source)=="pred")]))
}
elist$specific <- thisEffect
}
if (!is.null(groupName)) { #add group name if multiple groups analysis
elist$summaries$group <- groupName
if (!is.null(elist$specific)) { elist$specific$group <- groupName } #some sections don't have specific indirects
}
i_out[[e]] <- elist #list of outputs for this section
}
indirectOutput[[idx]] <- i_out
}
#name list elements according to predictor and outcome
#names(indirectOutput) <- sapply(indirectOutput, function(el) { paste(el$pred, el$outcome, sep=".") })
#change format to return two data.frames, one with all summaries, the other with all specific
summarydf <- do.call(rbind, lapply(indirectOutput, function(i_out) {
do.call(rbind, lapply(i_out, function(el) { el$summaries }))
}))
specificdf <- do.call(rbind, lapply(indirectOutput, function(i_out) {
do.call(rbind, lapply(i_out, function(el) { el$specific }))
}))
row.names(summarydf) <- NULL; row.names(specificdf) <- NULL
toreturn <- list(overall=summarydf, specific=specificdf)
class(toreturn) <- "mplus.indirect"
return(toreturn)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.