# this code is written by Michael Hallquist
#First draft of parser to convert Mplus model syntax to psindex model syntax
#idea: build parTable and run model from mplus syntax
#then perhaps write export function: parTable2Mplus
#and/or parTable2psindex
trimSpace <- function(string) {
stringTrim <- sapply(string, function(x) {
x <- sub("^\\s*", "", x, perl=TRUE)
x <- sub("\\s*$","", x, perl=TRUE)
return(x)
}, USE.NAMES=FALSE)
return(stringTrim)
}
#small utility function to join strings in a regexp loop
joinRegexExpand <- function(cmd, argExpand, matches, iterator, matchLength="match.length") {
if (iterator == 1 && matches[iterator] > 1) {
pre <- substr(cmd, 1, matches[iterator] - 1)
} else pre <- ""
#if this is not the final match, then get sub-string between the end of this match and the beginning of the next
#otherwise, match to the end of the command
post.end <- ifelse(iterator < length(matches), matches[iterator+1] - 1, nchar(cmd))
post <- substr(cmd, matches[iterator] + attr(matches, matchLength)[iterator], post.end)
cmd.expand <- paste(pre, argExpand, post, sep="")
return(cmd.expand)
}
#expand Mplus hyphen syntax (will also expand constraints with hyphens)
expandCmd <- function(cmd, alphaStart=TRUE) {
#use negative lookahead and negative lookbehind to eliminate possibility of hyphen being used as a negative starting value (e.g., x*-1)
#also avoid match of anything that includes a decimal point, such as a floating-point starting value -10.5*x1
#if alphaStart==TRUE, then require that the matches before and after hyphens begin with alpha character
#this is used for variable names, whereas the more generic expansion works for numeric constraints and such
#need to do a better job of this so that u1-u20* is supported... I don't think the regexp below is general enough
#if (alphaStart) {
# hyphens <- gregexpr("[_A-Za-z]+\\w*\\s*-\\s*[_A-Za-z]+\\w*", cmd, perl=TRUE)[[1]]
#} else {
# hyphens <- gregexpr("(?!<(\\*|\\.))\\w+(?!(\\*|\\.))\\s*-\\s*(?!<(\\*|\\.))\\w+(?!(\\*|\\.))", cmd, perl=TRUE)[[1]]
#}
hyphens <- gregexpr("(?!<(\\*|\\.))\\w+(?!(\\*|\\.))\\s*-\\s*(?!<(\\*|\\.))\\w+(?!(\\*|\\.))", cmd, perl=TRUE)[[1]]
if (hyphens[1L] > 0) {
cmd.expand <- c()
ep <- 1
for (v in 1:length(hyphens)) {
#match one keyword before and after hyphen
argsplit <- strsplit(substr(cmd, hyphens[v], hyphens[v] + attr(hyphens, "match.length")[v] - 1), "\\s*-\\s*", perl=TRUE)[[1]]
v_pre <- argsplit[1]
v_post <- argsplit[2]
#the basic positive lookbehind blows up with pure numeric constraints (1 - 3) because no alpha char precedes digit
#can use an non-capturing alternation grouping to allow for digits only or the final digits after alphas (as in v_post.num)
v_pre.num <- as.integer(sub("\\w*(?<=[A-Za-z_])(\\d+)$", "\\1", v_pre, perl=TRUE)) #use positive lookbehind to avoid greedy \w+ match -- capture all digits
v_post.match <- regexpr("^(?:\\w*(?<=[A-Za-z_])(\\d+)|(\\d+))$", v_post, perl=TRUE)
stopifnot(v_post.match[1L] > 0)
#match mat be under capture[1] or capture[2] because of alternation above
whichCapture <- which(attr(v_post.match, "capture.start") > 0)
v_post.num <- as.integer(substr(v_post, attr(v_post.match, "capture.start")[whichCapture], attr(v_post.match, "capture.start")[whichCapture] + attr(v_post.match, "capture.length")[whichCapture] - 1))
v_post.prefix <- substr(v_post, 1, attr(v_post.match, "capture.start")[whichCapture] - 1) #just trusting that pre and post match
if (is.na(v_pre.num) || is.na(v_post.num)) stop("Cannot expand variables: ", v_pre, ", ", v_post)
v_expand <- paste(v_post.prefix, v_pre.num:v_post.num, sep="", collapse=" ")
#for first hyphen, there may be non-hyphenated syntax preceding the initial match
cmd.expand[ep] <- joinRegexExpand(cmd, v_expand, hyphens, v)
ep <- ep + 1
}
return(paste(cmd.expand, collapse=""))
} else {
return(cmd) #no hyphens to expand
}
}
#handle starting values and fixed parameters on rhs
parseFixStart <- function(cmd) {
cmd.parse <- c()
ep <- 1L
if ((fixed.starts <- gregexpr("[\\w\\.-]+\\s*([@*])\\s*[\\w\\.-]+", cmd, perl=TRUE)[[1]])[1L] > 0) { #shouldn't it be \\*, not * ?! Come back to this.
for (f in 1:length(fixed.starts)) {
#capture above obtains the fixed/start character (@ or *), whereas match obtains the full regex match
opchar <- substr(cmd, attr(fixed.starts, "capture.start")[f], attr(fixed.starts, "capture.start")[f] + attr(fixed.starts, "capture.length")[f] - 1)
#match arguments around asterisk/at symbol
argsplit <- strsplit(substr(cmd, fixed.starts[f], fixed.starts[f] + attr(fixed.starts, "match.length")[f] - 1), paste0("\\s*", ifelse(opchar=="*", "\\*", opchar), "\\s*"), perl=TRUE)[[1]]
v_pre <- argsplit[1]
v_post <- argsplit[2]
if (suppressWarnings(is.na(as.numeric(v_pre)))) { #fixed.starts value post-multiplier
var <- v_pre
val <- v_post
} else if (suppressWarnings(is.na(as.numeric(v_post)))) { #starting value pre-multiplier
var <- v_post
val <- v_pre
} else stop("Cannot parse Mplus fixed/starts values specification: ", v_pre, v_post)
if (opchar == "@") {
cmd.parse[ep] <- joinRegexExpand(cmd, paste0(val, "*", var, sep=""), fixed.starts, f)
ep <- ep + 1L
} else {
cmd.parse[ep] <- joinRegexExpand(cmd, paste0("start(", val, ")*", var, sep=""), fixed.starts, f)
ep <- ep + 1L
}
}
return(paste(cmd.parse, collapse=""))
} else {
return(cmd)
}
}
parseConstraints <- function(cmd) {
#Allow cmd to have newlines embedded. In this case, split on newlines, and loop over and parse each chunk
#Dump leading and trailing newlines, which contain no information about constraints, but may add dummy elements to vector after strsplit
#Maybe return LHS and RHS parsed command where constraints only appear on the RHS, whereas the LHS contains only parameters.
#Example: LHS is v1 v2 v3 and RHS is con1*v1 con2*v2 con3*v3
cmd.split <- strsplit(cmd, "\n")[[1]]
#drop empty lines (especially leading newline)
cmd.split <- if(length(emptyPos <- which(cmd.split == "")) > 0L) {cmd.split[-1*emptyPos]} else {cmd.split}
#Create a version of the command with no constraint specifications.
#This is useful for constraint specs that use the params on the LHS and RHS. Example: v1 ~~ conB*v1
cmd.noconstraints <- paste0(gsub("\\s*\\([^\\)]+\\)\\s*", "", cmd.split, perl=TRUE), collapse=" ")
cmd.tojoin <- c() #will store all chunks divided by newlines, which will be joined at the end.
#iterate over each newline segment
for (n in 1:length(cmd.split)) {
#in principle, now that we respect newlines, parens should only be of length 1, since Mplus syntax dictates newlines for each use of parentheses for constraints
if ((parens <- gregexpr("(?<!start)\\(([^\\)]+)\\)", cmd.split[n], perl=TRUE)[[1L]])[1L] > 0) { #match parentheses, but not start()
#the syntax chunk after all parentheses have been matched
cmd.expand <- c()
for (p in 1:length(parens)) {
#string within the constraint parentheses
constraints <- substr(cmd.split[n], attr(parens, "capture.start")[p], attr(parens, "capture.start")[p] + attr(parens, "capture.length")[p] - 1)
#Divide constraints on spaces to determine number of constraints to parse. Use trimSpace to avoid problem of user including leading/trailing spaces within parentheses.
con.split <- strsplit(trimSpace(constraints), "\\s+", perl=TRUE)[[1]]
#if Mplus uses a purely numeric constraint, then add ".con" prefix to be consistent with R naming.
con.split <- sapply(con.split, function(x) {
if (! suppressWarnings(is.na(as.numeric(x)))) {
make.names(paste0(".con", x))
} else { x }
})
#determine the parameters that precede the parentheses (either first character for p == 1 or character after preceding parentheses)
prestrStart <- ifelse(p > 1, attr(parens, "capture.start")[p-1] + attr(parens, "capture.length")[p-1] + 1, 1)
#obtain the parameters that precede the parentheses, divide into arguments on spaces
#use trimSpace here because first char after prestrStart for p > 1 will probably be a space
precmd.split <- strsplit(trimSpace(substr(cmd.split[n], prestrStart, parens[p] - 1)), "\\s+", perl=TRUE)[[1]]
#peel off any potential LHS arguments, such as F1 BY
precmdLHSOp <- which(tolower(precmd.split) %in% c("by", "with", "on"))
if (any(precmdLHSOp)) {
lhsop <- paste0(precmd.split[1:precmdLHSOp[1L]], " ", collapse=" ") #join lhs and op as a single string, add trailing space so that paste with expanded RHS is right.
rhs <- precmd.split[(precmdLHSOp+1):length(precmd.split)]
} else {
lhsop <- ""
rhs <- precmd.split
}
if (length(con.split) > 1L) {
#several constraints listed within parentheses. Example: F1 BY X1 X2 X3 X4 (C2 C3 C4)
#thus, backwards match the constraints to parameters
#restrict parameters to backwards match to be of the same length as number of constraints
rhs.backmatch <- rhs[(length(rhs)-length(con.split)+1):length(rhs)]
rhs.expand <- c()
#check that no mean or scale markers are part of the rhs param to expand
if ((preMark.match <- regexpr("^\\s*[\\[\\{]", rhs.backmatch[1L], perl=TRUE))[1L] > 0) {
preMark <- substr(rhs.backmatch[1L], preMark.match[1L], preMark.match[1L] + attr(preMark.match, "match.length")[1L] - 1)
rhs.backmatch[1L] <- substr(rhs.backmatch[1L], preMark.match[1L] + attr(preMark.match, "match.length")[1L], nchar(rhs.backmatch[1L]))
} else { preMark <- "" }
if ((postMark.match <- regexpr("[\\]\\}]\\s*$", rhs.backmatch[length(rhs.backmatch)], perl=TRUE))[1L] > 0) {
postMark <- substr(rhs.backmatch[length(rhs.backmatch)], postMark.match[1L], nchar(rhs.backmatch[length(rhs.backmatch)]))
rhs.backmatch[length(rhs.backmatch)] <- substr(rhs.backmatch[length(rhs.backmatch)], 1, postMark.match[1L] - 1)
} else { postMark <- "" }
#pre-multiply each parameter with each corresponding constraint
for (i in 1:length(rhs.backmatch)) {
rhs.expand[i] <- paste0(con.split[i], "*", rhs.backmatch[i])
}
#join rhs as string and add back in mean/scale operator, if present
rhs.expand <- paste0(preMark, paste(rhs.expand, collapse=" "), postMark)
#if there were params that preceded the backwards match, then add these back to the syntax
#append this syntax to the parsed command, cmd.expand
if (length(rhs) - length(con.split) > 0L) {
cmd.expand <- c(cmd.expand, paste(lhsop, paste(rhs[1:(length(rhs)-length(con.split))], collapse=" "), rhs.expand))
} else {
cmd.expand <- c(cmd.expand, paste0(lhsop, rhs.expand))
}
} else {
#should be able to reduce redundancy with above
#all parameters on the right hand side are to be equated
#thus, pre-multiply each parameter by the constraint
#check that no mean or scale markers are part of the rhs param to expand
#DUPE CODE FROM ABOVE. Make Function?!
if ((preMark.match <- regexpr("^\\s*[\\[\\{]", rhs[1L], perl=TRUE))[1L] > 0) {
preMark <- substr(rhs[1L], preMark.match[1L], preMark.match[1L] + attr(preMark.match, "match.length")[1L] - 1)
rhs[1L] <- substr(rhs[1L], preMark.match[1L] + attr(preMark.match, "match.length")[1L], nchar(rhs[1L]))
} else { preMark <- "" }
if ((postMark.match <- regexpr("[\\]\\}]\\s*$", rhs[length(rhs)], perl=TRUE))[1L] > 0) {
postMark <- substr(rhs[length(rhs)], postMark.match[1L], nchar(rhs[length(rhs)]))
rhs[length(rhs)] <- substr(rhs[length(rhs)], 1, postMark.match[1L] - 1)
} else { postMark <- "" }
rhs.expand <- c()
for (i in 1:length(rhs)) {
rhs.expand[i] <- paste0(con.split[1L], "*", rhs[i])
}
#join rhs as string
rhs.expand <- paste0(preMark, paste(rhs.expand, collapse=" "), postMark)
cmd.expand <- c(cmd.expand, paste0(lhsop, rhs.expand))
}
}
cmd.tojoin[n] <- paste(cmd.expand, collapse=" ")
} else { cmd.tojoin[n] <- cmd.split[n] } #no parens
}
#eliminate newlines in this function so that they don't mess up \\s+ splits downstream
toReturn <- paste(cmd.tojoin, collapse=" ")
attr(toReturn, "noConstraints") <- cmd.noconstraints
return(toReturn)
}
expandGrowthCmd <- function(cmd) {
#can assume that any spaces between tscore and variable were stripped by parseFixStart
#verify that this is not a random slope
if (any(tolower(strsplit(cmd, "\\s+", perl=TRUE)[[1]]) %in% c("on", "at"))) {
stop("psindex does not support random slopes or individually varying growth model time scores")
}
cmd.split <- strsplit(cmd, "\\s*\\|\\s*", perl=TRUE)[[1]]
if (!length(cmd.split) == 2) stop("Unknown growth syntax: ", cmd)
lhs <- cmd.split[1]
lhs.split <- strsplit(lhs, "\\s+", perl=TRUE)[[1]]
rhs <- cmd.split[2]
rhs.split <- strsplit(rhs, "(\\*|\\s+)", perl=TRUE)[[1]]
if (length(rhs.split) %% 2 != 0) stop("Number of variables and number of tscores does not match: ", rhs)
tscores <- as.numeric(rhs.split[1:length(rhs.split) %% 2 != 0]) #pre-multipliers
vars <- rhs.split[1:length(rhs.split) %% 2 == 0]
cmd.expand <- c()
for (p in 0:(length(lhs.split)-1)) {
if (p == 0) {
#intercept
cmd.expand <- c(cmd.expand, paste(lhs.split[(p+1)], "=~", paste("1*", vars, sep="", collapse=" + ")))
} else {
cmd.expand <- c(cmd.expand, paste(lhs.split[(p+1)], "=~", paste(tscores^p, "*", vars, sep="", collapse=" + ")))
}
}
return(cmd.expand)
}
#function to wrap long lines at a certain width, splitting on + symbols to be consistent with R syntax
wrapAfterPlus <- function(cmd, width=90, exdent=5) {
result <- lapply(cmd, function(line) {
if (nchar(line) > width) {
split <- c()
spos <- 1L
plusMatch <- gregexpr("+", line, fixed=TRUE)[[1]]
mpos <- 1L
if (plusMatch[1L] > 0L) {
#split after plus symbol
charsRemain <- nchar(line)
while(charsRemain > 0L) {
toProcess <- substr(line, nchar(line) - charsRemain + 1, nchar(line))
offset <- nchar(line) - charsRemain + 1
if (nchar(remainder <- substr(line, offset, nchar(line))) <= (width - exdent)) {
#remainder of line fits within width -- no need to continue wrapping
split[spos] <- remainder
charsRemain <- 0
} else {
wrapAt <- which(plusMatch < (width + offset - exdent))
wrapAt <- wrapAt[length(wrapAt)] #at the final +
split[spos] <- substr(line, offset, plusMatch[wrapAt])
charsRemain <- charsRemain - nchar(split[spos])
spos <- spos + 1
}
}
#remove leading and trailing chars
split <- trimSpace(split)
#handle exdent
split <- sapply(1:length(split), function(x) {
if (x > 1) paste0(paste(rep(" ", exdent), collapse=""), split[x])
else split[x]
})
return(split)
} else {
return(strwrap(line, width=width, exdent=exdent)) #convention strwrap when no + present
}
} else {
return(line)
}
})
#bind together multi-line expansions into single vector
return(unname(do.call(c, result)))
}
mplus2psindex.constraintSyntax <- function(syntax) {
#should probably pass in model syntax along with some tracking of which parameter labels are defined.
#convert MODEL CONSTRAINT section to psindex model syntax
syntax <- paste(lapply(trimSpace(strsplit(syntax, "\n")), function(x) { if (length(x) == 0L && is.character(x)) "" else x}), collapse="\n")
#replace ! with # for comment lines. Also strip newline and replace with semicolon
syntax <- gsub("(\\s*)!(.+)\n", "\\1#\\2;", syntax, perl=TRUE)
#split into vector of strings
#need to peel off leading or trailing newlines -- leads to parsing confusion downstream otherwise
syntax.split <- gsub("(^\n|\n$)", "", unlist( strsplit(syntax, ";") ), perl=TRUE)
constraint.out <- c()
#TODO: Handle PLOT and LOOP syntax for model constraints.
#TODO: Handle DO loop convention
#first parse new parameters defined in MODEL CONSTRAINT into a vector
new.parameters <- c() #parameters that are defined in constraint section
if (length(new.con.lines <- grep("^\\s*NEW\\s*\\([^\\)]+\\)", syntax.split, perl=TRUE, ignore.case=TRUE)) > 0L) {
for (cmd in syntax.split[new.con.lines]) {
#process new constraint definition
new.con <- regexpr("^\\s*NEW\\s*\\(([^\\)]+)\\)", cmd, perl=TRUE, ignore.case=TRUE)
if (new.con[1L] == -1) stop("Unable to parse names of new contraints")
new.con <- substr(cmd, attr(new.con, "capture.start"), attr(new.con, "capture.start") + attr(new.con, "capture.length") - 1L)
new.con <- expandCmd(new.con) #allow for hyphen expansion
new.parameters <- c(new.parameters, strsplit(trimSpace(new.con), "\\s+", perl=TRUE)[[1L]])
}
syntax.split <- syntax.split[-1L * new.con.lines] #drop out these lines
parameters.undefined <- new.parameters #to be used below to handle ambiguity of equation versus definition
}
for (cmd in syntax.split) {
if (grepl("^\\s*#", cmd, perl=TRUE)) { #comment line
constraint.out <- c(constraint.out , gsub("\n", "", cmd, fixed=TRUE)) #drop any newlines
} else if (grepl("^\\s+$", cmd, perl=TRUE)) {
#do nothing, just a space line
} else {
#constraint proper
cmd <- gsub("**", "^", cmd, fixed=TRUE) #handle exponent
#lower case the math operations supported by Mplus to be consistent with R
#match all math operators, then lower case each and rejoin string
maths <- gregexpr("(SQRT|LOG|LOG10|EXP|ABS|SIN|COS|TAN|ASIN|ACOS|ATAN)\\s*\\(", cmd, perl=TRUE)[[1L]]
if (maths[1L] > 0) {
maths.replace <- c()
ep <- 1
for (i in 1:length(maths)) {
operator <- tolower(substr(cmd, attr(maths, "capture.start")[i], attr(maths, "capture.start")[i] + attr(maths, "capture.length")[i] - 1))
maths.replace[ep] <- joinRegexExpand(cmd, operator, maths, i, matchLength="capture.length") #only match operator, not opening (
ep <- ep + 1
}
cmd <- paste(maths.replace, collapse="")
}
#equating some lhs and rhs: could reflect definition of new parameter
if ((equals <- regexpr("=", cmd, fixed=TRUE))[1L] > 0) {
lhs <- trimSpace(substr(cmd, 1, equals - 1))
rhs <- trimSpace(substr(cmd, equals + attr(equals, "match.length"), nchar(cmd)))
#possibility of lhs or rhs containing the single variable to be equated
if (regexpr("\\s+", lhs, perl=TRUE)[1L] > 0L) {
def <- rhs
body <- lhs
} else if (regexpr("\\s+", rhs, perl=TRUE)[1L] > 0L) {
def <- lhs
body <- rhs
} else {
#warning("Can't figure out which side of constraint defines a parameter")
#this would occur for simple rel5 = rel2 sort of syntax
def <- lhs
body <- rhs
}
#must decide whether this is a new parameter (:=) or equation of exising labels (==)
#alternatively, could be zero, as in 0 = x + y
#this is tricky, because mplus doesn't differentiate definition from equation
#consequently, could confuse the issue as in ex5.20
#NEW(rel2 rel5 stan3 stan6);
#rel2 = lam2**2*vf1/(lam2**2*vf1 + ve2);
#rel5 = lam5**2*vf2/(lam5**2*vf2 + ve5);
#rel5 = rel2;
#for now, only define a new constraint if it's not already defined
#otherwise equate
if (def %in% new.parameters && def %in% parameters.undefined) {
constraint.out <- c(constraint.out, paste(def, ":=", body))
parameters.undefined <- parameters.undefined[!parameters.undefined==def]
} else {
constraint.out <- c(constraint.out, paste(def, "==", body))
}
} else {
#inequality constraints -- paste as is
constraint.out <- c(constraint.out, cmd)
}
}
}
wrap <- paste(wrapAfterPlus(constraint.out, width=90, exdent=5), collapse="\n")
return(wrap)
}
mplus2psindex.modelSyntax <- function(syntax) {
#initial strip of leading/trailing whitespace, which can interfere with splitting on spaces
#strsplit generates character(0) for empty strings, which causes problems in paste because paste actually includes it as a literal
#example: paste(list(character(0), "asdf", character(0)), collapse=" ")
#thus, use lapply to convert these to empty strings first
syntax <- paste(lapply(trimSpace(strsplit(syntax, "\n")), function(x) { if (length(x) == 0L && is.character(x)) "" else x}), collapse="\n")
#replace ! with # for comment lines. Also strip newline and replace with semicolon
syntax <- gsub("(\\s*)!(.+)\n*", "\\1#\\2;", syntax, perl=TRUE)
#new direction: retain newlines in parsed syntax until after constraints have been parsed
#delete newlines
#syntax <- gsub("\n", "", syntax, fixed=TRUE)
# replace semicolons with newlines prior to split (divide into commands)
#syntax <- gsub(";", "\n", syntax, fixed=TRUE)
#split into vector of strings
#syntax.split <- unlist( strsplit(syntax, "\n") )
syntax.split <- trimSpace(unlist( strsplit(syntax, ";") ))
#format of parTable to mimic.
# 'data.frame': 34 obs. of 12 variables:
# $ id : int 1 2 3 4 5 6 7 8 9 10 ...
# $ lhs : chr "ind60" "ind60" "ind60" "dem60" ...
# $ op : chr "=~" "=~" "=~" "=~" ...
# $ rhs : chr "x1" "x2" "x3" "y1" ...
# $ user : int 1 1 1 1 1 1 1 1 1 1 ...
# $ group : int 1 1 1 1 1 1 1 1 1 1 ...
# $ free : int 0 1 2 0 3 4 5 0 6 7 ...
# $ ustart: num 1 NA NA 1 NA NA NA 1 NA NA ...
# $ exo : int 0 0 0 0 0 0 0 0 0 0 ...
# $ label : chr "" "" "" "" ...
# $ eq.id : int 0 0 0 0 0 0 0 0 0 0 ...
# $ unco : int 0 1 2 0 3 4 5 0 6 7 ...
#vector of psindex syntax
psindex.out <- c()
for (cmd in syntax.split) {
if (grepl("^\\s*#", cmd, perl=TRUE)) { #comment line
psindex.out <- c(psindex.out, gsub("\n", "", cmd, fixed=TRUE)) #drop any newlines (otherwise done by parseConstraints)
} else if (grepl("^\\s*$", cmd, perl=TRUE)) {
#do nothing, just a space or blank line
} else {
#hyphen expansion
cmd <- expandCmd(cmd)
#blow up on growth syntax for now
# if (grepl("|", cmd, fixed=TRUE)) stop("Growth modeling syntax using | not supported at the moment.")
#parse fixed parameters and starting values
cmd <- parseFixStart(cmd)
#parse any constraints here (avoid weird logic below)
cmd <- parseConstraints(cmd)
if ((op <- regexpr("\\s+(by|on|with|pwith)\\s+", cmd, ignore.case=TRUE, perl=TRUE))[1L] > 0) { #regressions, factors, covariances
lhs <- substr(cmd, 1, op - 1) #using op takes match.start which will omit spaces before operator
rhs <- substr(cmd, op + attr(op, "match.length"), nchar(cmd))
operator <- tolower(substr(cmd, attr(op, "capture.start"), attr(op, "capture.start") + attr(op, "capture.length") - 1))
if (operator == "by") { lav.operator <- "=~"
} else if (operator == "with" || operator == "pwith") { lav.operator <- "~~"
} else if (operator == "on") { lav.operator <- "~" }
#handle parameter combinations
lhs.split <- strsplit(lhs, "\\s+")[[1]] #trimSpace(
#handle pwith syntax
if (operator == "pwith") {
#TODO: Figure out if pwith can be paired with constraints?
rhs.split <- strsplit(rhs, "\\s+")[[1]] #trimSpace(
if (length(lhs.split) != length(rhs.split)) { browser(); stop("PWITH command does not have the same number of arguments on the left and right sides.")}
cmd <- sapply(1:length(lhs.split), function(i) paste(lhs.split[i], lav.operator, rhs.split[i]))
} else {
#insert plus signs on the rhs
rhs <- gsub("\\s+", " + ", rhs, perl=TRUE)
if (length(lhs.split) > 1L) {
#expand using possible combinations
cmd <- sapply(lhs.split, function(larg) {
pair <- paste(larg, lav.operator, rhs)
return(pair)
})
} else {
cmd <- paste(lhs, lav.operator, rhs)
}
}
} else if ((means.scales <- regexpr("^\\s*([\\[\\{])([^\\]\\}]+)[\\]\\}]\\s*$", cmd, ignore.case=TRUE, perl=TRUE))[1L] > 0) { #intercepts/means or scales
#first capture is the operator: [ or {
operator <- substr(cmd, attr(means.scales, "capture.start")[1L], attr(means.scales, "capture.start")[1L] + attr(means.scales, "capture.length")[1L] - 1)
params <- substr(cmd, attr(means.scales, "capture.start")[2L], attr(means.scales, "capture.start")[2L] + attr(means.scales, "capture.length")[2L] - 1)
#obtain parameters with no constraint specification for LHS
params.noconstraints <- sub("^\\s*[\\[\\{]([^\\]\\}]+)[\\]\\}]\\s*$", "\\1", attr(cmd, "noConstraints"), perl=TRUE)
means.scales.split <- strsplit(params, "\\s+")[[1]] #trimSpace(
means.scales.noConstraints.split <- strsplit(params.noconstraints, "\\s+")[[1]] #trimSpace(
if (operator == "[") {
#Tricky syntax shift (and corresponding kludge). For means, need to put constraint on RHS as pre-multiplier of 1 (e.g., x1 ~ 5*1).
#But parseConstraints returns constraints multiplied by parameters
cmd <- sapply(means.scales.split, function(v) {
#shift pre-multiplier
if ((premult <- regexpr("([^\\*]+)\\*([^\\*]+)", v, perl=TRUE))[1L] > 0) {
modifier <- substr(v, attr(premult, "capture.start")[1L], attr(premult, "capture.start")[1L] + attr(premult, "capture.length")[1L] - 1)
paramName <- substr(v, attr(premult, "capture.start")[2L], attr(premult, "capture.start")[2L] + attr(premult, "capture.length")[2L] - 1)
paste0(paramName, " ~ ", modifier, "*1")
} else {
paste(v, "~ 1")
}
})
} else if (operator == "{"){
#only include constraints on RHS
cmd <- sapply(1:length(means.scales.split), function(v) paste(means.scales.noConstraints.split[v], "~*~", means.scales.split[v]))
} else { stop("What's the operator?!") }
} else if (grepl("|", cmd, fixed=TRUE)) {
#expand growth modeling language
cmd <- expandGrowthCmd(cmd)
} else { #no operator, no means, must be variance.
#cat("assuming vars: ", cmd, "\n")
vars.lhs <- strsplit(attr(cmd, "noConstraints"), "\\s+")[[1]] #trimSpace(
vars.rhs <- strsplit(cmd, "\\s+")[[1]] #trimSpace(
cmd <- sapply(1:length(vars.lhs), function(v) paste(vars.lhs[v], "~~", vars.rhs[v]))
}
#handle threshold substitution: $ -> |
cmd <- gsub("$", "|", cmd, fixed=TRUE)
psindex.out <- c(psindex.out, cmd)
}
}
#for now, include a final trimSpace call since some arguments have leading/trailing space stripped.
wrap <- paste(wrapAfterPlus(psindex.out, width=90, exdent=5), collapse="\n") #trimSpace(
return(wrap)
}
mplus2psindex <- function(inpfile, run=TRUE) {
stopifnot(length(inpfile) == 1L)
stopifnot(grepl("\\.inp$", inpfile))
if (!file.exists(inpfile)) { stop("Could not find file: ", inpfile) }
#for future consideration. For now, require a .inp file
# if (length(inpfile) == 1L && grepl("\\.inp$", inpfile)) {
# if (!file.exists(inpfile)) { stop("Could not find file: ", inpfile) }
# inpfile.text <- scan(inpfile, what="character", sep="\n", strip.white=FALSE, blank.lines.skip=FALSE, quiet=TRUE)
# } else {
# #assume that inpfile itself is syntax (e.g., in a character vector)
# inpfile.text <- inpfile
# }
inpfile.text <- scan(inpfile, what="character", sep="\n", strip.white=FALSE, blank.lines.skip=FALSE, quiet=TRUE)
sections <- divideInputIntoSections(inpfile.text, inpfile)
mplus.inp <- list()
mplus.inp$title <- trimSpace(paste(sections$title, collapse=" "))
mplus.inp$data <- divideIntoFields(sections$data, required="file")
mplus.inp$variable <- divideIntoFields(sections$variable, required="names")
mplus.inp$analysis <- divideIntoFields(sections$analysis)
meanstructure <- "default" #psindex default
if(!is.null(mplus.inp$analysis$model)) {
if (tolower(mplus.inp$analysis$model) == "nomeanstructure") { meanstructure=FALSE } #explicitly disable mean structure
}
information <- "default" #psindex default
if(!is.null(mplus.inp$analysis$information)) {
information <- tolower(mplus.inp$analysis$information)
}
estimator <- "default"
if (!is.null(est <- mplus.inp$analysis$estimator)) {
#no memory of what this is up to....
if (toupper(est) == "MUML") warning("Mplus does not support MUML estimator. Using default instead.")
estimator <- est
#march 2013: handle case where categorical data are specified, but ML-based estimator requested.
#use WLSMV instead
if (!is.null(mplus.inp$variable$categorical) && toupper(substr(mplus.inp$analysis$estimator, 1, 2)) == "ML") {
warning("Lavaan does not yet support ML-based estimation for categorical data. Reverting to WLSMV")
estimator <- "WLSMV"
}
}
#expand hyphens in variable names and split into vector that will be the names for read.table
mplus.inp$variable$names <- strsplit(expandCmd(mplus.inp$variable$names), "\\s+", perl=TRUE)[[1]]
#expand hyphens in categorical declaration
if (!is.null(mplus.inp$variable$categorical)) mplus.inp$variable$categorical <- strsplit(expandCmd(mplus.inp$variable$categorical), "\\s+", perl=TRUE)[[1]]
#convert mplus syntax to psindex syntax
mplus.inp$model <- mplus2psindex.modelSyntax(sections$model)
#handle model constraint
if ("model.constraint" %in% names(sections)) {
mplus.inp$model.constraint <- mplus2psindex.constraintSyntax(sections$model.constraint)
mplus.inp$model <- paste(mplus.inp$model, mplus.inp$model.constraint, sep="\n")
}
#read mplus data (and handle missing spec)
mplus.inp$data <- readMplusInputData(mplus.inp, inpfile)
#handle bootstrapping specification
se="default"
bootstrap <- 1000L
test <- "default"
if (!is.null(mplus.inp$analysis$bootstrap)) {
boot.type <- "standard"
#check whether standard versus residual bootstrap is specified
if ((boot.match <- regexpr("\\((\\w+)\\)", mplus.inp$analysis$bootstrap, perl=TRUE)) > 0L) {
boot.type <- tolower(substr(mplus.inp$analysis$bootstrap, attr(boot.match, "capture.start"), attr(boot.match, "capture.start") + attr(boot.match, "capture.length") - 1L))
}
if (boot.type == "residual") test <- "Bollen.Stine"
se <- "bootstrap"
if ((nboot.match <- regexpr("^\\s*(\\d+)", mplus.inp$analysis$bootstrap, perl=TRUE)) > 0L) {
bootstrap <- as.numeric(substr(mplus.inp$analysis$bootstrap, attr(nboot.match, "capture.start"), attr(nboot.match, "capture.start") + attr(nboot.match, "capture.length") - 1L))
}
}
if (run) {
fit <- sem(mplus.inp$model, data=mplus.inp$data, meanstructure=meanstructure, mimic="Mplus", estimator=estimator, test=test, se=se, bootstrap=bootstrap, information=information)
fit@external <- list(mplus.inp=mplus.inp)
} else {
fit <- mplus.inp #just return the syntax outside of a psindex object
}
return(fit)
}
divideIntoFields <- function(section.text, required) {
if (is.null(section.text)) { return(NULL) }
#The parser breaks down when there is a line with a trailing comment because then splitting on semicolon will combine it with the following line
#Thus, trim off trailing comments before initial split
section.text <- gsub("\\s*!.*$", "", section.text, perl=TRUE)
section.split <- strsplit(paste(section.text, collapse=" "), ";", fixed=TRUE)[[1]] #split on semicolons
section.divide <- list()
for (cmd in section.split) {
if (grepl("^\\s*!.*", cmd, perl=TRUE)) next #skip comment lines
if (grepl("^\\s+$", cmd, perl=TRUE)) next #skip blank lines
#mplus is apparently tolerant of specifications that don't include IS/ARE/=
#example: usevariables x1-x10;
#thus, split on spaces and assume that first element is lhs, drop second element if IS/ARE/=, and assume remainder is rhs
#but if user uses equals sign, then spaces will not always be present (e.g., usevariables=x1-x10)
if ( (leadingEquals <- regexpr("^\\s*[A-Za-z]+[A-Za-z_-]*\\s*(=)", cmd[1L], perl=TRUE))[1L] > 0) {
cmdName <- trimSpace(substr(cmd[1L], 1, attr(leadingEquals, "capture.start") - 1))
cmdArgs <- trimSpace(substr(cmd[1L], attr(leadingEquals, "capture.start") + 1, nchar(cmd[1L])))
} else {
cmd.spacesplit <- strsplit(trimSpace(cmd[1L]), "\\s+", perl=TRUE)[[1L]]
if (length(cmd.spacesplit) < 2L) {
#for future: make room for this function to prase things like just TECH13 (no rhs)
} else {
cmdName <- trimSpace(cmd.spacesplit[1L])
if (length(cmd.spacesplit) > 2L && tolower(cmd.spacesplit[2L]) %in% c("is", "are")) {
cmdArgs <- paste(cmd.spacesplit[3L:length(cmd.spacesplit)], collapse=" ") #remainder, removing is/are
} else {
cmdArgs <- paste(cmd.spacesplit[2L:length(cmd.spacesplit)], collapse=" ") #is/are not used, so just join rhs
}
}
}
section.divide[[make.names(tolower(cmdName))]] <- cmdArgs
}
if (!missing(required)) { stopifnot(all(required %in% names(section.divide))) }
return(section.divide)
}
#helper function
splitFilePath <- function(abspath) {
#function to split path into path and filename
#code adapted from R.utils filePath command
if (!is.character(abspath)) stop("Path not a character string")
if (nchar(abspath) < 1 || is.na(abspath)) stop("Path is missing or of zero length")
components <- strsplit(abspath, split="[\\/]")[[1]]
lcom <- length(components)
stopifnot(lcom > 0)
#the file is the last element in the list. In the case of length == 1, this will extract the only element.
relFilename <- components[lcom]
absolute <- FALSE
if (lcom == 1) {
dirpart <- NA_character_
}
else if (lcom > 1) {
#drop the file from the list (the last element)
components <- components[-lcom]
dirpart <- do.call("file.path", as.list(components))
#if path begins with C:, /, //, or \\, then treat as absolute
if (grepl("^([A-Z]{1}:|/|//|\\\\)+.*$", dirpart, perl=TRUE)) absolute <- TRUE
}
return(list(directory=dirpart, filename=relFilename, absolute=absolute))
}
readMplusInputData <- function(mplus.inp, inpfile) {
#handle issue of mplus2psindex being called with an absolute path, whereas mplus has only a local data file
inpfile.split <- splitFilePath(inpfile)
datfile.split <- splitFilePath(mplus.inp$data$file)
#if inp file target directory is non-empty, but mplus data is without directory, then append
#inp file directory to mplus data. This ensures that R need not be in the working directory
#to read the dat file. But if mplus data has an absolute directory, don't append
#if mplus data directory is present and absolute, or if no directory in input file, just use filename as is
if (!is.na(datfile.split$directory) && datfile.split$absolute)
datFile <- mplus.inp$data$file #just use mplus data filename if it has absolute path
else if (is.na(inpfile.split$directory))
datFile <- mplus.inp$data$file #just use mplus data filename if inp file is missing path (working dir)
else
datFile <- file.path(inpfile.split$directory, mplus.inp$data$file) #dat file path is relative or absent, and inp file directory is present
if (!file.exists(datFile)) {
warning("Cannot find data file: ", datFile)
return(NULL)
}
#handle missing is/are:
missList <- NULL
if (!is.null(missSpec <- mplus.inp$variable$missing)) {
expandMissVec <- function(missStr) {
#sub-function to obtain a vector of all missing values within a set of parentheses
missSplit <- strsplit(missStr, "\\s+")[[1L]]
missVals <- c()
for (f in missSplit) {
if ((hyphenPos <- regexpr("\\d+(-)\\d+", f, perl=TRUE))[1L] > -1L) {
#expand hyphen
preHyphen <- substr(f, 1, attr(hyphenPos, "capture.start") - 1)
postHyphen <- substr(f, attr(hyphenPos, "capture.start") + 1, nchar(f))
missVals <- c(missVals, as.character(seq(preHyphen, postHyphen)))
} else {
#append to vector
missVals <- c(missVals, f)
}
}
return(as.numeric(missVals))
}
if (missSpec == "." || missSpec=="*") { #case 1: MISSING ARE|=|IS .;
na.strings <- missSpec
} else if ((allMatch <- regexpr("\\s*ALL\\s*\\(([^\\)]+)\\)", missSpec, perl=TRUE))[1L] > -1L) { #case 2: use of ALL with parens
missStr <- trimSpace(substr(missSpec, attr(allMatch, "capture.start"), attr(allMatch, "capture.start") + attr(allMatch, "capture.length") - 1L))
na.strings <- expandMissVec(missStr)
} else { #case 3: specific missing values per variable
#process each element
missBlocks <- gregexpr("(?:(\\w+)\\s+\\(([^\\)]+)\\))+", missSpec, perl=TRUE)[[1]]
missList <- list()
if (missBlocks[1L] > -1L) {
for (i in 1:length(missBlocks)) {
vname <- substr(missSpec, attr(missBlocks, "capture.start")[i,1L], attr(missBlocks, "capture.start")[i,1L] + attr(missBlocks, "capture.length")[i,1L] - 1L)
vmiss <- substr(missSpec, attr(missBlocks, "capture.start")[i,2L], attr(missBlocks, "capture.start")[i,2L] + attr(missBlocks, "capture.length")[i,2L] - 1L)
vnameHyphen <- regexpr("(\\w+)-(\\w+)", vname, perl=TRUE)[1L]
if (vnameHyphen > -1L) {
#lookup against variable names
vstart <- which(mplus.inp$variable$names == substr(vname, attr(vnameHyphen, "capture.start")[1L], attr(vnameHyphen, "capture.start")[1L] + attr(vnameHyphen, "capture.length")[1L] - 1L))
vend <- which(mplus.inp$variable$names == substr(vname, attr(vnameHyphen, "capture.start")[2L], attr(vnameHyphen, "capture.start")[2L] + attr(vnameHyphen, "capture.length")[2L] - 1L))
if (length(vstart) == 0L || length(vend) == 0L) { stop("Unable to lookup missing variable list: ", vname) }
#I suppose start or finish could be mixed up
if (vstart > vend) { vstart.orig <- vstart; vstart <- vend; vend <- vstart.orig }
vname <- mplus.inp$variable$names[vstart:vend]
}
missVals <- expandMissVec(vmiss)
for (j in 1:length(vname)) {
missList[[ vname[j] ]] <- missVals
}
}
} else { stop("I don't understand this missing specification: ", missSpec) }
}
} else { na.strings <- "NA" }
if (!is.null(missList)) {
dat <- read.table(datFile, header=FALSE, col.names=mplus.inp$variable$names, colClasses="numeric")
#loop over variables in missList and set missing values to NA
dat[,names(missList)] <- lapply(names(missList), function(vmiss) {
dat[which(dat[,vmiss] %in% missList[[vmiss]]), vmiss] <- NA
return(dat[,vmiss])
})
names(dat) <- mplus.inp$variable$names #loses these from the lapply
} else {
dat <- read.table(datFile, header=FALSE, col.names=mplus.inp$variable$names, na.strings=na.strings, colClasses="numeric")
}
#TODO: support covariance/mean+cov inputs
#store categorical variables as ordered factors
if (!is.null(mplus.inp$variable$categorical)) {
dat[,c(mplus.inp$variable$categorical)] <- lapply(dat[,c(mplus.inp$variable$categorical), drop=FALSE], ordered)
}
return(dat)
}
divideInputIntoSections <- function(inpfile.text, filename) {
inputHeaders <- grep("^\\s*(title:|data.*:|variable:|define:|analysis:|model.*:|output:|savedata:|plot:|montecarlo:)", inpfile.text, ignore.case=TRUE, perl=TRUE)
stopifnot(length(inputHeaders) > 0L)
mplus.sections <- list()
for (h in 1:length(inputHeaders)) {
sectionEnd <- ifelse(h < length(inputHeaders), inputHeaders[h+1] - 1, length(inpfile.text))
section <- inpfile.text[inputHeaders[h]:sectionEnd]
sectionName <- trimSpace(sub("^([^:]+):.*$", "\\1", section[1L], perl=TRUE)) #obtain text before the colon
#dump section name from input syntax
section[1L] <- sub("^[^:]+:(.*)$", "\\1", section[1L], perl=TRUE)
mplus.sections[[make.names(tolower(sectionName))]] <- section
}
return(mplus.sections)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.