# ----------------------- intro -------------------------------------------
# Trying to implement a cleaner, more structurated version of
# lavaan function lavParseModelString
# Written by Luc De Wilde in september/october 2023
# -------------------------------------------------------------------------
#------------------------- known differences ------------------------------
# Different behaviour of new code:
# Lines ending with a '+', "*" or "=~" are explicitly concatenated with the
# following line (current code achieves the same, but via other means)
# Lines beginning with these same string are concatenated to the previous line.
# Labels given via label(...), equal(...) or rv(...) can contain spaces.
# Adding modifiers to a lhs-op-rhs-block item can be done
# on a new line (cf. multimod.lmd)
# Adding multiple times the same modifier results in the last one being applied.
# if model is given in a character-vector with length > 1 and some comments
# that include a lavaan operator or "efa", the current procedure gives
# a fatal error while the new procedure doesn't (cf. first test
# 'non collapsed' in testing.R)
# if there are blocks defined and the first one occurs after other formula's
# have been processed, a warning is given
# Splitting of lavaan operators "=~" and "~~" is possible and regulated by
# parameter spaces.in.operator:
# ignore: silently remove spaces
# warn: remove spaces and gives warning
# error: spaces are not removed and this will lead to a syntax error
# -------------------------------------------------------------------------
# ----------------------- ldw_create_enum ---------------------------------
# function to create an Enumerable like structure in R
# usage example mycolors <- ldw_create_enum(c("black", "white", "orange", "green", "red", "blue"))
# xyz <- mycolors$red
# values are default 1L, ..., number of names, but can be user specified
# -------------------------------------------------------------------------
ldw_create_enum <- function(names, values = seq_along(names)) {
stopifnot(identical(unique(names), names), is.character(names))
stopifnot(length(names) == length(values))
res <- as.list(setNames(values, names))
res$enum.names <- names
res$enum.values <- values
res$enum.size <- length(values)
res <- as.environment(res)
lockEnvironment(res, bindings = TRUE)
res
}
# ------------------------ ldw_parse_sublist ------------------------------------
# function to create a list with only some indexes for all members
# -------------------------------------------------------------------------
ldw_parse_sublist <- function(inlist, indexes) {
for (j in seq_along(inlist)) {
inlist[[j]] <- inlist[[j]][indexes]
}
inlist
}
# ------------------------ ldw_txtloc --------------------------------
# function which translates a position in the model source string to a
# user friendly locator and shows the line with position
# --------------------------------------------------------------------------
ldw_txtloc <- function(modelsrc, position) {
txt <- ""
if (nchar(modelsrc) >= position && position > 0) {
newlines <- gregexpr("\n", paste0(modelsrc, "\n"), fixed = TRUE)[[1]]
lijn <- which(newlines >= position)[1]
if (lijn == 1L) {
pos <- position
lijnchar <- substr(modelsrc, 1L, newlines[1])
} else {
pos <- position - newlines[lijn - 1L]
lijnchar <- substr(modelsrc, newlines[lijn - 1L] + 1L, newlines[lijn])
}
if (nchar(lijnchar) == 1L) {
lijnchar <- ""
} else {
lijnchar <- substr(lijnchar, 1L, nchar(lijnchar) - 1)
}
# adapt line number when first line blank :
if (grepl("^[ \t]*\n", modelsrc)) lijn <- lijn - 1L
txt <- gettextf(" at line %1$s, pos %2$s", lijn, pos)
cat(lijnchar, "\n", strrep(" ", pos - 1L), "^\n", sep = "")
}
txt
}
# ------------------------ ldw_parse_step1 ------------------------------
# function to split the model source in tokens.
# Returns a list with tokens with their attributes
# elem.pos : position in source
# elem.type : type of token (cf. definition of types
# in ldw_parse_model_string)
# elem.text : the text of the token
# elem.formule.number : sequence number of the 'logical'
# formula where the token occurs
# the function returns the stored tokens in a list
# --------------------------------------------------------------------------
ldw_parse_step1 <- function(modelsrc, types, debug, warn, spaces.in.operator) {
nmax <- nchar(modelsrc)
elem.pos <- vector("integer", nmax)
elem.type <- elem.pos
elem.text <- vector("character", nmax)
elem.i <- 1L
modelsrcw <- paste0(modelsrc, "\n") # working model, must end
# with a newline for tests via regexpr
stringliterals <- gregexpr("\"[^\"]*?[\"\n]", modelsrcw)[[1L]]
if (stringliterals[1L] > -1L) {
stringliteral.lengths <- attr(stringliterals, "match.length")
for (i in seq_along(stringliterals)) {
pfpos <- stringliterals[i]
pflen <- stringliteral.lengths[i]
substr(modelsrcw, pfpos + 1L, pfpos + pflen - 2L) <-
strrep(" ", pflen - 2L)
elem.pos[elem.i] <- pfpos
elem.text[elem.i] <- substr(modelsrc, pfpos + 1L, pfpos + pflen - 2L)
elem.type[elem.i] <- types$stringliteral
elem.i <- elem.i + 1L
}
}
comments <- gregexpr("[#!].*?\n", modelsrcw)[[1L]]
if (comments[1] > -1L) {
comment.lengths <- attr(comments, "match.length")
for (i in seq_along(comments)) {
substr(modelsrcw, comments[i], comments[i] + comment.lengths[i] - 1L) <-
strrep(" ", comment.lengths[i] - 1L)
# check for stringliterals in comment
str.in.comment <- (elem.pos > comments[i] &
elem.pos < comments[i] + comment.lengths[i])
if (any(str.in.comment)) {
elem.type[str.in.comment] <- 0
}
}
}
modelsrcw <- gsub("\t", " ", modelsrcw)
newlines <- gregexpr("[;\n]", modelsrcw)[[1L]]
if (newlines[1L] > -1L) {
for (i in seq_along(newlines)) {
pfpos <- newlines[i]
substr(modelsrcw, pfpos, pfpos) <- "\n"
elem.pos[elem.i] <- pfpos
elem.text[elem.i] <- "\n"
elem.type[elem.i] <- types$newline
elem.i <- elem.i + 1L
}
}
# --------------------- handling spaces.in.operator ------------------------
if (spaces.in.operator != "error") {
if (grepl("= +~", modelsrcw)) {
waar <- regexpr("= +~", modelsrcw)[1]
modelsrcw <- gsub("=( +)~", "=~\\1", modelsrcw)
if (spaces.in.operator == "warn" && warn == TRUE) {
lav_msg_warn(gettext("splitting of '=~' deprecated"),
ldw_txtloc(modelsrc, waar))
}
}
if (grepl("[^=~]~ +~", modelsrcw)) {
waar <- regexpr("[^=~]~ +~", modelsrcw)[1]
modelsrcw <- gsub("([^=~])~( +)~", "\\1~~\\2", modelsrcw)
if (spaces.in.operator == "warn" && warn == TRUE) {
lav_msg_warn(gettext("splitting of '~~' deprecated"),
ldw_txtloc(modelsrc, waar + 1L))
}
}
}
# --------------------------------------------------------------------------
lavops <- gregexpr("=~|<~|~\\*~|~~|~|==|<|>|:=|:|\\||%", modelsrcw)[[1]]
if (lavops[1L] > -1L) {
lavop.lengths <- attr(lavops, "match.length")
for (i in seq_along(lavops)) {
pfpos <- lavops[i]
pflen <- lavop.lengths[i]
elem.pos[elem.i] <- pfpos
elem.text[elem.i] <- substr(modelsrcw, pfpos, pfpos + pflen - 1L)
elem.type[elem.i] <- types$lavaanoperator
substr(modelsrcw, pfpos, pfpos + pflen - 1L) <- strrep(" ", pflen)
elem.i <- elem.i + 1L
}
}
symbols <- gregexpr("[,()/*?^']", modelsrcw)[[1L]] # f1=~x2 + 0.5 ? x3
symbols1 <- gregexpr("[-+][^.0-9]", modelsrcw)[[1L]] # f1=~x2+x3
symbols2 <- gregexpr("[._0-9a-df-zA-DF-Z)] *[-+][.0-9]", modelsrcw)[[1L]]
# f1=~x2+2*x3, len-2 !
symbols3 <- gregexpr("[^.0-9][eE] *[-+][.0-9]", modelsrcw)[[1L]]
# f1=~xe+2*x3, len-2 !
if (symbols1[1L] > -1L) {
if (symbols[1L] == -1L) {
symbols <- symbols1
} else {
symbols <- c(symbols, symbols1)
}
}
if (symbols2[1L] > -1L) {
symbols2.lengths <- attr(symbols2, "match.length")
symbols2 <- symbols2 + symbols2.lengths - 2L
if (symbols[1L] == -1L) {
symbols <- symbols2
} else {
symbols <- c(symbols, symbols2)
}
}
if (symbols3[1L] > -1L) {
symbols3.lengths <- attr(symbols3, "match.length")
symbols3 <- symbols3 + symbols3.lengths - 2L
if (symbols[1L] == -1L) {
symbols <- symbols3
} else {
symbols <- c(symbols, symbols3)
}
}
if (symbols[1L] > -1L) {
for (i in seq_along(symbols)) {
pfpos <- symbols[i]
substr(modelsrcw, pfpos, pfpos) <- " "
elem.pos[elem.i] <- pfpos
elem.text[elem.i] <- substr(modelsrc, pfpos, pfpos)
elem.type[elem.i] <- types$symbol
elem.i <- elem.i + 1L
}
}
numliterals <- gregexpr(
"([ \n][-+][.0-9]|[ \n]\\.[0-9]|[ \n][0-9])[-+\\.0-9eE]*",
modelsrcw)[[1]]
if (numliterals[1L] > -1L) {
numliteral.lengths <- attr(numliterals, "match.length") - 1L
numliterals <- numliterals + 1L
for (i in seq_along(numliterals)) {
pfpos <- numliterals[i]
pflen <- numliteral.lengths[i]
substr(modelsrcw, pfpos, pfpos + pflen - 1L) <- strrep(" ", pflen)
elem.pos[elem.i] <- pfpos
elem.text[elem.i] <- substr(modelsrc, pfpos, pfpos + pflen - 1L)
elem.type[elem.i] <- types$numliteral
elem.i <- elem.i + 1L
}
}
identifiers <- gregexpr(
"[ \n][_.a-zA-Z][._a-zA-Z0-9]*",
paste0(" ", modelsrcw))[[1]]
identifier.lengths <- attr(identifiers, "match.length") - 1L
for (i in seq_along(identifiers)) {
pfpos <- identifiers[i]
pflen <- identifier.lengths[i]
substr(modelsrcw, pfpos, pfpos + pflen - 1L) <- strrep(" ", pflen)
elem.pos[elem.i] <- pfpos
elem.text[elem.i] <- substr(modelsrc, pfpos, pfpos + pflen - 1L)
elem.type[elem.i] <- types$identifier
elem.i <- elem.i + 1L
}
# check for uninterpreted chars
wrong <- regexpr("[^\"\n ]", modelsrcw)
if (wrong != -1L) {
lav_msg_stop(gettext("unexpected character"),
ldw_txtloc(modelsrc, wrong))
}
# remove unused elements from vectors
elements <- which(elem.type > 0L)
elem.pos <- elem.pos[elements]
elem.type <- elem.type[elements]
elem.text <- elem.text[elements]
# order tokens
token.order <- order(elem.pos)
elem.pos <- elem.pos[token.order]
elem.type <- elem.type[token.order]
elem.text <- elem.text[token.order]
# concatenate identifiers with only spaces in between - LDW 22/4/2024
elem.i <- length(elem.pos)
concatenated <- FALSE
while (elem.i > 1L) {
if (elem.type[elem.i] == types$identifier &&
elem.type[elem.i - 1L] == types$identifier) {
spaces.between <- elem.pos[elem.i] - elem.pos[elem.i - 1L] -
length(elem.text[elem.i - 1L])
elem.text[elem.i - 1L] <- paste0(
elem.text[elem.i - 1L],
strrep(" ", spaces.between),
elem.text[elem.i]
)
elem.type[elem.i] <- 0L
concatenated <- TRUE
}
elem.i <- elem.i - 1L
}
if (concatenated) { # remove items with type 0
elements <- which(elem.type > 0L)
elem.pos <- elem.pos[elements]
elem.type <- elem.type[elements]
elem.text <- elem.text[elements]
}
# to set formula number
elem.formula.number <- rep(0L, length(elem.type))
frm.number <- 1L
frm.hasefa <- FALSE
frm.lastplus <- FALSE
frm.incremented <- FALSE
for (i in seq_along(elem.type)) {
elem.formula.number[i] <- frm.number
if (elem.type[i] == types$identifier && elem.text[i] == "efa") frm.hasefa <- TRUE
if (any(elem.text[i] == c("+", "*", "=~"))) {
if (frm.incremented) {
frm.number <- frm.number - 1L
elem.formula.number[i] <- frm.number
frm.incremented <- FALSE
}
frm.lastplus <- TRUE
} else {
if (any(elem.type[i] == c(
types$stringliteral, types$identifier, types$numliteral,
types$stringliteral, types$symbol
))) {
frm.lastplus <- FALSE
}
if (i > 1 && elem.type[i] != types$newline && elem.type[i - 1L] == types$lavaanoperator) {
frm.hasefa <- FALSE
}
}
if (elem.type[i] == types$newline) {
if (i > 1 && elem.type[i - 1L] != types$newline) { # ignore multiple new lines
if (!frm.hasefa && !frm.lastplus) {
frm.number <- frm.number + 1L
frm.incremented <- TRUE
} else {
frm.hasefa <- FALSE
}
}
} else {
frm.incremented <- FALSE
}
}
return(list(
elem.pos = elem.pos, elem.type = elem.type,
elem.text = elem.text, elem.formula.number = elem.formula.number
))
}
# ------------------------ ldw_parse_step2 ------------------------------
# function to group the modellist tokens in 'mono' formulas.
# mono means that the terms (for formulas other then blocks and constraints)
# are split in seperate formula's, e.g.
# a1 + a2 =~ b1 + b2 becomes
# / a1 =~ b1
# | a1 =~ b2
# | a2 =~ b1
# \ a2 =~ b2
# newlines are removed
# the function returns a list of formulas
# --------------------------------------------------------------------------
ldw_parse_step2 <- function(modellist, modelsrc, types, debug, warn) {
real.operators <- c("=~", "<~", "~*~", "~~", "~", "|", "%")
welke <- modellist$elem.type != types$newline
formula.numbers <- unique(modellist$elem.formula.number[welke])
formulas <- lapply(formula.numbers, function(s) {
welkenu <- modellist$elem.formula.number == s & welke
list(
elem.pos = modellist$elem.pos[welkenu],
elem.type = modellist$elem.type[welkenu],
elem.text = modellist$elem.text[welkenu]
)
})
maxnum <- length(formula.numbers) + sum(modellist$elem.text == "+")
outval <- vector(mode = "list", length = maxnum)
realnum <- 0L
for (i in seq_along(formulas)) {
formul1 <- formulas[[i]]
opi <- which(formul1$elem.type == types$lavaanoperator)
nelem <- length(formul1$elem.type)
if (length(opi) == 0L) {
lav_msg_stop(gettext("formula without valid operator"),
ldw_txtloc(modelsrc, formul1$elem.pos[1]))
}
if (length(opi) > 1L) opi <- opi[1] # only first operator taken
if (any(formul1$elem.text[opi] == real.operators) && sum(formul1$elem.text == "+") > 0) {
# check + symbols outside parentheses in left and right hand side
lhplusjes <- integer(0)
openparentheses <- 0L
for (jj in seq.int(1L, opi - 1L)) {
if (formul1$elem.text[jj] == "(") {
openparentheses <- openparentheses + 1L
next
}
if (formul1$elem.text[jj] == ")") {
openparentheses <- openparentheses - 1L
next
}
if (formul1$elem.text[jj] == "+" && openparentheses == 0L) {
lhplusjes <- c(lhplusjes, jj)
}
}
lhplusjes <- c(lhplusjes, opi)
plusjes <- integer(0)
openparentheses <- 0L
for (jj in seq.int(opi + 1L, nelem)) {
if (formul1$elem.text[jj] == "(") {
openparentheses <- openparentheses + 1L
next
}
if (formul1$elem.text[jj] == ")") {
openparentheses <- openparentheses - 1L
next
}
if (formul1$elem.text[jj] == "+" && openparentheses == 0L) {
plusjes <- c(plusjes, jj)
}
}
plusjes <- c(plusjes, nelem + 1)
# splitting lhs and rhs on '+' signs
for (j in seq_along(lhplusjes)) {
j0 <- 1L
if (j > 1L) j0 <- lhplusjes[j - 1L] + 1L
j1 <- lhplusjes[j] - 1L
if (j1 < j0) next # skip empty parts
for (k in seq_along(plusjes)) {
k0 <- opi + 1L
k1 <- plusjes[k] - 1L
if (k > 1L) k0 <- plusjes[k - 1L] + 1L
if (k1 < k0) next # skip empty parts
welke <- c(seq.int(j0, j1), opi, seq.int(k0, k1))
realnum <- realnum + 1L
outval[[realnum]] <- ldw_parse_sublist(formul1, welke)
}
}
} else {
realnum <- realnum + 1L
outval[[realnum]] <- formul1
}
}
outval[seq_len(realnum)]
}
# ------------------------ ldw_parse_check_valid_name ------------------------
# checks if an element of the elem.text member in a list is a valid r-name
# ----------------------------------------------------------------------------
ldw_parse_check_valid_name <- function(formul1, ind, modelsrc) {
# allow spaces, LDW 22/4/2024
testitem <- gsub(" ", "_", formul1$elem.text[ind], fixed = TRUE)
if (make.names(testitem) != testitem) {
lav_msg_stop(
gettext("identifier is either a reserved word (in R) or
contains an illegal character"),
ldw_txtloc(modelsrc, formul1$elem.pos[ind])
)
}
return(invisible(NULL))
}
# ------------------------ ldw_num_modifier ----------------------------------
# function for transforming string with numeric values separated by semicolons
# in a numeric vector (used in ldw_parse_get_modifier)
# -----------------------------------------------------------------------------
ldw_num_modifier <- function(txt) {
# help function
vapply(strsplit(txt, ";")[[1]], function(x)
if (x == "NA") NA_real_ else as.numeric(x), 1.0, USE.NAMES = FALSE)
}
# ------------------------ ldw_unpaste ---------------------------------------
# function for transforming string with string values separated by semicolons
# in a vector (used in ldw_parse_get_modifier)
# -----------------------------------------------------------------------------
ldw_unpaste <- function(text) {
out <- strsplit(text, ";(NA;)*")[[1]]
if (grepl(";$", text)) out <- c(out, "")
out
}
# ------------------------ ldw_evaluate_r_expression --------------------------
# help function to evaluate the value of an r expression formed by the elements
# with index 'from' to 'to' of a formula 'formul1'
# returns "_error_" if evaluation failed
# used only in ldw_parse_get_modifier
# -----------------------------------------------------------------------------
ldw_evaluate_r_expression <- function(formul1, from, to, types) {
strings <- vapply(seq.int(from, to), function(x) {
if (formul1$elem.type[x] == types$stringliteral) {
paste0('"', formul1$elem.text[x], '"')
} else {
formul1$elem.text[x]
}
}, "")
txt <- paste(strings, collapse = "")
result <- try(eval(parse(text = txt), envir = NULL,
enclos = baseenv()), silent = TRUE)
if (inherits(result, "try-error")) {
return("_error_")
}
return(result)
}
# ------------------------ ldw_adapt_vector_type -----------------------------
# help function to dynamically adapt the type of a vector in a c(...) sequence
# used only in ldw_parse_get_modifier
# ----------------------------------------------------------------------------
ldw_adapt_vector_type <- function(typenu, typetoadd, texttoadd, types) {
if (texttoadd != "NA") {
if (typenu == 0) {
typenu <- typetoadd
} else {
if (typenu != typetoadd) typenu <- types$stringliteral
}
} else if (typenu == 0) {
typenu <- types$numliteral
}
return(typenu)
}
# ------------------------ ldw_parse_get_modifier ------------------------------
# The function takes a list with tokens belonging to a single 'mono' lavaan
# formula as input. The other arguments are:
# lhs : check for lhs or rhs modifier
# opi : index of the lavaan operator in the list-items
# modelsrc : the model source string (for error messages and warnings)
# types : the types of tokens
# debug : should debug information be displayed?
# warn : give warnings when appropiate?
# The function return the modifier detected as element of a list
# with name the modifier type (efa, fixed, start, label, lower, upper, prior or rv)
# and value an array of values (length > 1 if vector via c(...)) for the modifier value.
# An error message is produced when no modifier can be determined.
# --------------------------------------------------------------------------
ldw_parse_get_modifier <- function(formul1, lhs, opi, modelsrc, types, debug, warn) {
nelem <- length(formul1$elem.type)
# remove unnecessary parentheses (one element between parentheses, previous no identifier)
check.more <- TRUE
while (check.more && nelem > 4L) {
check.more <- FALSE
for (par.i in seq.int(3L, nelem - 1L)) {
if (formul1$elem.text[par.i - 1L] == "(" &&
formul1$elem.text[par.i + 1L] == ")" &&
formul1$elem.type[par.i - 2L] != types$identifier) {
formul1$elem.type[par.i - 1L] <- 0L
formul1$elem.type[par.i + 1L] <- 0L
check.more <- TRUE
}
}
if (check.more) {
formul1 <- ldw_parse_sublist(formul1, which(formul1$elem.type > 0))
nelem <- length(formul1$elem.type)
}
}
if (lhs) {
# modifier on left hand side
# only 1 possibility : efa ( expression-resulting-in-char ) * identifier operator ... (rhs) ...
if (formul1$elem.text[1L] == "efa" &&
formul1$elem.text[2L] == "(" &&
formul1$elem.text[opi - 3L] == ")" &&
formul1$elem.text[opi - 2L] == "*") {
temp <- ldw_evaluate_r_expression(formul1, 3L, opi - 4L, types)
if (is.character(temp) && temp[1] != "_error_") {
return(list(efa = temp))
}
}
lav_msg_stop(gettext("invalid left hand side modifier"),
ldw_txtloc(modelsrc, formul1$elem.pos[1L]))
} else {
# modifier on right hand side
# check for vectors c(...), start(...), fixed(...), ...
for (j in (opi + 1L):(nelem - 2L)) {
if (formul1$elem.text[j + 1L] == "(") {
if (formul1$elem.text[j] == "c") {
vector.type <- 0
labnu <- j + 2L
lab <- formul1$elem.text[labnu]
vector.type <- ldw_adapt_vector_type(
vector.type, formul1$elem.type[labnu],
formul1$elem.text[labnu], types
)
while (formul1$elem.text[labnu + 1L] == ",") {
labnu <- labnu + 2L
lab <- c(lab, formul1$elem.text[labnu])
vector.type <- ldw_adapt_vector_type(
vector.type, formul1$elem.type[labnu],
formul1$elem.text[labnu], types
)
}
if (vector.type == 0) vector.type <- types$stringliteral
if (formul1$elem.text[labnu + 1L] == ")") {
formul1$elem.type[seq.int(j, labnu)] <- 0
formul1$elem.type[labnu + 1L] <- vector.type
formul1$elem.text[labnu + 1L] <- paste(lab, collapse = ";")
formul1 <- ldw_parse_sublist(formul1, which(formul1$elem.type > 0))
nelem <- length(formul1$elem.type)
break
} else {
lav_msg_stop(gettext("invalid vector specification"),
ldw_txtloc(modelsrc, formul1$elem.pos[j]))
}
}
if (j + 3L < nelem && formul1$elem.text[j + 3L] == "," &&
any(formul1$elem.text[j] == c("start", "fixed", "label",
"upp", "lower", "rv", "prior"))) {
vector.type <- 0
labnu <- j + 2L
lab <- formul1$elem.text[labnu]
vector.type <- ldw_adapt_vector_type(
vector.type, formul1$elem.type[labnu],
formul1$elem.text[labnu], types
)
while (formul1$elem.text[labnu + 1L] == ",") {
labnu <- labnu + 2L
lab <- c(lab, formul1$elem.text[labnu])
vector.type <- ldw_adapt_vector_type(
vector.type, formul1$elem.type[labnu],
formul1$elem.text[labnu], types
)
}
if (vector.type == 0) vector.type <- types$stringliteral
if (formul1$elem.text[labnu + 1L] == ")") {
formul1$elem.type[seq.int(j + 3L, labnu)] <- 0
formul1$elem.type[j + 2L] <- vector.type
formul1$elem.text[j + 2L] <- paste(lab, collapse = ";")
formul1 <- ldw_parse_sublist(formul1, which(formul1$elem.type > 0))
nelem <- length(formul1$elem.type)
break
} else {
lav_msg_stop(gettext("invalid vector specification"),
ldw_txtloc(modelsrc, formul1$elem.pos[j]))
}
}
}
}
# possibilities
# stringliteral|identifier * identifier|numliteral
# numliteral * identifier|numliteral
# numliteral ? identifier|numliteral
# fixed|start|upper|lower|rv|prior(numliteral) * identifier|numliteral
# label|equal (stringliteral|identifier) * identifier|numliteral
# ==> literals before * or ? can be replaced by R-expression
# resulting in correct type
# check on last element being a numliteral or identifier
# already done in calling function
if (all(formul1$elem.text[nelem - 1L] != c("*", "?"))) {
lav_msg_stop(gettext("invalid modifier symbol (should be '*' or '?')"),
ldw_txtloc(modelsrc, formul1$elem.pos[nelem - 1L]))
}
if (formul1$elem.text[nelem - 1L] == "?") {
temp <- ldw_evaluate_r_expression(formul1, opi + 1L, nelem - 2L, types)
if (is.numeric(temp)) {
return(list(start = temp))
}
lav_msg_stop(gettext("invalid start value expression (should be numeric)"),
ldw_txtloc(modelsrc, formul1$elem.pos[opi + 1L]))
}
if (nelem == opi + 3) {
if (formul1$elem.text[opi + 1L] == "NA")
formul1$elem.type[opi + 1L] <- types$numliteral
if (any(formul1$elem.type[opi + 1L] ==
c(types$identifier, types$stringliteral))) {
return(list(label = ldw_unpaste(formul1$elem.text[opi + 1L])))
} else {
if (formul1$elem.type[opi + 1L] == types$numliteral) {
return(list(fixed = ldw_num_modifier(formul1$elem.text[opi + 1L])))
} else {
lav_msg_stop(
gettext("invalid value (should be numeric, identifier or string)"),
ldw_txtloc(modelsrc, formul1$elem.pos[opi + 1L]))
}
}
}
if (formul1$elem.text[opi + 2L] == "(" &&
formul1$elem.text[nelem - 2L] == ")") {
if (any(formul1$elem.text[opi + 1L] ==
c("fixed", "start", "upper", "lower", "prior"))) {
if (nelem == opi + 6L) {
if (formul1$elem.text[opi + 3L] == "NA")
formul1$elem.type[opi + 3L] <- types$numliteral
if (formul1$elem.type[opi + 3L] == types$numliteral) {
outje <- list()
outje[[formul1$elem.text[opi + 1L]]] <-
ldw_num_modifier(formul1$elem.text[opi + 3L])
return(outje)
}
lav_msg_stop(gettext("invalid value (should be numeric)"),
ldw_txtloc(modelsrc, formul1$elem.pos[opi + 3L]))
}
temp <- ldw_evaluate_r_expression(formul1, opi + 3L, nelem - 3L, types)
if (is.numeric(temp)) {
outje <- list()
outje[[formul1$elem.text[opi + 1L]]] <- temp
return(outje)
}
lav_msg_stop(
gettext("invalid value R-expression (should be numeric)"),
ldw_txtloc(modelsrc, formul1$elem.pos[opi + 3L]))
}
if (any(formul1$elem.text[opi + 1L] == c("equal", "rv", "label"))) {
modname <- formul1$elem.text[opi + 1L]
if (modname == "equal") modname <- "label"
if (nelem == opi + 6L) {
if (formul1$elem.type[opi + 3L] == types$stringliteral) {
outje <- list()
outje[[modname]] <-
ldw_unpaste(formul1$elem.text[opi + 3L])
return(outje)
}
lav_msg_stop(gettext("invalid value (should be string)"),
ldw_txtloc(modelsrc, formul1$elem.pos[opi + 3L]))
}
temp <- ldw_evaluate_r_expression(formul1, opi + 3L, nelem - 3L, types)
if (is.character(temp) && temp[1] != "_error_") {
outje <- list()
outje[[modname]] <- temp
return(outje)
}
lav_msg_stop(
gettext("invalid value R-expression (should be a string)"),
ldw_txtloc(modelsrc, formul1$elem.pos[opi + 3L]))
}
}
temp <- ldw_evaluate_r_expression(formul1, opi + 1L, nelem - 2L, types)
if (is.numeric(temp)) {
return(list(fixed = temp))
}
if (is.character(temp) && temp[1] != "_error_") {
return(list(label = temp))
}
lav_msg_stop(gettext("invalid modifier specification"),
ldw_txtloc(modelsrc, formul1$elem.pos[opi + 1L]))
}
}
ldw_parse_model_string <- function(model.syntax = "", as.data.frame. = FALSE,
warn = TRUE, debug = FALSE, spaces.in.operator = "warn") {
stopifnot(length(model.syntax) > 0L)
stopifnot(any(spaces.in.operator == c("ignore", "warn", "error")))
# replace 'strange' tildes (in some locales) (new in 0.6-6)
modelsrc <- gsub(
pattern = "\u02dc",
replacement = "~",
paste(unlist(model.syntax), "", collapse = "\n")
)
types <- ldw_create_enum(c(
"identifier", "numliteral", "stringliteral",
"symbol", "lavaanoperator", "newline"
))
modellist <- ldw_parse_step1(modelsrc, types, debug = debug, warn = warn, spaces.in.operator = spaces.in.operator)
if (debug) {
print(data.frame(
pos = modellist$elem.pos,
type = types$enum.names[modellist$elem.type],
text = modellist$elem.text,
formula = modellist$elem.formula.number
))
}
formulalist <- ldw_parse_step2(modellist, modelsrc, types, debug = debug, warn = warn)
#---- analyse syntax formulas and put in flat.-----
max.mono.formulas <- length(formulalist)
flat.lhs <- character(max.mono.formulas)
flat.op <- character(max.mono.formulas)
flat.rhs <- character(max.mono.formulas)
flat.rhs.mod.idx <- integer(max.mono.formulas)
flat.block <- integer(max.mono.formulas) # keep track of groups using ":" operator
flat.fixed <- character(max.mono.formulas) # only for display purposes!
flat.start <- character(max.mono.formulas) # only for display purposes!
flat.lower <- character(max.mono.formulas) # only for display purposes!
flat.upper <- character(max.mono.formulas) # only for display purposes!
flat.label <- character(max.mono.formulas) # only for display purposes!
flat.prior <- character(max.mono.formulas)
flat.efa <- character(max.mono.formulas)
flat.rv <- character(max.mono.formulas)
flat.idx <- 0L
mod.idx <- 0L
constraints <- list()
mod <- list()
block <- 1L
block.op <- FALSE
if (debug) {
cat("formula to analyse:\n")
}
# operators <- c("=~", "<~", "~*~", "~~", "~", "==", "<", ">", ":=",
# ":", "\\|", "%")
constraint_operators <- c("==", "<", ">", ":=")
for (s in seq_along(formulalist)) {
formul1 <- formulalist[[s]]
if (debug) {
cat(vapply(seq_along(formul1$elem.type), function(j) {
if (formul1$elem.type[j] == types$stringliteral) {
return(dQuote(formul1$elem.text[j], FALSE))
}
return(formul1$elem.text[j])
}, ""), "\n")
}
nelem <- length(formul1$elem.type)
# where is the operator
opi <- match(types$lavaanoperator, formul1$elem.type)
# opi <- which(formul1$elem.type == types$lavaanoperator)
# if (length(opi) > 1L) opi <- opi[1L]
op <- formul1$elem.text[opi]
if (any(op == constraint_operators)) { # --------------- constraints ------------------
lhs <- paste(formul1$elem.text[seq.int(1L, opi - 1L)], collapse = "")
rhs <- paste(formul1$elem.text[seq.int(opi + 1L, nelem)], collapse = "")
constraints <- c(
constraints,
list(list(
op = op,
lhs = lhs,
rhs = rhs,
user = 1L
))
)
next
}
if (op == ":") { # ------------------------- block start ------------------
if (opi == 1L) {
lav_msg_stop(
gettext("Missing block identifier. The correct syntax is: \"LHS: RHS\",
where LHS is a block identifier (eg group or level), and RHS is
the group/level/block number or label."),
ldw_txtloc(modelsrc, formul1$elem.pos[1]))
}
if (opi > 2L || all(tolower(formul1$elem.text[1]) !=
c("group", "level", "block", "class"))) {
lav_msg_stop(
gettext("Invalid block identifier. The correct syntax is: \"LHS: RHS\",
where LHS is a block identifier (eg group or level), and RHS is
the group/level/block number or label."),
ldw_txtloc(modelsrc, formul1$elem.pos[1]))
}
if (nelem != 3 || all(formul1$elem.type[3] !=
c(types$stringliteral, types$identifier, types$numliteral))) {
lav_msg_stop(
gettext("syntax contains block identifier \"group\" with missing or
invalid number/label.The correct syntax is: \"LHS: RHS\", where
LHS is a block identifier (eg group or level), and RHS is the
group/level/block number or label."),
ldw_txtloc(modelsrc, formul1$elem.pos[1]))
}
flat.idx <- flat.idx + 1L
flat.lhs[flat.idx] <- formul1$elem.text[1]
flat.op[flat.idx] <- op
flat.rhs[flat.idx] <- formul1$elem.text[3]
flat.rhs.mod.idx[flat.idx] <- 0L
if (block.op) {
block <- block + 1L
} else {
if (flat.idx != 1 && warn == TRUE) {
lav_msg_warn(
gettext("First block defined after other formula's"),
ldw_txtloc(modelsrc, formul1$elem.pos[1]))
}
}
flat.block[flat.idx] <- block
block.op <- TRUE
next
}
# ------------------ relational operators -----------------------------
# checks for valid names in lhs and rhs
ldw_parse_check_valid_name(formul1, opi - 1L, modelsrc) # valid name lhs
for (j in seq.int(opi + 1L, nelem)) { # valid names rhs
if (formul1$elem.type[j] == types$identifier &&
formul1$elem.text[j] != "NA") {
ldw_parse_check_valid_name(formul1, j, modelsrc)
}
}
if (formul1$elem.type[nelem] != types$identifier &&
(formul1$elem.type[nelem] != types$numliteral || all(op != c("~", "=~"))))
{
lav_msg_stop(
gettext("Last element of rhs part expected to be an identifier or,
for operator ~ or =~, a numeric literal!"),
ldw_txtloc(modelsrc, formul1$elem.pos[nelem]))
}
# intercept fixed on 0
# replace 'lhs ~ 0' => 'lhs ~ 0 * 1' - intercept fixed on zero
if (formul1$elem.text[nelem] == "0" && op == "~" && opi == nelem - 1L) {
formul1$elem.type <- c(formul1$elem.type, types$symbol, types$numliteral)
formul1$elem.text <- c(formul1$elem.text, "*", "1")
formul1$elem.pos <- c(formul1$elem.pos, rep(formul1$elem.pos[nelem], 2))
nelem <- length(formul1$elem.type)
}
# phantom latent variable
# replace 'lhs =~ 0' => 'lhs =~ fixed(0)*lhs', 0 can be other numliteral
# also, lhs is last element before '=~'
if (formul1$elem.type[nelem] == types$numliteral && op == "=~") {
formul1$elem.type <- c(
formul1$elem.type[seq.int(1L, nelem - 1L)], types$identifier,
types$symbol, types$numliteral, types$symbol, types$symbol,
types$identifier
)
formul1$elem.text <- c(
formul1$elem.text[seq.int(1L, nelem - 1L)], "fixed", "(",
formul1$elem.text[nelem], ")", "*", formul1$elem.text[opi - 1L]
)
formul1$elem.pos <- c(formul1$elem.pos[seq.int(1L, nelem - 1L)],
rep(formul1$elem.pos[nelem], 6))
nelem <- length(formul1$elem.type)
}
# handling interaction variable types
colons <- which(formul1$elem.text[seq.int(1L, nelem - 1L)] == ":" &
formul1$elem.type[seq.int(2L, nelem)] == types$identifier)
# check at most 1 colon
if (length(colons) > 1) {
lav_msg_stop(
gettext("Three-way or higher-order interaction terms (using multiple
colons) are not supported in the lavaan syntax; please manually
construct the product terms yourself in the data.frame, give
them an appropriate name, and then you can use these interaction
variables as any other (observed) variable in the model syntax."
), ldw_txtloc(modelsrc, formul1$elem.pos[colons[2]]))
}
if (length(colons) == 1) { # collapse items around colon "a" ":" "b" => "a:b"
formul1$elem.text[colons - 1L] <-
paste(formul1$elem.text[seq.int(colons - 1L, colons + 1L)],
collapse = "")
formul1 <- ldw_parse_sublist(formul1, seq.int(1L, colons - 1L))
nelem <- length(formul1$elem.type)
}
lhs <- formul1$elem.text[opi - 1L]
rhs <- formul1$elem.text[nelem]
already <- which(flat.lhs == lhs & flat.op == op & flat.block == block &
(flat.rhs == rhs | (flat.rhs == "" & op == "~" &
formul1$elem.type[nelem] == types$numliteral)))
if (length(already) == 1L) {
idx <- already
} else {
flat.idx <- flat.idx + 1L
idx <- flat.idx
flat.lhs[idx] <- lhs
flat.op[idx] <- op
flat.rhs[idx] <- rhs
flat.block[idx] <- block
if (formul1$elem.type[nelem] == types$numliteral) {
if (op == "~") flat.rhs[idx] <- ""
}
}
lhsmod <- list()
if (opi > 2) lhsmod <- ldw_parse_get_modifier(formul1,
TRUE, opi, modelsrc, types, debug, warn)
rhsmod <- list()
if (nelem - opi > 1) rhsmod <- ldw_parse_get_modifier(formul1,
FALSE, opi, modelsrc, types, debug, warn)
flat.fixed[idx] <- if (is.null(rhsmod$fixed)) flat.fixed[idx] else
paste(rhsmod$fixed, collapse = ";")
flat.start[idx] <- if (is.null(rhsmod$start)) flat.start[idx] else
paste(rhsmod$start, collapse = ";")
flat.label[idx] <- if (is.null(rhsmod$label)) flat.label[idx] else
paste(rhsmod$label, collapse = ";")
flat.lower[idx] <- if (is.null(rhsmod$lower)) flat.lower[idx] else
paste(rhsmod$lower, collapse = ";")
flat.upper[idx] <- if (is.null(rhsmod$upper)) flat.upper[idx] else
paste(rhsmod$upper, collapse = ";")
flat.prior[idx] <- if (is.null(rhsmod$prior)) flat.prior[idx] else
paste(rhsmod$prior, collapse = ";")
flat.efa[idx] <- if (is.null(lhsmod$efa)) flat.efa[idx] else
paste(lhsmod$efa, collapse = ";")
flat.rv[idx] <- if (is.null(rhsmod$rv)) flat.rv[idx] else
paste(rhsmod$rv, collapse = ";")
modnu <- c(lhsmod, rhsmod)
if (length(modnu) > 0L) { # there is a modifier here
if (length(already) == 0) { # unknown element
mod.idx <- mod.idx + 1L
cur.mod.idx <- mod.idx
mod[[cur.mod.idx]] <- modnu
flat.rhs.mod.idx[idx] <- cur.mod.idx
} else { # known element
if (flat.rhs.mod.idx[idx] == 0) { # not yet modifier
mod.idx <- mod.idx + 1L
cur.mod.idx <- mod.idx
mod[[cur.mod.idx]] <- modnu
flat.rhs.mod.idx[idx] <- cur.mod.idx
} else { # use existing modifier index
cur.mod.idx <- flat.rhs.mod.idx[idx]
mod[[cur.mod.idx]] <- c(mod[[cur.mod.idx]], modnu)
}
}
}
}
# create flat (omit items without operator)
filled.ones <- which(flat.op != "")
flat <- list(
lhs = flat.lhs[filled.ones],
op = flat.op[filled.ones],
rhs = flat.rhs[filled.ones],
mod.idx = flat.rhs.mod.idx[filled.ones],
block = flat.block[filled.ones],
fixed = flat.fixed[filled.ones],
start = flat.start[filled.ones],
lower = flat.lower[filled.ones],
upper = flat.upper[filled.ones],
label = flat.label[filled.ones],
prior = flat.prior[filled.ones],
efa = flat.efa[filled.ones],
rv = flat.rv[filled.ones]
)
# change op for intercepts (for convenience only)
int.idx <- which(flat.op == "~" & flat.rhs == "")
if (length(int.idx) > 0L) {
flat$op[int.idx] <- "~1"
}
# new in 0.6, reorder covariances here!
flat <- lav_partable_covariance_reorder(flat)
if (as.data.frame.) {
flat <- as.data.frame(flat, stringsAsFactors = FALSE)
}
# new in 0.6-4: check for 'group' within 'level'
if (any(flat.op == ":")) {
op.idx <- which(flat.op == ":")
if (length(op.idx) < 2L) {
# only 1 block identifier? this is weird -> give warning
if (warn == TRUE)
lav_msg_warn(gettext("syntax contains only a single block identifier!"))
} else {
first.block <- flat.lhs[op.idx[1L]]
second.block <- flat.lhs[op.idx[2L]]
if (first.block == "level" && second.block == "group") {
lav_msg_stop(gettext("groups can not be nested within levels!"))
}
}
}
attr(flat, "modifiers") <- mod
attr(flat, "constraints") <- constraints
flat
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.