# parse psindex syntax
# YR 14 Jan 2014: move to lav_syntax.R
lavParseModelString <- function(model.syntax = '', as.data.frame. = FALSE,
warn = TRUE, debug = FALSE) {
# check for empty syntax
if(length(model.syntax) == 0) {
stop("psindex ERROR: empty model syntax")
}
# remove comments prior to split.
# Match from comment character to newline, but don't eliminate newline
model.syntax <- gsub("[#!].*(?=\n)","", model.syntax, perl=TRUE)
# replace semicolons with newlines prior to split
model.syntax <- gsub(";", "\n", model.syntax, fixed=TRUE)
#remove whitespace prior to split
model.syntax <- gsub("[ \t]+", "", model.syntax, perl=TRUE)
# remove any occurrence of >= 2 consecutive newlines to eliminate \
# blank statements; this retains a blank newline at the beginning,
# if such exists, but parser will not choke because of start.idx
model.syntax <- gsub("\n{2,}", "\n", model.syntax, perl=TRUE)
# break up in lines
model <- unlist( strsplit(model.syntax, "\n") )
# check for multi-line formulas: they contain no "~" or "=" character
# but before we do that, we remove all modifiers
# to avoid confusion with for example equal("f1=~x1") statements
model.simple <- gsub("\\(.*\\)\\*", "MODIFIER*", model)
start.idx <- grep("[~=<>:|%]", model.simple)
# check for empty start.idx: no operator found (new in 0.6-1)
if(length(start.idx) == 0L) {
stop("psindex ERROR: model does not contain psindex syntax (no operators found)")
}
# check for non-empty string, without an operator in the first lines
# (new in 0.6-1)
if(start.idx[1] > 1L) {
# two possibilities:
# - we have an empty line (ok)
# - the element contains no operator (warn!)
for(el in 1:(start.idx[1] - 1L)) {
# not empty?
if(nchar(model.simple[el]) > 0L) {
warning("psindex WARNING: no operator found in this syntax line: ", model.simple[el], "\n", " This syntax line will be ignored!")
}
}
}
end.idx <- c( start.idx[-1]-1, length(model) )
model.orig <- model
model <- character( length(start.idx) )
for(i in 1:length(start.idx)) {
model[i] <- paste(model.orig[start.idx[i]:end.idx[i]], collapse="")
}
# ok, in all remaining lines, we should have a '~' operator
# OR one of '=', '<', '>', '|' outside the ""
model.simple <- gsub("\\\".[^\\\"]*\\\"", "LABEL", model)
idx.wrong <- which(!grepl("[~=<>:|%]", model.simple))
if(length(idx.wrong) > 0) {
cat("psindex: missing operator in formula(s):\n")
print(model[idx.wrong])
stop("psindex ERROR: syntax error in psindex model syntax")
}
# but perhaps we have a '+' as the first character?
idx.wrong <- which(grepl("^\\+", model))
if(length(idx.wrong) > 0) {
cat("psindex: some formula(s) start with a plus (+) sign:\n")
print(model[idx.wrong])
stop("psindex ERROR: syntax error in psindex model syntax")
}
# main operation: flatten formulas into single bivariate pieces
# with a left-hand-side (lhs), an operator (eg "=~"), and a
# right-hand-side (rhs)
# both lhs and rhs can have a modifier
# (but we ignore the lhs modifier for now)
FLAT.lhs <- character(0)
#FLAT.lhs.mod <- character(0)
FLAT.op <- character(0)
FLAT.rhs <- character(0)
FLAT.rhs.mod.idx <- integer(0)
FLAT.block <- integer(0) # keep track of groups using ":" operator
FLAT.fixed <- character(0) # only for display purposes!
FLAT.start <- character(0) # only for display purposes!
FLAT.label <- character(0) # only for display purposes!
FLAT.prior <- character(0)
FLAT.idx <- 0L
MOD.idx <- 0L
CON.idx <- 0L
MOD <- vector("list", length=0L)
CON <- vector("list", length=0L)
BLOCK <- 1L
BLOCK_OP <- FALSE
for(i in 1:length(model)) {
x <- model[i]
if(debug) {
cat("formula to parse:\n"); print(x); cat("\n")
}
# 1. which operator is used?
line.simple <- gsub("\\\".[^\\\"]*\\\"", "LABEL", x)
# "=~" operator?
if(grepl("=~", line.simple, fixed=TRUE)) {
op <- "=~"
# "<~" operator?
} else if(grepl("<~", line.simple, fixed=TRUE)) {
op <- "<~"
} else if(grepl("~*~", line.simple, fixed=TRUE)) {
op <- "~*~"
# "~~" operator?
} else if(grepl("~~", line.simple, fixed=TRUE)) {
op <- "~~"
# "~" operator?
} else if(grepl("~", line.simple, fixed=TRUE)) {
op <- "~"
# "==" operator?
} else if(grepl("==", line.simple, fixed=TRUE)) {
op <- "=="
# "<" operator?
} else if(grepl("<", line.simple, fixed=TRUE)) {
op <- "<"
# ">" operator?
} else if(grepl(">", line.simple, fixed=TRUE)) {
op <- ">"
# ":=" operator?
} else if(grepl(":=", line.simple, fixed=TRUE)) {
op <- ":="
# ":" operator?
} else if(grepl(":", line.simple, fixed=TRUE)) {
op <- ":"
# "|" operator?
} else if(grepl("|", line.simple, fixed=TRUE)) {
op <- "|"
# "%" operator?
} else if(grepl("%", line.simple, fixed=TRUE)) {
op <- "%"
} else {
stop("unknown operator in ", model[i])
}
# 2. split by operator (only the *first* occurence!)
# check first if equal/label modifier has been used on the LEFT!
if(substr(x,1,6) == "label(")
stop("label modifier can not be used on the left-hand side of the operator")
if(op == "|") {
op.idx <- regexpr("\\|", x)
} else if(op == "~*~") {
op.idx <- regexpr("~\\*~", x)
} else {
op.idx <- regexpr(op, x)
}
lhs <- substr(x, 1L, op.idx-1L)
# fix for 'NA' names in lhs; not likely to happen to ov.names
# since 'NA' is not a valid name for list elements/data.frame columns
if(lhs == "NA") lhs <- "NA."
rhs <- substr(x, op.idx+attr(op.idx, "match.length"), nchar(x))
# check if first character is '+'; if so, remove silently
if(substr(rhs, 1, 1) == "+") {
rhs <- substr(rhs, 2, nchar(rhs))
}
# 2b. if operator is "==" or "<" or ">" or ":=", put it in CON
if(op == "==" || op == "<" || op == ">" || op == ":=") {
# remove quotes, if any
lhs <- gsub("\\\"", "", lhs)
rhs <- gsub("\\\"", "", rhs)
CON.idx <- CON.idx + 1L
CON[[CON.idx]] <- list(op=op, lhs=lhs, rhs=rhs, user=1L)
next
}
# 2c if operator is ":", put it in BLOCK
if(op == ":") {
FLAT.idx <- FLAT.idx + 1L
FLAT.lhs[FLAT.idx] <- lhs
FLAT.op[ FLAT.idx] <- op
FLAT.rhs[FLAT.idx] <- rhs
FLAT.fixed[FLAT.idx] <- ""
FLAT.start[FLAT.idx] <- ""
FLAT.label[FLAT.idx] <- ""
FLAT.prior[FLAT.idx] <- ""
FLAT.rhs.mod.idx[FLAT.idx] <- 0L
if(BLOCK_OP) {
BLOCK <- BLOCK + 1L
}
FLAT.block[FLAT.idx] <- BLOCK
BLOCK_OP <- TRUE
next
}
# 3. parse left hand
# lhs modifiers will be ignored for now
lhs.formula <- as.formula(paste("~",lhs))
out <- lav_syntax_parse_rhs(rhs=lhs.formula[[2L]])
lhs.names <- names(out)
# check if we have modifiers
if(sum(sapply(out, length)) > 0L) {
warning("psindex WARNING: left-hand side of formula below contains modifier:\n", x,"\n")
}
# 4. lav_syntax_parse_rhs (as rhs of a single-sided formula)
# new 0.5-12: before we do this, replace '0.2?' by 'start(0.2)*'
# requested by the simsem folks
rhs <- gsub('\\(?([-]?[0-9]*\\.?[0-9]*)\\)?\\?',"start(\\1)\\*", rhs)
rhs.formula <- as.formula(paste("~",rhs))
out <- lav_syntax_parse_rhs(rhs=rhs.formula[[2L]],op=op)
if(debug) print(out)
# for each lhs element
for(l in 1:length(lhs.names)) {
# for each rhs element
for(j in 1:length(out)) {
# catch intercepts
if(names(out)[j] == "intercept") {
if(op == "~") {
rhs.name <- ""
} else {
stop("psindex ERROR: right-hand side of formula contains an intercept, but operator is \"", op, "\" in: ", x)
}
} else if(names(out)[j] == "..zero.." && op == "~") {
rhs.name <- ""
} else if(names(out)[j] == "..constant.." && op == "~") {
rhs.name <- ""
} else {
rhs.name <- names(out)[j]
}
# move this 'check' to post-parse
#if(op == "|") {
# th.name <- paste("t", j, sep="")
# if(names(out)[j] != th.name) {
# stop("psindex ERROR: threshold ", j, " of variable ",
# sQuote(lhs.names[1]), " should be named ",
# sQuote(th.name), "; found ",
# sQuote(names(out)[j]), "\n")
# }
#}
# catch lhs = rhs and op = "=~"
if(op == "=~" && lhs.names[l] == names(out)[j]) {
stop("psindex ERROR: latent variable `", lhs.names[l], "' can not be measured by itself")
}
# check if we not already have this combination (in this group)
# 1. asymmetric (=~, ~, ~1)
if(op != "~~") {
idx <- which(FLAT.lhs == lhs.names[l] &
FLAT.op == op &
FLAT.block == BLOCK &
FLAT.rhs == rhs.name)
if(length(idx) > 0L) {
stop("psindex ERROR: duplicate model element in: ", model[i])
}
} else {
# 2. symmetric (~~)
idx <- which(FLAT.lhs == rhs.name &
FLAT.op == "~~" &
FLAT.block == BLOCK &
FLAT.rhs == lhs.names[l])
if(length(idx) > 0L) {
stop("psindex ERROR: duplicate model element in: ", model[i])
}
}
FLAT.idx <- FLAT.idx + 1L
FLAT.lhs[FLAT.idx] <- lhs.names[l]
FLAT.op[ FLAT.idx] <- op
FLAT.rhs[FLAT.idx] <- rhs.name
FLAT.block[FLAT.idx] <- BLOCK
FLAT.fixed[FLAT.idx] <- ""
FLAT.start[FLAT.idx] <- ""
FLAT.label[FLAT.idx] <- ""
FLAT.prior[FLAT.idx] <- ""
mod <- list()
rhs.mod <- 0L
if(length(out[[j]]$fixed) > 0L) {
mod$fixed <- out[[j]]$fixed
FLAT.fixed[FLAT.idx] <- paste(mod$fixed, collapse=";")
rhs.mod <- 1L
}
if(length(out[[j]]$start) > 0L) {
mod$start <- out[[j]]$start
FLAT.start[FLAT.idx] <- paste(mod$start, collapse=";")
rhs.mod <- 1L
}
if(length(out[[j]]$label) > 0L) {
mod$label <- out[[j]]$label
FLAT.label[FLAT.idx] <- paste(mod$label, collapse=";")
rhs.mod <- 1L
}
if(length(out[[j]]$prior) > 0L) {
mod$prior <- out[[j]]$prior
FLAT.prior[FLAT.idx] <- paste(mod$prior, collapse=";")
rhs.mod <- 1L
}
#if(op == "~1" && rhs == "0") {
# mod$fixed <- 0
# FLAT.fixed[FLAT.idx] <- paste(mod$fixed, collapse=";")
# rhs.mod <- 1L
#}
if(op == "=~" && rhs == "0") {
mod$fixed <- 0
FLAT.rhs[FLAT.idx] <- FLAT.lhs[FLAT.idx]
FLAT.fixed[FLAT.idx] <- paste(mod$fixed, collapse=";")
rhs.mod <- 1L
}
FLAT.rhs.mod.idx[FLAT.idx] <- rhs.mod
if(rhs.mod > 0L) {
MOD.idx <- MOD.idx + 1L
MOD[[MOD.idx]] <- mod
}
} # rhs elements
} # lhs elements
} # model elements
# enumerate modifier indices
mod.idx <- which(FLAT.rhs.mod.idx > 0L)
FLAT.rhs.mod.idx[ mod.idx ] <- 1:length(mod.idx)
FLAT <- list(lhs=FLAT.lhs, op=FLAT.op, rhs=FLAT.rhs,
mod.idx=FLAT.rhs.mod.idx, block=FLAT.block,
fixed=FLAT.fixed, start=FLAT.start,
label=FLAT.label, prior=FLAT.prior)
# change op for intercepts (for convenience only)
int.idx <- which(FLAT$op == "~" & FLAT$rhs == "")
if(length(int.idx) > 0L) {
FLAT$op[int.idx] <- "~1"
}
# new in 0.6, reorder covariances here!
FLAT <- lav_partable_covariance_reorder(FLAT)
if(as.data.frame.) {
FLAT <- as.data.frame(FLAT, stringsAsFactors=FALSE)
}
attr(FLAT, "modifiers") <- MOD
attr(FLAT, "constraints") <- CON
FLAT
}
lav_syntax_parse_rhs <- function(rhs, op="") {
# new version YR 15 dec 2011!
# - no 'equal' field anymore (only labels!)
# - every modifier is evaluated
# - unquoted labels are allowed (eg. x1 + x2 + c(v1,v2,v3)*x3)
# fill in rhs list
out <- list()
repeat {
if(length(rhs) == 1L) { # last one and only a single element
out <- c(vector("list", 1L), out)
NAME <- all.vars(rhs)
if(length(NAME) > 0L) {
names(out)[1L] <- NAME
} else { # intercept or zero?
if(as.character(rhs) == "1") {
names(out)[1L] <- "intercept"
} else if(as.character(rhs) == "0") {
names(out)[1L] <- "..zero.."
out[[1L]]$fixed <- 0
} else {
names(out)[1L] <- "..constant.."
out[[1L]]$fixed <- 0
}
}
break
} else if(rhs[[1L]] == "*") { # last one, but with modifier
out <- c(vector("list", 1L), out)
NAME <- all.vars(rhs[[3L]])
if(length(NAME) > 0L) { # not an intercept
# catch interaction term
rhs3.names <- all.names(rhs[[3L]])
if(rhs3.names[1L] == ":") {
NAME <- paste(NAME[1L], ":", NAME[2L], sep = "")
}
names(out)[1L] <- NAME
} else { # intercept
names(out)[1L] <- "intercept"
}
i.var <- all.vars(rhs[[2L]], unique=FALSE)
if(length(i.var) > 0L) {
# modifier are unquoted labels
out[[1L]]$label <- i.var
} else {
# modifer is something else
out[[1L]] <- lav_syntax_get_modifier(rhs[[2L]])
}
break
} else if(rhs[[1L]] == ":") { # last one, but interaction term
out <- c(vector("list", 1L), out)
NAME <- all.vars(rhs)
NAME <- paste(NAME[1L], ":", NAME[2L], sep = "")
names(out)[1L] <- NAME
break
} else if(rhs[[1L]] == "+") { # not last one!
i.var <- all.vars(rhs[[3L]], unique=FALSE)
n.var <- length(i.var)
# catch interaction term
rhs3.names <- all.names(rhs[[3L]])
if(length(i.var) > 1L && ":" %in% rhs3.names) {
colon.idx <- which(rhs3.names == ":")
i.var <- i.var[seq_len(n.var - 1L)]
n.var <- n.var - 1L
i.var[n.var] <- paste(rhs3.names[colon.idx + 1L], ":",
rhs3.names[colon.idx + 2L], sep = "")
}
out <- c(vector("list", 1L), out)
if(length(i.var) > 0L) {
names(out)[1L] <- i.var[n.var]
} else {
names(out)[1L] <- "intercept"
}
if(n.var > 1L) {
# modifier are unquoted labels
out[[1L]]$label <- i.var[-n.var]
} else if(length(rhs[[3L]]) == 3L && rhs3.names[1L] == "*") {
# modifiers!!
out[[1L]] <- lav_syntax_get_modifier(rhs[[3L]][[2L]])
}
# next element
rhs <- rhs[[2L]]
} else {
stop("psindex ERROR: I'm confused parsing this line: ", rhs, "\n")
}
}
# if multiple elements, check for duplicated elements and merge if found
if(length(out) > 1L) {
rhs.names <- names(out)
while( !is.na(idx <- which(duplicated(rhs.names))[1L]) ) {
dup.name <- rhs.names[ idx ]
orig.idx <- match(dup.name, rhs.names)
merged <- c( out[[orig.idx]], out[[idx]] )
if(!is.null(merged)) # be careful, NULL will delete element
out[[orig.idx]] <- merged
out <- out[-idx]
rhs.names <- names(out)
}
}
# if thresholds, check order and reorder if necessary
#if(op == "|") {
# t.names <- names(out)
# idx <- match(sort(t.names), t.names)
# out <- out[idx]
#}
out
}
lav_syntax_get_modifier <- function(mod) {
if(length(mod) == 1L) {
# three possibilites: 1) numeric, 2) NA, or 3) quoted character
if( is.numeric(mod) )
return( list(fixed=mod) )
if( is.na(mod) )
return( list(fixed=as.numeric(NA)) )
if( is.character(mod) )
return( list(label=mod) )
} else if(mod[[1L]] == "start") {
cof <- unlist(lapply(as.list(mod)[-1],
eval, envir=NULL, enclos=NULL))
return( list(start=cof) )
} else if(mod[[1L]] == "equal") {
label <- unlist(lapply(as.list(mod)[-1],
eval, envir=NULL, enclos=NULL))
return( list(label=label) )
} else if(mod[[1L]] == "label") {
label <- unlist(lapply(as.list(mod)[-1],
eval, envir=NULL, enclos=NULL))
label[is.na(label)] <- "" # catch 'NA' elements in a label
return( list(label=label) )
} else if(mod[[1L]] == "prior") {
prior <- unlist(lapply(as.list(mod)[-1],
eval, envir=NULL, enclos=NULL))
return( list(prior=prior) )
} else if(mod[[1L]] == "c") {
# vector: we allow numeric and character only!
cof <- unlist(lapply(as.list(mod)[-1],
eval, envir=NULL, enclos=NULL))
if(all(is.na(cof))) {
return( list(fixed=rep(as.numeric(NA), length(cof))) )
} else if(is.numeric(cof))
return( list(fixed=cof) )
else if(is.character(cof)) {
cof[is.na(cof)] <- "" # catch 'NA' elements in a label
return( list(label=cof) )
} else {
stop("psindex ERROR: can not parse modifier:", mod, "\n")
}
} else {
# unknown expression
# as a final attempt, we will evaluate it and coerce it
# to either a numeric or character (vector)
cof <- try( eval(mod, envir=NULL, enclos=NULL), silent=TRUE)
if(is.numeric(cof))
return( list(fixed=cof) )
else if(is.character(cof))
return( list(label=cof) )
else {
stop("psindex ERROR: can not parse modifier:", mod, "\n")
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.