model_builder <- function(mf, pf, dv, endogs, exogs, constants, id, wave,
err.inv, const.inv, alpha.free, y.lag, y.free, x.free,
fixed.effects, partial.pre, weights) {
##### Get lag info ############################################################
d <- mf
vi <- pf$v_info
# Endogenous predictor lag structure
if (any(vi$endog)) {
vi_en <- vi[vi$endog == TRUE, ]
## Now going through and saving the number of lags to list for use later
endogs_lags <- vi_en$lag
names(endogs_lags) <- vi_en$new_name
endogs <- unique(vi_en$new_name)
names(endogs) <- endogs
endogs_lags <- as.list(endogs_lags)
if (any(duplicated(names(endogs_lags)))) {
dupes <- names(endogs_lags)[duplicated(names(endogs_lags))]
for (dupe in dupes) {
values <- endogs_lags[which(names(endogs_lags) == dupe)]
endogs_lags <- endogs_lags[names(endogs_lags) %nin% dupe]
endogs_lags[[dupe]] <- unname(unlist(values))
}
}
} else {
endogs <- NULL
endogs_lags <- NULL
}
# Exogenous predictor lag structure
if (!is.null(exogs)) {
vi_ex <- vi[vi$endog == FALSE, ]
## Now going through and saving the number of lags to list for use later
exogs_lags <- vi_ex$lag
names(exogs_lags) <- vi_ex$new_name
exogs <- unique(vi_ex$new_name)
names(exogs) <- exogs
exogs_lags <- as.list(exogs_lags)
if (any(duplicated(names(exogs_lags)))) {
dupes <- names(exogs_lags)[duplicated(names(exogs_lags))]
for (dupe in dupes) {
values <- exogs_lags[which(names(exogs_lags) == dupe)]
exogs_lags <- exogs_lags[names(exogs_lags) %nin% dupe]
exogs_lags[[dupe]] <- unname(unlist(values))
}
}
} else {
exogs <- NULL
exogs_lags <- NULL
}
# Saving a vector of all variables that are time varying
varying <- c(unlist(endogs), dv, unlist(exogs))
###### Widen data #############################################################
constants_2 <- constants
if (weights) constants_2 <- c(constants_2, ".weights")
# Now I need to have the data in wide format
wframe <- widen_panel_in(d, varying = varying, constants = constants_2)
# Save info about complete observations
complete_obs <- attr(wframe, "complete_obs")
###### Model build prep #######################################################
# Save list of waves
waves <- sort(unique(d[[wave]]))
min_wave <- min(d[[wave]])
max_wave <- max(d[[wave]])
# Creating list of time-varying variables in a list for constructing lavaan
# model string
vbywave <- rep(list(NA), times = length(waves))
names(vbywave) <- ch(waves)
for (w in waves) {
varnames <- sapply(varying, paste, "_", w, sep = "")
vbywave[[ch(w)]] <- varnames
names(vbywave[[ch(w)]]) <- varying # This allows indexing by the bare name
}
# start variable is used to determine which wave to begin with
start <- max(c(pf$v_info$max_lag, y.lag)) + 1 # Depends on how many lags
# But can't have lagged DV at wave 1 either
if (start < (min_wave + max(y.lag))) {start <- min_wave + max(y.lag)}
end <- max_wave
vbywave <- lapply(vbywave, function(x) {
x <- x[!duplicated(x)]
})
####### Alpha latent var definition ###########################################
# Let alpha vary across time if user requests
if (alpha.free == TRUE) {
a_f <- ""
} else { # Otherwise fixed at one
a_f <- "1 * "
}
the_dvs <- sapply(vbywave[ch(start:end)], function(x) {x[dv]})
alpha_eq <- paste("alpha =~ ", paste(paste0(a_f, the_dvs), collapse = " + "),
sep = "")
####### Main dv equations #####################################################
main_eqs <- NULL
# Create data frame of coefficient combinations and their names
var_coefs <- data.frame(var = NA, coef = NA, lag = NA)
# iterating over each wave for which we will predict the value of DV
for (w in start:end) {
reg_vars_en <- NULL
reg_vars_ex <- NULL
reg_vars_cons <- NULL
## Loop through endogenous variables, putting all in a vector
if (!is.null(endogs)) {
# Pass to helper function to reduce code duplication
reg_vars_en <- main_regressions_eq(
vars = endogs, vars_lags = endogs_lags, prefix = "en",
vbywave = vbywave, w = w, x.free = x.free, reg_vars = reg_vars_en
)
# Do the same for the var_coefs data frame
var_coefs <- main_regressions_df(
vars = endogs, vars_lags = endogs_lags, prefix = "en",
vbywave = vbywave, w = w, x.free = x.free, var_coefs = var_coefs
)
}
## If there are exogenous time varying vars, loop through those
if (!is.null(exogs)) {
# Pass to helper function to reduce code duplication
reg_vars_ex <- main_regressions_eq(
vars = exogs, vars_lags = exogs_lags, prefix = "ex",
vbywave = vbywave, w = w, x.free = x.free, reg_vars = reg_vars_ex
)
# Do the same for the var_coefs data frame
var_coefs <- main_regressions_df(
vars = exogs, vars_lags = exogs_lags, prefix = "ex",
vbywave = vbywave, w = w, x.free = x.free, var_coefs = var_coefs
)
}
## If there are constants, loop through them
if (!is.null(constants)) {
for (var in constants) {
index <- which(constants == var)
# Check if it is in x.free
postfix <- if (var %in% x.free) paste0(index, "_", w) else index
reg_vars_cons <- c(
reg_vars_cons, paste0("c", postfix, " * ", constants[index],
sep = "")
)
var_coefs[nrow(var_coefs) + 1,] <- list(var, paste0("c", postfix), 0)
}
}
reg_vars_en <- reg_vars_en[!duplicated(reg_vars_en)]
reg_vars_ex <- reg_vars_ex[!duplicated(reg_vars_ex)]
reg_vars_cons <- reg_vars_cons[!duplicated(reg_vars_cons)]
reg <- paste(vbywave[[ch(w)]][dv], "~",
paste(c(reg_vars_en, reg_vars_ex, reg_vars_cons), collapse = " + ")
)
## Lastly, add prior wave of DV
for (lag.y in y.lag) {
if (lag.y == 0) {next}
lag_term <- if ((all(is.logical(y.free)) & y.free) | lag.y %in% y.free) {
NULL
} else {
paste0("p", lag.y, " * ")
}
reg <- paste(reg, " + ", lag_term, vbywave[[ch(w - lag.y)]][dv], sep = "")
if (all(y.free == FALSE)) {
var_coefs[nrow(var_coefs) + 1,] <- list(dv, paste0("p", lag.y), lag.y)
} else {
var_coefs[nrow(var_coefs) + 1,] <- list(dv, "", lag.y)
}
}
## Save finished equation to list (okay, technically a vector)
main_eqs <- c(main_eqs, reg)
}
reg <- NULL
var_coefs <- var_coefs[complete.cases(var_coefs),]
var_coefs <- unique(var_coefs)
####### Alpha covariances #####################################################
alpha_reg <- "alpha ~~" # Beginning of equation
varying_vars <- unique(unlist(list(endogs, exogs), recursive = FALSE))
varying_lags <- unlist(list(endogs_lags, exogs_lags), recursive = FALSE)
if (any(duplicated(names(varying_lags)))) {
dupes <- names(varying_lags)[duplicated(names(varying_lags))]
for (dupe in dupes) {
values <- varying_lags[which(names(varying_lags) == dupe)]
varying_lags <- varying_lags[names(varying_lags) %nin% dupe]
varying_lags[[dupe]] <- unname(unlist(values))
}
}
alpha_vars <- NULL
for (var in varying_vars) {
w_begin <- start - max(varying_lags[[var]])
w_end <- end - min(varying_lags[[var]])
ws <- w_begin:w_end
alpha_vars <- c(alpha_vars,
sapply(vbywave[ch(ws)], function(x) {x[var]})
)
}
for (w in 1:(which(names(vbywave) == start) - 1)) {
# Add waves of DV prior to first time it is used as regression DV
if (0 %nin% y.lag) { # Don't want this for non-dynamic specification
alpha_vars <- c(alpha_vars, vbywave[[w]][dv])
}
}
if (fixed.effects == TRUE) {re <- NULL} else {re <- "0 *"}
alpha_reg <- paste(alpha_reg, paste(paste(re, alpha_vars), collapse = " + "))
######## Endogenous IV covariances ############################################
# Creating empty object to which we will append the equations
endogs_covs <- c()
# The part of the equation with time-varying exogenous predictors is the same
# no matter what, so making it once and will add each time later
exogsreg <- NULL
if (!is.null(exogs)) {
for (var in exogs) {
exogsreg <- c(exogsreg, vbywave[[ch(start - max(exogs_lags[[var]]))]][var])
for (w in (start - max(exogs_lags[[var]])):(end - min(exogs_lags[[var]]))) {
if (w > end) {next}
exogsreg <- c(exogsreg, vbywave[[ch(w)]][var])
}
}
}
# Creating the constants part of the equations, which obviously doesn't change
creg <- NULL
if (!is.null(constants)) {
for (var in constants) {
index <- which(constants == var)
creg <- c(creg, constants[index])
}
}
creg <- creg[!duplicated(creg)]
exogsreg <- exogsreg[!duplicated(exogsreg)]
# Creating endogenous IV covariances
endogs_covs <- NULL # in case there are no endogenous
mod_frame <- data.frame(dv = NA, iv = NA)
if (!is.null(endogs)) { # Checking if there are any endogenous variables
# Iterating through endogenous variables
endogs_covs <- tv_cov_eqs(var = var, start = start, end = end,
endogs_lags = endogs_lags,
vbywave = vbywave, endogs = endogs,
exogsreg = exogsreg, creg = creg, dv = dv,
y.lag = y.lag,
mod_frame = mod_frame, min_wave = min_wave,
max_wave = max_wave)
mod_frame <- attr(endogs_covs, "mod_frame")
}
###### Exogenous tv covariances ###############################################
exogs_covs <- NULL
if (length(exogs) > 0) {
# Iterating through endogenous variables
exogs_covs <- tv_cov_eqs(var = var, start = start, end = end,
endogs_lags = exogs_lags,
vbywave = vbywave, endogs = exogs,
exogsreg = NULL, creg = creg, dv = dv,
y.lag = y.lag,
mod_frame = mod_frame, min_wave = min_wave,
max_wave = max_wave)
}
######## Constants covariances ################################################
constants_covs <- c()
if (!is.null(constants)) {
for (c in constants) {
if (0 %nin% y.lag) {
## Covary with all values of DV prior to first regression
reg <- paste(c, "~~", vbywave[[ch(min_wave)]][dv])
if (start > min_wave + 1) { # This is needed if there is > 1 lag
for (wp in (min_wave + 1):(start - 1)) {
reg <- paste(reg, "+", vbywave[[ch(wp)]][dv])
}
}
} else if (which(constants == c) < length(constants)) {
reg <- paste(c, "~~")
} else {
reg <- NULL
}
if (which(constants == c) < length(constants)) {
ocs <- constants[(which(constants == c) + 1):length(constants)]
for (oc in ocs) {
reg <- paste(reg, "+", oc)
}
}
constants_covs <- c(constants_covs, reg)
}
}
reg <- NULL
###### Correlated future errors equations #####################################
endogs_errs <- c()
reg <- NULL
if (!is.null(endogs)) {
# To allow partial correlation, you just need to flip a 1 to a 0...
lag_offset <- ifelse(partial.pre, yes = 0, no = 1)
# Using this to control whether final wave of endogenous pred is used
for (w in start:(end - 1)) {
if (w == min_wave && start != min_wave) {next}
reg <- paste(vbywave[[ch(w)]][dv], " ~~", sep = "")
reg_vars <- NULL
for (var in endogs) {
w2 <- w + lag_offset
while (w2 <= end - min(endogs_lags[[var]])) {
if (w2 > end) {next}
reg_vars <- c(reg_vars, vbywave[[ch(w2)]][var])
w2 <- w2 + 1
}
}
if (is.null(reg_vars)) {
# Do nothing because it's an empty equation
} else {
reg <- paste(reg, paste(reg_vars, collapse = " + "))
endogs_errs <- c(reg, endogs_errs)
}
reg <- NULL
}
}
####### DV error variance equations (optional) #################################
# If user wants DV error variances for each wave to be held constant, do it
if (err.inv == TRUE) {
err_var_eqs_ann <- "## Holding DV error variance constant for each wave"
var_co <- "v*"
} else {
err_var_eqs_ann <- "## DV error variance free to vary across waves"
var_co <- NULL
}
err_var_eqs <- NULL
for (w in start:end) {
eq <- paste(vbywave[[ch(w)]][dv], " ~~ ", var_co, vbywave[[ch(w)]][dv], sep = "")
err_var_eqs <- c(err_var_eqs, eq)
}
###### DV variance constraints #######################################
dv_vars <- c()
dv_vars_ann <- NULL
if (const.inv == TRUE) {
dv_vars_ann <- "## Hold DV variance constant each wave (optional)"
var_co <- "var*"
} else {
dv_vars_ann <- "## Let DV variance vary across waves"
var_co <- NULL
}
for (w in start:end) {
reg <- paste(vbywave[[ch(w)]][dv], " ~ ", var_co, "1", sep = "")
# Add finished equation to list
dv_vars <- c(dv_vars, reg)
reg <- NULL
}
reg <- NULL
###### DV multiple lags covariances ###########################################
lag_covs <- NULL
if (any(y.lag > 1)) {
lag <- max(y.lag)
lag <- paste0(dv, "_", min_wave + lag)
ylags <- sapply(vbywave[ch(max_wave:min_wave)], function(x) {x[dv]})
ylags <- ylags[(which(ylags == lag) + 1):length(ylags)]
lag_reg <- paste(
ylags[1], "~~", paste(ylags[-1], collapse = " + ")
)
lag_covs <- c(lag_covs, lag_reg)
ylags <- ylags[-1]
while (length(ylags) > 1) {
lag_reg <- paste(
ylags[1], "~~", paste(ylags[-1], collapse = " + ")
)
lag_covs <- c(lag_covs, lag_reg)
ylags <- ylags[-1]
}
}
###### Defining annotations to be used in final model string ##################
alpha_eq_ann <- "## Alpha latent variable (random intercept)"
alpha_reg_ann <- if (fixed.effects == TRUE) {
"## Alpha free to covary with observed variables (fixed effects)"
} else {
"## Alpha cannot covary with observed variables (random effects)"
}
main_eqs_ann <- "## Main regressions"
endogs_errs_ann <- if (!is.null(endogs)) {
"## Correlating DV errors with future values of predetermined predictors"
} else {NULL}
endogs_covs_ann <- if (!is.null(endogs)) {
"## Predetermined predictors covariances"
} else {NULL}
exogs_covs_ann <- "## Exogenous (time varying and invariant) predictors covariances"
# Now save all parts of the lavaan model to one object
all_eqs <- list(main_eqs_ann, main_eqs, alpha_eq_ann, alpha_eq,
alpha_reg_ann, alpha_reg, endogs_errs_ann, endogs_errs,
endogs_covs_ann, endogs_covs,
exogs_covs_ann, exogs_covs, constants_covs, lag_covs,
err_var_eqs_ann, err_var_eqs, dv_vars_ann, dv_vars)
# Adding line breaks between the equations
out <- lapply(all_eqs, concat) # concat is an internal function
# Adding extra line breaks between each set of equations
out <- concat2(out) # concat2 is an internal function
# Going to deal with non-syntactic names
if (any(make.names(names(wframe)) %nin% names(wframe))) {
ns_names <- names(wframe)[make.names(names(wframe)) %nin% names(wframe)]
for (n in ns_names) {
# reg_pattern <- paste0(escape_regex(n), "_[0-9]")
# rep_pattern <- paste0("`\\\\1`")
# out <- stringr::str_replace_all(out, reg_pattern, )
out <- gsub(n, paste0("`", n, "`"), out, fixed = TRUE)
}
}
ret_obj <- list(model = out, data = wframe, complete_obs = complete_obs,
start = start, end = end, var_coefs = var_coefs,
any_weights = !is.null(weights))
if (!is.null(endogs)) {
ret_obj$endogs_lags <- endogs_lags
ret_obj$endogs <- endogs
}
if (!is.null(exogs)) {
ret_obj$exogs_lags <- exogs_lags
ret_obj$exogs <- exogs
}
return(ret_obj)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.