Nothing
#' Short Form Tabu Search
#'
#' Given an initial (full) lavaan model string, the original data, a criterion
#' function to minimize, and some additional specifications,
#' performs a Tabu model specification search. Currently only supports
#' neighbors that are 1 move away from the current model.
#'
#'
#' @param initialModel The initial model (typically the full form) as a character vector with lavaan model.syntax.
#' @param originalData The original data frame with variable names.
#' @param numItems A numeric vector indicating the number of items to retain for each factor.
#' @param criterion A function calculating the objective criterion to minimize. Default is to use the built-in `rmsea` value from `lavaan::fitmeasures()`.
#' @param niter A numeric value indicating the number of iterations (model specification selections)
#' to perform. Default is 50.
#' @param tabu.size A numeric value indicating the size of Tabu list. Default is 5.
#' @param lavaan.model.specs A list which contains the specifications for the
#' lavaan model. The default values are the defaults for lavaan to perform a
#' CFA. See \link[lavaan]{lavaan} for more details.
#' @param bifactor Logical. Indicates if the latent model is a bifactor model. If `TRUE`, assumes that the last latent variable in the provided model syntax is the bifactor (i.e., all of the retained items will be set to load on the last latent variable).
#' @param verbose Logical. If `TRUE`, prints out the initial short form and the selected short form at the end of each iteration.
#' @param parallel An option for using parallel processing. If \code{TRUE}, the
#' function will utilize all available cores. Default is \code{TRUE}.
#'
#' @return A named list with the best value of the objective function (`best.obj`) and the best lavaan model object (`best.mod`).
#' @export
#'
#' @examples
#' shortAntModel <- "
#' Ability =~ Item1 + Item2 + Item3 + Item4 + Item5 + Item6 + Item7 + Item8
#' Ability ~ Outcome
#' "
#'
#' data(simulated_test_data)
#' tabuResult <- tabuShortForm(
#' initialModel = shortAntModel,
#' originalData = simulated_test_data, numItems = 7,
#' niter = 1, tabu.size = 3, parallel = FALSE
#' )
#' summary(tabuResult) # shows the resulting model
#' \dontrun{
#' # create simulation data from the `psych` package
#' # four factors, 12 items each, 48 total items
#' # factor loading matrix - not quite simple structure
#' fxMatrix <-
#' matrix(
#' data = c(
#' rep(x = c(.9, .7, .5, .3), times = 3),
#' rep(0.2, times = 3 * 4 * 3), # first factor loadings
#'
#' rep(0.2, times = 3 * 4),
#' rep(x = c(.9, .7, .5, .3), times = 3),
#' rep(0.2, times = 3 * 4 * 2), # second factor loadings
#'
#' rep(0.2, times = 3 * 4 * 2),
#' rep(x = c(.9, .7, .5, .3), times = 3),
#' rep(0.2, times = 3 * 4), # third factor loadings
#'
#' rep(0.2, times = 3 * 4 * 3),
#' rep(x = c(.9, .7, .5, .3), times = 3) # fourth factor loadings
#' ),
#' ncol = 4
#' )
#' # factor correlation matrix - all factors uncorrelated
#' PhiMatrix <-
#' matrix(data = c(
#' 1, 0, 0, 0,
#' 0, 1, 0, 0,
#' 0, 0, 1, 0,
#' 0, 0, 0, 1
#' ), ncol = 4)
#' tabuData <-
#' psych::sim(
#' fx = fxMatrix,
#' Phi = PhiMatrix,
#' n = 1000,
#' raw = TRUE
#' )$observed # observed is the simulated observed data
#'
#' # NOTE: you must specify the model such that each factor is on a single line!
#' # otherwise, the algorithm will not work correctly!
#' tabuModel <- "
#' Trait1 =~ Item1 + Item2 + Item3 + Item4 + Item5 + Item6 +
#' Item7 + Item8 + Item9 + Item10 + Item11 + Item12
#' Trait2 =~ Item13 + Item14 + Item15 + Item16 + Item17 +
#' Item18 + Item19 + Item20 + Item21 + Item22 + Item23 + Item24
#' Trait3 =~ Item25 + Item26 + Item27 + Item28 + Item29 + Item30 +
#' Item31 + Item32 + Item33 + Item34 + Item35 + Item36
#' Trait4 =~ Item37 + Item38 + Item39 + Item40 + Item41 +
#' Item42 + Item43 + Item44 + Item45 + Item46 + Item47 + Item48
#' "
#'
#' colnames(tabuData) <- paste0("Item", 1:48)
#' # specify the criterion function that the Tabu Search minimizes
#' # wrap this in a tryCatch in case a model does not converge!
#' # specify an appropriate error value: if minimizing, error value must be large
#' tabuCriterion <- function(x) {
#' tryCatch(lavaan::fitmeasures(object = x, fit.measures = "chisq"),
#' error = function(e) Inf
#' )
#' }
#'
#' # use the tabuShortForm function
#' # reduce form to the best 12 items
#' tabuShort <- tabuShortForm(
#' initialModel = tabuModel, originalData = tabuData,
#' numItems = c(3, 3, 3, 3),
#' criterion = tabuCriterion,
#' niter = 20, tabu.size = 10
#' )
#' }
#'
tabuShortForm <- function(originalData,
initialModel,
numItems,
criterion = function(x) {
tryCatch(-lavaan::fitmeasures(object = x, fit.measures = "cfi"),
error = function(e) Inf
)
},
niter = 20,
tabu.size = 5,
lavaan.model.specs = list(
int.ov.free = TRUE,
int.lv.free = FALSE,
std.lv = TRUE,
auto.fix.first = FALSE,
auto.fix.single = TRUE,
auto.var = TRUE,
auto.cov.lv.x = TRUE,
auto.th = TRUE,
auto.delta = TRUE,
auto.cov.y = TRUE,
ordered = NULL,
model.type = "cfa",
estimator = "default"
),
bifactor = FALSE,
verbose = FALSE,
parallel = T) {
start.time = Sys.time()
mapply(
assign,
names(lavaan.model.specs),
lavaan.model.specs,
MoreArgs = list(envir = environment())
)
allItems <-
colnames(originalData)
# extract the latent factor syntax
mapply(
assign,
c("factors", "itemsPerFactor"),
syntaxExtraction(initialModelSyntaxFile = initialModel, items = allItems),
MoreArgs = list(envir = parent.frame())
)
# save the external relationships
vectorModel <- unlist(strsplit(x = initialModel, split = "\\n"))
externalRelation <- vectorModel[grep("[ ]{0,}(?<!=)~ ", vectorModel, perl = T)]
factorRelation <- vectorModel[grep("[ ]{0,}~~ ", vectorModel)]
init.model <-
randomInitialModel(init.model = initialModel,
maxItems = numItems,
allItems = allItems,
initialData = originalData,
bifactorModel = bifactor,
lavaan.model.specs = lavaan.model.specs)
best.obj <- all.obj <- current.obj <- criterion(init.model@model.output)
best.mod <- current.model <- init.model@model.output
best.syntax <- current.syntax <- init.model@model.syntax
names(itemsPerFactor) <- factors
convergence <-
function(x) {
converge <-
x@Fit@converged
se <-
!any(is.na(x@Fit@se))
ifelse (converge==TRUE & se == TRUE, criterion(x), NA)
}
if (verbose == TRUE) {
print("Initial short form selected:")
cat(current.syntax)
}
tabu.list <- vector("list")
all.syntax <-
vector(length = niter + 1)
all.syntax[1] <-
current.syntax
chk <- Sys.getenv("_R_CHECK_LIMIT_CORES_", "")
if (parallel) {
if (nzchar(chk) && chk == "TRUE") {
# use 2 cores in CRAN/Travis/AppVeyor
num_workers <- 2L
} else {
# use all cores in devtools::test()
num_workers <- parallel::detectCores()
}
} else {
num_workers = 1
}
cl <- parallel::makeCluster(num_workers,type="PSOCK", outfile = "")
doSNOW::registerDoSNOW(cl)
`%dopar%` <- foreach::`%dopar%`
# Do iterations
for (it in 1:niter) {
cat(paste0("\rRunning iteration ", it, " of ", niter, ". "))
# Loop through all neighbors
tmp.obj <- vector("numeric")
tmp.mod <- vector("list", length(factors))
tmp.syntax <- vector("list", length(factors))
for (j in 1:length(factors)) {
currentModelSyntax <-
strsplit(current.syntax, split = "\n")[[1]]
currentFactor <-
factors[j]
otherFactors <-
factors[-j]
currentItems <-
itemsPerFactor[[ factors[j] ]][
itemsPerFactor[[ factors[j] ]] %in%
unlist(
strsplit(
grep(paste0(factors[j], ".*=~"), currentModelSyntax, value = T),
split = " "
)
)
]
removedItems <-
itemsPerFactor[[ factors[j] ]][
!itemsPerFactor[[ factors[j] ]] %in%
unlist(
strsplit(
grep(paste0(factors[j], ".*=~"), currentModelSyntax, value = T),
split = " "
)
)
]
itemChange <-
expand.grid(currentItems, removedItems)
tmp.mod[[j]] <- vector("list", length = nrow(itemChange))
tmp.syntax[[j]] <- vector("list", length = nrow(itemChange))
for (k in 1:nrow(itemChange)) {
newItems <-
gsub(pattern = paste0(itemChange[k, 1], "\\b"), replacement = itemChange[k, 2], currentItems)
newCurrentFactor <-
paste(
factors[j], "=~",
paste(newItems, collapse = " + ")
)
currentModelSyntax[grepl(paste0(factors[j], ".*=~"), currentModelSyntax)] <-
newCurrentFactor
newModelSyntax <-
paste(stringr::str_trim(currentModelSyntax),
collapse = " \n"
)
tmp.syntax[[j]][[k]] <- newModelSyntax
}
}
tmp.syntax <-
unlist(tmp.syntax)
model = 0L
tmp.mod <-
foreach::foreach(model = 1:length(tmp.syntax), .inorder = T) %dopar% {
fitmodel <-
modelWarningCheck(
lavaan::lavaan(
model = tmp.syntax[model],
data = originalData,
model.type = model.type,
int.ov.free = int.ov.free,
int.lv.free = int.lv.free,
auto.fix.first = auto.fix.first,
std.lv = std.lv,
auto.fix.single = auto.fix.single,
auto.var = auto.var,
auto.cov.lv.x = auto.cov.lv.x,
auto.th = auto.th,
auto.delta = auto.delta,
auto.cov.y = auto.cov.y,
ordered = ordered,
estimator = estimator,
warn = FALSE
),
newModelSyntax
)@model.output
}
tmp.obj <-
sapply(tmp.mod, convergence)
tmp.mod <-
unlist(tmp.mod)
tmp.tables <-
lapply(tmp.mod,
lavaan::partable)
# Check which indices result in a valid objective function
valid <- which(!is.na(tmp.obj))
# Get just models not on Tabu list
if (length(tabu.list) > 0) {
notTabu <- rep(TRUE, length = length(tmp.tables))
for (i in valid) {
for (j in 1:length(tabu.list)) {
notTabu[i] <-
ifelse(notTabu[i] == FALSE,
FALSE,
!identical(tmp.tables[[i]],
lavaan::partable(tabu.list[[j]])
)
)
}
}
newValid <- notTabu
} else {
newValid <- valid
}
# Out of valid models, pick model with best objective function value
indx <- which.min(tmp.obj[newValid])
# Move current state to next model
current.obj <- (tmp.obj[newValid])[indx]
all.obj <- c(all.obj, current.obj)
current.mod <- (tmp.mod[newValid])[[indx]]
current.syntax <- (tmp.syntax[newValid])[[indx]]
all.syntax[it + 1] <- current.syntax
if (verbose == TRUE) {
print(current.mod)
print(current.syntax)
}
# Update Tabu list
tabu.list <- c(current.mod, tabu.list)
if (length(tabu.list) > tabu.size) {
tabu.list <- tabu.list[1:tabu.size]
}
# Update if the current model is better than the best model
if (current.obj < best.obj) {
best.obj <- current.obj
best.mod <- current.mod
best.syntax <- current.syntax
tabu.list <- vector("list") # Clear Tabu list
}
}
foreach::registerDoSEQ()
parallel::stopCluster(cl)
if (class(best.obj)[1] == "lavaan.vector") {
temp = as.numeric(best.obj)
names(temp) = names(best.obj)
best.obj = temp
}
ret <-
new("TS",
function_call = match.call(),
all_fit = all.obj,
best_fit = best.obj,
best_model = best.mod,
best_syntax = best.syntax,
runtime = Sys.time() - start.time,
final_tabu_list = tabu.list
)
ret
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.