knitr::opts_chunk$set( collapse = TRUE, comment = "## ", fig.path = "README-" )
Automatic Short Form Creation for scales. Currently, the Ant Colony Optimization (ACO) Algorithm and the Tabu search are implemented. The original R implementation for the ACO algorithm is from Leite, Huang, & Marcoulides (2008), while the Tabu search function was taken from Marcoulides & Falk (2018). There does not yet seem to be an application of Simulated Annealing (SA) within psychometrics, but Drezner & Marcoulides, 1999 (in Multiple Linear Regression Viewpoints, Volume 25(2); not available online) used SA for multiple regression model selection; this package appears to be the first to implement SA for psychometric models.
This document was created on r Sys.Date()
.
install.packages("ShortForm") # the CRAN-approved version require("devtools") devtools::install_github("AnthonyRaborn/ShortForm", branch = "devel") # the developmental version
Here are some (slightly modified) examples from the help documentation using
lavaan. Be warned, the algorithms may take some time to converge, particularly
with large forms, multiple dimensions, and different settings. The time for these
examples to converge on a laptop with an Intel Core i7 8th Gen processor is printed at the bottom. See the sessionInfo()
below.
sessionInfo()
start.time.ACO <- Sys.time() library(ShortForm, quietly = T) # using simulated test data and the default values for lavaan.model.specs set.seed(1) # create simulation data from the `psych` package # four factors, 12 items each, 48 total items # factor loading matrix - not quite simple structure fxMatrix <- matrix(data = c(rep(x = c(.8, .8, .4, .3), times = 3), rep(0.2, times = 3*4*3), # first factor loadings rep(0.2, times = 3*4), rep(x = c(.8, .8, .4, .3), times = 3), rep(0.2, times = 3*4*2), # second factor loadings rep(0.2, times = 3*4*2), rep(x = c(.8, .8, .4, .3), times = 3), rep(0.2, times = 3*4), # third factor loadings rep(0.2, times = 3*4*3), rep(x = c(.8, .8, .4, .3), times = 3) # fourth factor loadings ), ncol = 4) # factor correlation matrix - all factors uncorrelated PhiMatrix <- matrix(data = c(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1), ncol = 4) antData <- psych::sim( fx = fxMatrix, Phi = PhiMatrix, n = 600, mu = c(-2, -1, 1, 2), raw = TRUE )$observed # observed is the simulated observed data colnames(antData) = paste0("Item", 1:48) antModel <- ' Trait1 =~ Item1 + Item2 + Item3 + Item4 + Item5 + Item6 + Item7 + Item8 + Item9 + Item10 + Item11 + Item12 Trait2 =~ Item13 + Item14 + Item15 + Item16 + Item17 + Item18 + Item19 + Item20 + Item21 + Item22 + Item23 + Item24 Trait3 =~ Item25 + Item26 + Item27 + Item28 + Item29 + Item30 + Item31 + Item32 + Item33 + Item34 + Item35 + Item36 Trait4 =~ Item37 + Item38 + Item39 + Item40 + Item41 + Item42 + Item43 + Item44 + Item45 + Item46 + Item47 + Item48 ' # then, create the list of the items by the factors list.items <- list( paste0("Item", 1:12), paste0("Item", 13:24), paste0("Item", 25:36), paste0("Item", 37:48) ) # finally, call the function with some minor changes to the default values. abilityShortForm = antcolony.lavaan(data = antData, ants = 10, evaporation = 0.9, antModel = antModel, list.items = list.items, full = 48, i.per.f = c(6,6,6,6), lavaan.model.specs = list(model.type = "cfa", auto.var = T, estimator = "default", ordered = NULL, int.ov.free = TRUE, int.lv.free = FALSE, auto.fix.first = TRUE, auto.fix.single = TRUE, std.lv = FALSE, auto.cov.lv.x = TRUE, auto.th = TRUE, auto.delta = TRUE, auto.cov.y = TRUE), factors = c("Trait1", "Trait2", "Trait3", "Trait4"), steps = 100, max.run = 100, parallel = T) abilityShortForm # print the results of the final short form plot(abilityShortForm, type = 'pheromone') # the pheromone plot for class "antcolony"
A similar example can be found in the antcolony.mplus
function, but requires you to have a valid Mplus installation on the computer. It took a total of r round(as.difftime(Sys.time() - start.time.ACO, units = "mins"),2)
r attr(round(as.difftime(Sys.time() - start.time.ACO, units = "mins"),2), 'units')
to run this example.
This example demonstrates how to use the Tabu search for model specification searches when the original model may be misspecified in some way.
start.time.Tabu <- Sys.time() library(ShortForm, quietly = T) set.seed(2) # create simulation data from the `psych` package # two factors, 12 items total # factor loading matrix - not quite simple structure fxMatrix <- matrix(data = c( # first factor loadings rep(x = c(.8, .8, .6, .6), times = 3), # second factor loadings rep(x = c(.2), times = 12) ), ncol = 2) # factor correlation matrix - all factors uncorrelated PhiMatrix <- matrix(data = c(1,0, 0,1 ), ncol = 2) tabuData <- psych::sim( fx = fxMatrix, Phi = PhiMatrix, n = 600, raw = TRUE )$observed # observed is the simulated observed data colnames(tabuData) = paste0("Item", 1:12) tabuModel <- ' Trait1 =~ Item1 + Item2 + Item3 + Item4 + Item5 + Item6 + 0*Item7 + 0*Item8 + 0*Item9 + 0*Item10 + 0*Item11 + 0*Item12 Trait2 =~ 0*Item1 + 0*Item2 + 0*Item3 + 0*Item4 + 0*Item5 + 0*Item6 + Item7 + Item8 + Item9 + Item10 + Item11 + Item12 ' # fit the initial misspecified model for Tabu init.model <- lavaan::lavaan(model = tabuModel, data = tabuData, auto.var=TRUE, auto.fix.first=FALSE, std.lv=TRUE, auto.cov.lv.x=FALSE) # use search.prep to prepare for the Tabu search ptab <- search.prep(fitted.model = init.model, loadings=TRUE, fcov=FALSE, errors=FALSE) Tabu_example <- suppressWarnings( tabu.sem(init.model = init.model, ptab = ptab, obj = AIC, niter = 20, tabu.size = 10) ) # the suppressWarning wrapping hides the lavaan WARNING output from improper models # check the final model summary(Tabu_example) # plot the change in the objective/criterion function over each run plot(Tabu_example)
It took a total of r round(as.difftime(Sys.time() - start.time.Tabu, units = "mins"),2)
r attr(round(as.difftime(Sys.time() - start.time.Tabu, units = "mins"),2), 'units')
to run this example.
The next Tabu example demonstrates how to use it to find a short form of a prespecified length with different data.
start.time.Tabu <- Sys.time() library(ShortForm, quietly = T) # set the seed to reproduce this example set.seed(3) # create simulation data from the `psych` package # four factors, 12 items each, 48 total items # factor loading matrix - not quite simple structure fxMatrix <- matrix(data = c(rep(x = c(.8, .8, .4, .3), times = 3), rep(0.2, times = 3*4*3), # first factor loadings rep(0.2, times = 3*4), rep(x = c(.8, .8, .4, .3), times = 3), rep(0.2, times = 3*4*2), # second factor loadings rep(0.2, times = 3*4*2), rep(x = c(.8, .8, .4, .3), times = 3), rep(0.2, times = 3*4), # third factor loadings rep(0.2, times = 3*4*3), rep(x = c(.8, .8, .4, .3), times = 3) # fourth factor loadings ), ncol = 4) # factor correlation matrix - all factors uncorrelated PhiMatrix <- matrix(data = c(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1), ncol = 4) tabuData <- psych::sim( fx = fxMatrix, Phi = PhiMatrix, n = 600, mu = c(-2, -1, 1, 2), raw = TRUE )$observed # observed is the simulated observed data colnames(tabuData) = paste0("Item", 1:48) tabuModel <- ' Trait1 =~ Item1 + Item2 + Item3 + Item4 + Item5 + Item6 + Item7 + Item8 + Item9 + Item10 + Item11 + Item12 Trait2 =~ Item13 + Item14 + Item15 + Item16 + Item17 + Item18 + Item19 + Item20 + Item21 + Item22 + Item23 + Item24 Trait3 =~ Item25 + Item26 + Item27 + Item28 + Item29 + Item30 + Item31 + Item32 + Item33 + Item34 + Item35 + Item36 Trait4 =~ Item37 + Item38 + Item39 + Item40 + Item41 + Item42 + Item43 + Item44 + Item45 + Item46 + Item47 + Item48 ' # specify the criterion function that the Tabu Search minimizes # wrap this in a tryCatch in case a model does not converge! # specify an appropriate error value: since we're minimizing, error value must be large tabuCriterion = function(x) { tryCatch(lavaan::fitmeasures(object = x, fit.measures = 'chisq'), error = function(e) Inf) } # use the tabuShortForm function # reduce form to the best 12 items, 3 per factor tabuShort <- tabuShortForm(initialModel = tabuModel, originalData = tabuData, numItems = c(5,5,5,5), criterion = tabuCriterion, niter = 20, tabu.size = 10, verbose = FALSE ) # check the chosen model summary(tabuShort) # plot the changes in the objective function over each iteration plot(tabuShort)
It took a total of r round(as.difftime(Sys.time() - start.time.Tabu, units = "mins"),2)
r attr(round(as.difftime(Sys.time() - start.time.Tabu, units = "mins"),2), 'units')
to run this example.
This example demonstrates the use of simulated annealing for creating short forms.
start.time.SA <- Sys.time() library(ShortForm, quietly = T) # create simulation data from the `psych` package # four factors, 12 items each, 48 total items # factor loading matrix - not quite simple structure set.seed(4) fxMatrix <- matrix(data = c(rep(x = c(.8, .8, .4, .3), times = 3), rep(0.2, times = 3*4*3), # first factor loadings rep(0.2, times = 3*4), rep(x = c(.8, .8, .4, .3), times = 3), rep(0.2, times = 3*4*2), # second factor loadings rep(0.2, times = 3*4*2), rep(x = c(.8, .8, .4, .3), times = 3), rep(0.2, times = 3*4), # third factor loadings rep(0.2, times = 3*4*3), rep(x = c(.8, .8, .4, .3), times = 3) # fourth factor loadings ), ncol = 4) # factor correlation matrix - all factors uncorrelated PhiMatrix <- matrix(data = c(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1), ncol = 4) annealData <- psych::sim( fx = fxMatrix, Phi = PhiMatrix, n = 600, mu = c(-2, -1, 1, 2), raw = TRUE )$observed # observed is the simulated observed data colnames(annealData) = paste0("Item", 1:48) annealModel <- ' Trait1 =~ Item1 + Item2 + Item3 + Item4 + Item5 + Item6 + Item7 + Item8 + Item9 + Item10 + Item11 + Item12 Trait2 =~ Item13 + Item14 + Item15 + Item16 + Item17 + Item18 + Item19 + Item20 + Item21 + Item22 + Item23 + Item24 Trait3 =~ Item25 + Item26 + Item27 + Item28 + Item29 + Item30 + Item31 + Item32 + Item33 + Item34 + Item35 + Item36 Trait4 =~ Item37 + Item38 + Item39 + Item40 + Item41 + Item42 + Item43 + Item44 + Item45 + Item46 + Item47 + Item48 ' lavaan.model.specs <- list(model.type = "cfa", auto.var = TRUE, estimator = "default", ordered = NULL, int.ov.free = TRUE, int.lv.free = FALSE, std.lv = TRUE, auto.fix.first = FALSE, auto.fix.single = TRUE, auto.cov.lv.x = TRUE, auto.th = TRUE, auto.delta = TRUE, auto.cov.y = TRUE) # perform the SA algorithm set.seed(1) SA_example <- simulatedAnnealing(initialModel = annealModel, originalData = annealData, maxSteps = 200, fitStatistic = 'cfi', maximize = TRUE, temperature = "logistic", items = paste0("Item", 1:48), lavaan.model.specs = lavaan.model.specs, maxChanges = 3, maxItems = c(6,6,6,6), setChains = 4) summary(SA_example) plot(SA_example) # plot showing how the fit value changes at each step
It took a total of r round(as.difftime(Sys.time() - start.time.SA, units = "mins"),2)
r attr(round(as.difftime(Sys.time() - start.time.SA, units = "mins"),2), 'units')
to run the SA example, and a total of r round(as.difftime(Sys.time() - start.time.ACO, units = "mins"),2)
r attr(round(as.difftime(Sys.time() - start.time.ACO, units = "mins"),2), 'units')
to run all four together.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.