knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "## ",
  fig.path = "README-"
)

ShortForm

CRAN_Status_Badge Travis-CI Build Status CRAN Downloads Per Month CRAN Downloads Total

Automatic Short Form Creation for scales. Currently, the Ant Colony Optimization (ACO) Algorithm and the Tabu search are implemented. The original R implementation for the ACO algorithm is from Leite, Huang, & Marcoulides (2008), while the Tabu search function was taken from Marcoulides & Falk (2018). There does not yet seem to be an application of Simulated Annealing (SA) within psychometrics, but Drezner & Marcoulides, 1999 (in Multiple Linear Regression Viewpoints, Volume 25(2); not available online) used SA for multiple regression model selection; this package appears to be the first to implement SA for psychometric models.

This document was created on r Sys.Date().

Installation

install.packages("ShortForm") # the CRAN-approved version
require("devtools")
devtools::install_github("AnthonyRaborn/ShortForm", branch = "devel") # the developmental version

Usage

Here are some (slightly modified) examples from the help documentation using lavaan. Be warned, the algorithms may take some time to converge, particularly with large forms, multiple dimensions, and different settings. The time for these examples to converge on a laptop with an Intel Core i7 8th Gen processor is printed at the bottom. See the sessionInfo() below.

sessionInfo()

ACO Algorithm

start.time.ACO <- Sys.time()
library(ShortForm, quietly = T)
# using simulated test data and the default values for lavaan.model.specs
set.seed(1)
# create simulation data from the `psych` package
# four factors, 12 items each, 48 total items
# factor loading matrix - not quite simple structure
fxMatrix <- 
 matrix(data = c(rep(x = c(.8, .8, .4, .3), times = 3),
                 rep(0.2, times = 3*4*3), # first factor loadings

                 rep(0.2, times = 3*4),
                 rep(x = c(.8, .8, .4, .3), times = 3),
                 rep(0.2, times = 3*4*2), # second factor loadings

                 rep(0.2, times = 3*4*2),
                 rep(x = c(.8, .8, .4, .3), times = 3),
                 rep(0.2, times = 3*4), # third factor loadings

                 rep(0.2, times = 3*4*3),
                 rep(x = c(.8, .8, .4, .3), times = 3) # fourth factor loadings
 ),
 ncol = 4)
# factor correlation matrix - all factors uncorrelated
PhiMatrix <-
 matrix(data = c(1,0,0,0, 
                 0,1,0,0, 
                 0,0,1,0, 
                 0,0,0,1), ncol = 4) 
antData <- 
 psych::sim(
   fx = fxMatrix,
   Phi = PhiMatrix,
   n = 600,
   mu = c(-2, -1, 1, 2),
   raw = TRUE
 )$observed # observed is the simulated observed data

colnames(antData) = paste0("Item", 1:48)

antModel <- '
Trait1 =~ Item1 + Item2 + Item3 + Item4 + Item5 + Item6 + Item7 + Item8 + Item9 + Item10 + Item11 + Item12
Trait2 =~ Item13 + Item14 + Item15 + Item16 + Item17 + Item18 + Item19 + Item20 + Item21 + Item22 + Item23 + Item24
Trait3 =~ Item25 + Item26 + Item27 + Item28 + Item29 + Item30 + Item31 + Item32 + Item33 + Item34 + Item35 + Item36
Trait4 =~ Item37 + Item38 + Item39 + Item40 + Item41 + Item42 + Item43 + Item44 + Item45 + Item46 + Item47 + Item48
'

# then, create the list of the items by the factors
list.items <- 
  list(
    paste0("Item",  1:12),
    paste0("Item", 13:24),
    paste0("Item", 25:36),
    paste0("Item", 37:48)
       )
# finally, call the function with some minor changes to the default values.
abilityShortForm = 
  antcolony.lavaan(data = antData,
                   ants = 10, evaporation = 0.9, antModel = antModel,
                   list.items = list.items, full = 48, i.per.f = c(6,6,6,6),
                   lavaan.model.specs = 
                     list(model.type = "cfa", auto.var = T, estimator = "default", 
                          ordered = NULL, int.ov.free = TRUE,
                          int.lv.free = FALSE, auto.fix.first = TRUE, 
                          auto.fix.single = TRUE, std.lv = FALSE, auto.cov.lv.x = TRUE, 
                          auto.th = TRUE, auto.delta = TRUE, 
                          auto.cov.y = TRUE),
                   factors = c("Trait1", "Trait2", "Trait3", "Trait4"), steps = 100, 
                   max.run = 100, 
                   parallel = T)
abilityShortForm # print the results of the final short form
plot(abilityShortForm, type = 'pheromone') # the pheromone plot for class "antcolony"

A similar example can be found in the antcolony.mplus function, but requires you to have a valid Mplus installation on the computer. It took a total of r round(as.difftime(Sys.time() - start.time.ACO, units = "mins"),2) r attr(round(as.difftime(Sys.time() - start.time.ACO, units = "mins"),2), 'units') to run this example.

Tabu Search Algorithm

This example demonstrates how to use the Tabu search for model specification searches when the original model may be misspecified in some way.

start.time.Tabu <- Sys.time()
library(ShortForm, quietly = T)
set.seed(2)
# create simulation data from the `psych` package
# two factors, 12 items total
# factor loading matrix - not quite simple structure
fxMatrix <- 
  matrix(data = c(
    # first factor loadings
    rep(x = c(.8, .8, .6, .6), times = 3),
    # second factor loadings
    rep(x = c(.2), times = 12)
    ),
  ncol = 2)
# factor correlation matrix - all factors uncorrelated
PhiMatrix <-
  matrix(data = c(1,0, 
                  0,1
                  ), ncol = 2) 
tabuData <- 
  psych::sim(
    fx = fxMatrix,
    Phi = PhiMatrix,
    n = 600,
    raw = TRUE
  )$observed # observed is the simulated observed data

colnames(tabuData) = paste0("Item", 1:12)

tabuModel <- '
Trait1 =~ Item1 + Item2 + Item3 + Item4 + Item5 + Item6 + 0*Item7 + 0*Item8 + 0*Item9 + 0*Item10 + 0*Item11 + 0*Item12
Trait2 =~ 0*Item1 + 0*Item2 + 0*Item3 + 0*Item4 + 0*Item5 + 0*Item6 + Item7 + Item8 + Item9 + Item10 + Item11 + Item12
'
# fit the initial misspecified model for Tabu
init.model <- lavaan::lavaan(model = tabuModel, data = tabuData, 
                             auto.var=TRUE, auto.fix.first=FALSE, std.lv=TRUE,
                             auto.cov.lv.x=FALSE)

# use search.prep to prepare for the Tabu search
ptab <- 
  search.prep(fitted.model = init.model, 
              loadings=TRUE, 
              fcov=FALSE, 
              errors=FALSE)

Tabu_example <- 
  suppressWarnings(
    tabu.sem(init.model = init.model, 
             ptab = ptab, 
             obj = AIC, 
             niter = 20, 
             tabu.size = 10)
    ) # the suppressWarning wrapping hides the lavaan WARNING output from improper models

# check the final model
summary(Tabu_example)

# plot the change in the objective/criterion function over each run
plot(Tabu_example)

It took a total of r round(as.difftime(Sys.time() - start.time.Tabu, units = "mins"),2) r attr(round(as.difftime(Sys.time() - start.time.Tabu, units = "mins"),2), 'units') to run this example.

The next Tabu example demonstrates how to use it to find a short form of a prespecified length with different data.

start.time.Tabu <- Sys.time()
library(ShortForm, quietly = T)
# set the seed to reproduce this example
set.seed(3)
# create simulation data from the `psych` package
# four factors, 12 items each, 48 total items
# factor loading matrix - not quite simple structure
fxMatrix <- 
 matrix(data = c(rep(x = c(.8, .8, .4, .3), times = 3),
                 rep(0.2, times = 3*4*3), # first factor loadings

                 rep(0.2, times = 3*4),
                 rep(x = c(.8, .8, .4, .3), times = 3),
                 rep(0.2, times = 3*4*2), # second factor loadings

                 rep(0.2, times = 3*4*2),
                 rep(x = c(.8, .8, .4, .3), times = 3),
                 rep(0.2, times = 3*4), # third factor loadings

                 rep(0.2, times = 3*4*3),
                 rep(x = c(.8, .8, .4, .3), times = 3) # fourth factor loadings
 ),
 ncol = 4)
# factor correlation matrix - all factors uncorrelated
PhiMatrix <-
 matrix(data = c(1,0,0,0, 
                 0,1,0,0, 
                 0,0,1,0, 
                 0,0,0,1), ncol = 4) 
tabuData <- 
 psych::sim(
   fx = fxMatrix,
   Phi = PhiMatrix,
   n = 600,
   mu = c(-2, -1, 1, 2),
   raw = TRUE
 )$observed # observed is the simulated observed data

colnames(tabuData) = paste0("Item", 1:48)

tabuModel <- '
Trait1 =~ Item1 + Item2 + Item3 + Item4 + Item5 + Item6 + Item7 + Item8 + Item9 + Item10 + Item11 + Item12
Trait2 =~ Item13 + Item14 + Item15 + Item16 + Item17 + Item18 + Item19 + Item20 + Item21 + Item22 + Item23 + Item24
Trait3 =~ Item25 + Item26 + Item27 + Item28 + Item29 + Item30 + Item31 + Item32 + Item33 + Item34 + Item35 + Item36
Trait4 =~ Item37 + Item38 + Item39 + Item40 + Item41 + Item42 + Item43 + Item44 + Item45 + Item46 + Item47 + Item48
'

# specify the criterion function that the Tabu Search minimizes
# wrap this in a tryCatch in case a model does not converge!
# specify an appropriate error value: since we're minimizing, error value must be large
tabuCriterion = function(x) {
 tryCatch(lavaan::fitmeasures(object = x, fit.measures = 'chisq'),
          error = function(e) Inf)
}

# use the tabuShortForm function
# reduce form to the best 12 items, 3 per factor
tabuShort <- 
  tabuShortForm(initialModel = tabuModel, originalData = tabuData,
                numItems = c(5,5,5,5), criterion = tabuCriterion,
                niter = 20, tabu.size = 10, verbose = FALSE
                )

# check the chosen model
summary(tabuShort)
# plot the changes in the objective function over each iteration
plot(tabuShort)

It took a total of r round(as.difftime(Sys.time() - start.time.Tabu, units = "mins"),2) r attr(round(as.difftime(Sys.time() - start.time.Tabu, units = "mins"),2), 'units') to run this example.

Simulated Annealing

This example demonstrates the use of simulated annealing for creating short forms.

start.time.SA <- Sys.time()
library(ShortForm, quietly = T)
# create simulation data from the `psych` package
# four factors, 12 items each, 48 total items
# factor loading matrix - not quite simple structure
set.seed(4)
fxMatrix <- 
 matrix(data = c(rep(x = c(.8, .8, .4, .3), times = 3),
                 rep(0.2, times = 3*4*3), # first factor loadings

                 rep(0.2, times = 3*4),
                 rep(x = c(.8, .8, .4, .3), times = 3),
                 rep(0.2, times = 3*4*2), # second factor loadings

                 rep(0.2, times = 3*4*2),
                 rep(x = c(.8, .8, .4, .3), times = 3),
                 rep(0.2, times = 3*4), # third factor loadings

                 rep(0.2, times = 3*4*3),
                 rep(x = c(.8, .8, .4, .3), times = 3) # fourth factor loadings
 ),
 ncol = 4)
# factor correlation matrix - all factors uncorrelated
PhiMatrix <-
 matrix(data = c(1,0,0,0, 
                 0,1,0,0, 
                 0,0,1,0, 
                 0,0,0,1), ncol = 4) 
annealData <- 
 psych::sim(
   fx = fxMatrix,
   Phi = PhiMatrix,
   n = 600,
   mu = c(-2, -1, 1, 2),
   raw = TRUE
 )$observed # observed is the simulated observed data

colnames(annealData) = paste0("Item", 1:48)

annealModel <- '
Trait1 =~ Item1 + Item2 + Item3 + Item4 + Item5 + Item6 + Item7 + Item8 + Item9 + Item10 + Item11 + Item12
Trait2 =~ Item13 + Item14 + Item15 + Item16 + Item17 + Item18 + Item19 + Item20 + Item21 + Item22 + Item23 + Item24
Trait3 =~ Item25 + Item26 + Item27 + Item28 + Item29 + Item30 + Item31 + Item32 + Item33 + Item34 + Item35 + Item36
Trait4 =~ Item37 + Item38 + Item39 + Item40 + Item41 + Item42 + Item43 + Item44 + Item45 + Item46 + Item47 + Item48
'

lavaan.model.specs <-
  list(model.type = "cfa",
       auto.var = TRUE, estimator = "default", ordered = NULL,
       int.ov.free = TRUE, int.lv.free = FALSE, std.lv = TRUE, auto.fix.first = FALSE, 
       auto.fix.single = TRUE, auto.cov.lv.x = TRUE, auto.th = TRUE, 
       auto.delta = TRUE, auto.cov.y = TRUE)

# perform the SA algorithm
set.seed(1)
SA_example <- 
  simulatedAnnealing(initialModel = annealModel, originalData = annealData, maxSteps = 200, 
                     fitStatistic = 'cfi', maximize = TRUE, 
                     temperature = "logistic", items = paste0("Item", 1:48), 
                     lavaan.model.specs = lavaan.model.specs, 
                     maxChanges = 3, maxItems = c(6,6,6,6), setChains = 4)
summary(SA_example)
plot(SA_example) # plot showing how the fit value changes at each step

It took a total of r round(as.difftime(Sys.time() - start.time.SA, units = "mins"),2) r attr(round(as.difftime(Sys.time() - start.time.SA, units = "mins"),2), 'units') to run the SA example, and a total of r round(as.difftime(Sys.time() - start.time.ACO, units = "mins"),2) r attr(round(as.difftime(Sys.time() - start.time.ACO, units = "mins"),2), 'units') to run all four together.



AnthonyRaborn/ShortForm documentation built on Feb. 6, 2024, 1:25 a.m.