# R/bruceR-stats_2_scale.R In bruceR: Broadly Useful Convenient and Efficient R Functions

#### Documented in AlphaCFACONSECCOUNTEFA.meanMEANMODEPCASTD.sumSUM

#### Multivariate Computation ####

#' Multivariate computation.
#'
#' @description
#' Easily compute multivariate sum, mean, and other scores.
#' Reverse scoring can also be easily implemented without saving extra variables.
#' \code{\link{Alpha}} function uses a similar method to deal with reverse scoring.
#'
#' Three ways to specify variables:
#' \enumerate{
#'   \item \strong{\code{var + items}}: common and unique parts of variable names (suggested).
#'   \item \strong{\code{vars}}: a character vector of variable names (suggested).
#'   \item \strong{\code{varrange}}: starting and stopping positions of variables (NOT suggested).
#' }
#'
#' @param data Data frame.
#' @param var \strong{[Option 1]}
#' Common part across variables: e.g., \code{"RSES"}, \code{"XX.{i}.pre"}
#' (if \code{var} string has any placeholder in braces \code{{...}},
#' then \code{items} will be pasted into the braces, see examples)
#' @param items \strong{[Option 1]}
#' Unique part across variables: e.g., \code{1:10}, \code{c("a", "b", "c")}
#' @param vars \strong{[Option 2]}
#' Character vector specifying variables: e.g., \code{c("X1", "X2", "X3", "X4", "X5")}
#' @param varrange \strong{[Option 3]}
#' Character string specifying positions ("start:stop") of variables: e.g., \code{"A1:E5"}
#' @param value [Only for \code{COUNT}] The value to be counted.
#' @param rev [Optional] Variables that need to be reversed. It can be
#' (1) a character vector specifying the reverse-scoring variables (recommended), or
#' (2) a numeric vector specifying the item number of reverse-scoring variables (not recommended).
#' @param range,likert [Optional] Range of likert scale: e.g., \code{1:5}, \code{c(1, 5)}.
#' If not provided, it will be automatically estimated from the given data (BUT you should use this carefully).
#' @param na.rm Ignore missing values. Defaults to \code{TRUE}.
#' @param values [Only for \code{CONSEC}] Values to be counted as consecutive identical values. Defaults to all numbers (\code{0:9}).
#'
#' @return A vector of computed values.
#'
#' @examples
#' d = data.table(
#'   x1 = 1:5,
#'   x4 = c(2,2,5,4,5),
#'   x3 = c(3,2,NA,NA,5),
#'   x2 = c(4,4,NA,2,5),
#'   x5 = c(5,4,1,4,5)
#' )
#' d
#' ## I deliberately set this order to show you
#' ## the difference between "vars" and "varrange".
#'
#' ## ====== Usage 1: data.table := ====== ##
#' d[, :=(
#'   na = COUNT(d, "x", 1:5, value=NA),
#'   n.2 = COUNT(d, "x", 1:5, value=2),
#'   sum = SUM(d, "x", 1:5),
#'   m1 = MEAN(d, "x", 1:5),
#'   m2 = MEAN(d, vars=c("x1", "x4")),
#'   m3 = MEAN(d, varrange="x1:x2", rev="x2", range=1:5),
#'   cons1 = CONSEC(d, "x", 1:5),
#'   cons2 = CONSEC(d, varrange="x1:x5")
#' )]
#' d
#'
#' ## ====== Usage 2: add() & added() ====== ##
#' data = as.data.table(psych::bfi)
#'   gender = as.factor(gender)
#'   education = as.factor(education)
#'   E = .mean("E", 1:5, rev=c(1,2), range=1:6)
#'   A = .mean("A", 1:5, rev=1, range=1:6)
#'   C = .mean("C", 1:5, rev=c(4,5), range=1:6)
#'   N = .mean("N", 1:5, range=1:6)
#'   O = .mean("O", 1:5, rev=c(2,5), range=1:6)
#' }, drop=TRUE)
#' data
#'
#' ## ====== New Feature for var & items ====== ##
#' d = data.table(
#'   XX.1.pre = 1:5,
#'   XX.2.pre = 6:10,
#'   XX.3.pre = 11:15
#' )
#' add(d, { XX.mean = .mean("XX.{i}.pre", 1:3) })
#' add(d, { XX.mean = .mean("XX.{items}.pre", 1:3) })  # the same
#' add(d, { XX.mean = .mean("XX.{#$%^&}.pre", 1:3) }) # the same #' #' @name %%COMPUTE%% NULL paste_var_items = function(var, items) { if(grepl("\\{.*\\}", var)) { var.split = strsplit(var, "\\{.*\\}")[[1]] vars = paste0(var.split[1], items, var.split[2]) } else { vars = paste0(var, items) } return(vars) } convert2vars = function(data, var=NULL, items=NULL, vars=NULL, varrange=NULL, rev=NULL) { if(!is.null(varrange)) { dn = names(data) varrange = gsub(" ", "", strsplit(varrange, ":")[[1]]) vars = dn[which(dn==varrange[1]):which(dn==varrange[2])] } if(is.null(vars)) vars = paste_var_items(var, items) if(is.numeric(rev)) { if(is.null(var)) stop("Argument rev must be character rather than numeric if you specify vars or varrange.", call.=FALSE) else rev = paste0(var, rev) } if(is.character(rev)) rev = which(vars %in% rev) vars.raw = vars vars = paste0(deparse(substitute(data)), "$", vars, "")
return(list(vars.raw=vars.raw, vars=vars, rev=rev))
}

#' @describeIn grapes-grapes-COMPUTE-grapes-grapes
#' \strong{Count} a certain value across variables.
#' @export
COUNT = function(data, var=NULL, items=NULL, vars=NULL, varrange=NULL,
value=NA) {
Count = function(...) sum(c(...), na.rm=TRUE)
v.r = convert2vars(data, var, items, vars, varrange)
vars = v.r$vars if(is.na(value)) varlist = paste0("is.na(", vars, ")") else varlist = paste0(vars, "==", value) expr = paste0("mapply(Count, ", paste(varlist, collapse=", "), ")") eval(parse(text=expr)) } #' @describeIn grapes-grapes-COMPUTE-grapes-grapes #' Compute \strong{mode} across variables. #' @export MODE = function(data, var=NULL, items=NULL, vars=NULL, varrange=NULL) { getmode = function(v) { uniqv = unique(v) uniqv[which.max(tabulate(match(v, uniqv)))] } Mode = function(...) getmode(c(...)) varlist = convert2vars(data, var, items, vars, varrange)$vars
expr = paste0("mapply(Mode, ", paste(varlist, collapse=", "), ")")
eval(parse(text=expr))
}

#' @describeIn grapes-grapes-COMPUTE-grapes-grapes
#' Compute \strong{sum} across variables.
#' @export
SUM = function(data,
var=NULL, items=NULL, vars=NULL, varrange=NULL,
rev=NULL, range=likert, likert=NULL,
na.rm=TRUE) {
Sum = function(...) sum(..., na.rm=na.rm)
v.r = convert2vars(data, var, items, vars, varrange, rev)
vars = v.r$vars rev = v.r$rev
if(!is.null(rev) & is.null(range)) {
ranges = apply(as.data.frame(data)[,v.r$vars.raw], 2, function(...) range(..., na.rm=TRUE)) range = c(min(ranges[1,], na.rm=TRUE), max(ranges[2,], na.rm=TRUE)) warning("The range of likert scale was automatically estimated from the given data. If you are not sure about this, please specify the range argument. See ?SUM") } pre = rep("", length(vars)) pre[rev] = ifelse(is.null(range), "", paste0(sum(range(range)), "-")) varlist = paste0(pre, vars) expr = paste0("mapply(Sum, ", paste(varlist, collapse=", "), ")") eval(parse(text=expr)) } #' @describeIn grapes-grapes-COMPUTE-grapes-grapes #' Tidy version of \code{SUM}, #' only can be used in \code{\link[bruceR:add]{add()/added()}} #' @export .sum = function(var=NULL, items=NULL, vars=NULL, varrange=NULL, rev=NULL, range=likert, likert=NULL, na.rm=TRUE) { Sum = glue("function(...) sum(..., na.rm={na.rm})") if(!is.null(varrange)) stop("Please specify either vars or var + items. To specify varrange, please use SUM().", call.=FALSE) if(is.null(vars)) vars = paste_var_items(var, items) if(!is.null(rev) & is.null(range)) stop("Please also specify the range argument!", call.=FALSE) if(is.numeric(rev)) { if(is.null(var)) stop("Argument rev must be character rather than numeric if you specify vars or varrange.", call.=FALSE) else rev = paste0(var, rev) } if(is.character(rev)) rev = which(vars %in% rev) pre = rep("", length(vars)) pre[rev] = ifelse(is.null(range), "", paste0(sum(range(range)), "-")) varlist = paste0(pre, vars) expr = paste0("mapply(", Sum, ", ", paste(varlist, collapse=", "), ")") return(glue(expr)) } #' @describeIn grapes-grapes-COMPUTE-grapes-grapes #' Compute \strong{mean} across variables. #' @export MEAN = function(data, var=NULL, items=NULL, vars=NULL, varrange=NULL, rev=NULL, range=likert, likert=NULL, na.rm=TRUE) { Mean = function(...) mean(c(...), na.rm=na.rm) v.r = convert2vars(data, var, items, vars, varrange, rev) vars = v.r$vars
rev = v.r$rev if(!is.null(rev) & is.null(range)) { ranges = apply(as.data.frame(data)[,v.r$vars.raw], 2, function(...) range(..., na.rm=TRUE))
range = c(min(ranges[1,], na.rm=TRUE), max(ranges[2,], na.rm=TRUE))
warning("The range of likert scale was automatically estimated from the given data. If you are not sure about this, please specify the range argument. See ?MEAN")
}
pre = rep("", length(vars))
pre[rev] = ifelse(is.null(range), "", paste0(sum(range(range)), "-"))
varlist = paste0(pre, vars)
expr = paste0("mapply(Mean, ", paste(varlist, collapse=", "), ")")
eval(parse(text=expr))
}

#' @describeIn grapes-grapes-COMPUTE-grapes-grapes
#' Tidy version of \code{MEAN},
#' @export
.mean = function(var=NULL, items=NULL, vars=NULL, varrange=NULL,
rev=NULL, range=likert, likert=NULL,
na.rm=TRUE) {
Mean = glue("function(...) mean(c(...), na.rm={na.rm})")
if(!is.null(varrange))
stop("Please specify either vars or var + items. To specify varrange, please use MEAN().", call.=FALSE)
if(is.null(vars)) vars = paste_var_items(var, items)
if(!is.null(rev) & is.null(range))
stop("Please also specify the range argument!", call.=FALSE)
if(is.numeric(rev)) {
if(is.null(var))
stop("Argument rev must be character rather than numeric if you specify vars or varrange.", call.=FALSE)
else
rev = paste0(var, rev)
}
if(is.character(rev)) rev = which(vars %in% rev)
pre = rep("", length(vars))
pre[rev] = ifelse(is.null(range), "", paste0(sum(range(range)), "-"))
varlist = paste0(pre, vars)
expr = paste0("mapply(", Mean, ", ", paste(varlist, collapse=", "), ")")
return(glue(expr))
}

#' @describeIn grapes-grapes-COMPUTE-grapes-grapes
#' Compute \strong{standard deviation} across variables.
#' @export
STD = function(data,
var=NULL, items=NULL, vars=NULL, varrange=NULL,
rev=NULL, range=likert, likert=NULL,
na.rm=TRUE) {
Std = function(...) sd(c(...), na.rm=na.rm)
v.r = convert2vars(data, var, items, vars, varrange, rev)
vars = v.r$vars rev = v.r$rev
if(!is.null(rev) & is.null(range)) {
ranges = apply(as.data.frame(data)[,v.r$vars.raw], 2, function(...) range(..., na.rm=TRUE)) range = c(min(ranges[1,], na.rm=TRUE), max(ranges[2,], na.rm=TRUE)) warning("The range of likert scale was automatically estimated from the given data. If you are not sure about this, please specify the range argument. See ?STD") } pre = rep("", length(vars)) pre[rev] = ifelse(is.null(range), "", paste0(sum(range(range)), "-")) varlist = paste0(pre, vars) expr = paste0("mapply(Std, ", paste(varlist, collapse=", "), ")") eval(parse(text=expr)) } #' @describeIn grapes-grapes-COMPUTE-grapes-grapes #' Compute \strong{consecutive identical digits} across variables (especially useful in detecting careless responding). #' @export CONSEC = function(data, var=NULL, items=NULL, vars=NULL, varrange=NULL, values=0:9) { Conseq = function(string, number=values) { # Consecutive Identical Digits pattern = paste(paste0(number, "{2,}"), collapse="|") ifelse(grepl(pattern, string), max(nchar(str_extract_all(string=string, pattern=pattern, simplify=TRUE))), 0) } v.r = convert2vars(data, var, items, vars, varrange) vars = v.r$vars
varlist = vars
expr = paste0("mapply(Conseq, paste0(", paste(varlist, collapse=", "), "))")
eval(parse(text=expr))
}

#### Reliability, EFA, and CFA ####

#' Reliability analysis (Cronbach's \eqn{\alpha} and McDonald's \eqn{\omega}).
#'
#' @description
#' An extension of \code{\link[psych:alpha]{psych::alpha()}} and \code{\link[psych:omega]{psych::omega()}},
#' reporting (1) scale statistics
#' (Cronbach's \eqn{\alpha} and McDonald's \eqn{\omega}) and
#' (2) item statistics
#' (item-rest correlation [i.e., corrected item-total correlation]
#' and Cronbach's \eqn{\alpha} if item deleted).
#'
#' Three options to specify variables:
#' \enumerate{
#'   \item \strong{\code{var + items}}: common and unique parts of variable names (suggested).
#'   \item \strong{\code{vars}}: a character vector of variable names (suggested).
#'   \item \strong{\code{varrange}}: starting and stopping positions of variables (NOT suggested).
#' }
#'
#' @inheritParams %%COMPUTE%%
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#'
#' @return
#' A list of results obtained from
#'
#' @examples
#' # ?psych::bfi
#' \donttest{data = psych::bfi
#' Alpha(data, "E", 1:5)   # "E1" & "E2" should be reversed
#' Alpha(data, "E", 1:5, rev=1:2)            # correct
#' Alpha(data, "E", 1:5, rev=cc("E1, E2"))   # also correct
#' Alpha(data, vars=cc("E1, E2, E3, E4, E5"), rev=cc("E1, E2"))
#' Alpha(data, varrange="E1:E5", rev=cc("E1, E2"))
#'
#' # using dplyr::select()
#' data %>% select(E1, E2, E3, E4, E5) %>%
#'   Alpha(vars=names(.), rev=cc("E1, E2"))
#' }
#' @seealso
#'
#' @export
Alpha = function(
data, var, items, vars=NULL, varrange=NULL, rev=NULL,
digits=3
) {
if(!is.null(varrange)) {
dn = names(data)
varrange = gsub(" ", "", strsplit(varrange, ":")[[1]])
vars = dn[which(dn==varrange[1]):which(dn==varrange[2])]
}
if(is.null(vars)) vars = paste_var_items(var, items)
if(is.numeric(rev)) rev = paste0(var, rev)
n.total = nrow(data)
data = na.omit(as.data.frame(data)[vars])
n.valid = nrow(data)
for(v in vars) {
data[[v]] = as.numeric(data[[v]])
if(v %in% rev) {
data[[v]] = min(data[[v]]) + max(data[[v]]) - data[[v]]
vr = paste(v, "(rev)")
Run("data = dplyr::rename(data, {vr}={v})")
}
}
nitems = length(vars)

suppressMessages({
suppressWarnings({
alpha = psych::alpha(data, delete=FALSE, warnings=FALSE)
omega = psych::omega(data, nfactors=1, flip=FALSE)
loadings = psych::principal(data, nfactors=1, scores=FALSE)$loadings items = cbind( alpha$item.stats[c("mean", "sd", "r.drop")],
alpha$alpha.drop[c("raw_alpha")]) names(items) = c("Mean", "S.D.", "Item-Rest Cor.", "Cronbach\u2019s \u03b1") items.need.rev = vars[loadings<0] }) }) Print(" \n <<cyan Reliability Analysis>> Summary: Total Items: {nitems} Scale Range: {min(data)} ~ {max(data)} Total Cases: {n.total} Valid Cases: {n.valid} ({100*n.valid/n.total:.1}%) Scale Statistics: <<italic Mean>> = {alpha$total$mean:.{digits}} <<italic S.D.>> = {alpha$total$sd:.{digits}} Cronbach\u2019s \u03b1 = {alpha$total$raw_alpha:.{digits}} McDonald\u2019s \u03c9 = {omega$omega.tot:.{digits}}
")
# Cronbach's \u03b1: {alpha$total$raw_alpha:.{digits}} (based on raw scores)
# Cronbach's \u03b1: {alpha$total$std.alpha:.{digits}} (based on standardized items)

if(alpha$total$raw_alpha<0.5 | length(items.need.rev)>0) {
cat("\n")
if(alpha$total$raw_alpha<0.5)
Print("<<yellow Warning: Scale reliability is low. You may check item codings.>>")
if(length(items.need.rev)==1)
Print("<<yellow Item {items.need.rev} correlates negatively with the scale and may be reversed.>>")
if(length(items.need.rev)>1)
Print("<<yellow Items {paste(items.need.rev, collapse=', ')} correlate negatively with the scale and may be reversed.>>")
if(length(items.need.rev)>0)
Print("<<yellow You can specify this argument: rev=c(\"{paste(items.need.rev, collapse='\", \"')}\")>>")
}

cat("\n")
print_table(items, digits=digits,
title="Item Statistics (Cronbach\u2019s \u03b1 If Item Deleted):",
note="Item-Rest Cor. = Corrected Item-Total Correlation")
cat("\n")

# rel = jmv::reliability(data, vars=eval(vars), revItems=eval(rev),
#                        meanScale=TRUE, sdScale=TRUE,
#                        alphaScale=TRUE, omegaScale=TRUE,
#                        itemRestCor=TRUE, alphaItems=TRUE, omegaItems=TRUE)
# rel$items$setTitle("Item Reliability Statistics (if item is dropped)")

invisible(list(alpha=alpha, omega=omega))
}

#' Principal Component Analysis (PCA) and Exploratory Factor analysis (EFA).
#'
#' @description
#' An extension of \code{\link[psych:principal]{psych::principal()}} and \code{\link[psych:fa]{psych::fa()}},
#' performing either Principal Component Analysis (PCA) or Exploratory Factor Analysis (EFA).
#'
#' Three options to specify variables:
#' \enumerate{
#'   \item \strong{\code{var + items}}: use the common and unique parts of variable names.
#'   \item \strong{\code{vars}}: directly define a character vector of variables.
#'   \item \strong{\code{varrange}}: use the starting and stopping positions of variables.
#' }
#'
#' @inheritParams %%COMPUTE%%
#' @param method Extraction method.
#' \itemize{
#'   \item \code{"pca"} - Principal Component Analysis (default)
#'   \item \code{"pa"} - Principal Axis Factor Analysis
#'   \item \code{"ml"} - Maximum Likelihood Factor Analysis
#'   \item \code{"minres"} - Minimum Residual Factor Analysis
#'   \item \code{"uls"} - Unweighted Least Squares Factor Analysis
#'   \item \code{"ols"} - Ordinary Least Squares Factor Analysis
#'   \item \code{"wls"} - Weighted Least Squares Factor Analysis
#'   \item \code{"gls"} - Generalized Least Squares Factor Analysis
#'   \item \code{"alpha"} - Alpha Factor Analysis (Kaiser & Coffey, 1965)
#' }
#' @param rotation Rotation method.
#' \itemize{
#'   \item \code{"none"} - None (not suggested)
#'   \item \code{"varimax"} - Varimax (default)
#'   \item \code{"oblimin"} - Direct Oblimin
#'   \item \code{"promax"} - Promax
#'   \item \code{"quartimax"} - Quartimax
#'   \item \code{"equamax"} - Equamax
#' }
#' @param nfactors How to determine the number of factors/components?
#' \itemize{
#'   \item \code{"eigen"} - based on eigenvalue (> minimum eigenvalue) (default)
#'   \item \code{"parallel"} - based on parallel analysis
#'   \item (any number >= 1) - user-defined fixed number
#' }
#' @param hide.loadings A number (0~1) for hiding absolute factor/component loadings below this value.
#' Defaults to \code{0} (does not hide any loading).
#' @param plot.scree Display the scree plot? Defaults to \code{TRUE}.
#' @param kaiser Do the Kaiser normalization (as in SPSS)? Defaults to \code{TRUE}.
#' @param max.iter Maximum number of iterations for convergence. Defaults to \code{25} (the same as in SPSS).
#' @param min.eigen Minimum eigenvalue (used if \code{nfactors="eigen"}). Defaults to \code{1}.
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#' @param file File name of MS Word (\code{.doc}).
#' @param ... Arguments passed from \code{PCA()} to \code{EFA()}.
#'
#' @note
#' Results based on the \code{varimax} rotation method are identical to SPSS.
#' The other rotation methods may produce results slightly different from SPSS.
#'
#' @return
#' A list of results:
#' \describe{
#'   \item{\code{result}}{The R object returned from \code{\link[psych:principal]{psych::principal()}} or \code{\link[psych:fa]{psych::fa()}}}
#'   \item{\code{result.kaiser}}{The R object returned from \code{\link[psych:kaiser]{psych::kaiser()}} (if any)}
#'   \item{\code{extraction.method}}{Extraction method}
#'   \item{\code{rotation.method}}{Rotation method}
#'   \item{\code{eigenvalues}}{A \code{data.frame} of eigenvalues and sum of squared (SS) loadings}
#'   \item{\code{scree.plot}}{A \code{ggplot2} object of the scree plot}
#' }
#'
#' @describeIn EFA Exploratory Factor Analysis
#'
#' @seealso
#'
#' @examples
#' \donttest{data = psych::bfi
#' EFA(data, "E", 1:5)              # var + items
#' EFA(data, "E", 1:5, nfactors=2)  # var + items
#'
#' EFA(data, varrange="A1:O5",
#'     nfactors="parallel",
#'
#' # the same as above:
#' # using dplyr::select() and dplyr::matches()
#' # to select variables whose names end with numbers
#' # (regexp: \\d matches all numbers, $matches the end of a string) #' data %>% select(matches("\\d$")) %>%
#'   EFA(vars=names(.),       # all selected variables
#'       method="pca",        # default
#'       rotation="varimax",  # default
#'       nfactors="parallel", # parallel analysis
#' }
#' @export
EFA = function(
data, var, items, vars=NULL, varrange=NULL, rev=NULL,
method=c("pca", "pa", "ml", "minres", "uls", "ols", "wls", "gls", "alpha"),
rotation=c("none", "varimax", "oblimin", "promax", "quartimax", "equamax"),
nfactors=c("eigen", "parallel", "(any number >= 1)"),
plot.scree=TRUE,
# plot.factor=TRUE,
kaiser=TRUE,
max.iter=25,
min.eigen=1,
digits=3,
file=NULL
) {
if(!is.null(varrange)) {
dn = names(data)
varrange = gsub(" ", "", strsplit(varrange, ":")[[1]])
vars = dn[which(dn==varrange[1]):which(dn==varrange[2])]
}
if(is.null(vars)) vars = paste_var_items(var, items)
if(is.numeric(rev)) rev = paste0(var, rev)
n.total = nrow(data)
data = na.omit(as.data.frame(data)[vars])
n.valid = nrow(data)
for(v in vars) {
data[[v]] = as.numeric(data[[v]])
if(v %in% rev) {
data[[v]] = min(data[[v]])+max(data[[v]])-data[[v]]
vr = paste(v, "(rev)")
Run("data = dplyr::rename(data, {vr}={v})")
}
}
nitems = length(vars)

# determine number of factors
eigen.value = eigen(cor(data), only.values=TRUE)$values eigen.parallel = NULL error.nfactors = "nfactors should be \"eigen\", \"parallel\", or an integer (>= 1)." if(length(nfactors)>1) nfactors = "eigen" if(is.numeric(nfactors)) { if(nfactors<1) stop(error.nfactors, call.=FALSE) nfactors = nfactors } else if(nfactors=="eigen") { nfactors = sum(eigen.value>min.eigen) } else if(nfactors=="parallel") { eigen.parallel = parallel_analysis(nrow(data), ncol(data), niter=20) nfactors = max(which(eigen.value<=eigen.parallel)[1]-1, 1) if(is.na(nfactors)) nfactors = length(eigen.value) } else { stop(error.nfactors, call.=FALSE) } # extraction method valid.methods = c("pca", "pa", "ml", "minres", "uls", "ols", "wls", "gls", "alpha") if(length(method)>1) method = "pca" if(method %notin% valid.methods) stop(Glue(" EFA() has changed significantly since bruceR v0.8.0. method should be one of \"{paste(valid.methods, collapse='\", \"')}\". See: help(EFA)"), call.=FALSE) Method = switch( method, "pca"="Principal Component Analysis", "pa"="Principal Axis Factor Analysis", "ml"="Maximum Likelihood Factor Analysis", "minres"="Minimum Residual Factor Analysis", "uls"="Unweighted Least Squares Factor Analysis", "ols"="Ordinary Least Squares Factor Analysis", "wls"="Weighted Least Squares Factor Analysis", "gls"="Generalized Least Squares Factor Analysis", "alpha"="Alpha Factor Analysis (Kaiser & Coffey, 1965)") # rotation method valid.rotations = c("none", "varimax", "oblimin", "promax", "quartimax", "equamax") if(length(rotation)>1) rotation = "varimax" if(rotation %notin% valid.rotations) stop(Glue(" EFA() has changed significantly since bruceR v0.8.0. rotation should be one of \"{paste(valid.rotations, collapse='\", \"')}\". See: help(EFA)"), call.=FALSE) Method.Rotation = switch( rotation, "none"="None", "varimax"="Varimax", "oblimin"="Oblimin", "promax"="Promax", "quartimax"="Quartimax", "equamax"="Equamax") if(rotation %in% c("none", "equamax")) kaiser = FALSE if(kaiser) Method.Rotation = paste(Method.Rotation, "(with Kaiser Normalization)") if(nfactors==1) Method.Rotation = "(Only one component was extracted. The solution was not rotated.)" # analyze suppressMessages({ suppressWarnings({ kmo = psych::KMO(data)$MSA
btl = psych::cortest.bartlett(data, n=nrow(data))
if(method=="pca") {
efa = psych::principal(
data, nfactors=nfactors, rotate=rotation)
} else {
efa = psych::fa(
data, nfactors=nfactors, rotate=rotation,
fm=method, max.iter=max.iter)
}
if(kaiser & nfactors>1) {
Rotation = rotation
if(rotation=="varimax") Rotation = "Varimax"  # GPArotation::Varimax
if(rotation=="promax") Rotation = "Promax"  # psych::Promax
efak = psych::kaiser(efa, rotate=Rotation)
loadings = efak$loadings } else { efak = NULL loadings = efa$loadings
}
})
})

# print
analysis = ifelse(method=="pca",
"Principal Component Analysis",
"Explanatory Factor Analysis")
tag = ifelse(method=="pca", "Component", "Factor")
Print("
\n
<<cyan {analysis}>>

Summary:
Total Items: {nitems}
Scale Range: {min(data)} ~ {max(data)}
Total Cases: {n.total}
Valid Cases: {n.valid} ({100*n.valid/n.total:.1}%)

Extraction Method:
- {Method}
Rotation Method:
- {Method.Rotation}

KMO and Bartlett's Test:
- Kaiser-Meyer-Olkin (KMO) Measure of Sampling Adequacy: MSA = {kmo:.{digits}}
- Bartlett's Test of Sphericity: Approx. {p(chi2=btl$chisq, df=btl$df)}
")

eigen = data.frame(
Eigenvalue = eigen.value,
PropVar0 = 100*(eigen.value/nitems),
CumuVar0 = cumsum(100*(eigen.value/nitems)),
)
row.names(eigen) = paste(tag, 1:nitems)
names(eigen) = c("Eigenvalue", "Variance %", "Cumulative %",
cat("\n")
print_table(eigen, digits=digits,
title="Total Variance Explained:")

loadings$Communality = efa$communality
# loadings$Uniqueness = efa$uniquenesses
}
info1 = ifelse(rotation!="none" & nfactors>1, " (Rotated)", "")
info2 = ifelse(sort.loadings, " (Sorted by Size)", "")
cat("\n")
Print("
Communality = Sum of Squared (SS) Factor Loadings
(Uniqueness = 1 - Communality)
\n
")
if(!is.null(file))
note=Glue("Extraction Method: {Method}.</p><p>Rotation Method: {Method.Rotation}."))

# scree plot
dp = data.frame(
Type = "Data",
Component = 1:length(eigen.value),
Eigenvalue = eigen.value)
if(!is.null(eigen.parallel)) {
dp = rbind(dp, data.frame(
Type = "Parallel (Simulation)",
Component = 1:length(eigen.parallel),
Eigenvalue = eigen.parallel))
}
Type = Component = Eigenvalue = NULL
p = ggplot(dp, aes(x=Component, y=Eigenvalue, color=Type, fill=Type)) +
geom_hline(yintercept=min.eigen, linetype=2) +
geom_path(size=1) +
geom_point(size=2.5, shape=21) +
scale_y_continuous(limits=c(0, ceiling(max(eigen.value)))) +
scale_color_manual(values=c("black", "grey50")) +
scale_fill_manual(values=c("grey50", "grey90")) +
labs(x=tag, title="Scree Plot") +
theme_bruce() +
theme(legend.position=c(0.85, 0.75))
if(plot.scree) {
try({
plot.error = TRUE
print(p)
plot.error = FALSE
}, silent=TRUE)
if(plot.error) {
warning = Glue("
Plot is NOT successfully displayed in the RStudio Plots Pane.
Please check if the Plots Pane of your RStudio is too small.
You should enlarge the Plots Pane (and/or clear all plots).")
warning(warning)
cat("\n")
}
}

# jmv::efa(data, vars=eval(expand_vars(vartext)),
#          nFactorMethod=method,  # "eigen", "parallel", "fixed"
#          extraction=extraction,  # "pa", "ml", "minres"
#          rotation=rotation,  # "none", "varimax", "quartimax", "promax", "oblimin", "simplimax"
#          minEigen=1,
#          nFactors=nFactors,
#          screePlot=TRUE, eigen=TRUE,
#          factorCor=TRUE, factorSummary=TRUE, modelFit=TRUE,
#          kmo=TRUE, bartlett=TRUE)

invisible(list(
result=efa,
result.kaiser=efak,
extraction.method=Method,
rotation.method=Method.Rotation,
eigenvalues=eigen,
scree.plot=p))
}

#' @describeIn EFA Principal Component Analysis - a wrapper of \code{EFA(..., method="pca")}
#' @export
PCA = function(..., method="pca") { EFA(..., method=method) }

parallel_analysis = function(nrow, ncol, niter=20) {
sim.eigen = lapply(1:niter, function(x) {
sim.data = matrix(rnorm(nrow*ncol), nrow=nrow, ncol=ncol)
eigen(cor(sim.data), only.values=TRUE)$values }) sim.eigen = t(matrix(unlist(sim.eigen), ncol=niter)) sim.eigen.CI = apply(sim.eigen, 2, function(x) quantile(x, 0.95)) return(sim.eigen.CI) } ## Expand multiple variables with complex string formats ## Input: "X[1:5] + Y[c(1,3)] + Z" ## Output: expand_vars = function(vartext) { vartexts = gsub(" ", "", strsplit(vartext, "\\+")[[1]]) vars = c() for(vartext.i in vartexts) { if(grepl("\$|\$", vartext.i)==TRUE) { vars.i = eval(parse(text=paste0("paste0('", gsub("\\]", ")", gsub("\\[", "',", vartext.i))))) } else { vars.i = vartext.i } vars = c(vars, vars.i) } return(vars) } ## CFA model formula transformation modelCFA.trans = function(style=c("jmv", "lavaan"), model, highorder="") { # model: free style input model = gsub("^\\s+|\\s+$", "", model)
model = strsplit(gsub(" ", "", strsplit(model, "(;|\\n)+")[[1]]), "=~")
# jmv style
model.jmv = list()
for(i in 1:length(model)) {
var = model[[i]][[2]]
vars = expand_vars(var)
model.jmv[[i]] = list(label=model[[i]][[1]],
vars=vars)
}
# lavaan style
model.lav = c()
for(i in 1:length(model.jmv)) {
model.i = paste(model.jmv[[i]]$label, paste(model.jmv[[i]]$vars, collapse=" + "), sep=" =~ ")
model.lav = c(model.lav, model.i)
}
model.lav = paste(model.lav, collapse="\n")
# high-order CFA (only for lavaan)
factors = sapply(model.jmv, function(x) x\$label)
if(highorder!="")
model.lav = paste(model.lav,
paste(highorder, "=~",
paste(factors, collapse=" + ")),
paste(highorder, "~~", highorder),
sep="\n")
# output
if(style=="jmv") return(model.jmv)
if(style=="lavaan") return(model.lav)
}

#' Confirmatory Factor Analysis (CFA).
#'
#' An extension of \code{\link[lavaan:cfa]{lavaan::cfa()}}.
#'
#' @inheritParams %%COMPUTE%%
#' @param model Model formula. See examples.
#' @param estimator The estimator to be used
#' (for details, see \link[lavaan:lavOptions]{lavaan options}).
#' Defaults to \code{"ML"}.
#' Can be one of the following:
#' \describe{
#'   \item{\code{"ML"}}{Maximum Likelihood (can be extended to
#'   \code{"MLM"}, \code{"MLMV"}, \code{"MLMVS"}, \code{"MLF"}, or \code{"MLR"}
#'   for robust standard errors and robust test statistics)}
#'   \item{\code{"GLS"}}{Generalized Least Squares}
#'   \item{\code{"WLS"}}{Weighted Least Squares}
#'   \item{\code{"ULS"}}{Unweighted Least Squares}
#'   \item{\code{"DWLS"}}{Diagonally Weighted Least Squares}
#'   \item{\code{"DLS"}}{Distributionally-weighted Least Squares}
#' }
#' @param highorder High-order factor. Defaults to \code{""}.
#' @param orthogonal Defaults to \code{FALSE}. If \code{TRUE}, all covariances among latent variables are set to zero.
#' @param missing Defaults to \code{"listwise"}. Alternative is \code{"fiml"} ("Full Information Maximum Likelihood").
## @param CI \code{TRUE} or \code{FALSE} (default), provide confidence intervals for the model estimates.
## @param MI \code{TRUE} or \code{FALSE} (default), provide modification indices for the parameters not included in the model.
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#' @param file File name of MS Word (\code{.doc}).
#'
#' @return
#' A list of results returned by \code{\link[lavaan:cfa]{lavaan::cfa()}}.
#'
#' @seealso
#'
#' @examples
#' \donttest{data.cfa=lavaan::HolzingerSwineford1939
#' CFA(data.cfa, "Visual =~ x[1:3]; Textual =~ x[c(4,5,6)]; Speed =~ x7 + x8 + x9")
#' CFA(data.cfa, model="
#'     Visual =~ x[1:3]
#'     Textual =~ x[c(4,5,6)]
#'     Speed =~ x7 + x8 + x9
#'     ", highorder="Ability")
#'
#' data.bfi = na.omit(psych::bfi)
#' CFA(data.bfi, "E =~ E[1:5]; A =~ A[1:5]; C =~ C[1:5]; N =~ N[1:5]; O =~ O[1:5]")
#' }
#' @export
CFA = function(
data,
model="A =~ a[1:5]; B =~ b[c(1,3,5)]; C =~ c1 + c2 + c3",
estimator="ML",
highorder="", orthogonal=FALSE, missing="listwise",
# CI=FALSE, MI=FALSE,
digits=3,
file=NULL
) {
# model.jmv = modelCFA.trans("jmv", model)
model.lav = modelCFA.trans("lavaan", model, highorder)
# if(orthogonal==TRUE | highorder!="") style = "lavaan"

cat("\n")
Print("<<cyan Model Syntax (lavaan):>>")
cat(model.lav)
cat("\n")

# # jmv style
# if("jmv" %in% style) {
#   fit.jmv = jmv::cfa(data=data, factors=model.jmv,
#                      resCov=NULL,
#                      constrain="facVar", # or "facInd"
#                      # 'facVar' fixes the factor variances to 1
#                      # 'facInd' fixes each factor to the scale of its first indicator
#                      ci=CI, mi=MI, # modification indices
#                      stdEst=TRUE, resCovEst=TRUE,
#                      # pathDiagram=plot,
#                      fitMeasures=c("cfi", "tli", "rmsea", "srmr", "aic", "bic"),
#                      miss=missing) # fiml (default), listwise
#   cat("\n#### jamovi style output ####\n")
#   print(fit.jmv)
# }

# lavaan style
fit.lav = lavaan::cfa(model=model.lav,
data=data,
estimator=estimator,
std.lv=TRUE,
# TRUE: fixing the factor residual variances to 1
# FALSE: fixing the factor loading of the first indicator to 1
orthogonal=orthogonal,
missing=missing) # fiml, listwise (default)
# cat("\n#### lavaan output ####\n\n")
lavaan_summary(fit.lav, ci="raw", digits=digits, file=file)
# lavaan::summary(fit.lav,
#                 fit.measures=TRUE,
#                 standardized=TRUE,
#                 ci=CI,
#                 modindices=MI)
# if(MI) print(lavaan::modificationIndices(fit.lav))
# if(plot) semPlot::semPaths(fit.lav, "std", curveAdjacent=TRUE,
#                            style="lisrel", nDigits=2, edge.label.cex=1)

Print("Estimator: {estimator}")
cat("\n")

invisible(fit.lav)
}


## Try the bruceR package in your browser

Any scripts or data that you put into this service are public.

bruceR documentation built on June 22, 2024, 12:26 p.m.