#' EFA.Comp.Data
#'
#'
#' @aliases EFA.Comp.Data
#' @param Data N (sample size) x k (number of variables) data matrix
#' @param F.Max largest number of factors to consider
#' @param N.Pop size of finite populations of comparison data (default = 10,000 cases)
#' @param N.Samples number of samples drawn from each population (default = 500)
#' @param Alpha alpha level when testing statistical significance of improvement with add'l factor (default = .30)
#' @param Graph whether to plot the fit of eigenvalues to those for comparison data (default = F)
#' @param Spearman whether to use Spearman rank-order correlations rather than Pearson correlations (default = F)
#' @references
#' Ruscio, John; Roche, B. (2012). 'Determining the number of factors to retain in an exploratory factor analysis using comparison data of known factorial structure'. Psychological Assessment 24: 282?\200?292.
# \doi{10.1037/a0025697}
#'
#'
#' @export EFA.Comp.Data
#' @examples
#' ?Sample.Commands
EFA.Comp.Data <- function(Data, F.Max, N.Pop = 10000, N.Samples = 500,
Alpha = 0.3, Graph = F, Spearman = F) {
N <- dim(Data)[1]
k <- dim(Data)[2]
if (Spearman)
Cor.Type <- "spearman" else Cor.Type <- "pearson"
cor.Data <- cor(Data, method = Cor.Type)
Eigs.Data <- eigen(cor.Data)$values
RMSR.Eigs <- matrix(0, nrow = N.Samples, ncol = F.Max)
Sig <- T
F.CD <- 1
while ((F.CD <= F.Max) & (Sig)) {
Pop <- GenData(Data, N.Factors = F.CD, N = N.Pop, Cor.Type = Cor.Type)
for (j in 1:N.Samples) {
Samp <- Pop[sample(1:N.Pop, size = N, replace = T), ]
cor.Samp <- cor(Samp, method = Cor.Type)
Eigs.Samp <- eigen(cor.Samp)$values
RMSR.Eigs[j, F.CD] <- sqrt(sum((Eigs.Samp - Eigs.Data) * (Eigs.Samp -
Eigs.Data))/k)
}
if (F.CD > 1)
Sig <- (wilcox.test(RMSR.Eigs[, F.CD], RMSR.Eigs[, (F.CD -
1)], "less")$p.value < Alpha)
if (Sig)
F.CD <- F.CD + 1
}
cat("Number of factors to retain: ", F.CD - 1, "\n")
if (Graph) {
if (Sig)
x.max <- F.CD - 1 else x.max <- F.CD
ys <- apply(RMSR.Eigs[, 1:x.max], 2, mean)
plot(x = 1:x.max, y = ys, ylim = c(0, max(ys)), xlab = "Factor",
ylab = "RMSR Eigenvalue", type = "b", main = "Fit to Comparison Data")
abline(v = F.CD - 1, lty = 3)
}
}
#' Sample.Commands
#'
#' This program shows how to use the main program with two illustrative samples of
#' data. The first sample contains N = 500 cases, k = 12 normally distributed
#' continuous variables, and 3 correlated factors. The sample commands show the
#' correlation matrix and the output of the EFA.Comp.Data program, which correctly
#' identifies 3 factors. The second sample contains N = 500 cases, k = 8 non-normally
#' distributed ordinal variables, and 2 uncorrelated factors. The sample commands show
#' the Pearson correlation matrix, the Spearman correlation matrix, the difference
#' between these matrices, the output of the EFA.Comp.Data program (first using Pearson
#' correlations and then using Spearman correlations). The first run overidentifies
#' the number of factors, but the second correctly identifies 2 factors.
#'
#' @aliases Sample.Commands
#' @references
#' Ruscio, John; Roche, B. (2012). 'Determining the number of factors to retain in an exploratory factor analysis using comparison data of known factorial structure'. Psychological Assessment 24: 282?\200?292.
# \doi{10.1037/a0025697}
#'
#'
#' @export Sample.Commands
#' @examples
#' \dontrun{
#' ?Sample.Commands
#' }
Sample.Commands <- function() {
set.seed(1)
s1 <- rnorm(500)
s2 <- rnorm(500)
s3 <- rnorm(500)
x1 <- s1 + s2 + rnorm(500)
x2 <- s1 + s2 + rnorm(500)
x3 <- s1 + s2 + rnorm(500)
x4 <- s1 + s2 + rnorm(500)
x5 <- s1 + s3 + rnorm(500)
x6 <- s1 + s3 + rnorm(500)
x7 <- s1 + s3 + rnorm(500)
x8 <- s1 + s3 + rnorm(500)
x9 <- s2 + s3 + rnorm(500)
x10 <- s2 + s3 + rnorm(500)
x11 <- s2 + s3 + rnorm(500)
x12 <- s2 + s3 + rnorm(500)
x <- cbind(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12)
print(round(cor(x), 2))
EFA.Comp.Data(Data = x, F.Max = 5, Graph = T)
s1 <- runif(500)
s2 <- runif(500)
x1 <- cut(((s1 + 0.5 * runif(500))/1.5)^0.5, breaks = 5)
x2 <- cut(((s1 + 0.5 * runif(500))/1.5)^0.5, breaks = 5)
x3 <- cut(((s1 + 0.5 * runif(500))/1.5)^2, breaks = 5)
x4 <- cut(((s1 + 0.5 * runif(500))/1.5)^2, breaks = 5)
x5 <- cut(((s2 + 0.5 * runif(500))/1.5)^0.5, breaks = 5)
x6 <- cut(((s2 + 0.5 * runif(500))/1.5)^0.5, breaks = 5)
x7 <- cut(((s2 + 0.5 * runif(500))/1.5)^2, breaks = 5)
x8 <- cut(((s2 + 0.5 * runif(500))/1.5)^2, breaks = 5)
x <- cbind(x1, x2, x3, x4, x5, x6, x7, x8)
print(round(cor(x), 2))
print(round(cor(x), 2), method = "spearman")
print(round(cor(x) - cor(x, method = "spearman"), 2))
win.graph()
EFA.Comp.Data(Data = x, F.Max = 5, Graph = T)
win.graph()
EFA.Comp.Data(Data = x, F.Max = 5, Graph = T, Spearman = T)
}
GenData <- function(Supplied.Data, N.Factors, N, Max.Trials = 5, Initial.Multiplier = 1,
Cor.Type) {
# Steps refer to description in the following article: Ruscio, J., &
# Kaczetow, W. (2008). Simulating multivariate nonnormal data using an
# iterative algorithm. Multivariate Behavioral Research, 43(3),
# 355-381.
# Initialize variables and (if applicable) set random number seed (step
# 1) -------------------------------------
set.seed(1)
k <- dim(Supplied.Data)[2]
Data <- matrix(0, nrow = N, ncol = k) # Matrix to store the simulated data
Distributions <- matrix(0, nrow = N, ncol = k) # Matrix to store each variable's score distribution
Iteration <- 0 # Iteration counter
Best.RMSR <- 1 # Lowest RMSR correlation
Trials.Without.Improvement <- 0 # Trial counter
# Generate distribution for each variable (step 2)
# -------------------------------------------------------------
for (i in 1:k) Distributions[, i] <- sort(sample(Supplied.Data[, i],
size = N, replace = T))
# Calculate and store a copy of the target correlation matrix (step 3)
# -----------------------------------------
Target.Corr <- cor(Supplied.Data, method = Cor.Type)
Intermediate.Corr <- Target.Corr
# Generate random normal data for shared and unique components,
# initialize factor loadings (steps 5, 6) --------
Shared.Comp <- matrix(rnorm(N * N.Factors, 0, 1), nrow = N, ncol = N.Factors)
Unique.Comp <- matrix(rnorm(N * k, 0, 1), nrow = N, ncol = k)
Shared.Load <- matrix(0, nrow = k, ncol = N.Factors)
Unique.Load <- matrix(0, nrow = k, ncol = 1)
# Begin loop that ends when specified number of iterations pass without
# improvement in RMSR correlation --------
while (Trials.Without.Improvement < Max.Trials) {
Iteration <- Iteration + 1
# Calculate factor loadings and apply to reproduce desired correlations
# (steps 7, 8) ---------------------------
Fact.Anal <- Factor.Analysis(Intermediate.Corr, Corr.Matrix = TRUE,
N.Factors = N.Factors, Cor.Type = Cor.Type)
if (N.Factors == 1)
Shared.Load[, 1] <- Fact.Anal$loadings else for (i in 1:N.Factors) Shared.Load[, i] <- Fact.Anal$loadings[,
i]
Shared.Load[Shared.Load > 1] <- 1
Shared.Load[Shared.Load < -1] <- -1
if (Shared.Load[1, 1] < 0)
Shared.Load <- Shared.Load * -1
for (i in 1:k) if (sum(Shared.Load[i, ] * Shared.Load[i, ]) < 1)
Unique.Load[i, 1] <- (1 - sum(Shared.Load[i, ] * Shared.Load[i,
])) else Unique.Load[i, 1] <- 0
Unique.Load <- sqrt(Unique.Load)
for (i in 1:k) Data[, i] <- (Shared.Comp %*% t(Shared.Load))[,
i] + Unique.Comp[, i] * Unique.Load[i, 1]
# Replace normal with nonnormal distributions (step 9)
# ---------------------------------------------------------
for (i in 1:k) {
Data <- Data[sort.list(Data[, i]), ]
Data[, i] <- Distributions[, i]
}
# Calculate RMSR correlation, compare to lowest value, take appropriate
# action (steps 10, 11, 12) --------------
Reproduced.Corr <- cor(Data, method = Cor.Type)
Residual.Corr <- Target.Corr - Reproduced.Corr
RMSR <- sqrt(sum(Residual.Corr[lower.tri(Residual.Corr)] * Residual.Corr[lower.tri(Residual.Corr)])/(0.5 *
(k * k - k)))
if (RMSR < Best.RMSR) {
Best.RMSR <- RMSR
Best.Corr <- Intermediate.Corr
Best.Res <- Residual.Corr
Intermediate.Corr <- Intermediate.Corr + Initial.Multiplier *
Residual.Corr
Trials.Without.Improvement <- 0
} else {
Trials.Without.Improvement <- Trials.Without.Improvement +
1
Current.Multiplier <- Initial.Multiplier * 0.5^Trials.Without.Improvement
Intermediate.Corr <- Best.Corr + Current.Multiplier * Best.Res
}
}
# Construct the data set with the lowest RMSR correlation (step 13)
# --------------------------------------------
Fact.Anal <- Factor.Analysis(Best.Corr, Corr.Matrix = TRUE, N.Factors = N.Factors,
Cor.Type = Cor.Type)
if (N.Factors == 1)
Shared.Load[, 1] <- Fact.Anal$loadings else for (i in 1:N.Factors) Shared.Load[, i] <- Fact.Anal$loadings[,
i]
Shared.Load[Shared.Load > 1] <- 1
Shared.Load[Shared.Load < -1] <- -1
if (Shared.Load[1, 1] < 0)
Shared.Load <- Shared.Load * -1
for (i in 1:k) if (sum(Shared.Load[i, ] * Shared.Load[i, ]) < 1)
Unique.Load[i, 1] <- (1 - sum(Shared.Load[i, ] * Shared.Load[i,
])) else Unique.Load[i, 1] <- 0
Unique.Load <- sqrt(Unique.Load)
for (i in 1:k) Data[, i] <- (Shared.Comp %*% t(Shared.Load))[, i] +
Unique.Comp[, i] * Unique.Load[i, 1]
Data <- apply(Data, 2, scale) # standardizes each variable in the matrix
for (i in 1:k) {
Data <- Data[sort.list(Data[, i]), ]
Data[, i] <- Distributions[, i]
}
# Return the simulated data set (step 14)
# ----------------------------------------------------------------------
return(Data)
}
################################################################################################################
Factor.Analysis <- function(Data, Corr.Matrix = FALSE, Max.Iter = 50, N.Factors = 0,
Cor.Type) {
Data <- as.matrix(Data)
k <- dim(Data)[2]
if (N.Factors == 0) {
N.Factors <- k
Determine <- T
} else Determine <- F
if (!Corr.Matrix)
Cor.Matrix <- cor(Data, method = Cor.Type) else Cor.Matrix <- Data
Criterion <- 0.001
Old.H2 <- rep(99, k)
H2 <- rep(0, k)
Change <- 1
Iter <- 0
Factor.Loadings <- matrix(nrow = k, ncol = N.Factors)
while ((Change >= Criterion) & (Iter < Max.Iter)) {
Iter <- Iter + 1
Eig <- eigen(Cor.Matrix)
L <- sqrt(Eig$values[1:N.Factors])
for (i in 1:N.Factors) Factor.Loadings[, i] <- Eig$vectors[, i] *
L[i]
for (i in 1:k) H2[i] <- sum(Factor.Loadings[i, ] * Factor.Loadings[i,
])
Change <- max(abs(Old.H2 - H2))
Old.H2 <- H2
diag(Cor.Matrix) <- H2
}
if (Determine)
N.Factors <- sum(Eig$values > 1)
return(list(loadings = Factor.Loadings[, 1:N.Factors], factors = N.Factors))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.