R/esem.R
In psych: Procedures for Psychological, Psychometric, and Personality Research

#created Sept 4, 2017 to try to do exploratory sem by factoring sets 1 and 2
#and then linking the two sets
#slightly improved December 15, 2018 to label the factors better
#added cancorDiagram which is functionally just a call to esemDiagram
#added the smooth parameter 3/19/25 
"esem" <- function(r,varsX,varsY,nfX=1,nfY=1,n.obs=NULL,fm="minres",rotate="oblimin",
rotateY="oblimin",plot=TRUE, cor="cor", use="pairwise",weight=NULL,smooth=TRUE,...) {


if(is.null(colnames(r))) colnames(r) <- rownames(r) <- paste0("V",1:ncol(r))

#vars <- order(c(vars1,vars2))
 cl <- match.call()
 if(is.numeric(varsX)) varsX <- colnames(r)[varsX]
 if(is.numeric(varsY)) varsY <- colnames(r)[varsY] 
vars <- c(varsX,varsY)
if(is.null(n.obs)) n.obs <- NA

if(!isCorrelation(r)) {#find the correlations
n.obs <- nrow(r) 
r <- r[,vars]  #we organize the data to be just the ones we want, that is, ignore variables not included in the model
   switch(cor, 
       cor = {r <- cor(r,use=use)},
       cov = {r <- cov(r,use=use) 
              covar <- TRUE},
       wtd = { r <- cor.wt(r,w=weight)$r},
       tet = {r <- tetrachoric(r)$rho},
       poly = {r <- polychoric(r)$rho},
       mixed = {r <- mixedCor(r,use=use)$rho},
       Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho},
       YuleQ = {r <- YuleCor(r,1)$rho},
       YuleY = {r <- YuleCor(r,.5)$rho } 
       )
       }
#varnames <- colnames(r)[vars]
varnames <- vars
rownames(r) <- colnames(r)
R <- r[varnames,varnames]  #This reorganizes R so that it is the order of the selected variables
 nX <- length(varsX)
 nY <-length(varsY)
 df1 <- nX *(nX-1)/2 - nfX * nX + nfX * (nfX-1)/2
 df2 <- nY *( nY-1)/2 - nfY * nY + nfY * (nfY-1)/2 

  f1 <- fa.extend(R,nfX,ov=varsX,ev=varsY,fm=fm,rotate=rotate,smooth=smooth,...)
 
  loads1 <- f1$loadings[varnames,,drop=FALSE]
  
  
  S1 <- f1$Structure[varnames,,drop=FALSE]
 if(!is.null(ncol(S1)))  colnames(loads1) <- colnames(S1) <- paste0("X",1:ncol(loads1))
  Phi1 <- f1$Phi
  f2 <- fa.extend(R,nfY,ov=varsY,ev=varsX,fm=fm,rotate=rotateY,...)

  loads2  <- f2$loadings[varnames,,drop=FALSE]
  S2 <- f2$Structure[varnames,,drop=FALSE]
   if(!is.null(ncol(S2))) {colnames(loads2) <- colnames(S2) <- paste0("Y",1:ncol(loads2))}
    Phi2 <- f2$Phi
  f12 <- cbind(loads1,loads2)
  S12 <- cbind(S1,S2)
  S12 <- as.matrix(S12)
 Phi <- t(S12) %*% solve(R) %*% S12
 loadsX <- f1$loadings[varsX,,drop=FALSE]
 loadsY <- f2$loadings[varsY,,drop=FALSE]
 colnames(loadsX) <- paste0("X",1:ncol(loadsX)) 
 colnames(loadsY) <- paste0("Y",1:ncol(loadsY)) 

# loadsX <- f1$loadings[colnames(R)[varsX],,drop=FALSE]
# loadsY <- f2$loadings[colnames(R)[varsY],,drop=FALSE]
 diag(Phi) <- 1
 #now, a kludge to make it better -- but not actually, so dropped 
if(FALSE){if(!is.null(Phi1)) Phi[1:nfX,1:nfX] <- Phi1
if(!is.null(Phi2))  Phi[(nfX+1):(nfX+nfY),(nfX+1):(nfX+nfY)] <- Phi2
}
 
  result <- esem.stats(R,f12,S12,Phi,n.obs=n.obs) 
  result$n.obs <- n.obs
  result$loadings <- f12
  result$Structure <- S12 
  result$loadsX <- loadsX
  result$loadsY <- loadsY
  result$PhiX <- Phi1
  result$PhiY <- Phi2
  result$esem.dof <- df1 + df2
  result$df1 <- df1
  result$df2 <- df2
  result$fm <- fm
  result$fx <- f1$fo
 result$fy <- f2$fo
 result$Phi <- Phi

 result$Call <- cl
 class(result) <- c("psych","esem")
 if(plot) esem.diagram(result)
 
 return(result)}
 
 
 "esem.stats" <- function(r,f,s,phi,n.obs=NA) {
 r <- as.matrix(r)
 n <- ncol(r)
 nfactors <- ncol(f)
 if(is.null(nfactors)) nfactors <- 1
 Sp <- s %*% solve(phi)  #this is not quite f, but is better
 model <- Sp %*% t(s)  #this works better than  model <- f %*%  t(s)

residual <- r - model
result <- list()
 result$communality <- diag(model)
 result$sumsq <- diag(t(s) %*% (Sp))

 r2 <- sum(r*r)
 rstar2 <- sum(residual*residual)
  #Alternatively, we can recognize that we are not estimating all of these
  #this is results$esem.dof
 result$dof <- dof <-  n * (n-1)/2 - n * nfactors + (nfactors *(nfactors-1)/2)
 
r2.off <- r2 - tr(r)
diag(residual) <- 0
rstar.off <- sum(residual^2)
 
 #   
#   m.inv.r <- try(solve(model,r),silent=TRUE) #modified Oct 30, 2009 to perhaps increase precision -- #modified 2015/1/2 to use try
#       
#      if(inherits(m.inv.r,"try-error") {warning("the model inverse times the r matrix is singular, replaced with Identity matrix which means fits are wrong")
#            sum}
#     if(is.na(n.obs)) {result$n.obs=NA 
#     			      result$PVAL=NA} else {result$n.obs=n.obs}
 m.inv.r <- diag(1,n,n)   #this is because the m.inv.r is not estimated 
  #  result$dof <-  n * (n-1)/2 - n * nfactors + (nfactors *(nfactors-1)/2)
   
    result$objective <- sum(diag((m.inv.r))) - log(det(m.inv.r)) -n   #this is what Tucker Lewis call F
result$objective <- rstar2   #because the normal way doesn't work
#     if(is.infinite(result$objective)) {result$objective <- rstar2
#                                        message("The determinant of the smoothed correlation was zero.\nThis means the objective function is not defined.\nChi square is based upon observed residuals.")}
    result$criteria <- c("objective"=result$objective,NA,NA)
   
     if (!is.na(n.obs)) {result$STATISTIC <-  chisq <- result$objective * ((n.obs-1) -(2 * n + 5)/6 -(2*nfactors)/3) #from Tucker  and from factanal
    # if (!is.na(n.obs)) {result$STATISTIC <-  chisq <- result$objective * ((n.obs-1)) #from Fox and sem
          if(!is.nan(result$STATISTIC)) if (result$STATISTIC <0) {result$STATISTIC <- 0}  
   			if (result$dof > 0) {result$PVAL <- pchisq(result$STATISTIC, result$dof, lower.tail = FALSE)} else {result$PVAL <- NA}
    		}
      
 result$ENull <- r2.off * n.obs  #the empirical null model
 result$null.dof <- n * (n-1)
  result$chi <- rstar.off * n.obs  #this is the empirical chi square
  result$rms <- sqrt(rstar.off/(n*(n-1)))  #this is the empirical rmsea                      
  result$nh <- n.obs
if (result$dof > 0) {result$EPVAL <- pchisq(result$chi, result$dof, lower.tail = FALSE)
result$crms <- sqrt(rstar.off/(2*result$dof) )
 result$EBIC <- result$chi - result$dof * log(n.obs) 
 result$ESABIC <- result$chi - result$dof * log((n.obs+2)/24)
  result$RMSEA <- RMSEA(result$chi,result$dof,n.obs) } else {result$EPVAL <- NA

 result$crms <- NA
result$EBIC <- NA
 result$ESABIC <- NA}
                                 
     result$fit <-1-rstar2/r2
    result$fit.off <- 1-rstar.off/r2.off
    result$sd <- sd(as.vector(residual)) #this is the non-sample size weighted root mean square residual
    result$factors <- nfactors
    
    result$complexity <- (apply(f,1,function(x)  sum(x^2)))^2/apply(f,1,function(x)sum(x^4))
  result$residual <- residual
    diag(model) <- diag(r) 
    return(result) 
 }
   
  
  "print_psych.esem" <-function(x,digits=2,short=TRUE,cut=NULL, suppress.warnings=TRUE,...)  {

 cat("Exploratory Structural Equation Modeling  Analysis using method = ",x$fm )
   cat("\nCall: ")
   print(x$Call)
   nitems <- nrow(x$loadings)
   nfactors <- ncol(x$loadings)
  
   
   cat("\nFor the 'X' set:\n")
  x$loadsX <- as.matrix(x$loadsX)
   print(round(x$loadsX,digits=digits))
 if(!short) { if(!is.null(ncol(x$PhiX))){   cat("\nWith factor intercorrelations of \n")
 print(round(x$PhiX,digits=digits)) }
   }
  
   
     cat("\nFor the 'Y' set:\n")
     x$loadsY <- as.matrix(x$loadsY)
   print(round(x$loadsY,digits=digits))
   if(!short) {
    if(!is.null(ncol(x$PhiY))) {  cat("\nWith factor intercorrelations of \n")
    print(round(x$PhiY,digits=digits))
    }
    }
   
  if(!short) {
     cat('\nStandardized  pattern coefficients on the X and Y sets using Factor Extension\n')
     L <- cbind(x$loadings,x$communality,1-x$communality)
     
     colnames(L)[(ncol(L)-1):ncol(L)] <- c("h2","u2")
   #  print(round(x$loadings,digits=digits),round(x$communality,digits=digits))
     print(round(L,digits=digits))
     
      varex <- rbind("SS loadings" =   x$sumsq)
        varex <- rbind(varex, "Proportion Var" =   x$sumsq/nitems)
           varex <- rbind(varex, "Cumulative Var"=  cumsum( x$sumsq/nitems))
                              varex <- rbind(varex, "Cum. factor Var"=  cumsum( x$sumsq/sum( x$sumsq)))
    cat("\n") 
    print(round(varex, digits))
     

   }
   
   
 #     if(!is.null(x$complexity)) cat("\nMean item complexity = ",round(mean(x$complexity),1))     
#        objective <- x$criteria[1]
#   
   cat("\nCorrelations between the X and Y sets.\n")
   print(round(x$Phi,digits=digits))


    if(!is.null(x$null.dof)) {cat("\nThe degrees of freedom for the null model are ",x$null.dof, " and the empirical chi square  function was ",round(x$ENull,digits),...)}
    
  
    cat("\nThe degrees of freedom for the model are",x$dof," and the empirical chi square function was ",round(x$chi,digits),"\n"," with prob < ", signif(x$EPVAL,digits),"\n" ,...) 
     
     
    if(!is.null(x$rms)) {cat("\nThe root mean square of the residuals (RMSR) is ", round(x$rms,digits),"\n") }
    if(!is.null(x$crms)) {cat("The   corrected root mean square of the residuals is ", round(x$crms,digits),"\n",...) }
    
  
     if((!is.null(x$chi)) && (!is.na(x$chi))) {cat(" with the empirical chi square ", round(x$chi,digits), " with prob < ", signif(x$EPVAL,digits),"\n" ,...)  }
   	
   	 if(!is.na(x$n.obs)) {cat("The total number of observations was ",x$n.obs, " with fitted Chi Square = ",round(x$STATISTIC,digits), " with prob < ", signif(x$PVAL,digits),"\n",...)}
  
     
   	if(!is.null(x$TLI)) cat("\nTucker Lewis Index of factoring reliability = ",round(x$TLI,digits+1))
   	if(!is.null(x$RMSEA)) {cat("\nRMSEA index = ",round(x$RMSEA[[2]],digits+1), " and the", (1- x$RMSEA[[4]])*100,"% confidence intervals are ",round(x$RMSEA[[1]],digits+1),round(x$RMSEA[[3]],digits+1),...)  }
if(!is.null(x$EBIC)) {cat("\nEmpirical BIC = ",round(x$EBIC,digits))}
if(!is.null(x$ESABIC)) {cat("\nESABIC = ",round(x$ESABIC,digits))}


if(!is.null(x$fit)) cat("\nFit based upon off diagonal values =", round(x$fit.off,digits))
 	
 	if(short)  cat("\nTo see the item loadings for the X and Y sets combined, and the associated fa output, print with  short=FALSE.\n")
 	 
# cat("\nTo	 report the factor analysis of the X and Y sets with their associated statistics, run  fa on the X and Y sets separately.")
  
  }
  
  
  "interbattery" <- function(r, varsX, varsY, nfX = 1, nfY = 1, n.obs = NULL,cor = "cor", use = "pairwise",weight=NULL) {
   cl <- match.call()
vars <- c(varsX,varsY)
if(is.null(n.obs)) n.obs <- NA

if(ncol(r)  < nrow(r)) {#find the correlations
n.obs <- nrow(r) 
   switch(cor, 
       cor = {r <- cor(r,use=use)},
       cov = {r <- cov(r,use=use) 
              covar <- TRUE}, 
       tet = {r <- tetrachoric(r)$rho},
       poly = {r <- polychoric(r)$rho},
       mixed = {r <- mixedCor(r,use=use)$rho},
       Yuleb = {r <- YuleCor(r,,bonett=TRUE)$rho},
       YuleQ = {r <- YuleCor(r,1)$rho},
       YuleY = {r <- YuleCor(r,.5)$rho } 
       )
       }
varnames <- colnames(r)[vars]
R <- r[varnames,varnames]  #This reorganizes R so that it is the order of the selected 
  r12 <- r[varsX,varsY]
  H1 <- r12 %*% t(r12)
  E1  <- eigen(H1)
  W1 <- E1$vectors[,1:nfX,drop=FALSE]
  gamma1 <- sqrt(E1$values[1:nfX,drop=FALSE])
  A1 <- W1 %*% diag(sqrt(gamma1),ncol=nfX)
  W2 <- t(r12) %*% W1 %*% diag(1/gamma1,ncol=nfX)
  A2 <- W2 %*% diag(sqrt(gamma1))
  As <- colSums(sign(A1))
  flip <- diag(sign(As),ncol=nfX)
  A1 <- A1 %*% flip
    As <- colSums(sign(A2))
  flip <- diag(sign(As),ncol=nfX)
  A2 <- A2 %*% flip
  
  colnames(A1) <- colnames(A2) <- paste0("IB",1:ncol(A1))
  rownames(A1) <- rownames(r12)
  return(list(A1=A1,A2 = A2,loadings=rbind(A1,A2),Call=cl))
  }