R/glb.R
In psych: Procedures for Psychological, Psychometric, and Personality Research

# an attempt at finding the worst and best splits, beta is worst split (from ICLUST)

"glb" <-
function(r,key=NULL) {
cl <- match.call()
 nvar <- dim(r)[2]
if(dim(r)[1] != dim(r)[2]) {r <- cor(r,use="pairwise")} else {r <- cov2cor(r)}  #make sure it is a correlation matrix not a covariance or data matrix
      if(is.null(colnames(r))) {  rownames(r) <- colnames(r) <- paste("V",1:nvar,sep="") }
      m <- (1-r)/2
      diag(m) <- 1
       m.names <- colnames(m)
       
 
 if (!is.null(key)) { m <- diag(key) %*% m %*% diag(key)
                           colnames(m) <- m.names   #flip items if we choose to do so
                           flip <- FALSE   #we do this if we specify the key
                           } else {key <- rep(1,nvar) }
       signkey <- strtrim(key,1)
             signkey[signkey=="1"] <- ""
             m.names <- paste(m.names,signkey,sep="")
             colnames(m) <- rownames(m) <- m.names             
 worst <- ICLUST(r,2,plot=FALSE,reliability=FALSE)
   keys <- worst$p.sorted$cluster
 
 best <- ICLUST(m,2,plot=FALSE,SMC=FALSE,reliability=FALSE)
 keys <- matrix(rep(0,nvar*2),ncol=2)
 keys <- best$p.sorted$cluster
 
 m1 <- r
 diag(m1) <- 0
 best.kmeans <- kmeans(m,2,nstart=10)
 keys.kmean <- matrix(rep(0,nvar*2),ncol=2)
 for(i in 1:nvar) {
 keys.kmean[i,best.kmeans$cluster[i]] <- 1 }  
 
  f1 <- fa(r)  #one factor solution
  load <- f1$loadings
   ord.load <- order(load)
    key.fa <- matrix(rep(0,nvar*2),ncol=2)
    for (i in 1:nvar) {
    key.fa[ord.load[i],1] <- i %% 2
    key.fa[ord.load[i],2] <- 1 -  key.fa[ord.load[i],1] }
  
  
    
  f2 <- fa(r,2,SMC=FALSE)  #two factor solution
    load <- f2$loadings
     key.fa2 <- matrix(rep(0,nvar*2),ncol=2)
   
    key.fa2[,1] <- (load[,1] > load[,2]) + 0
    key.fa2[,2 ] <- 1- key.fa2[,1]
    

e <- eigen(r)$values[1]
alpha.pc <- 1-1/e
glb.fa <- glb.fa(r)  #based upon as many factors as possible, sort of omega total 
glb.nf <- glb.fa$numf
keys <- cbind(worst$p.sorted$cluster,keys,keys.kmean,key.fa,key.fa2)
colnames(keys) <- c("IC1","IC2","ICr1","ICr2","K1","K2","F1","F2","f1","f2")
 covar <- t(keys) %*% r %*% keys    #matrix algebra is our friend
 var <- diag(covar)
 sd.inv <- 1/sqrt(var)
 ident.sd <- diag(sd.inv,ncol = length(sd.inv))
 cluster.correl <- ident.sd %*% covar  %*% ident.sd   #convert covariances to correlations
 beta <- cluster.correl[2,1] *2 /(1+cluster.correl[2,1])
 glbIC <- cluster.correl[3,4] *2 /(1+cluster.correl[3,4])
 glb2 <- cluster.correl[5,6] * 2/(1+ cluster.correl[5,6] )
 glb3 <- cluster.correl[7,8] * 2/(1+cluster.correl[7,8])
 beta.fa <- cluster.correl[9,10] * 2/(1+cluster.correl[9,10])
 glb.max <- max(glbIC,glb2,glb3)
 sum.smc <- sum(smc(r))
 sum.r <- sum(r)
 gamma <- (sum.r+sum.smc-sum(diag(r)))/sum.r
 tenberg <- tenberge(r)
 result <- list(glb.fa=glb.fa,nf=glb.nf, beta = beta,beta.factor = beta.fa,alpha.pc = alpha.pc, glb.max = glb.max, 
                glb.IC =glbIC,    glb.Km = glb2, glb.Fa =glb3, r.smc = gamma,
                tenberge=tenberg, 
                keys=keys,Call=cl)
 class(result) <- c("psych","glb")
 return(result)
}
#moved to print.psych
# print.psych.glb <- function(x,digits=2,short=TRUE) {
# cat("Call: ") 
#     print(x$Call)
# cat("\nEstimates of the Greatest Lower Bound for reliability, based on factor and cluster models")
# 
#  cat("\nGLB estimated from factor based communalities = ", round(x$glb.fa$glb,digits) ,"with ",x$nf, " factors.")
#  cat("\nUse glb.fa to see more details ")
#  cat("\n Various estimates based upon splitting the scale into two (see keys for the various splits)")
#  cat("\nBeta = ", round(x$beta,digits) , "\nBeta fa",round(x$beta.fa,digits), " This is an estimate of the worst split half reliability")  
#  cat("\nKmeans clusters for best split ",round(x$glb.Km,digits=2))
# cat("\nCluster based estimates \nglb.IC = ",round(x$glb.IC,digits))
# cat("\nglb.max ", round(x$glb.max,digits),"Is the maximum of these estimates")
# cat("\n alpha-PC = ", round(x$alpha.pc,digits),"An estimate of alpha based on eignvalues")   #\nglb.IC) = " ,round(x$glb.IC,digits)) #"\nL5 = ", round(x$lambda.5,digits), "\nL6 (smc) = " ,round(x$lambda.6,digits), "\n")
#  cat("\nTenBerge bounds \nmu0 = ",round(x$tenberge$mu0,digits), "mu1 = ", round(x$tenberge$mu1,digits), "mu2 = " ,round(x$tenberge$mu2,digits), "mu3 = ",round(x$tenberge$mu3,digits) , "\n")
#  cat("\nestimated greatest lower bound based upon splitHalf = ", round(x$glb.Fa,digits),"\n")
# 
# if(!short) {cat("\n Various ways of keying the results\n")
#     x$keys} else {cat("\nUse short = FALSE to see the various ways of splitting the scale")}
#     
# }

#find the communalties base upon as many factors as possble
"glb.fa" <- 
function(r,key= NULL){

 cl <- match.call()   #for eventual fancy printing with the call listed
 nvar <- dim(r)[2]    #find a correlation matrix if using a data matrix
if(dim(r)[1] != dim(r)[2]) {r <- cor(r,use="pairwise")}  else {
if(!is.matrix(r)) r <- as.matrix(r)
r <- cov2cor(r)}  #make sure it is a correlation matrix not a covariance or data matrix
      if(is.null(colnames(r))) {  rownames(r) <- colnames(r) <- paste("V",1:nvar,sep="") }
      if (!is.null(key)) { key <- as.vector(key)
                          r <- diag(key) %*% r %*% diag(key)
                       
                           flip <- FALSE   #we do this if we specify the key
                           } else {key <- rep(1,nvar) }
   nv <- dim(r)[1]         #how many variables 
   f1 <- fa(r) #factor it  #first find the eigen values of the factor model
   nf <-length(which(f1$values > 0))  #how many are real 
   df <- nv * (nv-1)/2  - nf*nv + nf*(nf-1)/2  #check for degrees of freedom   
   if (df < 0 ) nf <- nf-1
   fn <- fa(r,nf,rotate="none")
   rr <- r
   diag(rr) <- fn$communality  #based upon nf factors
   glb <- sum(rr)/sum(r)
   return(list(glb=glb,communality = fn$communality,numf = nf,Call=cl))
   }