R/scoreBy.r
In psych: Procedures for Psychological, Psychometric, and Personality Research

#Find scores from correlations for each subject
#this is a version scoreOverlap adjusted the case of many subjects
#impute missing values?
#The correlations for each subject are found by statsBy
#and are stored as a list of matrices in the r element
#The trick is how to handle missing correlations 
#Various fixes 1/15/22
"scoreBy" <-
function(keys,stats,correct=TRUE,SMC=TRUE,av.r=TRUE,item.smc=NULL,impute=TRUE,select=TRUE,min.n=3,smooth=FALSE) { #function to score clusters according to the key matrix, correcting for item overlap
cl <- match.call() 
  
if(!inherits(stats, "statsBy")) stop("Please run statsBy first.  Make sure that you specified keys before stats.")
if(is.null(stats$r)) stop("I am sorry, but you need to run statsBy with the cors=TRUE option first")
MIN.n <- min.n
Item.smc <- item.smc  #we redefine later, so we need to keep this original value
bad.matrix <-0
na.matrix <- 0
r.list <- stats$r  #the call uses the output of statsBy 
Select <- select
keys.list<- keys #we are saving these for each pass	
ngroups <-length(r.list)	
 tol=sqrt(.Machine$double.eps)    #machine accuracy

var.names <- colnames(stats$r[[1]])

keys <- keys.list #these are the original keys   
select <- Select  #the original values
 bad <- FALSE
 if(is.list(keys) & (!is.data.frame(keys))) { if (select) {
    select <- selectFromKeyslist(var.names,keys)
 # select <- sub("-","",unlist(keys))   #added April 7, 2017
      select <- select[!duplicated(select)]
      }  else {select <- 1:length(var.names) }
      } 
 keys <- make.keys(stats$r[[1]][select,select],keys)  #added 9/9/16    (and then modified March 4, 2017  #move outside of loop
 if(!is.matrix(keys)) keys <- as.matrix(keys)  #keys are sometimes a data frame - must be a matrix

#now, do repeated scoring, one pass for each group
result <- list(Call = cl)
cor.list <- list()
var.list <- list()
alpha.list <- list()
r.list <- list()
for(group in 1:ngroups) {

num.n <- stats$nWg[[group]] #this is the pairwise data 

#if(sum(!is.na(num.n)) <  10 || mean(num.n,na.rm=TRUE) < MIN.n){
if(mean(num.n,na.rm=TRUE) < MIN.n){
    cor.list[[group]] <- NA
    alpha.list[[group]] <- NA   #add an empty element
    r.list[[group]] <- NA
   next()
   }
r <- stats$r[[group]]
r[num.n < min.n] <- NA
  
 #if (!isCorrelation(r)) {r <- cor(r[,select],use="pairwise")} else 
r <- r[select,select]

  #only use correlations
 #if ((dim(r)[1] != dim(r)[2]) ) {r <- cor(r,use="pairwise")}
 #if(any(abs(r[!is.na(r)]) > 1)) warning("Something is seriously wrong with the correlation matrix, some correlations had absolute values > 1!  Please check your data.")
 if(any(is.na(r))) {
                SMC=FALSE
 #                warning("Missing values in the correlation matrix do not allow for SMC's to be found")
                 bad <- TRUE}

 if(SMC && is.null(Item.smc)) {item.smc <- smc(r)} else {
         diag(r) <- NA
          item.smc <- apply(r,1,function(x) max(abs(x),na.rm=TRUE))
          item.smc[is.infinite(item.smc) ] <- 1 
          diag(r) <- 1}
                                   
 if(all(item.smc ==1)) SMC <- FALSE

#item.smc <- rep(1,NCOL(r))  #why are we doing this?

 if(!bad) {covar <- t(keys) %*% r %*% keys} else  #matrix algebra is our friend 
     {
    covar<- apply(keys,2,function(x) colSums(apply(keys,2,function(x) colSums(r*x,na.rm=TRUE))*x,na.rm=TRUE))  #matrix multiplication without matrices!
   covar <- apply(keys,2,function(x)  colSums(apply(keys,2,function(x)  colMeans(r*x,na.rm=TRUE))*x,na.rm=TRUE)) *NROW(keys) 
 # we find the means of the non-zero elements by adjusting for the number of these elements
    covar <- score.na(keys,r,cor=FALSE)
  }
  
 
 
 n.keys <- ncol(keys)
 item.var <- item.smc
 raw.r  <- cov2cor(covar)
# key.var <- diag(t(keys) %*% keys)  #if correlations
 key.var <- diag(r) %*% abs(keys)   #if covariances or correlations
 key.n <-  diag(t(keys) %*% keys)
 var <- diag(covar)     #these are the scale variances unless we handled bad data
 key.smc <- as.vector( t(abs(keys)) %*% item.smc ) 
 #key.alpha <- ((var-key.var)/var)*(key.var/(key.var-1))
 key.alpha <- ((var-key.var)/var)*(key.n/(key.n-1))
 key.lambda6 <-  (var - key.var + key.smc)/var
 key.alpha[is.nan(key.alpha)] <- 1           #if only 1 variable to the cluster, then alpha is undefined
 key.alpha[!is.finite(key.alpha)] <- 1   
 key.av.r <- key.alpha/(key.var - key.alpha*(key.var-1))  #alpha 1 = average r
 colnames(raw.r) <- rownames(raw.r)  <- colnames(keys)
 names(key.lambda6) <- colnames(keys)
 key.lambda6 <- drop(key.lambda6)
 
 n.keys <- ncol(keys)
 sn <- key.av.r * key.var/(1-key.av.r)
 
if(!bad) { item.cov <- t(keys) %*% r    #the normal case is to have all correlations
         raw.cov <- item.cov %*% keys} else {  
        # item.cov <- apply(keys,2,function(x) colSums(r*x,na.rm=TRUE))  #some correlations are NA which makes this value too low
          item.cov <- apply(keys,2,function(x) colSums(r*x,na.rm=TRUE))    #we use the means to fix this problem 
         raw.cov <-  apply(keys,2,function(x) colSums(item.cov*x,na.rm=TRUE))
         #raw.cov <- covar
         item.cov <- t(item.cov)   #what is going on?
   }
 adj.cov <- raw.cov 


 #now adjust them
 
  med.r <- rep(NA, n.keys)
 for (i in 1:(n.keys)) {
    temp <- keys[,i][abs(keys[,i]) > 0]
   temp <- diag(temp,nrow=length(temp))
   small.r <- r[abs(keys[,i])>0,abs(keys[,i])>0]
   small.r <- temp %*% small.r %*% temp  #does not work with NA
    med.r[i]  <- median(small.r[lower.tri(small.r)],na.rm=TRUE)  
    for (j in 1:i) {
  
  #maybe don't adjust
   
if(av.r) { adj.cov[i,j] <- adj.cov[j,i]<- raw.cov[i,j] - sum(keys[,i] * keys[,j] ) + sum(keys[,i] * keys[,j] *  sqrt(key.av.r[i] * key.av.r[j]))
  } else {
     adj.cov[i,j] <- adj.cov[j,i] <- raw.cov[i,j] - sum(keys[,i] * keys[,j] )+ sum( keys[,i] * keys[,j] * sqrt(item.smc[i]* abs(keys[,i])*item.smc[j]*abs(keys[,j]) ))
 
 }
    } }

scale.var <- diag(raw.cov)

diag(adj.cov) <- diag(raw.cov)
 adj.r <- cov2cor(adj.cov)  #this is the overlap adjusted correlations

if(smooth) {
  if(any(is.na(adj.r))) {na.matrix <- na.matrix+1} else {
  ev <- eigen(adj.r)
  bad.matrix <- bad.matrix + any(ev$value<0)
  adj.r <- cor.smooth(adj.r)
  }
}

#adjust the item.cov for item overlap
#we do this by replacing the diagonal of the r matrix with the item.var (probably an smc, perhaps a maximum value)

diag(r) <- item.var
if(!bad) { item.cov <- t(keys) %*% r    #the normal case is to have all correlations
        } else {  
         #item.cov <- t(apply(keys,2,function(x) colSums(r*x,na.rm=TRUE)))  #some correlations are NA
              item.cov <- matrixMult.na(t(keys),r,scale=FALSE)
         }



 if(n.keys > 1) {
    item.cor <-   sqrt(diag(1/(key.lambda6*scale.var))) %*% (item.cov)  # %*% diag(1/sqrt(item.var))
    rownames(item.cor) <- colnames(keys)
    } else {
      item.cor <- r %*% keys /sqrt(key.lambda6*scale.var) }
   colnames(item.cor) <- colnames(r)
   item.cor <- t(item.cor)
   names(med.r) <- colnames(keys)


 

 adj.r.vect <- as.vector(adj.r[lower.tri(adj.r)])
 if (correct) {cluster.corrected <- correct.cor(adj.r,t(key.alpha))
 

r.list [[group]] <- adj.r.vect
 alpha.list[[group]] <-  list(alpha=key.alpha)
 cor.list[[group]] <- list(cor=adj.r)
 var.list [[group]] <- list(var=var/(key.n^2))
# names(result[group]) <- names(stats$r[group])
 }  #correct for attenuation
 else {

result[[group]] <- list(cor=adj.r,var=var, sd=sqrt(var),alpha=key.alpha,av.r = key.av.r,size=key.var,sn=sn,G6 =key.lambda6,item.cor=item.cor,med.r=med.r,Call=cl)}


}   #end of group loop



 names(alpha.list) <- names(cor.list) <- names(stats$r)
 #if(!exists("adj.r.vect")) browser()                        #debugging option
# r.list <- r.list[!is.na(r.list)]  #this gets rid of the missing cases
 cor.mat <- matrix(unlist(r.list),ncol=length(adj.r.vect),byrow=TRUE)
 var.mat <- matrix(unlist(var.list),ncol=NCOL(keys),byrow=TRUE)
# rownames(cor.mat) <- names(cor.list[!is.na(cor.list)])

rownames(var.mat) <- names(cor.list[!is.na(cor.list)])
colnames(var.mat)<- colnames(keys)
 rownames(cor.mat) <- names(cor.list[!is.na(cor.list)])

rownames(cor.mat) <- names(cor.list)


 cnR <- abbreviate(colnames(keys),minlength=5) 
      k <- 1
      nvar <- NCOL(keys)
      temp.name <- rep(NA,nvar*(nvar-1)/2)  #strange, but seems necessary
     for(i in 1:(nvar-1)) {for (j in (i+1):nvar) {
     temp.name[k]  <- paste(cnR[i],cnR[j],sep="-") 
     # colnames(cor.mat)[k] <- paste(cnR[i],cnR[j],sep="-")
      k<- k +1 }}
      colnames(cor.mat)<- temp.name
 
 result <- list(alpha=alpha.list,cor = cor.list,cor.mat=cor.mat,bad.matrix=bad.matrix, na.matrix=na.matrix,var = var.mat)
 class(result) <- c ("psych", "scoreBy")
 return(result)}
 #modified 01/11/15 to find r if not a square matrix
#modifed 03/05/15 to do pseudo matrix multiplication in case of missing data 

select.from.list <- function(x){
ngroups <- length(x)
result <-list()
for (i in 1:ngroups) {
 result[[i]] <- x[[i]]$cor[lower.tri(x[[i]]$cor)]}
 return(result)
}