R/score.items.R

"score.items"  <-
 function (keys,items,totals=FALSE,ilabels=NULL, missing=TRUE, impute="median",delete=TRUE,  min=NULL,max=NULL,digits=2) {
 message("score.items has been replaced by scoreItems, please change your call")
     scoreItems(keys=keys,items=items,totals=totals,ilabels=ilabels,missing=missing,impute=impute,delete=delete,min=min,max=max,digits=digits)
     }

"scoreItems"  <-
 function (keys,items,totals=FALSE,ilabels=NULL, missing=TRUE, impute="median",delete=TRUE,  min=NULL,max=NULL,digits=2,n.obs=NULL) {
   cl <- match.call()
   raw.data <- TRUE
   if(is.list(keys)) keys <- make.keys(items,keys)   #added 9/9/16
   keys <- as.matrix(keys)   #just in case they were not matrices to start with
    n.keys <- dim(keys)[2]
    n.items <- dim(keys)[1]
    abskeys <- abs(keys)
    keynames <- colnames(keys)
    num.item <- diag(t(abskeys) %*% abskeys) #how many items in each scale
    num.ob.item <- num.item   #will be adjusted in case of impute = FALSE
    if (!missing) items <-  na.omit(items) 
    n.subjects <- dim(items)[1]
     if ((dim(items)[1] == dim(items)[2])  &&  !isCorrelation(items)) {warning("You have an equal number of rows and columns but do not seem to have  a correlation matrix.  I will treat this as a data matrix.")} # with the exception for the very unusual case of exactly as many items as cases reported by Jeromy Anglim 
    if ((dim(items)[1] == dim(items)[2])  &&  isCorrelation(items)){ #this is the case of scoring correlation matrices instead of raw data  (checking for rare case as well)       
     raw.data <- FALSE
     totals <- FALSE #because we don't have the raw data, totals would be meaningless

     n.subjects <- 0
     C <- as.matrix(items)
     cov.scales <- t(keys) %*% C %*% keys  #fast, but does not handle the problem of NA correlations
     cov.scales2 <- diag(t(abskeys) %*% C^2 %*% abskeys) # this is sum(C^2)  for finding ase
     response.freq <- NULL
           }  else {
    #check to make sure all items are numeric  --  if not, convert them to numeric if possible, flagging the item that we have done so 
     if(!is.matrix(items)) {  #does not work for matrices
    for(i in 1:n.items) {   
        if(!is.numeric(items[[i]] ))  {
                               
                                  if(is.factor(unlist(items[[i]])) | is.character(unlist(items[[i]]))) {  items[[i]] <- as.numeric(items[[i]]) 
                                  
                                colnames(items)[i] <- paste0(colnames(items)[i],"*")
        
                          } else {items[[i]] <- NA} }
                         
               }
              } 
      

   items <- as.matrix(items)
    
    response.freq <- response.frequencies(items)
    item.var <- apply(items,2,sd,na.rm=TRUE)
       bad <- which((item.var==0)|is.na(item.var))
       if((length(bad) > 0) && delete) {
       for (baddy in 1:length(bad)) {warning( "Item= ",colnames(items)[bad][baddy]  , " had no variance and was deleted from the data and the keys.")}
       items <- items[,-bad]
        keys <- as.matrix(keys[-bad,])
       
        n.items <- n.items - length(bad) 
        abskeys <- abs(keys)
        colnames(keys) <- keynames
      
        }
    item.means <- colMeans(items,na.rm=TRUE)
    if (is.null(min)) {min <- min(items,na.rm=TRUE)}
    if (is.null(max)) {max <- max(items,na.rm=TRUE)}
    # miss.rep <- rowSums(is.na(items))

     miss.rep <- (is.na(items) +0) %*% abs(keys)
    
   
    num.item <- diag(t(abskeys) %*% abskeys) #how many items in each scale
    num.ob.item <- num.item   #will be adjusted in case of impute = FALSE
   if(impute !="none") {
        miss <- which(is.na(items),arr.ind=TRUE)
        if(impute=="mean") {
       		item.means <- colMeans(items,na.rm=TRUE)   #replace missing values with means
       		items[miss]<- item.means[miss[,2]]} else { 
       		item.med   <- apply(items,2,median,na.rm=TRUE) #replace missing with medians
        	items[miss]<- item.med[miss[,2]]}   #this only works if items is a matrix
        	 scores <- items %*%  keys  #this actually does all the work but doesn't handle missing values
        	C <- cov(items,use="pairwise")
          cov.scales  <- cov(scores,use="pairwise")    #and total scale variance
          cov.scales2 <- diag(t(abskeys) %*% C^2 %*% abskeys)   # sum(C^2)  for finding ase
        }  else { #handle the case of missing data without imputation
           scores <- matrix(NaN,ncol=n.keys,nrow=n.subjects)
          if(raw.data &&  totals == TRUE) warning("Specifying totals = TRUE without imputation can lead to serious problems.  Are you sure?")  #just in case it was not already false        #do we want to allow totals anyway?
           #we could try to parallelize this next loop
           for (scale in 1:n.keys) {
           	pos.item <- items[,which(keys[,scale] > 0)]
          	neg.item <- items[,which(keys[,scale] < 0)]
          	 neg.item <- max + min - neg.item
           	sub.item <- cbind(pos.item,neg.item)
           	if(!totals) {scores[,scale] <- rowMeans(sub.item,na.rm=TRUE)} else {scores[,scale] <- rowSums(sub.item,na.rm=TRUE)}
          	 rs <- rowSums(!is.na(sub.item))
          	 num.ob.item[scale] <- mean(rs[rs>0])  #added Sept 15, 2011
          # num.ob.item[scale] <- mean(rowSums(!is.na(sub.item))) # dropped 
           		} # end of scale loop
       	
           # we now need to treat the data as if we had done correlations at input
            C <- cov(items,use="pairwise")
            cov.scales <- t(keys) %*% C %*% keys
            cov.scales2 <- diag(t(abskeys) %*% C^2 %*% abskeys)  # sum(C^2)  for finding ase
            raw.data <- FALSE
         }  #end of treating missing without imputation

       }
                   
    slabels <- colnames(keys)
    if (is.null(slabels)) {
    	if (totals) {slabels<- paste("S",1:n.keys,sep="")} else {
    	             slabels <- paste("A",1:n.keys,sep="")} }
   
    
   

    item.var <- diag(C)  #find the item variances
    
    var.scales <- diag(cov.scales)
    cor.scales <- cov2cor(cov.scales)    
    sum.item.var <- item.var %*% abskeys 
    sum.item.var2 <- item.var^2 %*% abskeys 
   
    
   #av.r <- (var.scales - sum.item.var)/(num.item*(num.item-1))  #actually, this the average covar
   alpha.scale <- (var.scales - sum.item.var)*num.item/((num.item-1)*var.scales)
   av.r <- alpha.scale/(num.item - alpha.scale*(num.item-1))  #alpha 1 = average r
   alpha.ob <- av.r * num.ob.item/(1+(num.ob.item-1)* av.r)
  
    colnames(alpha.scale) <- slabels
  alpha.scale[is.nan(alpha.scale)] <- 1
   
   #Find standard errors of alpha following Duhacheck and Iacobbci
   #Q = (2 * n^2/((n-1)^2*(sum(C)^3))) * (sum(C) * (tr(C^2) + (tr(C))^2) - 2*(tr(C) * sum(C^2)))
   #this works if we have the raw data
   Q = (2 * num.item^2/((num.item-1)^2*((var.scales)^3))) * (var.scales * (sum.item.var2 + sum.item.var^2) - 2* sum.item.var * cov.scales2)

   ase <- NULL  #to have something if we don't have raw data
   #now find the Guttman 6 * reliability estimate as well as the corrected item-whole correlations
   
   if(raw.data) { item.cor <- cor(items,scores)} else {if (n.keys >1) {
         item.cor <- C %*% keys %*% diag(1/sqrt(var.scales))/sqrt(item.var)} else {item.cor <- C %*% keys /sqrt(var.scales * item.var)}}
         colnames(item.cor) <- slabels
    c.smc <- smc(C,TRUE)
   
    diag(C) <- c.smc
    sum.smc <- c.smc %*% abskeys
    G6 <- (var.scales - sum.item.var + sum.smc)/var.scales
    corrected.var <- diag(t(keys) %*%  C %*% keys)
    if(n.keys>1) {
    item.rc <- (C %*% keys) %*% sqrt(diag(1/corrected.var))/sqrt(item.var)} else {
      item.rc <- C %*% keys /sqrt(corrected.var*item.var) }
    colnames(item.rc) <- slabels
   
  if(n.subjects > 0) {ase <- sqrt(Q/ n.subjects )} else {if(!is.null(n.obs)) {ase <- sqrt(Q/ n.obs )} else {ase=NULL}}  #only meaningful if we have raw data
  if(is.null(ilabels)) {ilabels <- colnames(items) }
  if(is.null(ilabels)) {ilabels <-  paste("I",1:n.items,sep="")}
   rownames(item.rc) <- ilabels
  if(raw.data) {
     correction <- (colSums(abs(keys)-(keys))/2)*(max+min) #correct for flipping
     scores <- scores  + matrix(rep(correction,n.subjects),byrow=TRUE,nrow=n.subjects)
      
     if (!totals) {
        if(n.keys > 1) {scores <- scores %*% diag(1/num.item)   #find averages
                  }  else
                 {scores <- scores/num.item } }
            colnames(scores) <- slabels          
         } else {if (impute !="none") scores <- NULL}
    scale.cor <- correct.cor(cor.scales,t(alpha.scale))
  rownames(alpha.scale) <- "alpha"
  rownames(av.r) <- "average.r"
  rownames(G6) <- "Lambda.6"
  sn <-  av.r * num.item/(1-av.r)
  rownames(sn) <- "Signal/Noise"

   if (!raw.data) { 
     if(impute =="none") {
       #rownames(alpha.ob) <- "alpha.observed"
       if(!is.null(scores)) colnames(scores) <- slabels #added Sept 23, 2013
       results <-list(scores=scores,missing = miss.rep,alpha=alpha.scale, av.r=av.r,sn=sn, n.items = num.item,  item.cor = item.cor,cor = cor.scales, corrected = scale.cor,G6=G6,item.corrected = item.rc,response.freq=response.freq,raw=FALSE,alpha.ob = alpha.ob,num.ob.item =num.ob.item,ase=ase,Call=cl)} else {
                            results <- list(alpha=alpha.scale, av.r=av.r,sn=sn, n.items = num.item,  item.cor = item.cor,cor = cor.scales ,corrected = scale.cor,G6=G6,item.corrected = item.rc ,response.freq =response.freq,raw=FALSE, ase=ase,Call=cl)}  } else {
   if(raw.data) {if (sum(miss.rep) > 0) {results <-list(scores=scores,missing = miss.rep,alpha=alpha.scale, av.r=av.r, sn=sn,n.items = num.item,  item.cor = item.cor,cor = cor.scales ,corrected = scale.cor,G6=G6,item.corrected = item.rc,response.freq=response.freq,raw=TRUE,ase=ase,Call=cl)} else{  
                                         results <- list(scores=scores,alpha=alpha.scale, av.r=av.r,sn=sn, n.items = num.item,  item.cor = item.cor, cor =cor.scales,corrected = scale.cor,G6=G6,item.corrected = item.rc ,response.freq=response.freq,raw=TRUE,ase=ase,Call=cl)} }
   }
   class(results) <- c("psych", "score.items")
    return(results)
 }
 #modified June 1 to add row names to items 
 #modified June 22 to add median imputation
 #modified August 8 to add colnames to scores
 #modified Sept 23, 2007 to allow for short output
 #modified December 10, 2007 to default to ilabels as colnames(items)
 #modified March, 2009 to better use the print.psych function
 #modified March 22, 2009 to add G6 and corrected for item overlap correlation
 #also allow for correlation/covariance matrix input
 #modified Sept 3, 2010 to include response frequencies
 #modified November 11, 2010 to allow for data with lots of missingness to be scored without imputing means or medians  
 #need to rethink the short option.  Why bother since summary and print don't show scores anyway
 #added missing score to count missing responses for each scale instead of just the overall.
 #Modified November 22, 2013 to add confidence intervals for alpha 
 #modified Sept 9, 2016 to add the keys.list option for the scoring keys
frenchja/psych documentation built on May 16, 2019, 2:49 p.m.