R/matReg.r
In psych: Procedures for Psychological, Psychometric, and Personality Research

Documented in matReg

#a helper function to find regressions from covariances
#May 6, 2016
#Fixed November 29, 2018 to handle se of partialed variables correctly
#modified September 25, 2019 to find intercepts and standard errors using momements
#modified even more November 25, 2019 to return the intercepts and R2
matReg <- function(x,y=NULL,C=NULL,m=NULL,z=NULL,n.obs=0,means=NULL,std=FALSE,raw=TRUE,part=FALSE) {
#first, allow for formula based calls
 cl <- match.call()
    #convert names to locations if given formula input
    prod <- ex <-  NULL   #in case we do not have formula input
   #first, see if they are in formula mode  
   if(inherits(x,"formula")) {
   ps <- fparse(x)
   y <- ps$y          #[c(y, x, z, ex)]
   x <- ps$x
   data <- C  #to make the following easier to adapt from setCor
   med <- ps$m #but, mediation is not done here, so we just add this to x
  # if(!is.null(med)) x <- c(x,med)   #not  necessary, because we automatically put this in
   prod <- ps$prod
   z <- ps$z   #do we have any variable to partial out
   ex <- ps$ex
   form <- TRUE
} else {form <- FALSE}
           #  data <- char2numeric(data)   #move to later (01/05/19)
        
    if(is.numeric(y )) y <- colnames(data)[y]
    if(is.numeric(x )) x <- colnames(data)[x]
    if(is.numeric(z )) z <- colnames(data)[z]
  

#check for bad input  
if(any( !(c(y,x,z,ex) %in% colnames(data)) )) {
  cat("\nOops! Variable names are incorrect. Offending items are ", c(y, x, z, ex)[which(!(c(y, x, z, ex) %in% colnames(data)))],"\n")
 stop("I am stopping because the variable names are incorrect.  See above.")}
 
 
 #we might have had formula input thus, make C the covariance matrix
# if(form) C <- cov(data[c(y,x,z,ex)],use="pairwise")
 if(is.null(n.obs)) n.obs <- 0
   numx <- length(x)   #this is the number of predictors (but we should adjust by the number of covariates)   
   numz <- length(z)
   numy <- length(y)
   
   
   #df <- n.obs -1 - numx - length(z) - length(m)    #but this does not take into account the mediating variables
   #note that the x variable includes the intercept and thus uses up one extra df
   df <- n.obs  - numx -numz #We have partialed out z, should we use the df from it?  This is changed 11/26/19 to reduce df for z  
   Cr <- cov2cor(C)  
        	if(!is.null(z)){numz <- length(z)      #partial out the z variables
     	                zm <- C[z,z,drop=FALSE]
     	                za <- C[x,z,drop=FALSE]
     	                zb <- C[y,z,drop=FALSE]
     	                zmi <- solve(zm)
     	                 x.matrix <- C[x,x,drop=FALSE] - za %*% zmi %*% t(za)
     	               if(!part)  y.matrix <- C[y,y,drop=FALSE] - zb %*% zmi %*% t(zb)  #part versus partial (default)
     	                xy.matrix <- C[x,y,drop=FALSE] - za  %*% zmi %*% t(zb)
     	                 C <- cbind(rbind(y.matrix,xy.matrix),rbind(t(xy.matrix),x.matrix))
     	                
     	                 }
   
    if(numx==1) { beta <- solve(C[x,x,drop=FALSE],(C[x,y,drop=FALSE])) 
                 colnames(beta) <- y
        } else {
        beta <- solve(C[x,x],(C[x,y])) }    #this is the same as setCor and is a x * x matrix
        
    if(!is.matrix(beta)) {beta <- matrix(beta,nrow=length(beta))}   #beta is a matrix of beta weights 
    if(is.character(x)) {rownames(beta) <- x}  else {rownames(beta) <- colnames(C)[x]}
    if(is.character(y)) { colnames(beta) <- y} else { colnames(beta) <- colnames(C)[y]}
      
      x.inv <- solve(C[x,x]) #solve x.matrix    #taken from setCor
      yhat <- t(C[x,y,drop=FALSE]) %*% x.inv %*% C[x,y,drop=FALSE]
      resid <- C[y,y]- yhat
      

     if(!std ) {
        df <- n.obs - numx - numz

         Residual.se <- sqrt(diag(resid /df))  #this is the df  n.obs - length(x))
         se <- MSE <- diag(resid )/(df) 
             
               if(length(y) > 1) {SST <- diag(C[y,y] - means[y]^2 * n.obs)} else {SST <- ( C [y,y] - means[y]^2 * n.obs)}
     	       R2 <- (SST - diag(resid)) /SST 
     	       se.beta <- list()
     	        for (i in 1:length(y)) {
     	        se.beta[[i]] <- sqrt(MSE[i] * diag(x.inv))
                 }
                se <- matrix(unlist(se.beta),ncol=numy)
      if(length(y) > 1) {SST <- diag(C [y,y] - means[y]^2 * n.obs)} else {SST <- ( C [y,y] - means[y]^2 * n.obs)}
	       R2 <- (SST - diag(resid)) /SST 
	       
	   
      } else {      R2 <- colSums(beta * C[x,y])/diag(C[y,y,drop=FALSE])   #the standardized case
        uniq <- 1-(1-1/diag(solve(Cr[x,x,drop=FALSE])))  #1- smc
     

        if(n.obs > 2) { # se <- (sqrt((1-R2)/(n.obs-1 - numx-numz)) %*% t(sqrt(1/uniq)))  #these are the standardized se
                        se <- (sqrt((1-R2)/(df)) %*% t(sqrt(1/uniq)))   #setCor uses df = n.obs - numx - 1
                        se <- t( se * sqrt(diag(C[y,y,drop=FALSE])) %*% t(sqrt(1/diag(C[x,x,drop=FALSE]))) )  #But does this work in the general case?

        
                    colnames(se) <- colnames(beta) } else {se <- NA}
                   if(raw) {   #used to compare models  -- we need to adjust this for dfs
                           Residual.se <-   sqrt((1-R2)* df/(df-1)) } else {  #this is a kludge and is necessary to treat the SSR correctly
                       Residual.se <- sqrt((1-R2)/df * (n.obs-1))}
                   }
                   
                
                    if(!any(is.na(se))) { tvalue <- beta/se
                                        # prob <- 2*(1- pt(abs(tvalue),df))
                                          prob <- -2 *  expm1(pt(abs(tvalue),df,log.p=TRUE))
                                         } else {tvalue <- prob <- df <- NA}
  result <- list(beta=beta,se=se, t=tvalue,df=df,prob=prob,R2=R2,SE.resid=Residual.se)
  return(result)       }  		 
#######