R/sim.structural.R

Defines functions sim.groups

"sim.structure" <- "sim.structural" <-
function (fx=NULL,Phi=NULL,fy=NULL,f=NULL,n=0,uniq=NULL,raw=TRUE, items = FALSE, low=-2,high=2,d=NULL,cat=5,mu=0) {
 cl <- match.call()
 if(is.null(f)) { if(is.null(fy)) {f <- fx} else {
    f <- superMatrix(fx,fy)} }
  f <- as.matrix(f)
  if(!is.null(Phi)) {if(length(Phi)==1) Phi <- matrix(c(1,Phi,Phi,1),2,2)}
   #these are parameters for simulating items
   nf <- ncol(f)
   nvar <- nrow(f)
#if(is.null(d)) {d <- seq(low,high,(high-low)/(nvar/(nf-1)))
         #   d <- rep(d,nvar)} else {if(length(d)==1) d <- rep(d,nvar)}
	#a <- rep(1,nvar)         
  
  if(is.vector(f)) {f <- as.matrix(f)  #this is the case if doing a congeneric model
                    Phi <- 1}
  if(!is.null(Phi)) {
  model <-  f %*% Phi %*%  t(f) #the model correlation matrix for oblique factors
} else { model <- f%*% t(f)}
if(is.null(uniq)) {diag(model) <- 1 } else { diag(model) <- uniq  + diag(model)}                      # put ones along the diagonal unless uniq is specified 
  nvar <- dim(f)[1]
  if(is.null(rownames(model))) {colnames(model) <- rownames(model) <- paste("V",1:nvar,sep="")} #else {colnames(model) <- rownames(model) <- rownames(fx)}
 
  if(n>0) {
    mu <- rep(mu,nvar)
  	#observed <- mvrnorm(n = n, mu, Sigma=model, tol = 1e-6, empirical = FALSE)
  	 eX <- eigen(model)
                                      theta <- matrix(rnorm(nvar * n),n)
                                      observed <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*%  t(theta) + mu)   #this way theta and observed not identical
                                      theta <- observed
  	if(items) {#observedp <- matrix(t(pnorm(a*t(observed)- d)),n,nvar)    #this is not useful
  	         #observed[] <- rbinom(n*nvar, cat, observedp) #this puts in error again
  	         range.ob <- range(observed)
  	         observed <- (observed - range.ob[1])/(range.ob[2]- range.ob[1])
  	         observed<- round(cat * observed)}
  	  colnames(observed) <- colnames(model)
  r <- cor(observed) 
  } 

  	reliability <- diag(f %*% t(f))
  if(n<1) {results <- list(model=model,reliability=reliability) } else {
  if (!raw) {results <- list( model=model,reliability=reliability,r=r,N=n )} else {
             results <- list( model=model,reliability=reliability,r=r,observed= observed,theta=theta, N=n) } }
  results$Call <- cl
  class(results) <- c("psych", "sim")
 return(results)}
 
 # Replaced 1/8/21 with a version that reports theta 
# "sim" <-
# function (fx=NULL,Phi=NULL,fy=NULL,alpha=.8,lambda = 0,n=0,mu=NULL,raw=TRUE) {
#  cl <- match.call()
# ##set up some default values 
# if(is.null(fx)) {fx <- matrix(c(rep(c(.8,.7,.6,rep(0,12)),3),.8,.7,.6),ncol=4)
#    if(is.null(Phi)) {Phi <- diag(1,4,4)
#                      Phi <- alpha^abs(row(Phi) -col(Phi)) + lambda^2
#                      diag(Phi) <- max((alpha + lambda),1)
# 	                Phi <- cov2cor(Phi)}
#    if(is.null(mu)) {mu <- c(0,.5,1,2)}                   }
#     if(is.null(fy)) {f <- fx} else {
#     f <- superMatrix(fx,fy)} 
#   
#    if(is.null(mu)) {mu <- rep(0,ncol(fx))} 
#    
#      means <- fx %*% mu        
#   
#   if(is.vector(f)) {f <- as.matrix(f)  #this is the case if doing a congeneric model
#                     Phi <- 1}
#   if(!is.null(Phi)) {
#   model <-  f %*% Phi %*%  t(f) #the model correlation matrix for oblique factors
# } else { model <- f%*% t(f)}
# diag(model)<- 1                       # put ones along the diagonal
#   nvar <- dim(f)[1]
#   colnames(model) <- rownames(model) <- paste("V",1:nvar,sep="")
#   if(n>0) {
#     
#   #	observed <- mvrnorm(n = n, means, Sigma=model, tol = 1e-6, empirical = FALSE)
#   	 eX <- eigen(model)
#      observed <- matrix(rnorm(nvar * n),n)
#      observed <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*%  t(observed) + rep(means,n))
#   r <- cor(observed) } 
#   	reliability <- diag(f %*% t(f))
#   if(n<1) {results <- list(model=model,reliability=reliability) } else {
#   if (!raw) {results <- list( model=model,reliability=reliability,r=r,N=n )} else {
#              results <- list( model=model,reliability=reliability,r=r,observed= observed,N=n) } }
#   results$Call <- cl
#   class(results) <- c("psych", "sim")
#  return(results)}
#  


#####
#Added 1/7/21  to report the generating theta
#corrected 1/29/21 to correctly report generating theta (with correlations)
####
 	
# "sim" <-
# function (fx=NULL,Phi=NULL,fy=NULL,alpha=.8,lambda = 0,n=0,mu=NULL,raw=TRUE,theta=TRUE) {
#  cl <- match.call()
#  
# ##set up some default values 
# if(is.null(fx)) {fx <- matrix(c(rep(c(.8,.7,.6,rep(0,12)),3),.8,.7,.6),ncol=4)}
#     if(is.vector(fx)) {fx <- as.matrix(fx) } #this is the case if doing a congeneric model
#                    
#   if(is.vector(fy)) {fy <- as.matrix(fy)}
#      
#     nvar <- NROW(fx)
#     nfact <- NCOL(fx)
#    if(is.null(Phi)& is.null(fy)) {Phi <- diag(nfact)
#                      Phi <- alpha^abs(row(Phi) -col(Phi)) + lambda^2
#                      diag(Phi) <- max((alpha + lambda),1)
# 	                Phi <- cov2cor(Phi)}    #put in a simplex structure as default
#                  
#     if(is.null(fy)) {f <- fx} else {
#     f <- superMatrix(fx,fy)}
#     
#      if(is.null(mu))  mu <- as.matrix(0:(NCOL(f)-1),drop=FALSE)
#      if(is.null(Phi)) Phi <- diag(NCOL(f)) 
#   
#    if(is.null(mu)) {mu <- as.matrix(rep(0,NCOL(f)),drop=FALSE)} 
#    
#      means <- f %*% mu        
#   
# 
#      model <-  f %*% Phi %*%  t(f) #the model correlation matrix for oblique factors
#    
#    
#      nvar <- NROW(f) #we do this here because we have made f a supermatrix of fx and fy   
#      colnames(model) <- rownames(model) <- paste("V",1:nvar,sep="")  
#      
#         
#  if((n > 0)) { 
#      nfactors <- NCOL(f)
#      theta <- matrix(rnorm(n * nfactors),n)   #make up some random data 
#      
#      eX <- eigen(model)  #the model is actually the covariance matrix
#       t.scores <-  t(eX$vectors[,1:nfactors] %*% diag(sqrt(pmax(eX$values[1:nfactors], 0)))    %*%  t(theta)  + rep(means,n)) #the true scores for each variable
#       U <- diag(sqrt(1-diag(model)))
#       error <- t(U %*% matrix(rnorm(n * nvar),nrow=nvar))
#       observed <- t.scores + error
#       colnames(observed) <- paste0("V",1:ncol(observed))
#        r <- cor(observed)
#       if(!is.null(Phi)) { exPhi <- eigen(Phi)
#       # f.scores <- f.scores %*% Phi
# 
#         #f.scores <-t((exPhi$vectors %*% diag(sqrt(exPhi$values)) %* t(f.scores)) + mu)}
#         theta <-    t(exPhi$vectors %*% diag(sqrt(exPhi$values)) %*% t(theta) + rep(mu,n) ) 
#      }
#      } #  else {
#      
# 
#   diag(model)<- 1                       # put ones along the diagonal
#   	reliability <- diag(f %*% t(f))
#   if(n < 1) {results <- list(model=model,reliability=reliability) } else {
#   if (!raw) {results <- list( model=model,reliability=reliability,r=r,N=n )} else {
#              results <- list( model=model,reliability=reliability,r=r, observed= observed,theta=theta,Phi=Phi, N=n) } }
#   results$Call <- cl
#   class(results) <- c("psych", "sim")
#  return(results)} 
#  
#  
 
 
 #complete rewrite to allow the true factor scores (theta) to be reported
#rewritten March, 2021
#added threshold option April 2023
 "sim" <-
function (fx=NULL,Phi=NULL,fy=NULL,alpha=.8,lambda = 0,n=0,mu=NULL,raw=TRUE,threshold=NULL) {
 cl <- match.call()
 
##set up some default values 
# 4 correlated factors growing over time
# The four factors have a simplex structure

  
    
if(is.null(fx)) {fx <- matrix(c(rep(c(.8,.7,.6,rep(0,12)),3),.8,.7,.6),ncol=4)
   	if(is.null(Phi)) {Phi <- diag(NCOL(fx))         #create a simplex of the factors
                     Phi <- alpha^abs(row(Phi) -col(Phi)) + lambda^2
                     diag(Phi) <- max((alpha + lambda),1)
	                Phi <- cov2cor(Phi)}
   if(is.null(mu)) {mu <- seq(0,NCOL(fx)-1)} else {if(length(mu)< NCOL(fx)) mu <- sample(mu,NCOL(fx),replace=TRUE)}                  
      }    #end of default
    if(is.null(fy)) {f <- fx} else {
    f <- superMatrix(fx,fy)} 
     nvar <- NROW(f)
     nfactors <- NCOL(f)

    if(is.vector(f)) {f <- as.matrix(f)  #this is the case if doing a congeneric model
                    Phi <- 1}
   if(is.null(mu)) {mu <- rep(0,NCOL(f))} 
   
     means <- f  %*% mu  
        
  
  if(is.null(Phi)) {Phi <-diag(NCOL(f))}
        model <-  f %*% Phi %*%  t(f) #the model correlation matrix for oblique factors
          U <- diag(sqrt(1-diag(model)))   #the uniquensses 
          diag(model)<- 1                  # put ones along the diagonal
   
     colnames(model) <- rownames(model) <- paste("V",1:nvar,sep="")  
     
 #Create factor scores and raw data       
 if((n>0) ) { 
        if(!is.null(threshold)) {if (length(threshold) < nvar) threshold <- sample(threshold, nvar, replace=TRUE)}
     theta <- matrix(rnorm(n * nfactors),ncol=nfactors)   #the orthogonal factors
     et <- eigen(Phi)
  
     theta <- t(et$vectors %*% diag(sqrt(et$values ))%*% t(theta))  #the correlated factors
     observed <- theta %*% t(f)
     colnames(theta) <- colnames(f) 
      error <- t(U %*% matrix(rnorm(n * nvar),nrow=nvar))
      observed <- t(t(observed + error) + rep(means,n))  #observed are factors * loadings + error + means
       if(!is.null(threshold)) {
        i <- 1:nvar
		  observed <- t(t(observed [,i]) > threshold[i]) + 0 } 
       r <- cor(observed)  #will approximate the model
     }  
  	reliability <- diag(f %*% t(f))   
  if(n<1) {results <- list(model=model,reliability=reliability) } else {
  if (!raw) {results <- list( model=model,reliability=reliability,r=r,N=n )} else {
             results <- list( model=model,reliability=reliability,r=r,observed= observed,theta=theta,N=n) } }
  results$Call <- cl
  class(results) <- c("psych", "sim")
 return(results)}

 
 ###########################	
"sim.simplex" <-
	function(nvar =12, alpha=.8,lambda=0,beta=1,mu=NULL, n=0,threshold=NULL) {
	 cl <- match.call()
	R <- matrix(0,nvar,nvar)
	R[] <- alpha^abs(col(R)-row(R))*beta + lambda^2
	diag(R) <- max((alpha * beta) + lambda,1)
	R <- cov2cor(R)
	colnames(R) <- rownames(R) <- paste("V",1:nvar,sep="")
	
	if(is.null(mu)) {mu <- rep(0,nvar)} 
	
	if(n>0) {
	#observed.scores <- mvrnorm(n = n, mu, Sigma=R, tol = 1e-6, empirical = FALSE)
	observed <- matrix(rnorm(nvar *n),n)
	 	 eX <- eigen(R)
                observed.scores <- matrix(rnorm(nvar * n),n)
                observed.scores <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*%  t(observed)+mu)
    if(!is.null(threshold)) {if (length(threshold) < nvar) threshold <- sample(threshold, nvar, replace=TRUE)
             i <- 1:nvar
		  observed.scores <- t(t(observed.scores [,i]) > threshold[i]) + 0 } 
    colnames(observed.scores)<- paste0("V",1:nvar)           
	observed <- cor(observed.scores)
	results <- list(model=R,r=observed,observed=observed.scores) 
	results$Call <- cl
	class(results) <- c("psych", "sim")} else {results <- R}
	  
	results
	}
	

#simulate major and minor factors
#modified October 18, 2022 to allow specification of g and fbig for all variables
"sim.minor" <-
function(nvar=12,nfact=3,n=0,g=NULL,fbig=NULL,fsmall = c(-.2,.2),n.small=NULL, bipolar=TRUE, threshold=NULL) {
if(is.null(fbig)) {loads <- c(.8,.6) 
  loads <- sample(loads,nvar/nfact,replace=TRUE)


if(nfact == 1) {fx <- matrix(loads,ncol=1)} else {fx <- matrix(c(rep(c(loads,rep(0,nvar)),(nfact-1)),loads),ncol=nfact)}
if(bipolar) fx <- 2*((sample(2,nvar,replace=TRUE) %%2)-.5) * fx
} else {fx <- fbig
     nvar <- NROW(fx)
     nfact <- NCOL(fx)}
if(!is.null(g)) {if (length(g) < nvar) {g <- sample(g,nvar,replace=TRUE)}
        fx <- cbind(g,fx)
        }
 
if( !is.null(fsmall)) {
if(is.null(n.small)) n.small=nvar/4
fsmall  <- c(fsmall,rep(0,n.small))
fs <- matrix(sample(fsmall,nvar*floor(n.small),replace=TRUE),ncol=floor((n.small)))  
fload <- cbind(fx,fs)

if(is.null(g)) {
colnames(fload) <- c(paste("F",1:nfact,sep=""),paste("m",1:(n.small),sep=""))} else {
    colnames(fload) <- c("g",paste("F",1:nfact,sep=""),paste("m",1:(n.small),sep=""))}
    } else {
     fload <- fx
      colnames(fload) <- paste0("F",1:nfact)
      }
rownames(fload) <- paste0("V",1:nvar)
sanity.check <- h2  <- apply(fload,1,function(x) sum(x^2))
if(max(sanity.check) >  1.0) {print(cbind(fload, h2))
                       stop("Model is impossible.  Communalities (h2) exceed 1. ")}
Phi <- diag(ncol(fload))
results <- sim(fload,n=n, Phi=Phi,threshold=threshold)
         results$fload <- fload
 class(results) <- c("psych", "sim")
 return(results)
}


#simulate various structures and summarize them
"sim.omega" <-
function(nvar=12,nfact=3,n=500,g=NULL,sem=FALSE,fbig=NULL,fsmall = c(-.2,.2),bipolar=TRUE,om.fact=3,flip=TRUE,option="equal",ntrials=10,threshold=NULL) {
results <- matrix(NaN,nrow=ntrials,ncol=12)
colnames(results) <- c("n","om.model","omega","ev.N","e.f1","omega.f1","Beta","omegaCFA","omegaSem","rms","RMSEA","coeff.v")

#iterate the entire process n.trials times
for (i in 1:ntrials) {
#make up some data using the Tucker notion of big and small factors
x <- try(sim.minor(nvar=nvar,nfact=nfact,n=n,g=g,fbig=fbig,fsmall=fsmall,bipolar=bipolar,threshold=threshold))
if(is.null(g)) {omega.model <- 0} else {gsum <- colSums(x$fload)[1]
    omega.model <- gsum^2/sum(x$model)}
results[i,"om.model"] <- omega.model
observed.cor <- cor(x$observed)
ev <- eigen(observed.cor)$values
f1 <- fa(observed.cor)$loadings
om.fa <- sum(f1)^2/sum(observed.cor)
e.f1 <- sum(f1^2)/nvar
    sem.model <- omegaSem(x$fload[,1:nfact],sl=TRUE,nfactors=nfact)  #this is the model based upon the true values
    if(sem) {stop('The option to use the sem package  has been replaced with calls to lavaan')
    #if(!requireNamespace('sem')) {stop("You must have the sem package installed to use omegaSem")} else {sem.om <- try(sem(model=sem.model,S=observed.cor, N=n))} 
 
  #omega.cfa <- omegaFromSem(observed.cor,sem.om,flip=flip)
  #  if(omega.cfa$omega >1) omega.cfa$omega <- NA
  #results[i,"omegaCFA"] <- omega.cfa$omega
  } else {omega.cfa <- NULL}
results[i,"n"] <- n
if(n > 0) {
   if (sem) {om <- try(omegaSem(x$observed,om.fact,flip=flip,plot=FALSE,option=option))} else {
om <- try(omega(x$observed,om.fact,flip=flip,plot=FALSE,option=option))}
          ic <- suppressWarnings(ICLUST(x$observed,1,plot=FALSE))} else {
           if (sem) {om <- try(omegaSem(x$model,om.fact,flip=flip,plot=FALSE,option=option))} else {om <- try(omega(x$model,om.fact,flip=flip,plot=FALSE,option=option))
           if(inherits(om,"try-error")) {message("Error in sem. iteration = ",i)
            om <- NA
            next}}
          ic <- suppressWarnings(ICLUST(x$model,1,plot=FALSE))}
#results
if(sem) {results[i,"omega"] <- om$omegaSem$omega_h
                loads <- om$omegaSem$schmid$sl
     
} else {results[i,"omega"] <- om$omega_h
loads <- om$schmid$sl
}  
 p2 <- loads[,ncol(loads)]
         	mp2 <- mean(p2)
         	vp2 <- var(p2)       	
       
#results[i,"p2"] <-  mp2
#results[i,"p2.sd"] <-  sqrt(vp2)

results[i,"coeff.v"] <- sqrt(vp2)/mp2
results[i,"Beta"] <-  ic$beta
results[i,"ev.N"] <- ev[1]/nvar
results[i,"e.f1"] <- e.f1
results[i,"omega.f1"] <- om.fa


if(sem) {
        if(!is.null(om$omegaSem$schmid$RMSEA)) {results[i,"RMSEA"] <-om$omegaSem$schmid$RMSEA[1]} else {results[i,"RMSEA"] <- NA}
         if(!is.null(om$omegaSem$schmid$rms))results[i,"rms"] <- om$omegaSem$schmid$rms
        results[i,"omegaSem"] <- om$omega.efa$omega
        if(results[i,"omegaSem"] > 1) {warning("Bad result from sem   case = ",i) 
                             results[i,"omegaSem"] <- NA}
        } else { 
if(!is.null(om$schmid$RMSEA)) {results[i,"RMSEA"] <- om$schmid$RMSEA[1]} else {results[i,"RMSEA"] <- NA}
if(!is.null(om$schmid$rms)) results[i,"rms"] <- om$schmid$rms
     results[i,"omegaSem"] <- NA}

}

if(n==0) {results <- results[,-which(colnames(results)=="RMSEA")] #drop RMSEA if there are no cases
if(!sem) results <- results[,-which(colnames(results)=="omegaSem")] 
} else {if(!sem) results <- results[,-which(colnames(results)=="omegaSem")] }

return(results)
}

"sim.omega.2" <- function(nvar=12,nfact=3,n=c(100,200,400,800),g=c(0,.1,.2,.3,.4,.5),sem=TRUE,fbig=c(.7,.6),fsmall=c(-.2,.2),bipolar=FALSE,om.fact=3,ntrials=10) {
result <- list() 
k <- 1
progressBar(k,length(n)*length(g),"sim.omega.2")
for (ni in 1:length(n)) {
for (gi in 1:length(g)) {
result[[k]] <- sim.omega(nvar=nvar,nfact=nfact,n=n[ni],g =g[gi],fbig=fbig,fsmall=fsmall,bipolar=bipolar,ntrials=ntrials,om.fact=om.fact,sem=sem)
k <- k+1
}
}
cnames <- colnames(result[[1]])
#result <- unlist(result)
#if(sem) {result <- matrix(result,ncol=10,byrow=TRUE)} else {result <- matrix(result,ncol=9,byrow=TRUE) }
#colnames(result) <- cnames

return(result)
}


"sim.general" <- 
function(nvar=9,nfact=3, g=.3,r=.3,n=0) {

  r1 <- matrix(r,nvar/nfact,nvar/nfact)
  R <- matrix(g,nvar,nvar)
  rf <- superMatrix(r1,r1)
  if(nfact>2) {for (f in 1:(nfact-2)){
  rf <- superMatrix(r1,rf)}}
  R <- R + rf
  diag(R) <- 1
  colnames(R) <- rownames(R) <- paste((paste("V",1:(nvar/nfact),sep="")),rep(1:nfact,each=(nvar/nfact)),sep="gr")
  if(n > 0) {#x <-  mvrnorm(n = n, mu=rep(0,nvar), Sigma = R, tol = 1e-06,empirical = FALSE)
   eX <- eigen(R)
                                      x <- matrix(rnorm(nvar * n),n)
                                      x <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*%  t(x))
   return(x)} else {
  return(R)} }
  
  
#not public
#simulate the difference between  two groups  
  sim.groups <- function(n=1000,r=.5,d=.5,nvar = 2,bg= -1) {
model <- matrix(r,nvar,nvar)
 diag(model) <- 1
 eX <- eigen(model)
 observed <- matrix(rnorm(nvar * n),n)
 observed <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*%  t(observed))
 n1 <- n/2
if(bg < 1 ) {  mu1 <- c(0,d)} else {mu1 <- 0 }
 group1 <- t(t( observed[1:n1,]) + mu1  )
if(bg < 1) {mu2 <- c(d,0)} else {mu2 <- d}
 group2 <- t( t(observed[(n1+1):n,]) + mu2)
 data <- data.frame(grp=1,vars=group1)
 data2 <- data.frame(grp=2,vars=group2)
 data <- rbind(data,data2)
 return(data)
 }
 
 
 "sim.general.theta" <- 
function(nvar=9,nfact=3, g=.3,r=.3,n=0) {
#require(MASS)
  r1 <- matrix(r,nvar/nfact,nvar/nfact)
  R <- matrix(g,nvar,nvar)
  rf <- superMatrix(r1,r1)
  if(nfact>2) {for (f in 1:(nfact-2)){
  rf <- superMatrix(r1,rf)}}
  R <- R + rf
  diag(R) <- 1
  colnames(R) <- rownames(R) <- paste((paste("V",1:(nvar/nfact),sep="")),rep(1:nfact,each=(nvar/nfact)),sep="gr")
  if(n > 0) {#x <-  mvrnorm(n = n, mu=rep(0,nvar), Sigma = R, tol = 1e-06,empirical = FALSE)
   eX <- eigen(R)
                                      x <- matrix(rnorm(nvar * n),n)
                                      x <- t( eX$vectors %*% diag(sqrt(pmax(eX$values, 0)), nvar) %*%  t(x))
                                       
   return(x)} else {
  return(R)} }

Try the psych package in your browser

Any scripts or data that you put into this service are public.

psych documentation built on Sept. 26, 2023, 1:06 a.m.