R/parallelMRFA.R

Defines functions parallelMRFA

Documented in parallelMRFA

parallelMRFA<-function(X, Ndatsets = 500, percent = 95, corr = "Pearson", display = TRUE, graph = TRUE){

  #begin timer
  ptm <- proc.time()

  ######################################################################
  #  X : Raw sample scores
  ######################################################################

  if (missing(X)){
    stop("The argument X is not optional, please provide a valid raw sample scores")
  }

  ######################################################################
  #  Ndatsets: Number of random datasets used to compute the random distribution of eigenvalues
  ######################################################################

  if(Ndatsets<0){
    stop("Ndatsets argument has to be a positive value")
  }

  ######################################################################
  #  percent: Desired percentile of distribution of random eigenvalues (for example 95 for the 95th percentile) to be used as threshold
  ######################################################################

  if (percent>99.99 || percent<=0){
    stop("percent argument has to be between 1 and 99")
  }

  ######################################################################
  #  corr: Determine if Pearson or Polychoric matrix will be used (1: Computes Pearson correlation matrices 2: Computes Polychoric correlation matrices)
  ######################################################################

  if (corr!="Pearson" && corr!="Polychoric"){
      if (corr=="pearson"){
        corr = 1
      }
      else if (corr=="polychoric"){
        corr = 2
      }
    else {
      stop("corr argument has to be 'Pearson' for computing Pearson correlation or 'Polychoric' for computing Polychoric/Tetrachoric correlation)")
    }
  }
  else {
    if (corr=="Pearson"){
      corr = 1
    }
    else if (corr=="Polychoric"){
      corr = 2
    }
  }


  ######################################################################
  #  display argument: determines if the output will be displayed in the console

  if (display!=0 && display!=1){
    stop("display argument has to be logical (TRUE or FALSE, 0 or 1)")
  }

  ######################################################################
  #  graph argument: determines if the Scree Test plot will be printed
  ######################################################################

  if (graph!=0 && graph!=1){
      stop("graph argument has to be logical (TRUE or FALSE, 0 or 1)")
  }


  if (corr==1){
    corr_char = 'Pearson correlation matrices'
  }
  else if (corr==2){
    corr_char = 'Polychoric correlation matrices'
  }

  ################################# Everything  OK #################################
  ################################# Begin Analysis #################################

  siz<-size(X)
  N<-siz[1]
  m<-siz[2]

  if (corr==1){
    R<-cor(X)
  }
  else if (corr==2){
    #Polychoric matrix
    R<-(psych::polychoric(X))$rho
  }

  #check adequacy of the matrix (determinant, Bartlett & KMO)
  adeq<-adequacymatrix(R,N)

  check_adeq<-0
  if (adeq$kmo_index<0.7){
    check_adeq<-1
  }

  check_sr<-0
  D<-eigen(R)$values
  # Check if the matrix is positive-defined, if it is not, perform a smoothing
  if (min(D)<0){
    R<-sr(R,N)
    check_sr<-1
  }

  out<-mrfa(SIGMA=R,dimensionality=(m-1),random=10,display = 0)

  SIGMARED<-out$Matrix

  #function ed
    v<-eigen(SIGMARED)$values
    u<-eigen(SIGMARED)$vectors

    p<-length(v)
    v<-t(sort(v))
    i<-c(1:p)
    j<-c(p:1)
    v<-v[j]
    v<-diag(v)
    u<-u[,i[j]]

  realevals<-diag(v)
  si<-sum(realevals)
  realevals_p<-(realevals/si)*100

  evals<-array(0,dim=c(0,0))
  evals_p<-a<-array(0,dim=c(0,0))

  for (a in 1:Ndatsets){

    if (a==1){
      #calculate the time elapsed for computing the first one for estimating the total elapsed time
      ptm_one <- proc.time()
    }

    if (a==2){
      #waitbar
      cat('Computing Parallel Analysis: Please wait \n')
      pb <- txtProgressBar(min = 0, max = Ndatsets-1, style = 3)
    }

    Xi=optimbase::zeros(N,m)

    buff<-matrix(runif((N*m), min=0, max=1),N)
    buff2<-matrix(apply(buff,2,sort),N)
    J<-array(0,dim=c(0,0))
    buff_i<-array(0,dim=c(0,0))
    buff_n<-array(0,dim=c(0,0))

    for (ii in 1:m){
      remove(buff_i)
      remove(buff_n)
      remove(J)
      buff_i<-sample(buff2[,ii])
      J<-sort.list(buff_i,decreasing=TRUE)
      buff_n=as.numeric(unlist(X[J[],ii]))
      Xi[,ii]=buff_n
    }

    if (corr==1){
      Ri<-cor(Xi)
    }
    else if (corr==2){
      #polychoric matrix
      Ri<-(psych::polychoric(Xi,correct = 0))$rho
    }

    D<-eigen(Ri)$values

    SIGMARED<-mrfa(SIGMA = Ri, dimensionality = (m-1), random = 10,conv1 = 0.001, conv2 = 0.01, display = 0)$Matrix

    #function ed
      v<-eigen(SIGMARED)$values
      u<-eigen(SIGMARED)$vectors

      p<-length(v)
      v<-t(sort(v))
      i<-c(1:p)
      j<-c(p:1)
      v<-v[j]
      v<-diag(v)
      u<-u[,i[j]]

    L<-diag(v)

    Lp<-L/sum(L)*100

    evals<-c(evals,L)
    evals_p<-c(evals_p,Lp)

    if (a==1){
      time.taken.mrfa <- proc.time() - ptm_one

      time.taken.mrfa<-time.taken.mrfa[3]

      et.seconds<-time.taken.mrfa*Ndatsets


      if (corr==1){
        et.minutes<-(et.seconds/60)*1.8
      }
      else {
        et.minutes<-et.seconds/60
      }

      if (et.minutes<=1){
        cat('Estimated time for the analysis: less than a minute')
      }
      if (et.minutes>1 && et.minutes<=1.5) {
        cat(sprintf('Estimated time for the analysis: %3.0f minute',round(et.minutes)))
      }
      if (et.minutes>1.5) {
        cat(sprintf('Estimated time for the analysis: %3.0f minutes',round(et.minutes)))
      }

      cat('\n\n')
    }
    else{
      Sys.sleep(0.1)
      # update progress bar
      setTxtProgressBar(pb, a-1)
    }

  }
  close(pb)

  evals<-matrix(evals,m)
  evals<-apply(evals,2,sort,decreasing=T)
  evals_buff<-apply(evals,1,sort,decreasing=T)
  evals_decreasing<-apply(evals_buff,1,sort,decreasing=T)
  means<-apply(evals_decreasing,1,mean)

  evals_p<-matrix(evals_p,m)
  evals_p<-apply(evals_p,2,sort,decreasing=T)
  evals_buff_p<-apply(evals_p,1,sort,decreasing=T)
  evals_decreasing_p<-apply(evals_buff_p,1,sort,decreasing=T)
  means_p<-apply(evals_decreasing_p,1,mean)

  buff_percent=(Ndatsets+1)-(round((percent*Ndatsets)/100)) #revert because evals is decreasing

  percentiles<-evals_decreasing[,buff_percent]

  percentiles_p<-evals_decreasing_p[,buff_percent]

  #realevals_P contains the real-data % of variance
  #means_p contains the mean of random % of variance
  #percentiles_p contains the percentile of random % of variance

  campana<-0
  nf_per<-0
  nf_mean<-0
  for (i in 1:m){
    if (campana==0){
      if (realevals_p[i]>percentiles_p[i]){
        nf_per=nf_per+1
        nf_mean=nf_mean+1
      }
      else if(realevals_p[i]>means_p[i]){
        nf_mean=nf_mean+1
      }
      else{
        campana<-1
      }
    }
  }

  time.taken <- proc.time() - ptm

  seconds<-time.taken[3]
  hours<-floor(seconds/3600)
  minutes<-floor(seconds/60)
  seconds<-floor(seconds-(minutes*60))
  total_time<-sprintf('%02.0f:%02.0f:%02.0f',hours,minutes,seconds)

  OUT<-list('Real_Data'=realevals_p,'Real_Data_eigenv'=realevals, 'Mean_random'=means_p,'Percentile_random'=percentiles_p,'N_factors_mean'=nf_mean,'N_factors_percentiles'=nf_per)

  if (graph==1){

    buff_realevals<-realevals_p
    buff_means<-means_p
    buff_percentiles<-percentiles_p

    min_val=min(buff_realevals,buff_means,buff_percentiles)
    max_val=max(buff_realevals,buff_means,buff_percentiles)

    yrange=as.numeric(cbind(min_val,max_val))

    xrange=matrix(0,1,2)
    xrange[1]=size(buff_realevals)[2]
    xrange[2]=size(buff_realevals)[1]

    plot(xrange,yrange,type='n',xlab='Factors',ylab="% Explained Common Variance",xaxt='n',yaxt='n')

    if(max_val<10){
      axis_y<-1
    }
    else{
      axis_y<-round(max_val/10)
    }
    axis_y2<-seq(0,round(max_val+1),by=axis_y)
    axis(2,axis_y2)

    colors<-rainbow(3)

    if(xrange[1]<10){
      axis_x<-1
    }
    else{
      axis_x<-round((xrange[1]+1)/10)
    }
    axis_x2<-seq(0,round(xrange[1]+1),by=axis_x)
    axis(1,axis_x2)
    axis(1,nf_mean,col="#000000")
    grid(nx=NA,ny=NULL)
    abline(v=nf_mean,col="#000000",lty=3)

    linetype<-c(1:3)
    plotchar<-seq(18,18+3,1)

    data_to_plot=cbind(buff_realevals,buff_means,buff_percentiles)

    for (i in 1:3){
      lines(data_to_plot[,i],type='b',lwd=1.5,lty=linetype[i],col=colors[i],pch=plotchar[i])
    }
    title("Parallel Analysis")
    buff=character(length=3)
    buff[1]="Real-Data"
    buff[2]="Mean of random"
    buff[3]="Percentile of random"

    legend(xrange[1]-round(xrange[1]*0.4),yrange[2],buff,cex=0.8, col=colors,pch=plotchar,lty=linetype)
  }
  if (display==0){
    cat(sprintf('\nComputing time: %s\n\n',total_time))
    return(OUT)
  }
  else if (display==1){
    cat('\n')
    cat('Parallel Analysis (PA) based on Minimum Rank Factor Analysis\n\n')

    cat('Adequacy of the Dispersion Matrix:\n\n')
    cat(sprintf('Determinant of the matrix     = %17.15f\n',adeq$d))
    cat(sprintf('Bartlett\'s statistic          = %7.1f (df = %5.0f; P = %7.6f)\n',adeq$chisq,adeq$df,adeq$p_value))
    cat(sprintf('Kaiser-Meyer-Olkin (KMO) test = %7.5f ',adeq$kmo_index))
    if     (adeq$kmo_index >= 0.9){ cat(sprintf('(very good)'))}
    else if (adeq$kmo_index >= 0.8){ cat(sprintf('(good)'))}
    else if (adeq$kmo_index >= 0.7){ cat(sprintf('(fair)'))}
    else if (adeq$kmo_index >= 0.6){ cat(sprintf('(mediocre)'))}
    else if (adeq$kmo_index >= 0.5){ cat(sprintf('(bad)'))}
    else                     { cat(sprintf('(inaceptable)'))}
    cat('\n\n')

    cat('Implementation details:\n\n')
    cat(sprintf('  Correlation matrices analized:                %s\n',corr_char))
    cat(sprintf('  Number of random correlation matrices:        %.f\n',Ndatsets))
    cat('  Method to obtain random correlation matrices: Permutation of the raw data\n\n')

    cat(sprintf('Item      Real-data        Mean of random   %.f percentile of random\n',percent))
    cat('          % of variance    % of variance    % of variance\n\n')
    buff_realevals<-realevals_p
    buff_means<-means_p
    buff_percentiles<-percentiles_p

    campana<-0
    nf_per<-0
    nf_mean<-0
    for (i in 1:m){
      buff<-sprintf(' %3.0f      ',i)
      if (campana==0){
        if (buff_realevals[i]>buff_percentiles[i]){
          buff2=sprintf('%5.2f**          %5.2f            %5.2f\n',buff_realevals[i], buff_means[i], buff_percentiles[i])
          nf_per=nf_per+1
          nf_mean=nf_mean+1
        }
        else if(buff_realevals[i]>buff_means[i]){
          buff2=sprintf('%5.2f*           %5.2f            %5.2f\n',buff_realevals[i], buff_means[i], buff_percentiles[i])
          nf_mean=nf_mean+1
        }
        else{
          buff2=sprintf('%5.2f            %5.2f            %5.2f\n',buff_realevals[i], buff_means[i], buff_percentiles[i])
          campana<-1
        }
      }
      else{
        buff2=sprintf('%5.2f            %5.2f            %5.2f\n',realevals_p[i], means_p[i], percentiles_p[i])
      }
      cat(buff,buff2)
    }
    cat('\n')
    if (nf_mean>nf_per){
      cat(sprintf('**  Advised number of factors:   %.0f\n',nf_per))
      cat(sprintf('*   Advised number of factors:   %.0f\n\n',nf_mean))
    }
    else {
      cat(sprintf('**  Advised number of factors:   %.0f\n\n',nf_per))
    }

    #cat('References \n\n')
    #cat('Buja, A., & Eyuboglu, N. (1992). Remarks on parallel analysis. Multivariate Behavioral Research, 27(4), 509-540.\n\n')
    #cat('Timmerman, M. E., & Lorenzo-Seva, U. (2011). Dimensionality Assessment of Ordered Polytomous Items with Parallel Analysis. Psychological Methods, 16, 209-220.\n')

    if (check_sr==1){
      cat('WARNING: The matrix was not positive-defined, a smoothing procedure has been applied (Devlin, Gnanadesikan & Kettenring, 1981)\n\n')
    }
    if (check_adeq==1){
      cat('WARNING: The matrix is not suitable for performing factor analysis (KMO <0.7)\n\n')
    }

    cat(sprintf('Computing time: %s \n\n',total_time))

    invisible(OUT)
  }



}
navarro-gonzalez/DA.MRFA documentation built on May 23, 2019, 12:23 p.m.