R/exploratory.R

#######################################################################
#' @name alpha.cronbach
#' @title Cronbach's alpha
#' @description Cronbach's alpha measures how correlated are the items in a test
#' @param test a matrix or a Dataframe that holds the test response data
#' @details the coefficient is calculated \deqn{\alpha = (n/n-1)*(1 - (\sum V_i/V_t))}
#' where $V_t$ is the variance of test scores and $V_i$ is the variance of item scores.
#' It is desirable that the items are closely interrelated (coefficient near 1).
#' This function was extracted from multilevel_2.5 package.
#' @return Cronbach's alpha for the test and the number of individuals of test.
#' @examples
#' data <- simulateTest(model="2PL",items=10,individuals=1000)
#' alpha.cronbach(data$test)
#' @references Cronbach L. J. (1951) Coefficient Alpha and the internal structure of tests. Psychometrika, 16,297-334
#' @export

alpha.cronbach <- function (test) 
{
  data <- na.exclude(test)
  N <- ncol(test)
  TOTVAR <- var(apply(test, 1, sum)) #varianza de la suma sobre las filas
  SUMVAR <- sum(apply(test, 2, var)) #suma de las varianzas sobre columnas
  ALPHA <- (N/(N - 1)) * (1 - (SUMVAR/TOTVAR))
  OUT <- list(Alpha = ALPHA, N = nrow(test))
  return(OUT)
}

#######################################################################
#' @name alpha.c
#' @title Cronbach-Mesbah Curve
#' @description To assess the unidimensionality of a set of items from alpha coefficient.
#' @param test a Dataframe that holds the test response data
#' @details To construct the curve takes the next step by step:
#' 1. The first step uses all items to compute alpha. 
#' 2. One item is removed from the scale. The removed item is that which leaves the scale
#' with its maximum alpha value. If we remove a bad item, the alpha  coefficient  will
#' increase, whereas if we remove a good item alpha must decrease.  
#' 3. This procedure is repeated until only  two items remain.
#' This function was extracted from CMC_1.0 package.
#' @return The number of items used to calculate the coefficient. 
#' @return The maximum value of the alpha coefficient calculated at each step. 
#' @return The item removed at each step. 
#' @return Tue Cronbach-Mesbah curve plot.
#' @examples
#' data <- simulateTest(model="2PL",items=10,individuals=1000)
#' curve <- alpha.c(data$test)
#' @references Cameletti, M. & Caviezel, V. (2010). Checking the unidimensionality in R
#' using the Cronbach-Mesbah curve.
#' @references Mesbah, M. (2010). Statistical quality of life. In "Method and Applications of Statistics in the 
#' Life and Health Sciences", N. BalakrishnanEd., Wiley, pp. 839-864. 
#' @export

alpha.c <- function (test) {
  data = test
  n = nrow(data)
  k = ncol(data)
  max.vec = c()
  which.max.vec = c()
  complete.alpha = alpha.cronbach(data)$Alpha
  j = 1
  elements.to.remove.1 = seq(1:k)
  alpha.1 = c()
  for (i in 1:length(elements.to.remove.1)) {
    data.reduced = data[, -elements.to.remove.1[i]]
    alpha.1[i] = alpha.cronbach(data.reduced)$Alpha
  }
  max.vec[j] = max(alpha.1)
  which.max.vec[j] = which.max(alpha.1)
  for (j in 2:(k - 2)) {
    elements.to.remove = matrix(0, nrow = (k - j + 1), ncol = (j - 
                                                                 1))
    for (r in 1:length(which.max.vec)) {
      elements.to.remove[, r] = matrix(rep(which.max.vec[r], 
                                           k - j + 1), ncol = 1)
    }
    elements.to.remove = cbind(elements.to.remove, seq(1:k)[-which.max.vec[1:(j - 
                                                                                1)]])
    alpha = c()
    for (i in 1:nrow(elements.to.remove)) {
      data.reduced = data[, -elements.to.remove[i, ]]
      alpha[i] = alpha.cronbach(data.reduced)$Alpha
    }
    max.vec[j] = max(alpha)
    which.max.vec[j] = elements.to.remove[which.max(alpha), 
                                          j]
  }
  output = data.frame(N.Item = seq(2, k), `Alpha Max` = c(rev(max.vec), 
                                                          complete.alpha), `Removed Item` = c(colnames(data)[rev(which.max.vec)], 
                                                                                              "--"))
  plot(output[, 1], output[, 2], t = "b",xlab = "N.Item", 
       ylab = "Alpha Maximum",main="Cronbach-Mesbah Curve")
  
  text(seq(2, k), output[, 2], c(colnames(data)[rev(which.max.vec)], 
                                 ""), pos = 3, cex = 0.6)
  return(output)
}

#######################################################################
#' @name biserial.cor
#' @title Biserial Correlation
#' @description Point-Biserial correlation coefficient is a correlation coefficient
#' used when one variable is continuous and the other variable is dichotomous.
#' @param x a numeric vector representing the continuous variable. 
#' @param y a numeric vector representing the dichotomous variable.
#' @param use Is a option for the use of missing values. 
#' @param level which level of y to use.
#' @details It is calculated by applying the Pearson correlation coefficient to the case
#' where one of the variables has dichotomous nature. 
#' It is calculated as \deqn{r_{xy} = (\bar{x}_p - \bar{x}_q / S_x)*\sqrt{pq}}
#' where p  is the proportion of subjects with one of the two possible values of the 
#' variable Y, q is the proportion of subjects with the other possible value, 
#' \deqn{\bar{x}_p} and \deqn{\bar{x}_q} is the average X subjects whose proportion is p and q respectively,
#' and $S_x$ is the standard deviation of all subjects X.
#' This function was adapted from ltm_1.0 package.
#' @examples 
#' data <- simulateTest(model="2PL",items=10,individuals=1000)
#' biserial.cor(rowSums(data$test), data$test[,1])
#' @return The value of the point-biserial correlation.
#' @references U.Olsson, F.Drasgow, and N.Dorans (1982). The polyserial correlation coefficient. Psychometrika, 47:337-347.
#' @references Cox. N.R. (1974). Estimation of the Correlation between a Continuous and a Discrete Variable. Biometrics, 30:171-178.
#' @export

biserial.cor <- function (x, y, use = c("all.obs", "complete.obs"), level = 1) 
{
  if (!is.numeric(x)) 
    stop("'x' must be a numeric variable.\n")
  y <- as.factor(y)
  if (length(levs <- levels(y)) > 2) 
    stop("'y' must be a dichotomous variable.\n")
  if (length(x) != length(y)) 
    stop("'x' and 'y' do not have the same length")
  use <- match.arg(use)
  if (use == "complete.obs") {
    cc.ind <- complete.cases(x, y)
    x <- x[cc.ind]
    y <- y[cc.ind]
  }
  ind <- y == levs[level]
  diff.mu <- mean(x[ind]) - mean(x[!ind])
  prob <- mean(ind)
  diff.mu * sqrt(prob * (1 - prob))/sd(x)
}


#######################################################################
#' @name information 
#' @title Test or Item information
#' @description Computes the amount of test or item information. 
#' @param object a matrix  (con los coeficientes estimados)
#' @param range a interval for which the test information should be computed.
#' @param items the items for which the information shoulb be computed.
#' @details The amount of information is computed as the area under the Item or Test
#' Information Curve. 
#' The function was adapted from ltm_1.0 package.
#' @return TotalInfo the total amount of information.
#' @return RangeInfo the amount of information in the specified interval.
#' @return RangeProp the proportion of information in the specified interval.
#' @return Range the value of range argument
#' @return items the value of items argument
#' @examples
#' #data <- simulateTest(model = "2PL", items = 20, individuals = 800) 
#' #fit <- irtpp(dataset = data$test,model = "2PL")
#' #fit <- parameter.matrix(fit$z)
#' #information(fit, c(-2, 0))
#' #information(fit, c(0, 2), items = c(3, 5))
#' @references Reckase, M. (2009). Multidimensional item response theory. New York: Springer.
#' @references Baker, F. B., & Kim, S. H. (Eds.). (2004). Item response theory: Parameter estimation techniques. CRC Press.
#' @export

information <- function (object, range, items = NULL) {
  p <- nrow(object) #numero de items
  
  itms <- if (!is.null(items)) {
    if (!is.numeric(items) && length(items) > p) 
      stop("'items' should be a numeric vector of maximum length ", p,
           ".\n")
    if (any(!items %in% 1:p)) 
      stop("'items' should contain numbers in: ", paste(1:p, collapse = ", "), " indicating the items.\n")
    items
  }
  else 1:p
  
  f <- function(z){
    thetas <- object 
    Z <- cbind(z, 1)
    betas <- thetas[, 1:2] 
    cs <- plogis(thetas[, 3]) 
    pi. <- plogis(Z %*% t(betas)) 
    cs <- matrix(cs, nrow(Z), p, TRUE) 
    pi <- cs + (1 - cs) * pi.
    pqr <- pi * (1 - pi) * (pi./pi)^2
    mat <- t(t(pqr) * (betas[, 1]^2))
    rowSums(mat[, itms, drop = FALSE])
  }
  
  
  I0 <- integrate(f, -10, 10)$value
  I1 <- integrate(f, range[1], range[2])$value
  
  out <- list(InfoRange = I1, InfoTotal = I0, PropRange = I1/I0, 
              range = range, items = items)
  out
}

#######################################################################
#' @name gutt
#' @title Guttman's Lambda
#' @description Six Lower limits of reliability coefficients are presented. 
#' @param test a matrix or a Dataframe that holds the test response data
#' @details Let $S_j^2$ the variances over persons of the n items in the test, and
#' $S_t^2$ the variance over persons of the sum of the items.
#' The firt estimate $lambda_1$ can be computed from $L_1 = 1 - (sum{s_j^2}/S_t^2)$
#' Let $C_2$ the sum of squares of the covariances between items, therefore is
#' the sum of $n(n-1)/2$ terms. The bound $lambda_2$ is computed by $L_2 = L_1 + (sqrt{n/n-1 C_2}/S_t^2)$ 
#' The third lower bound $lambda_3$ is a modification of $lambda_1$, it is computed
#' from the $L_3 = n/(n-1) L_1$
#' Fourth lower bound $lamda_4$ has been interpreted as the greatest split half reliability,
#' and requires that the test be scored as twohalves. It is calculated from 
#' $L_4 = 2(1 - (s_a^2 + s_b^2)/s_t^2)$ where $S_a^2$ and $S_b^2$ are the respectives variances
#' of the two parts for the single trial. 
#' For the fifth lower bound $lambda_5$, let $C_{2j}$ be the sum of the squares of the
#' covariances of item j with the remaining $n-1$ items, and let $bar{C}_2$ be the largest of
#' the $C_{2j}$. Then the coefficient can be computed from $L_5 = L_1 + (2sqrt{bar{C}_2})/S_t^2$
#' The final bound is based on multiple correlation, let $e_j^2$ be the variance of the errors
#' of estimate of item j from its linear multiple regression on the remaining n-1 items. Then
#' $lambda_6$ can be computed from $L_6 = 1 - (sum{e_j^2})/S_t^2$ 
#' @return The six coefficients Guttman for the test.
#' @examples 
#' #data <- simulateTest(model="2PL",items=10,individuals=1000)
#' #gutt(data$test) 
#' @references Guttman, L. (1945). A basis for analyzing test-retest reliability. Psychometrika, 10(4), 255-282.
#' @export

gutt <- function(test){
  
  r <- cov(test)
  n <- ncol(r)
  n.obs <- nrow(r)
  st <- sum(r)
  
  L1 <- 1 - (sum(apply(test, 2, var))/st)
  L2 <- L1 + (sqrt((sum(r^2)/2)*(n/(n-1)))/st)
  L3 <- n/(n-1)*L1
  
  ### 
  
  xy <- matrix(ncol = 2, nrow = n/2)
  r.split <- data.frame(r)
  r.split <- r
  r.split[upper.tri(r.split, diag = TRUE)] <- -999999
  
  for (i in 1:(n/2)) {
    x.m <- which(r.split == max(r.split), arr.ind = TRUE)[1,]
    xy[i, 1] <- x.m[1]
    xy[i, 2] <- x.m[2]
    r.split[(x.m[1]), ] <- -999999
    r.split[, (x.m[1])] <- -999999
    r.split[, (x.m[2])] <- -999999
    r.split[(x.m[2]), ] <- -999999
  }
  A <- xy[, 1]
  B <- xy[, 2]
  lf <- which(1:n %in% c(A, B) == FALSE)
  if (length(c(A, B)) != n) {
    B <- c(B, lf)
  }
  
  
  t1t <- matrix(rep(NA, n), nrow = 1)
  t1t[A] <- 1
  t1t[B] <- 0
  t2 <- (t(t1t) - 1) * -1
  onerow <- matrix(rep(1, n), nrow = 1)
  onevector <- t(onerow)
  
  L4 <- (4 * (t1t %*% r %*% t2))/(onerow %*% r) %*% onevector
  L4 <- as.numeric(L4)
  ## 
  
  c2j <- (r^2)            
  c2 <- NULL
  
  for(j in 1:ncol(test)){
    c2[j] <- sum(c2j[j,-j])
  }
  
  L5 <- L1 + ((2*sqrt(max(c2)))/st)
  
  ##
  
  smc <- 1 - 1/diag(solve(r))
  t1 <- matrix(rep(1, n), ncol = 1)
  t1t <- t(t1)
  
  L6 <- 1 - sum(1 - smc)/(sum(r))
  
  ### 
  
  result <- list(lambda1 = L1, lambda2 = L2, lambda3 = L3, lambda4 = L4, lambda5 = L5, 
                 lambda6 = L6)
  return(result)
  
}

#######################################################################
#' @name Yule
#' @title Yule coefficient of correlation 
#' @description The Yule coefficient of is  a correlation coefficient applied 
#' to dichotomous data. Given a two x two table of counts
#' | a | b | R1 |
#' | c | d | R1 |
#' |---|---|----|
#' |C1 | C2| n  |
#' or a vector c(a,b,c,d) of frequencies.
#' @param x a 1 x 4 vector or a matrix 2 x 2 of frequencies.
#' @param Y if Y is true return Yule's Y coefficient of colligation.
#' @details The coefficient of Yule is calculated from (ad - bc)/(ad + bc).
#' This is the number of pairs in agreement (ad) - the number in disagreement (bc) 
#' over the total number of paired observations. 
#' @return the value of the Yule Q coefficient.
#' @examples 
#' #x <- c(12,8,16,9)
#' #Yule(x)
#' @references Yule, G.U. (1912). On the methods of measuring the association between two attributes. Journal of the Royal Statistical Society, 75, 579-652.
#' @references Warrens, Matthijs (2008), On Association Coefficients for 2x2 Tables and Properties That Do Not Depend on the Marginal Distributions. Psychometrika, 73, 777-789.
#' @export

Yule <- function (x, Y = FALSE) 
{
  stopifnot(prod(dim(x)) == 4 || length(x) == 4)
  if (is.vector(x)) {
    x <- matrix(x, 2)
  }
  a <- x[1, 1]
  b <- x[1, 2]
  c <- x[2, 1]
  d <- x[2, 2]
  if (Y) {
    Yule <- (sqrt(a * d) - sqrt(b * c))/(sqrt(a * d) + sqrt(b * c))
  }
  else {
    Yule <- (a * d - b * c)/(a * d + b * c)
  }
  return(Yule)
}

#######################################################################
#' @name phi
#' @title Phi coefficient of correlation 
#' @description The phi coefficient of is  a correlation coefficient applied 
#' to dichotomous data. Given a two x two table of counts
#' | a | b | R1 |
#' | c | d | R1 |
#' |---|---|----|
#' |C1 | C2| n  |
#' or a vector c(a,b,c,d) of frequencies.
#' @param x a 1 x 4 vector or a matrix 2 x 2 of frequencies.
#' @details The coefficient phi is calculated from \deqn{(ad - bc)/\sqrt{p_qp_2q_1q_2}}
#' where $p_i$ and $q_i$ are the ratios of the dichotomous variables. 
#' @return the value of the phi coefficient correlation.
#' @examples
#' #x2 <- matrix(x,ncol=2)
#' #phi(x2)
#' @references Warrens, Matthijs (2008), On Association Coefficients for 2x2 Tables and Properties That Do Not Depend on the Marginal Distributions. Psychometrika, 73, 777-789.
#' @references Yule, G.U. (1912). On the methods of measuring the association between two attributes. Journal of the Royal Statistical Society, 75, 579-652.
#' @export

phi <- function (x) 
{
  stopifnot(prod(dim(x)) == 4 || length(x) == 4)
  if (is.vector(x)) 
    t <- matrix(x, 2)
  rsum <- rowSums(x)
  csum <- colSums(x)
  total <- sum(rsum)
  rsum <- rsum/total
  csum <- csum/total
  v <- prod(rsum, csum)
  phi <- (x[1, 1]/total - csum[1] * rsum[1])/sqrt(v)
  names(phi) <- NULL
  return(round(phi, 2))
}

#######################################################################
#' @name polyserial.cor
#' @title Polyserial correlation
#' @description Polyserial correlation coefficient is a correlation coefficient
#' used when one variable is continuous and the other variable is dichotomous.
#' @param x a numeric vector representing the continuous variable. 
#' @param y a numeric vector representing the dichotomous variable.
#' @details The coefficient is calculated from \deqn{\rho = r_{xy} * \sqrt{(n - 1)/n} * s_y/\sum{\phi(\tau)}}
#' where $r_{xy}$ is the coefficient of correlation of Pearson coefficient, S_y is the 
#' standard deviation of Y, and \deqn{\phi(\tau)} are the ordinates of the normal curve at 
#' the normal equivalent of the cut point boundaries between the item responses.
#' This function was adapted from ltm_1.0 package.
#' @examples
#'  x <- rnorm(100)
#'  y <- sample(1:5,100,replace=TRUE)
#'  cor(x, y) 
#'  polyserial.cor(x, y) 
#' @return the value of the polyserial correlation.
#' @references U.Olsson, F.Drasgow, and N.Dorans (1982). The polyserial correlation coefficient. Psychometrika, 47:337-347.
#' @export

polyserial.cor <- function (x, y) 
{
  min.item <- min(y, na.rm = TRUE) 
  max.item <- max(y, na.rm = TRUE) 
  n.var <- 1  
  n.cases <- length(y) 
  
  dummy <- matrix(rep(min.item:max.item, n.var), ncol = n.var, nrow=length(y))
  colnames(dummy) <- names(y) 
  xdum <- cbind(y, dummy) 
  frequency <- apply(xdum, 2, table) 
  frequency <- t(frequency - 1) 
  responses <- rowSums(frequency) 
  frequency <- frequency/responses 
  frequency <- t(apply(frequency, 1, cumsum)) 
  len <- dim(frequency)[2] 
  tau <- dnorm(qnorm(frequency[, -len, drop = FALSE])) 
  stau <- rowSums(tau) 
  rxy <- cor(x, y, use = "pairwise") 
  sdy <- sd(y) 
  rps <- t(rxy) * sqrt((n.cases - 1)/n.cases) * sdy/stau[1] 
  rps[rps > 1] <- 1
  rps[rps < -1] <- -1
  return(rps)
}

#######################################################################
#' @name an.parallel
#' @title Parallel Analysis
#' @description performs Horn's parallel analysis for a principal component. 
#' @param x a matrix or a Dataframe that holds the test response data
#' @param iterations a number indicating the amount of iterations that 
#' representing the number of random data sets to be produced in the analysis.
#' @param centile a number between 1 and 99 indicating the centile used in estimating bias.
#' @param seed specifies that the random number is to be seeded with the supplied integer.
#' @param mat specifies that the procedure use the provided correlation matrix rather
#' than supplying a data matrix through x. The n argument must also be supplied when 
#' mat is used.
#' @param n the number of observations. Required when the correlation matrix is supplied 
#' with the mat option.
#' @details Is a implementation of Horn's (1965) tecnique for evaluating the components retained
#' in a principle component analysis (PCA). This procedure is a adaptation of the
#' function paran of Package Paran.
#' @return Retained Components a scalar integer representing the number of components retained.
#' @return Adjusted eigenvalues a vector of the estimated eigenvalues adjusted.
#' @return Unadjusted eigenvalues a vector of the eigenvalues of the observed data from either
#' an unrotated principal component analysis.
#' @return Bias a vector of the estimated bias of the unadjusted eigenvalues 
#' @examples 
#' data <- simulateTest(model="2PL",items=10,individuals=1000) 
#' an.parallel(data$test, iterations = 100, centile = 99, seed = 12)
#' @references John L. Horn (1965). A rationale and test for the number of factors 
#' in factor analysis. Psychometrika, Volume 30, Number 2, Page 179.
#' @references Dinno A. 2009. Exploring the Sensitivity of Horn's Parallel Analysis to the 
#' Distributional Form of Simulated Data. Multivariate Behavioral Research. 44(3): 362-388
#' @export

an.parallel <- function(x = NA, iterations = 0, centile = 0, seed = 0, 
                        mat = NA, n = NA) 
{
  if (!is.na(mat[[1]][1]) & !is.na(x[[1]][1])) {
    stop("\nYou must supply either x or mat but not both.")
  }
  if (is.na(mat[[1]][1]) & !is.na(x[[1]][1])) {
    P <- length(as.matrix(x)[1, ]) #numero de variables en una matriz de datos
  }
  if (!is.na(mat[[1]][1]) & is.na(x[[1]][1])) {
    P <- length(mat[1, ]) #numero de variables en una matriz de correlacion
  }
  if (!is.na(mat[[1]][1])) {
    if (length(mat[1, ]) != length(mat[, 1])) {
      stop("\nThe matrix provided with the mat argument is not a correlation matrix.\n")
    }
    if (is.element(FALSE, (diag(mat) == rep(1, P)))) {
      stop("\nThe matrix provided with the mat argument is not a correlation matrix.\nParallel analysis is not compatible with the eigendecomposition of a covariance matrix.")
    }
  }
  
  if (!is.na(x[[1]][1])) {
    N <- length(as.matrix(x)[, 1]) 
    if (!is.na(n)) {
      warning("\nThe n argument is only for use with the mat argument. Ignoring n argument.")
    }
    
    R <- cor(x) 
    cat("\nUsing eigendecomposition of correlation matrix.")
  }
  
  if (!is.na(mat[[1]][1])) {
    if (!is.na(mat[[1]][1]) & is.na(n)) {
      stop("\nYou must also provide the sample size when using the matrix argument.")
    }
    
    
    N <- n 
    R <- mat 
    cat("\nUsing eigendecomposition of provided correlation matrix.")
  }
  
  centile <- round(centile) 
  
  if (centile > 99 | centile < 0) {
    stop("\nYou must specify a centile value between 1 and 99.\n(Specifying centile 0 will use the mean.)")
  }
  
  eigenvalues <- eigen(R, only.values = TRUE, EISPACK = FALSE)[[1]]
  
  Ev <- eigenvalues
  model <- "component"
  models <- "components"
  Model <- "Component"
  Models <- "Components"
    
  if (iterations < 1) {
    iterations <- 30 * P
  }
  
  SimEvs <- matrix(NA, iterations, P)
  
  for (k in 1:iterations) {
    
    Sim <- matrix(NA, N, P)
    
    if (seed != 0) {
      set.seed(seed * k)
    }
    
    Sim <- matrix(rnorm(N * P), N, P)
  
    eigenvalues <- eigen(cor(Sim), only.values = TRUE, EISPACK = FALSE)[[1]]
    Evs <- eigenvalues
    SimEvs[k, ] <- Evs
  }
  
  cat("\n\nResults of Horn's Parallel Analysis for ", model, 
      " retention\n", sep = "")
  if (iterations == 1) {
    if (centile == 0) {
      cat("1 iteration, using the mean estimate", "\n", 
          sep = "")
    }
    if (centile != 0) {
      cat("1 iteration, using the ", centile, " centile estimate", 
          "\n", sep = "")
    }
  }
  if (iterations > 1) {
    if (centile == 0) {
      cat(iterations, " iterations, using the mean estimate", 
          "\n", sep = "")
    }
    if (centile != 0 & centile != 50) {
      cat(iterations, " iterations, using the ", centile, 
          " centile estimate", "\n", sep = "")
    }
    if (centile == 50) {
      cat(iterations, " iterations, using the ", centile, 
          " centile (median) estimate", "\n", sep = "")
    }
  }
  cat("\n--------------------------------------------------", 
      "\n")
  cat(Model, "  Adjusted    Unadjusted    Estimated", "\n")
  cat("            Eigenvalue  Eigenvalue    Bias", "\n")
  cat("--------------------------------------------------", 
      "\n")
  
  RndEv = c(1:P) * NA
  
  if (centile > 0) {
    for (p in 1:P) {
      RndEv[[p]] <- quantile(SimEvs[, p], probs = centile/100)[[1]]
    }
  }
  
  if (centile == 0) {
    for (p in 1:P) {
      RndEv[[p]] <- mean(SimEvs[, p])
    }
  }
  
  if (Ev[[1]] < 1 | RndEv[[1]] < 1) {
    cat("No components passed.", "\n")
    cat("--------------------------------------------------", 
        "\n")
    stop
  }
  
  Bias <- rep(0, P)
  AdjEv <- rep(1, P)
  
  for (p in 1:P) {
    Bias[p] <- RndEv[p] - 1
    AdjEv[p] <- Ev[[p]] - Bias[p]
  }

  y <- NA
  
  for (x in 1:P) {
    y <- x
    
    if (AdjEv[x] <= 1) {
      y <- x - 1
      retained <- y
      break
    }
    
  }
  
  y <- P
  
  for (x in 1:y) {
    if (AdjEv[x] >= 0) {
      AdjSpace = " "
    }
    if (AdjEv[x] < 0) {
      AdjSpace = ""
    }
    if (Ev[[x]] >= 0) {
      EvSpace = " "
    }
    if (Ev[[x]] < 0) {
      EvSpace = ""
    }
    if (Bias[x] >= 0) {
      BiasSpace = " "
    }
    if (Bias[x] < 0) {
      BiasSpace = ""
    }
    if (x > 9) {
      xPad = ""
    }
    if (x <= 9) {
      xPad = " "
    }
    AdjFPad = "   "
    if (round(AdjEv[x]) >= 10) {
      AdjFPad = "  "
    }
    if (round(AdjEv[x]) >= 100) {
      AdjFPad <- " "
    }
    SN <- 8
    if (abs(AdjEv[x]) >= 10) {
      SN <- 9
    }
    if (abs(AdjEv[x]) >= 100) {
      SN >= 10
    }
    if (AdjEv[x] < 0) {
      SN <- SN + 1
    }
    EvFPad = "   "
    if (round(Ev[[x]]) >= 10) {
      EvFPad = "  "
    }
    if (round(Ev[[x]]) >= 100) {
      EvFPad = " "
    }
    EvSN <- 8
    if (abs(Ev[[x]]) >= 10) {
      EvSN <- 9
    }
    if (abs(Ev[[x]]) >= 100) {
      EvSN <- 10
    }
    if (abs(Ev[[x]]) >= 5e-07) {
      EvZPad <- ""
    }
    if (abs(Ev[[x]]) < 5e-07) {
      Ev[[x]] <- 0
      EvZPad <- ".000000"
    }
    BiasSN <- 8
    if (Bias[x] >= 10) {
      BiasSN <- 9
    }
    if (Bias[x] >= 100) {
      BiasSN >= 10
    }
    cat(x, xPad, "      ", AdjFPad, AdjSpace, strtrim(AdjEv[x], 
                                                      SN), EvFPad, EvSpace, strtrim(Ev[[x]], EvSN), 
        EvZPad, "     ", BiasSpace, strtrim(Bias[x], 
                                            BiasSN), "\n", sep = "")
  }
  cat("--------------------------------------------------", 
      "\n")
  cat("\nAdjusted eigenvalues > 1 indicate dimensions to retain.\n(", 
      retained, " ", models, " retained)\n\n", sep = "")
  col = c("black", "red", "blue")
  
  AdjEvCol = col[1]
  EvCol = col[2]
  RndEvCol = col[3]
  AdjEvLty = 1
  EvLty = 1
  RndEvLty = 1
  
  par(yaxs = "i", xaxs = "i", lab = c(P, ceiling(max(AdjEv[1], Ev[1], RndEv[1])), 2))
  plot.default(c(1:P), RndEv, type = "o", main = "Parallel Analysis", 
               xlab = "Components", ylab = "Eigenvalues", pch = 20, 
               col = RndEvCol, lty = 1, lwd = 1, 
               xlim = c(0.5,P + 0.5), ylim = c(min(AdjEv, Ev, RndEv) - 0.5, 
                                               ceiling(max(AdjEv[[1]], Ev[[1]], RndEv[[1]]))))
  abline(h = 1, col = "grey", lwd = 0.5)
  points(c(1:P), AdjEv, type = "o", col = AdjEvCol, lty = 1, 
         pch = 21, bg = "white", lwd = 1)
  points(c(1:P), Ev, type = "o", col = EvCol, lty = 1, 
         pch = 20, lwd = 1)
  if (retained >= 1) {
    points(c(1:retained), AdjEv[1:retained], type = "p", 
           pch = 19, col = AdjEvCol, lty = 1, lwd = 1)
  }
  legend("topright", legend = c("Adjusted Ev (retained)", 
                                "Adjusted Ev (unretained)", "Unadjusted Ev", 
                                "Random Ev"), col = c(AdjEvCol, AdjEvCol, EvCol, 
                                                      RndEvCol), pch = c(19, 21, 20, 20), lty = c(1, 1, 1, 1))
  
  invisible(list(Retained = retained, AdjEv = AdjEv, Ev = Ev, 
                 RndEv = RndEv, Bias = Bias, SimEvs = SimEvs))
}
SICSresearch/IRTpp documentation built on May 9, 2019, 11:11 a.m.