R/CalculatePAOrdered.R

Defines functions CalculatePAOrdered

Documented in CalculatePAOrdered

################################################################################
# # Function for parallel analysis of correlation matrices of ordered data
################################################################################
CalculatePAOrdered <- function (dataMatrix, percentiles = 0.99, nReplicates = 200, use = "complete.obs", algorithm = "polycor") {
  # # Obtains a parallel analysis for ordered data
  # #
  # # Arg:
  # #  dataMatrix: matrix or data.frame of ordered variables
  # #  percentiles: vector of percentiles to report
  # #  nReplicates: number of simulations to produce for estimating the eigenvalues distribution under independence
  # #  use: Missing value handling method: If "complete.obs", remove observations with any missing data; if "pairwise.complete.obs", compute each correlation using all observations with valid data for that pair of variables.
  # #  algorithm: string specifying the correlation estimation algorithm. Polychoric correlation estimation method: "polycor" for estimates using the polycor package, "polychoric" for estimates using the C++ function polychoric.
  # #
  # # Ret:
  # #  parallelList: a list with data.frames observed, percentiles and simulated data.
  # #  observed: data.frame containing the observed eigenvalues
  # #  percentiles: data.frame containing the estimated percentiles of the eigenvalues distribution under independence
  # #  simulatedEigenValues: data.frame containing the simulated eigenvalues under independence



  ################################################################################
  # # Data verification
  ################################################################################
  if (!is.matrix(dataMatrix) & !is.data.frame(dataMatrix)) {
    stop("dataMatrix must be a matrix or a data.frame")
  }
  if (!is.data.frame(dataMatrix)) {
    dataMatrix <- data.frame(dataMatrix)
  }

  isOrderedData <- all(sapply(dataMatrix, is.ordered))
  if (!isOrderedData) {
    stop("All variables in dataMatrix must be ordered factors")
  }

  if (!(algorithm %in% c("polycor", "polychoric"))) {
    stop("Unknown algorithm")
  }

  ################################################################################
  # # Data information
  ################################################################################
  nObservations  <- nrow(dataMatrix)
  nVariables     <- ncol(dataMatrix)

  if (algorithm == "polycor") {
    datCorrelation <- hetcor(dataMatrix, pd = TRUE, use = use) # Polychoric
                        # correlations with matrix transformed to the nearest
                        # positive definite matrix
    datEigenValues <- eigen(datCorrelation)$values


  }
  if (algorithm == "polychoric") {
    isBinary = FALSE
    if (use == "complete.obs") {
      dataMatrix <- na.omit(dataMatrix)
    }

    dataMatrixInt <- as.matrix(as.data.frame.list(lapply(dataMatrix,
                                                    function(x)
                                                      as.integer(as.integer(x)-1))))

    result <- .Call(Cpolychoric, dataMatrixInt, isBinary,
                    nReplicates, sfsmisc::nearcor, new.env())
    datEigenValues <- sort(result[[1]], decreasing = TRUE)
  }

  observed <- data.frame(orderEigenValues = 1:nVariables,
                         typeEigenValues  = "Observed",
                         eigenValues      = datEigenValues,
                         stringsAsFactors = TRUE)

  ################################################################################
  # # Simulate correlation matrices under independence
  ################################################################################
  if (algorithm == "polycor") {
    simulatedEigenValues           <- matrix(nrow = nReplicates, ncol = nVariables)
    rownames(simulatedEigenValues) <- paste("iter", 1:nReplicates, sep = "")
    colnames(simulatedEigenValues) <- 1:nVariables

    simulatedData <- matrix(nrow = nObservations, ncol = nVariables)
    simulatedData <- data.frame(simulatedData)
    for (ii in 1:nReplicates) {
      for (jj in 1:nVariables) {
        probVariable         <- prop.table(table(dataMatrix[, jj]))
        simulatedMultinomial <- rmultinom(n = nObservations, size = 1,
                                          prob = probVariable)
        simVector            <- ordered(row(simulatedMultinomial)
                                          [simulatedMultinomial == 1])
        simulatedData[, jj]  <- simVector
      }

      simulatedEigenValues[ii, ] <- eigen(hetcor(simulatedData, pd = TRUE,
                                                 use = use))$values
    }
    simulatedEigenValues <- data.frame(simulatedEigenValues)
  }
  if (algorithm == "polychoric") {
    simulatedEigenValues           <- matrix(result[[2]],ncol=nReplicates)
    simulatedEigenValues           <- as.matrix(as.data.frame.list(tapply(simulatedEigenValues,
                                                                          col(simulatedEigenValues),
                                                                          sort,
                                                                          decreasing = TRUE)))
    simulatedEigenValues           <- data.frame(t(simulatedEigenValues))
    rownames(simulatedEigenValues) <- paste("iter", 1:nReplicates, sep = "")
    colnames(simulatedEigenValues) <- 1:nVariables
  }

  ################################################################################
  # # Obtain percentiles of simulated data
  ################################################################################
  estimatedPercentiles <- sapply(simulatedEigenValues, quantile, percentiles)

  if (length(percentiles) == 1) {
    estimatedPercentiles <- data.frame(orderEigenValues = 1:nVariables,
                                       typeEigenValues  = 100 * percentiles,
                                       eigenValues      = estimatedPercentiles)
    rownames(estimatedPercentiles) <- 1:nVariables
  } else {
    estimatedPercentiles <- data.frame(t(estimatedPercentiles))
    pValues <- grep("^X\\d+\\.$", names(estimatedPercentiles))
    names(estimatedPercentiles)[pValues] <- gsub("^X(\\d+)\\.$", "p.\\1", names(estimatedPercentiles)[pValues])
    estimatedPercentiles <- reshape(estimatedPercentiles, direction = "long",
                                    varying = seq(along = names(estimatedPercentiles)))
    estimatedPercentiles <- data.frame(orderEigenValues = estimatedPercentiles[, "id"],
                                       typeEigenValues  = estimatedPercentiles[, "time"],
                                       eigenValues      = estimatedPercentiles[, "p"])
  }

  ################################################################################
  # # Output
  ################################################################################
  parallelList <- list(observed = observed, percentiles = estimatedPercentiles,
                       simulatedEigenValues = simulatedEigenValues)

  return(parallelList)
}

Try the pcaPA package in your browser

Any scripts or data that you put into this service are public.

pcaPA documentation built on May 29, 2017, 6:53 p.m.