R/CalculatePAOrdered.R
In pcaPA: Parallel Analysis for Ordinal and Numeric Data using Polychoric and Pearson Correlations with S3 Classes

Documented in CalculatePAOrdered

################################################################################
# # Function for parallel analysis of correlation matrices of ordered data
################################################################################
CalculatePAOrdered <- function (dataMatrix, percentiles = 0.99, nReplicates = 200, use = "complete.obs", algorithm = "polycor") {
  # # Obtains a parallel analysis for ordered data
  # #
  # # Arg:
  # #  dataMatrix: matrix or data.frame of ordered variables
  # #  percentiles: vector of percentiles to report
  # #  nReplicates: number of simulations to produce for estimating the eigenvalues distribution under independence
  # #  use: Missing value handling method: If "complete.obs", remove observations with any missing data; if "pairwise.complete.obs", compute each correlation using all observations with valid data for that pair of variables.
  # #  algorithm: string specifying the correlation estimation algorithm. Polychoric correlation estimation method: "polycor" for estimates using the polycor package, "polychoric" for estimates using the C++ function polychoric.
  # #
  # # Ret:
  # #  parallelList: a list with data.frames observed, percentiles and simulated data.
  # #  observed: data.frame containing the observed eigenvalues
  # #  percentiles: data.frame containing the estimated percentiles of the eigenvalues distribution under independence
  # #  simulatedEigenValues: data.frame containing the simulated eigenvalues under independence



  ################################################################################
  # # Data verification
  ################################################################################
  if (!is.matrix(dataMatrix) & !is.data.frame(dataMatrix)) {
    stop("dataMatrix must be a matrix or a data.frame")
  }
  if (!is.data.frame(dataMatrix)) {
    dataMatrix <- data.frame(dataMatrix)
  }

  isOrderedData <- all(sapply(dataMatrix, is.ordered))
  if (!isOrderedData) {
    stop("All variables in dataMatrix must be ordered factors")
  }

  if (!(algorithm %in% c("polycor", "polychoric"))) {
    stop("Unknown algorithm")
  }

  ################################################################################
  # # Data information
  ################################################################################
  nObservations  <- nrow(dataMatrix)
  nVariables     <- ncol(dataMatrix)

  if (algorithm == "polycor") {
    datCorrelation <- hetcor(dataMatrix, pd = TRUE, use = use) # Polychoric
                        # correlations with matrix transformed to the nearest
                        # positive definite matrix
    datEigenValues <- eigen(datCorrelation)$values


  }
  if (algorithm == "polychoric") {
    isBinary = FALSE
    if (use == "complete.obs") {
      dataMatrix <- na.omit(dataMatrix)
    }

    dataMatrixInt <- as.matrix(as.data.frame.list(lapply(dataMatrix,
                                                    function(x)
                                                      as.integer(as.integer(x)-1))))

    result <- .Call(Cpolychoric, dataMatrixInt, isBinary,
                    nReplicates, sfsmisc::nearcor, new.env())
    datEigenValues <- sort(result[[1]], decreasing = TRUE)
  }

  observed <- data.frame(orderEigenValues = 1:nVariables,
                         typeEigenValues  = "Observed",
                         eigenValues      = datEigenValues,
                         stringsAsFactors = TRUE)

  ################################################################################
  # # Simulate correlation matrices under independence
  ################################################################################
  if (algorithm == "polycor") {
    simulatedEigenValues           <- matrix(nrow = nReplicates, ncol = nVariables)
    rownames(simulatedEigenValues) <- paste("iter", 1:nReplicates, sep = "")
    colnames(simulatedEigenValues) <- 1:nVariables

    simulatedData <- matrix(nrow = nObservations, ncol = nVariables)
    simulatedData <- data.frame(simulatedData)
    for (ii in 1:nReplicates) {
      for (jj in 1:nVariables) {
        probVariable         <- prop.table(table(dataMatrix[, jj]))
        simulatedMultinomial <- rmultinom(n = nObservations, size = 1,
                                          prob = probVariable)
        simVector            <- ordered(row(simulatedMultinomial)
                                          [simulatedMultinomial == 1])
        simulatedData[, jj]  <- simVector
      }

      simulatedEigenValues[ii, ] <- eigen(hetcor(simulatedData, pd = TRUE,
                                                 use = use))$values
    }
    simulatedEigenValues <- data.frame(simulatedEigenValues)
  }
  if (algorithm == "polychoric") {
    simulatedEigenValues           <- matrix(result[[2]],ncol=nReplicates)
    simulatedEigenValues           <- as.matrix(as.data.frame.list(tapply(simulatedEigenValues,
                                                                          col(simulatedEigenValues),
                                                                          sort,
                                                                          decreasing = TRUE)))
    simulatedEigenValues           <- data.frame(t(simulatedEigenValues))
    rownames(simulatedEigenValues) <- paste("iter", 1:nReplicates, sep = "")
    colnames(simulatedEigenValues) <- 1:nVariables
  }

  ################################################################################
  # # Obtain percentiles of simulated data
  ################################################################################
  estimatedPercentiles <- sapply(simulatedEigenValues, quantile, percentiles)

  if (length(percentiles) == 1) {
    estimatedPercentiles <- data.frame(orderEigenValues = 1:nVariables,
                                       typeEigenValues  = 100 * percentiles,
                                       eigenValues      = estimatedPercentiles)
    rownames(estimatedPercentiles) <- 1:nVariables
  } else {
    estimatedPercentiles <- data.frame(t(estimatedPercentiles))
    pValues <- grep("^X\\d+\\.$", names(estimatedPercentiles))
    names(estimatedPercentiles)[pValues] <- gsub("^X(\\d+)\\.$", "p.\\1", names(estimatedPercentiles)[pValues])
    estimatedPercentiles <- reshape(estimatedPercentiles, direction = "long",
                                    varying = seq(along = names(estimatedPercentiles)))
    estimatedPercentiles <- data.frame(orderEigenValues = estimatedPercentiles[, "id"],
                                       typeEigenValues  = estimatedPercentiles[, "time"],
                                       eigenValues      = estimatedPercentiles[, "p"])
  }

  ################################################################################
  # # Output
  ################################################################################
  parallelList <- list(observed = observed, percentiles = estimatedPercentiles,
                       simulatedEigenValues = simulatedEigenValues)

  return(parallelList)
}

Any scripts or data that you put into this service are public.

pcaPA documentation built on May 29, 2017, 6:53 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

pcaPA
Parallel Analysis for Ordinal and Numeric Data using Polychoric and Pearson Correlations with S3 Classes

R/CalculatePAOrdered.R
In pcaPA: Parallel Analysis for Ordinal and Numeric Data using Polychoric and Pearson Correlations with S3 Classes

Defines functions CalculatePAOrdered

Documented in CalculatePAOrdered

Try the pcaPA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pcaPA Parallel Analysis for Ordinal and Numeric Data using Polychoric and Pearson Correlations with S3 Classes

R/CalculatePAOrdered.R In pcaPA: Parallel Analysis for Ordinal and Numeric Data using Polychoric and Pearson Correlations with S3 Classes

Defines functions CalculatePAOrdered

Documented in CalculatePAOrdered

Try the pcaPA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pcaPA
Parallel Analysis for Ordinal and Numeric Data using Polychoric and Pearson Correlations with S3 Classes

R/CalculatePAOrdered.R
In pcaPA: Parallel Analysis for Ordinal and Numeric Data using Polychoric and Pearson Correlations with S3 Classes