Nothing
#' Polychoric Correlation Matrix
#'
#' This function computes a polychoric correlation matrix, which is the estimated
#' Pearson product-moment correlation matrix between underlying normally distributed
#' latent variables which generate the ordinal scores.
#'
#' @param x a matrix or data frame of discrete values.
#' @param smooth logical: if \code{TRUE} and if the polychoric matrix is not
#' positive definite, a simple smoothing algorithm using \code{cor.smooth()}
#' function is applied.
#' @param global logical: if \code{TRUE}, the global values of the tau parameter
#' is used instead of the local values.
#' @param weight a vector of length of the number of observations that specifies
#' the weights to apply to each case. The \code{NULL} case is
#' equivalent of weights of 1 for all cases.
#' @param correct a numeric value indicating the correction value to use to
#' correct for continuity in the case of zero entry. Note that
#' unlike in the \code{polychoric()} function in the \pkg{psych}
#' the default value is 0.
#' @param progress logical: if \code{TRUE}, the progress bar is shown.
#' @param na.rm logical: if \code{TRUE}, missing data are deleted.
#' @param delete logical: if \code{TRUE}, cases with no variance are deleted
#' with a warning before proceeding.
#' @param tri a character string indicating which triangular of the matrix
#' to show on the console, i.e., \code{both} for upper and lower
#' triangular, \code{lower} (default) for the lower triangular,
#' and \code{upper} for the upper triangular.
#' @param as.na a numeric vector indicating user-defined missing values,
#' i.e. these values are converted to \code{NA} before conducting
#' the analysis.
#' @param digits an integer value indicating the number of decimal places to
#' be used for displaying correlation coefficients.
#' @param check logical: if \code{TRUE}, argument specification is checked.
#' @param output logical: if \code{TRUE}, output is shown on the console.
#'
#' @author
#' William Revelle
#'
#' @references
#' Revelle, W. (2018) \emph{psych: Procedures for personality and psychological
#' research}. Northwestern University, Evanston, Illinois, USA,
#' https://CRAN.R-project.org/package=psych Version = 1.8.12.
#'
#' @return
#' Returns an object of class \code{misty.object}, which is a list with following
#' entries:
#' \tabular{ll}{
#' \code{call} \tab function call \cr
#' \code{type} \tab type of analysis \cr
#' \code{data} \tab matrix or data frame specified in \code{x} \cr
#' \code{args} \tab specification of function arguments \cr
#' \code{result} \tab result table \cr
#' }
#'
#' @note
#' This function is based on the \code{polychoric()} function in the \pkg{psych}
#' package by William Revelle.
#'
#' @export
#'
#' @examples
#' dat <- data.frame(x1 = c(1, 1, 3, 2, 1, 2, 3, 2, 3, 1),
#' x2 = c(1, 2, 1, 1, 2, 2, 2, 1, 3, 1),
#' x3 = c(1, 3, 2, 3, 3, 1, 3, 2, 1, 2))
#'
#' # Polychoric correlation matrix
#' cor.poly(dat)
cor.poly <- function(x, smooth = TRUE, global = TRUE, weight = NULL, correct = 0, progress = TRUE,
na.rm = TRUE, delete = TRUE, tri = c("both", "lower", "upper"), digits = 2,
as.na = NULL, check = TRUE, output = TRUE) {
#_____________________________________________________________________________
#
# Initial Check --------------------------------------------------------------
# Package mnormt installed?
if (isTRUE(!requireNamespace("mnormt", quietly = TRUE))) { stop("Package \"mnormt\" is needed for this function to work, please install it.", call. = FALSE ) }
# Check if input 'x' is missing
if (isTRUE(missing(x))) { stop("Please specify a matrix or data frame of discrete values for the argument 'x'.", call. = FALSE) }
# Check if input 'x' is NULL
if (isTRUE(is.null(x))) { stop("Input specified for the argument 'x' is NULL.", call. = FALSE) }
# Matrix or data frame for the argument 'x'?
if (isTRUE(!is.matrix(x) && !is.data.frame(x))) { stop("Please specify a matrix or data frame of discrete values for the argument 'x'.", call. = FALSE) }
# Check input 'check'
if (isTRUE(!is.logical(check))) { stop("Please specify TRUE or FALSE for the argument 'check'.", call. = FALSE) }
#_____________________________________________________________________________
#
# Input Check ----------------------------------------------------------------
if (isTRUE(check)) {
# Check input 'x': Discrete values
if (isTRUE(any(apply(x, 2, function(y) any(na.omit(y) %% 1L != 0L))))) { stop("Please specify a matrix or data frame of discrete values for the argument 'x'.", call. = FALSE) }
# Check input 'smooth'
if (isTRUE(!is.logical(smooth))) { stop("Please specify TRUE or FALSE for the argument 'smooth'.", call. = FALSE) }
# Check input 'global'
if (isTRUE(!is.logical(global))) { stop("Please specify TRUE or FALSE for the argument 'global'.", call. = FALSE) }
# Check input 'na.rm'
if (isTRUE(!is.logical(na.rm))) { stop("Please specify TRUE or FALSE for the argument 'na.rm'.", call. = FALSE) }
# Check input 'progress'
if (isTRUE(!is.logical(progress))) { stop("Please specify TRUE or FALSE for the argument 'progress'.", call. = FALSE) }
# Check input 'delete'
if (isTRUE(!is.logical(delete))) { stop("Please specify TRUE or FALSE for the argument 'delete'.", call. = FALSE) }
# Check input 'tri'
if (isTRUE(any(!tri %in% c("both", "lower", "upper")))) { warning("Character string in the argument 'tri' does not match with \"both\", \"lower\", or \"upper\".", call. = FALSE) }
# Check input 'digits'
if (isTRUE(digits %% 1L != 0L || digits < 0L)) { warning("Specify a positive integer number for the argument 'digits'.", call. = FALSE) }
# Check input 'output'
if (isTRUE(!is.logical(output))) { stop("Please specify TRUE or FALSE for the argument 'output'.", call. = FALSE) }
}
#_____________________________________________________________________________
#
# Data and Arguments ---------------------------------------------------------
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Convert user-missing values into NA ####
if (isTRUE(!is.null(as.na))) {
x <- misty::as.na(x, as.na = as.na, check = check)
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Print triangular ####
tri <- ifelse(all(c("both", "lower", "upper") %in% tri), "lower", tri)
#_____________________________________________________________________________
#
# Functions ------------------------------------------------------------------
#----------------------------------------
cor.smooth <- function (x, eig.tol = 10^-12) {
eigens <- try(eigen(x), TRUE)
if (isTRUE(class(eigens) == as.character("try-error"))) {
warning("There is something wrong with the correlation matrix, i.e., cor.smooth() failed to smooth it because some of the eigenvalues are NA.", call. = FALSE)
} else {
if (isTRUE(min(eigens$values) < .Machine$double.eps)) {
warning("Matrix was not positive definite, smoothing was done.", call. = FALSE)
eigens$values[eigens$values < eig.tol] <- 100L * eig.tol
nvar <- dim(x)[1L]
tot <- sum(eigens$values)
eigens$values <- eigens$values * nvar/tot
cnames <- colnames(x)
rnames <- rownames(x)
x <- eigens$vectors %*% diag(eigens$values) %*% t(eigens$vectors)
x <- cov2cor(x)
colnames(x) <- cnames
rownames(x) <- rnames
}
}
return(x)
}
mcmapply <- function (FUN, ..., MoreArgs = NULL, SIMPLIFY = TRUE, USE.NAMES = TRUE,
mc.preschedule = TRUE, mc.set.seed = TRUE, mc.silent = FALSE,
mc.cores = 1L, mc.cleanup = TRUE, affinity.list = NULL) {
cores <- as.integer(mc.cores)
if (isTRUE(cores < 1L)) {
stop("'mc.cores' must be >= 1", call. = FALSE)
}
if (isTRUE(cores > 1L)) {
stop("'mc.cores' > 1 is not supported on Windows", call. = FALSE)
}
mapply(FUN = FUN, ..., MoreArgs = MoreArgs, SIMPLIFY = SIMPLIFY,
USE.NAMES = USE.NAMES)
}
tableF <- function(x,y) {
minx <- min(x,na.rm = TRUE)
maxx <- max(x,na.rm = TRUE)
miny <- min(y,na.rm = TRUE)
maxy <- max(y,na.rm = TRUE)
maxxy <- (maxx+(minx == 0L))*(maxy + (miny == 0L))
dims <- c(maxx + 1L - min(1, minx), maxy + 1L - min(1L, minx))
bin <- x - minx + (y - miny)*(dims[1]) + max(1L, minx)
ans <- matrix(tabulate(bin, maxxy), dims)
return(ans)
}
tableFast <- function(x, y, minx, maxx, miny, maxy) {
maxxy <- (maxx + (minx == 0L))*(maxy + (minx == 0L))
bin <- x-minx + (y - minx) *maxx + 1
dims <- c(maxx + 1L - min(1L, minx), maxy + 1L - min(1L, miny))
ans <- matrix(tabulate(bin, maxxy), dims)
return(ans)
}
polyBinBvn <- function(rho, rc, cc) {
row.cuts <- c(-Inf, rc,Inf)
col.cuts <- c(-Inf, cc, Inf)
nr <- length(row.cuts) - 1L
nc <- length(col.cuts) - 1L
P <- matrix(0L, nr, nc)
R <- matrix(c(1L, rho, rho,1), 2L, 2L)
for (i in seq_len((nr - 1L))) {
for (j in seq_len((nc - 1L))) {
P[i, j] <- mnormt::sadmvn(lower = c(row.cuts[i], col.cuts[j]),
upper = c(row.cuts[i + 1L], col.cuts[j + 1L]), mean = rep(0L, 2L),
varcov = R)
}
}
P[1L, nc] <- pnorm(rc[1L]) - sum(P[1L, seq_len((nc - 1L))])
P[nr, 1L] <- pnorm(cc[1L]) - sum(P[seq_len((nr - 1L)), 1L])
if (isTRUE(nr >2L)) { for (i in (2L:(nr - 1L))) {P[i, nc] <- pnorm(rc[i]) -pnorm(rc[i - 1L])- sum(P[i, seq_len((nc - 1L))]) }}
if (isTRUE(nc >2L)) { for (j in (2L:(nc - 1L))) {P[nr, j] <- pnorm(cc[j]) - pnorm(cc[j - 1L])-sum(P[seq_len((nr - 1L)), j]) }}
if (isTRUE(nc > 1L)) P[nr, nc] <- 1L - pnorm(rc[nr - 1L]) - sum(P[nr, seq_len((nc - 1L))])
P
}
polyF <- function(rho, rc, cc, tab) {
P <- polyBinBvn(rho, rc, cc)
P[P <= 0L] <- NA
lP <- log(P)
lP[lP == -Inf] <- NA
lP[lP == Inf] <- NA
-sum(tab * lP,na.rm=TRUE) }
wtd.table <- function(x, y, weight) {
tab <- tapply(weight, list(x, y), sum, na.rm = TRUE, simplify = TRUE)
tab[is.na(tab)] <- 0L
return(tab)
}
polyc <- function(x, y = NULL, taux, tauy, global = TRUE, weight = NULL, correct = correct,
gminx, gmaxx, gminy, gmaxy) {
if (is.null(weight)) {
tab <- tableFast(x, y, gminx, gmaxx, gminy, gmaxy)
} else {
tab <- wtd.table(x,y,weight)
}
fixed <- 0L
tot <- sum(tab)
if (isTRUE(tot == 0L)) {
result <- list(rho = NA, objective = NA, fixed = 1L)
return(result)
}
tab <- tab/tot
if (isTRUE(correct > 0L)) {
if (isTRUE(any(tab[] == 0L))) {
fixed <- 1L
tab[tab == 0L] <- correct/tot
}
}
if (isTRUE(global)) {
rho <- optimize(polyF, interval = c(-1, 1), rc = taux, cc = tauy, tab)
} else {
if (isTRUE(!is.na(sum(tab)))) {
zerorows <- apply(tab, 1, function(x) all(x == 0))
zerocols <- apply(tab, 2, function(x) all(x == 0))
zr <- sum(zerorows)
zc <- sum(zerocols)
tab <- tab[!zerorows, , drop = FALSE]
tab <- tab[, !zerocols, drop = FALSE]
csum <- colSums(tab)
rsum <- rowSums(tab)
if (isTRUE(min(dim(tab)) < 2L)) {
rho <- list(objective = NA)
} else {
cc <- qnorm(cumsum(csum)[-length(csum)])
rc <- qnorm(cumsum(rsum)[-length(rsum)])
rho <- optimize(polyF, interval = c(-1L, 1L), rc = rc, cc = cc, tab)
}
} else {
rho <- list(objective = NA, rho= NA)
}
}
if (isTRUE(is.na(rho$objective))) {
result <- list(rho = NA, objective = NA, fixed = fixed)
} else {
result <- list(rho=rho$minimum, objective=rho$objective, fixed=fixed)
}
return(result)
}
polydi <- function(p, d, taup, taud, global = TRUE, ML = FALSE, std.err = FALSE, weight = NULL,
progress = TRUE, na.rm = TRUE, delete = TRUE, correct = 0.5) {
myfun <- function(x, i, j, correct, taup, taud, gminx, gmaxx, gminy, gmaxy, np) {
polyc(x[, i], x[, j], taup[, i], taud[1, (j - np)], global = global, weight = weight,
correct = correct, gminx = gminx, gmaxx = gmaxx, gminy = gminy, gmaxy = gmaxy)
}
matpLower <- function(x, np, nd, taup, taud, gminx, gmaxx, gminy, gmaxy) {
k <- 1
il <- vector()
jl <- vector()
for(i in seq_len(np)) {
for (j in seq_len(nd)) {
il[k] <- i
jl [k] <- j
k <- k+1
}
}
poly <- mcmapply(function(i, j) myfun(x, i, j, correct = correct,
taup = taup, taud = taud, gminx = gminx, gmaxx = gmaxx, gminy = gminy, gmaxy = gmaxy, np = np), il, jl + np)
mat <- matrix(np,nd)
mat <- as.numeric(poly[1, ])
return(mat)
}
if (isTRUE(!is.null(weight))) {
if (isTRUE(length(weight) != nrow(x))) {
stop("Length of the weight vector must match the number of cases.", call. = FALSE)
}
}
cl <- match.call()
np <- dim(p)[2L]
nd <- dim(d)[2L]
if (isTRUE(is.null(np))) np <- 1L
if (isTRUE(is.null(nd))) nd <- 1L
nsub <- dim(p)[1]
p <- as.matrix(p)
d <- as.matrix(d)
nvalues <- max(p, na.rm = TRUE) - min(p, na.rm = TRUE) + 1L
dmin <- apply(d, 2,function(x) min(x, na.rm = TRUE))
dmax <- apply(d, 2,function(x) max(x, na.rm = TRUE))
dvalues <- max(dmax - dmin)
if (isTRUE(dvalues != 1L)) stop("You did not supply a dichotomous variable.", call. = FALSE)
if (isTRUE(nvalues > 8L)) stop("You have more than 8 categories for your items, polychoric is probably not needed.", call. = FALSE)
item.var <- apply(p, 2L, sd, na.rm = na.rm)
bad <- which((item.var <= 0L) | is.na(item.var))
if (isTRUE(length(bad) > 0L && delete)) {
for (baddy in seq_len(length(bad))) {
message( "Item = ",colnames(p)[bad][baddy], " had no variance and was deleted")
}
p <- p[, -bad]
np <- np - length(bad)
}
pmin <- apply(p, 2, function(x) min(x, na.rm = TRUE))
minx <- min(pmin)
p <- t(t(p) - pmin + 1L)
miny <- min(dmin)
d <- t(t(d) - dmin + 1L)
gminx <- gminy <- 1L
pmax <- apply(p,2,function(x) max(x,na.rm = TRUE))
gmaxx <- max(pmax)
if (isTRUE(min(pmax) != max(pmax))) { global <- FALSE
warning("The items do not have an equal number of response alternatives, setting global to FALSE.", call. = FALSE)}
gmaxy <- max(apply(d, 2, function(x) max(x, na.rm = TRUE)))
pfreq <- apply(p, 2, tabulate, nbins = nvalues)
n.obs <- colSums(pfreq)
pfreq <- t(t(pfreq)/n.obs)
taup <- as.matrix(qnorm(apply(pfreq, 2, cumsum))[seq_len(nvalues - 1L), ], ncol = ncol(pfreq))
rownames(taup) <- paste(seq_len(nvalues - 1L))
colnames(taup) <- colnames(p)
dfreq <- apply(d, 2, tabulate, nbins = 2L)
if (isTRUE(nd < 2L)) {
n.obsd <- sum(dfreq)
} else {
n.obsd <- colSums(dfreq)
}
dfreq <- t(t(dfreq)/n.obsd)
taud <- qnorm(apply(dfreq, 2, cumsum))
mat <- matrix(0L, np, nd)
rownames(mat) <- colnames(p)
colnames(mat) <- colnames(d)
x <- cbind(p,d)
mat <- matpLower(x, np, nd, taup, taud, gminx, gmaxx, gminy, gmaxy)
mat <- matrix(mat, np, nd, byrow = TRUE)
rownames(mat) <- colnames(p)
colnames(mat) <- colnames(d)
taud <- t(taud)
result <- list(rho = mat,tau = taud, n.obs = nsub)
class(result) <- c("psych","polydi")
return(result)
}
polytab <- function(tab, correct = TRUE) {
tot <- sum(tab)
tab <- tab/tot
if (isTRUE(correct > 0L)) tab[tab == 0L] <- correct/tot
csum <- colSums(tab)
rsum <- rowSums(tab)
cc <- qnorm(cumsum(csum[-length(csum)]))
rc <- qnorm(cumsum(rsum[-length(rsum)]))
rho <- optimize(polyF, interval = c(-1L, 1L), rc = rc, cc = cc, tab)
result <- list(rho = rho$minimum, objective = rho$objective, tau.row = rc, tau.col = cc)
return(result)
}
myfun <- function(x, i, j, gminx, gmaxx, gminy, gmaxy) {
polyc(x[, i], x[, j], tau[, i], tau[, j], global = global, weight = weight, correct = correct,
gminx = gminx, gmaxx = gmaxx, gminy = gminy, gmaxy = gmaxy)
}
matpLower <- function(x, nvar, gminx, gmaxx, gminy, gmaxy) {
k <- 1
il <- vector()
jl <- vector()
for(i in 2L:nvar) {
for (j in seq_len(i - 1L)) {
il[k] <- i
jl[k] <- j
k <- k + 1
}
}
poly <- mcmapply(function(i, j) myfun(x, i, j, gminx = gminx, gmaxx = gmaxx, gminy = gminy, gmaxy = gmaxy), il, jl)
mat <- diag(nvar)
if (isTRUE(length(dim(poly)) == 2L)) {
mat[upper.tri(mat)] <- as.numeric(poly[1L, ])
mat <- t(mat) + mat
fixed <- as.numeric(poly[3L, ])
diag(mat) <- 1L
fixed <- sum(fixed)
if (isTRUE(fixed > 0L && correct > 0L)) {
warning(fixed ," cell(s) adjusted for 0 values using the correction for continuity.", call. = FALSE)
}
return(mat)
} else {
warning("Something is wrong in polycor.", call. = FALSE)
return(poly)
stop("Something was seriously wrong. Please look at the results.", call. = FALSE)
}
}
if (isTRUE(!is.null(weight))) {
if (isTRUE(length(weight) !=nrow(x))) {
stop("Length of the weight vector must match the number of cases", call. = FALSE)
}
}
nvar <- dim(x)[2L]
nsub <- dim(x)[1L]
if (isTRUE((prod(dim(x)) == 4L) || is.table(x))) {
result <- polytab(x, correct = correct)
} else {
x <- as.matrix(x)
if (isTRUE(!is.numeric(x))) {
x <- matrix(as.numeric(x), ncol = nvar)
message("Non-numeric input converted to numeric.")
}
xt <- table(x)
nvalues <- length(xt)
maxx <- max(x, na.rm = TRUE)
if (isTRUE(maxx > nvalues)) {
xtvalues <- as.numeric(names(xt))
for(i in seq_len(nvalues)) {
x[x == xtvalues[i]] <- i
}
}
nvalues <- max(x, na.rm = TRUE) - min(x, na.rm = TRUE) + 1L
item.var <- apply(x, 2, sd, na.rm = na.rm)
bad <- which((item.var <= 0)|is.na(item.var))
if (isTRUE(length(bad) > 0L && delete)) {
for (baddy in seq_len(length(bad))) {
message("Item = ", colnames(x)[bad][baddy], " had no variance and was deleted.")
}
x <- x[, -bad]
nvar <- nvar - length(bad)
}
xmin <- apply(x, 2L, function(x) min(x, na.rm = TRUE))
xmin <- min(xmin)
x <- t(t(x) - xmin + 1L)
gminx <- gminy <- 1L
xmax <- apply(x, 2L, function(x) max(x, na.rm = TRUE))
xmax <- max(xmax)
gmaxx <- gmaxy <- xmax
if (isTRUE(min(xmax) != max(xmax))) {
global <- FALSE
warning("Items do not have an equal number of response categories, global set to FALSE.", call. = FALSE)
}
xfreq <- apply(x, 2, tabulate, nbins = nvalues)
n.obs <- colSums(xfreq)
xfreq <- t(t(xfreq) / n.obs)
tau <- qnorm(apply(xfreq, 2, cumsum))[seq_len(nvalues - 1L), ]
if (isTRUE(!is.matrix(tau))) tau <- matrix(tau, ncol = nvar)
rownames(tau) <- seq_len(nvalues - 1L)
colnames(tau) <- colnames(x)
mat <- matrix(0L, nvar, nvar)
colnames(mat) <- rownames(mat) <- colnames(x)
mat <- matpLower(x, nvar, gminx, gmaxx, gminy, gmaxy)
if (isTRUE(any(is.na(mat)))) {
message("Some correlations are missing, smoothing turned off.")
smooth <- FALSE
}
if (isTRUE(smooth)) {
mat <- cor.smooth(mat)
}
colnames(mat) <- rownames(mat) <- colnames(x)
#_____________________________________________________________________________
#
# Return Object --------------------------------------------------------------
object <- list(call = match.call(),
type = "cor.poly",
data = x,
args = list(smooth = smooth, global = global, weight = weight, correct = correct,
progress = progress, na.rm = na.rm, delete = delete, digits = digits,
tri = tri, as.na = as.na, check = check, output = output),
result = list(cor = mat, tau = tau))
}
class(object) <- "misty.object"
#_____________________________________________________________________________
#
# Output ---------------------------------------------------------------------
if (isTRUE(output)) { print(object, check = FALSE) }
return(invisible(object))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.