Nothing
#' Compute ML Tetrachoric Correlations
#'
#' Compute ML tetrachoric correlations with optional bias correction and
#' smoothing.
#'
#'
#' @param X Either a matrix or vector of (0/1) binary data.
#' @param y An optional(if X is a matrix) vector of (0/1) binary data.
#' @param BiasCorrect A logical that determines whether bias correction (Brown
#' & Benedetti, 1977) is performed. Default = TRUE.
#' @param stderror A logical that determines whether standard errors are
#' calulated. Default = FALSE.
#' @param Smooth A logical which determines whether the tetrachoric correlation
#' matrix should be smoothed. A smoothed matrix is always positive definite.
#' @param max.iter Maximum number of iterations. Default = 50.
#' @param PRINT A logical that determines whether to print progress updates
#' during calculations. Default = TRUE
#' @return If stderror = FALSE, \code{tetcor} returns a matrix of tetrachoric
#' correlations. If \code{stderror} = TRUE then \code{tetcor} returns a list
#' the first component of which is a matrix of tetrachoric correlations and the
#' second component is a matrix of standard errors (see Hamdan, 1970).
#'
#' @return
#' \item{r}{The tetrachoric correlation matrix}.
#' \item{se}{A matrix of standard errors.}
#' \item{convergence}{(logical) The convergence status of the algorithm. A value of
#' TRUE denotes that the algorithm converged. A value of FALSE denotes that the
#' algorithm did not converge and the returned correlations
#' are Pearson product moments.}
#' \item{Warnings}{A list of warnings.}
#' @author Niels Waller
#' @references Brown, M. B. & Benedetti, J. K. (1977). On the mean and variance
#' of the tetrachoric correlation coefficient. \emph{Psychometrika, 42},
#' 347--355.
#'
#' Divgi, D. R. (1979) Calculation of the tetrachoric correlation coefficient.
#' \emph{Psychometrika, 44}, 169-172.
#'
#' Hamdan, M. A. (1970). The equivalence of tetrachoric and maximum likelihood
#' estimates of rho in 2 by 2 tables. \emph{Biometrika, 57}, 212-215.
#' @keywords Statistics
#' @import mvtnorm
#' @export
#' @examples
#'
#' ## generate bivariate normal data
#' library(MASS)
#' set.seed(123)
#' rho <- .85
#' xy <- mvrnorm(100000, mu = c(0,0), Sigma = matrix(c(1, rho, rho, 1), ncol = 2))
#'
#' # dichotomize at difficulty values
#' p1 <- .7
#' p2 <- .1
#' xy[,1] <- xy[,1] < qnorm(p1)
#' xy[,2] <- xy[,2] < qnorm(p2)
#'
#' print( apply(xy,2,mean), digits = 2)
#' #[1] 0.700 0.099
#'
#' tetcor(X = xy, BiasCorrect = TRUE,
#' stderror = TRUE, Smooth = TRUE, max.iter = 5000)
#'
#' # $r
#' # [,1] [,2]
#' # [1,] 1.0000000 0.8552535
#' # [2,] 0.8552535 1.0000000
#' #
#' # $se
#' # [,1] [,2]
#' # [1,] NA 0.01458171
#' # [2,] 0.01458171 NA
#' #
#' # $Warnings
#' # list()
#'
#'
#'
tetcor<-function (X, y = NULL, BiasCorrect = TRUE, stderror = FALSE,
Smooth = TRUE, max.iter = 5000, PRINT = TRUE)
{
# add: cparam=NULL to argument list
#------------------------------------------------------------#
# Function: tetcor #
# #
# December 16, 2017 #
# Improved fnc speed via FastTable #
# #
# April 27, 2016 #
# fixed se's for cases in which two cells are empty #
# May 1 2016 define cell counts as.numeric to avoid #
# integer overflow #
# #
# warning messages updated Sept 2 2004 #
# #
# Niels Waller, June 9, 2018 #
# Requires library: mvtnorm from CRAN #
# #
# Compute ML estimate of tetrachoric r #
# #
# ARGUMENTS: #
# X : a matrix of binary responses coded 0/1 #
# y : if x is a vector then y must be a vector of 0/1 #
# responses #
# BiasCorrect : a logical that determines whether to apply #
# the Brown & Benedetti bias correction #
# stderror : a logical that determines whether to compute #
# the s.e. of the estimate #
# #
# Smooth : a logical that determines whether to smooth the #
# correlation matrix #
# #
# max.iter : #
# #
# #
# VALUE #
# r : matrix of tetrachoric correlations #
# se : matrix of standard error of r is requested #
# Warnings : a list of correlations that did not converge #
# #
# #
# #
# Initial estimate of r calculated by Divgi's method #
# #
# Divgi, D. R. (1979) Calculation of the tetrachoric #
# correlation coefficient. Psychometrika, 44, 169--172. #
# #
# Bias correction: #
# Brown, M. B. & Benedetti, J. K. (1977). On the mean and #
# variance of the tetrachoric correlation coefficient. #
# Psychometrika, 42, 347--355. #
# #
# S.E. of r_{tet} #
# Hamdan, M. A. (1970). The equivalence of tetrachoric and #
# maximum likelihood estimates of $\rho$ in $2 \times 2$ #
# tables. Biometrika, 57, 212--215. #
#----------------------------------------------------------- #
# initialize warning flag
warning.msg <- list()
x <- X
# Update: September 8, 2014
# use 'cuhre' for bivariate integral
# library(R2Cuba)
if (is.data.frame(x))
x <- as.matrix(x)
if (is.data.frame(y))
y <- as.matrix(y)
if (!is.matrix(x) && is.null(y))
stop("supply both x and y or a matrix-like x")
if(!is.null(y))
x<-cbind(x,y)
if(sum(is.na(x)) > 0)
stop("Missing values are not allowed")
nitems <- ncol(x)
N <- nrow(x)
#--Do any items have zero variance?
zeroVar <- apply(x, 2, var) == 0
if (sum(zeroVar) > 0) {
labs <- paste0("Item", 1:nitems)
stop(paste0("\nSERIOUS ERROR \nThe following item(s) has variance = 0: ",
labs[zeroVar], "\n"))
}
#--Initialize matrices
# an element of seFlagMatrix will be set to 99
# if the aymptotic standard error can not
# be computed due to low cell counts
r <- matrix(0, nitems, nitems)
se <- 99
if(stderror == TRUE) se <- seFlagMatrix <- r
warning.k <- 0
## Function definitions
#-----compute bivariate normal density----------#
bvn <- function(z){
1/(2*pi*sqrt(1-rhat^2)) * exp(-(z[1]^2+z[2]^2-2*rhat*z[1]*z[2])
/(2*(1-rhat^2)) )
}
Fasttable <- function(x, y) {
tt <- sum(x * y)
t1 <- sum(x)
t2 <- sum(y)
matrix(c(length(x)-t1-t2+tt, t1-tt, t2-tt, tt), 2, 2)
}
########################
#--MAIN loop begins here
########################
for(iter.row in 2:nitems){
for(iter.col in 1:(iter.row-1)){
if(PRINT){
cat(c("Working on correlation: ",iter.row, iter.col, "\n"))
}
v1 <- x[,iter.row]
v2 <- x[,iter.col]
# we assume that data are 0/1 so this no longer needed
#v1 <- factor(v1,levels=c("0","1"))
#v2 <- factor(v2,levels=c("0","1"))
abcd <- Fasttable(v1,v2)
# use as.numeric to avoid integer overflow when N is very large
cella <- as.numeric(abcd[2,2])
cellb <- as.numeric(abcd[2,1])
cellc <- as.numeric(abcd[1,2])
celld <- as.numeric(abcd[1,1])
#----Brown and Benedetti bias correction----------#
if(BiasCorrect == TRUE){
# see Brown and Benedetti page 353
# when two cells are zero (either diagonal or off diagonal) assign
# r of plus or minus 1
if(cella == 0 & celld == 0){
r[iter.row,iter.col] <- -1
if(stderror == TRUE){
seFlagMatrix[iter.row, iter.col] <- seFlagMatrix[iter.col, iter.row] <- 99
}
next
}
if(cellc==0 & cellb==0){
r[iter.row, iter.col] <- 1
if(stderror == TRUE){
seFlagMatrix[iter.row, iter.col] <- seFlagMatrix[iter.col, iter.row] <- 99
}
next
}
if(cella == 0 & cellb > 0 & cellc > 0 & celld > 0){ # only cell a is 0.00
cella<-.5; cellb<- cellb - .5; cellc<- cellc - .5; celld<- celld +.5
}
if(cellb == 0 & cella > 0 & cellc > 0 & celld > 0){ # only cell b is 0.00
cellb<-cellb+.5; cella<-cella - .5; cellc<-cellc+.5; celld<-celld-.5
}
if(cellc == 0 & cellb > 0 & cella > 0 & celld > 0){ # only cell c is 0.00
cellc<-cellc+.5; celld<-celld-.5; cella<-cella-.5; cellb<-cellb+.5
}
if(celld == 0 & cellb > 0 & cellc > 0 & cella > 0){ # only cell d is 0.00
celld <- .5; cellb <- cellb - .5; cellc <- cellc - .5; cella <- cella + .5
}
}#-------------end of bias correction----------------------------#
p1 <- (cella + cellb)/N
p2 <- (cellc + cella)/N
p11 <- cella/N
p00 <- celld/N
p01 <- cellc/N
p10 <- cellb/N
#-------Correction for non-zero lower asymptotes
# Currently not working
# Carroll
# if(!is.null(cparam)){
# gi <- cparam[iter.row]
# wi <- 1-gi
# gj <- cparam[iter.col]
# wj <- 1-gj
#
# t00 <- p00/(wi * wj)
#
# t01 <- (wj*p01 * gj*p00)/(wi*wj)
# if(t01 < 0) t01 <-0
#
# t10 <- (wi*p10 - gi*p00)/(wi*wj)
# if(t10 < 0) t10<-0
#
# t11 <- 1 - t00 - t01 - t10
#
# p11 <- t11
# p1 <- 1 - ( (1-p1)/(1-gi) )
# p2 <- 1 - ( (1-p2)/(1-gj) )
#
# cella <- t11 * N
# cellb <- t10 * N
# cellc <- t01 * N
# celld <- t00 * N
#
#} # end correction for guessing
h.x <- qnorm(p1)
k.y <- qnorm(p2)
h.sign <- sign(h.x)
k.sign <- sign(k.y)
hstar <- max(abs(h.x), abs(k.y))
kstar <- min(abs(h.x), abs(k.y))
h2 <- hstar^2
k2 <- kstar^2
Aa <- .5/(1 + (h2+k2) * (.12454 - .27102 * ( 1 - hstar/sqrt(h2+k2) ) ) )
Bb <- .5/(1 + (h2+k2) * (.82281 - 1.03514 * ( kstar/sqrt(h2+k2) ) ) )
Cc <- .07557 * hstar + (hstar-kstar)^2 * (.51141/(hstar + 2.05793) -.07557/hstar)
R <- (cella * celld)/(cellb * cellc)
Dd <- h.sign * k.sign * kstar * (.79289 + 4.28981/(1+3.30231 * hstar))
alpha <- Aa + Bb * (-1 + 1/(1+Cc * (log10(R) - Dd)^2))
rhat<-cos(pi/(1 + R^alpha)) # rhat is start value
if(is.na(rhat > 0)) rhat <- 0 # if x and y have means of exactly .5 rhat is NaN
if( abs(rhat) == 1 ) { # integration breaks down if |rhat| = 1
rhat <-.95 * rhat
}
#-----End Divgi's method for start values -----------------#
#------------------------
# Divgi sends the readers to Pearson 1900 for
# the formula for this derivative. This form is
## reported in Kirk 1973, p. 261 Psychometrika
h.xSq <- h.x^2
k.ySq <- k.y^2
#---1st derivative of likelihood (a/N) wrt r------------#
dLdr<-function(r){
1/(2 * pi * sqrt(1-r^2)) * exp( - (h.xSq + k.ySq - 2 * h.x * k.y * r)/(2 * (1-r^2)) )
}
eps <- 99
iterations <- 0
#-----Newton Raphson Loop to improve initial estimate
converged <- TRUE
while( eps > .00001){
#----integrate over bivariate normal surface to estimate (d/N)
### Old code uses 'cuhre' from the R2Cuba in place of adapt
# adaptOut <- R2Cuba::cuhre(ndim = 2, ncomp = 1, integrand = bvn,
# lower = c(-6, -6), upper = c(h.x, k.y), flags = list(verbose = 0),
# rel.tol = .000001, abs.tol = 0, min.eval = 0, max.eval=max.iter)
# Now using mvtnorm
adaptOut <- mvtnorm::pmvnorm(lower = c(-8, -8),
upper = c(h.x, k.y),
sigma = matrix(c(1, rhat, rhat, 1),2,2))[1]
ep11 <- adaptOut
firstderiv <- dLdr(rhat)
if( firstderiv <= 0.02 ) firstderiv <- firstderiv * 5.5 #pull deriv back if too low
rnew <- rhat - (ep11-p11)/firstderiv
if( abs(rnew) > 1 ) {
rnew <- .99
rhat <- 99 # do not stop during this iteration
}
eps <- abs(rnew - rhat)
iterations <- iterations + 1
rhat <- rnew
if(iterations > max.iter){
converged <- FALSE
warning.k <- warning.k + 1
warn.msg <- paste("WARNING: Correlation ", iter.row, ", ", iter.col,
" failed to converge!", sep="")
# cat(warn.msg,"\n")
warning.msg[warning.k] <- warn.msg
rnew <- 51;
## replace with Pearson
rhat <- cor(x[,c(iter.row,iter.col)])[1,2]
eps <- .00000001
}
}##End while eps > .00001)
#-------Compute ML estimate of standard error-(Hamdan, 1970)----#
# Hamdan, M. A. (1970). The Equivalence of Tetrachoric and Maximum
# Likelihood Estimates of r in 2 x 2 Tables. Biometrika, 57(1), 212-215.
if( stderror ){
se[iter.row,iter.col] <- 1/(N * bvn(c(h.x,k.y))) * (1/cella + 1/cellb + 1/cellc + 1/celld)^-.5
} ## END 0F if(stderror)
#-------------------------------------------------------#
r[iter.row,iter.col]<-rhat
}## END iter over columns
} ## END iter over rows
## fill in upper triangle of r
r <- r + t(r)
diag(r) <- 1
## Smooth tetrachoric R matrix
if(Smooth){
ULU <- eigen(r)
U <- ULU$vectors
L <- ULU$values
## maybe use smoothAPA here
if( min(L) <= 0 ){ # renorm to make matrix positive definite
L[L<=0] <- .0001
Ltot <- sum(L)
L <- nitems*L/Ltot
Lsqrt <- diag(sqrt(L))
Fload <- tcrossprod(U, Lsqrt) #U%*%Lsqrt
r <- tcrossprod(Fload, Fload) #Fload %*% t(Fload)
Dmat <- diag(1/sqrt(diag(r)))
r <- Dmat %*% r %*% Dmat
}
}##End if Smooth
## Return results
if( stderror == FALSE ) {
list(r = r,
se = NA,
convergence = converged,
Warnings = warning.msg)
}
else if ( stderror == TRUE ){
se <- se + t(se)
diag(se) <- NA
## If a standard error cannot be computed it will be reported as
## NA.
se[seFlagMatrix==99] <-NA
list(r = r,
se = se,
convergence = converged,
Warnings = warning.msg)
}
} ## End of tetcor function
######### CHECK #########
# ## generate bivariate normal data
# library(MASS)
# library(microbenchmark)
# rho <- .85
# xy<- mvrnorm(50000, mu = c(0,0), Sigma = matrix(c(1,rho,rho,1),ncol=2))
#
# ## dichotomize
# p1 <- .59
# p2 <- .1
# xy[,1] <- (xy[,1] > p1)
# xy[,2] <- (xy[,2] > p2)
#
#
#
# tetcor(X=xy[,1], y=xy[,2], max.iter=5000,
# stderror = TRUE, PRINT = FALSE)
#
#
# microbenchmark(times = 200,
# tetcor(X=xy[,1], y=xy[,2], max.iter=5000,
# stderror = TRUE, PRINT = FALSE)
# )
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.