#' Likelihood Distance
#'
#' Compute likelihood distances between models when removing the \eqn{i_{th}} case. If there are no
#' missing data then the \code{\link{GOF}} will often provide equivalent results. If mirt is used,
#' then the values will be associated with the unique response patterns instead.
#'
#' Note that \code{LD} is not limited to confirmatory factor analysis and
#' can apply to nearly any model being studied
#' where detection of influential observations is important.
#'
#' @aliases LD
#' @param data matrix or data.frame
#' @param model if a single numeric number declares number of factors to extract in
#' exploratory factor analysis (requires complete dataset, i.e., no missing).
#' If \code{class(model)} is a sem (semmod), or lavaan (character),
#' then a confirmatory approach is performed instead. Finally, if the model is defined with
#' \code{mirt::mirt.model()} then distances will be computed for categorical data with the
#' mirt package
#' @param progress logical; display the progress of the computations in the console?
#' @author Phil Chalmers \email{rphilip.chalmers@@gmail.com}
#' @seealso
#' \code{\link{gCD}}, \code{\link{GOF}}, \code{\link{obs.resid}},
#' \code{\link{robustMD}}, \code{\link{setCluster}}
#' @references
#'
#' Chalmers, R. P. & Flora, D. B. (2015). faoutlier: An R Package for Detecting
#' Influential Cases in Exploratory and Confirmatory Factor Analysis.
#' \emph{Applied Psychological Measurement, 39}, 573-574. \doi{10.1177/0146621615597894}
#'
#' Flora, D. B., LaBrish, C. & Chalmers, R. P. (2012). Old and new ideas for data
#' screening and assumption testing for exploratory and confirmatory factor analysis.
#' \emph{Frontiers in Psychology, 3}, 1-21. \doi{10.3389/fpsyg.2012.00055}
#' @keywords cooks
#' @export LD
#' @examples
#'
#' \dontrun{
#'
#' #run all LD functions using multiple cores
#' setCluster()
#'
#' #Exploratory
#' nfact <- 3
#' (LDresult <- LD(holzinger, nfact))
#' (LDresult.outlier <- LD(holzinger.outlier, nfact))
#' plot(LDresult)
#' plot(LDresult.outlier)
#'
#' ## add a progress meter
#' LDresult <- LD(holzinger, nfact, progress = TRUE)
#'
#' #-------------------------------------------------------------------
#' #Confirmatory with sem
#' model <- sem::specifyModel()
#' F1 -> Remndrs, lam11
#' F1 -> SntComp, lam21
#' F1 -> WrdMean, lam31
#' F2 -> MissNum, lam42
#' F2 -> MxdArit, lam52
#' F2 -> OddWrds, lam62
#' F3 -> Boots, lam73
#' F3 -> Gloves, lam83
#' F3 -> Hatchts, lam93
#' F1 <-> F1, NA, 1
#' F2 <-> F2, NA, 1
#' F3 <-> F3, NA, 1
#'
#' (LDresult <- LD(holzinger, model))
#' (LDresult.outlier <- LD(holzinger.outlier, model))
#' plot(LDresult)
#' plot(LDresult.outlier)
#'
#' #-------------------------------------------------------------------
#' #Confirmatory with lavaan
#' model <- 'F1 =~ Remndrs + SntComp + WrdMean
#' F2 =~ MissNum + MxdArit + OddWrds
#' F3 =~ Boots + Gloves + Hatchts'
#'
#' (LDresult <- LD(holzinger, model, orthogonal=TRUE))
#' (LDresult.outlier <- LD(holzinger.outlier, model, orthogonal=TRUE))
#' plot(LDresult)
#' plot(LDresult.outlier)
#'
#' # categorical data with mirt
#' library(mirt)
#' data(LSAT7)
#' dat <- expand.table(LSAT7)
#' model <- mirt.model('F = 1-5')
#' LDresult <- LD(dat, model)
#' plot(LDresult)
#'
#' }
LD <- function(data, model, progress = TRUE, ...)
{
f_numeric <- function(ind, data, model, ...){
res <- factanal(data[-ind, ], model, ...)
Sigma <- res$loadings %*% t(res$loadings) + diag(res$uniquenesses)
sum(dmvnorm(data, sigma=Sigma, log=TRUE))
}
f_sem <- function(ind, data, model, ...){
logLik(sem::sem(model, data=data[-ind, ], ...))
}
f_lavaan <- function(ind, data, model, ...){
lavaan::logLik(lavaan::sem(model, data=data[-ind, ], ...))
}
f_mirt <- function(ind, data, model, large, sv, ...){
large$Freq[[1L]][ind] <- large$Freq[[1L]][ind] - 1L
tmp <- mirt::mirt(data=data, model=model, verbose=FALSE, large=large, pars=sv, ...)
mirt::extract.mirt(tmp, 'logLik')
}
rownames(data) <- 1:nrow(data)
index <- matrix(1L:nrow(data))
if(is.numeric(model)){
if(any(is.na(data)))
stop('Numeric model requires complete dataset (no NA\'s)')
MLmod <- f_numeric(nrow(data) + 1, data=data, model=model, ...)
LR <- myApply(index, MARGIN=1L, FUN=f_numeric, progress=progress,
data=data, model=model, ...)
} else if(class(model) == "semmod"){
MLmod <- sem::sem(model, data=data, ...)
MLmod <- logLik(MLmod)
LR <- myApply(index, MARGIN=1L, FUN=f_sem, progress=progress,
data=data, model=model, ...)
} else if(class(model) == "character"){
MLmod <- lavaan::sem(model, data=data, ...)
MLmod <- lavaan::logLik(MLmod)
LR <- myApply(index, MARGIN=1L, FUN=f_lavaan, progress=progress,
data=data, model=model, ...)
} else if(class(model) == "mirt.model"){
large <- MLmod_full <- mirt::mirt(data=data, model=model, large = 'return')
index <- matrix(1L:length(large$Freq[[1L]]))
MLmod_full <- mirt::mirt(data=data, model=model, verbose = FALSE, large=large, ...)
sv <- mirt::mod2values(MLmod_full)
MLmod <- mirt::extract.mirt(MLmod_full, 'logLik')
index <- matrix(1L:length(large$Freq[[1L]]))
LR <- myApply(index, MARGIN=1L, FUN=f_mirt, progress=progress,
data=data, model=model, large=large, sv=sv, ...)
} else {
stop('model class not supported')
}
deltaX2 <- 2*(MLmod - LR)
if(class(model) != "mirt.model")
names(deltaX2) <- rownames(data)
class(deltaX2) <- 'LD'
deltaX2
}
#' @rdname LD
#' @param x an object of class \code{LD}
#' @param ncases number of extreme cases to display
#' @param digits number of digits to round in the printed result
#' @param ... additional parameters to be passed
#' @export
print.LD <- function(x, ncases = 10, digits = 5, ...)
{
sorted <- x[order(abs(x), decreasing = TRUE)]
if(ncases %% 2 != 0) ncases <- ncases + 1
sorted <- c(sorted[1:(ncases/2)],
sorted[(length(sorted)-(ncases/2 + 1)):length(sorted)])
ret <- matrix(sorted)
rownames(ret) <- names(sorted)
colnames(ret) <- 'LD'
return(print(round(ret, digits)))
}
#' @rdname LD
#' @param y a \code{NULL} value ignored by the plotting function
#' @param type type of plot to use, default displays points and lines
#' @param main the main title of the plot
#' @param ylab the y label of the plot
#' @param absolute logical; use absolute values instead of deviations?
#' @export
plot.LD <- function(x, y = NULL, main = 'Likelihood Distance',
type = c('p','h'), ylab = 'LD', absolute = FALSE, ...)
{
LD <- if(absolute) abs(as.numeric(x)) else as.numeric(x)
ID <- 1:length(x)
dat <- data.frame(LD,ID)
ret <- lattice::xyplot(LD~ID, dat, type = type, main = main, ylab = ylab,
panel = function(...){
panel.xyplot(ID, LD, type='h')
panel.xyplot(ID, LD, type='p')
panel.abline(0)
}, ...)
return(ret)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.