R/itemanalysis.R

Defines functions classifyItemsMirt classifyItems factoranalysis getItemText print.itemText setItemText ggplotICC.RM empICC reliability plot.mapTest print.mapTest mapTest parallelAnalysis frequencies basicDescr dfEFA tryPrintExpr tryCatch.W.E getDataFramesIana getDataFrames

Documented in basicDescr classifyItems classifyItemsMirt dfEFA empICC factoranalysis frequencies getDataFrames getDataFramesIana getItemText ggplotICC.RM mapTest parallelAnalysis plot.mapTest print.mapTest reliability setItemText tryCatch.W.E tryPrintExpr

#' GUI for Item Analysis and Scale Construction
#'
#' Iana is a browser-based GUI for classical item and test analysis, factor analysis, and item response modeling with a focus on items with an ordered-category response format.
#'
#' Iana is a browser-based graphical user interface to R functions for the psychometric analysis of questionnaires and tests with an ordinal response format. Iana tries to integrate the essential statistical analysis steps into a convenient interface. Iana covers classical item and test analysis (reliability, item discrimation), dimensionality tests (parallel analysis and MAP test), principal components and exploratory factor analysis (including factor analysis based on polychoric correlations), one-factor confirmatory analysis, and item response models (partial credit model). Graphical output includes histograms of item and test scores, empirical item characteric curves, and person-item maps, among others.
#'
#' Iana is based on the \href{https://shiny.posit.co}{Shiny Web Framework for R}, which allows a fast bidirectional communication between the browser and R. Iana is "reactive", meaning that its output is instantly updated if any of the inputs (e.g., the selected variables) are changed by the user. This makes it easy to compare the impact of item selection and/or modeling options on the results. Iana comes with some built-in data sets, with which the interface can be tested
#'
#' The basic usage is documented in \code{\link{runiana}}.
#'
#' \tabular{ll}{
#' Package: \tab iana\cr
#' Type: \tab Package\cr
#' Version: \tab 0.1\cr
#' Date: \tab 2019-06-10\cr
#' License: \tab GPL (>= 2)\cr
#' LazyLoad: \tab yes\cr
#' }
#'
#' @name iana-package
#' @aliases iana
#' @importFrom stats cor cov factanal ksmooth loess median na.omit qbeta sd xtabs quantile rnorm
#' @importFrom graphics abline arrows legend par points
#' @importFrom utils read.table
#' @importFrom psych describe principal fa irt.fa fa.parallel KMO skew kurtosi isCorrelation tetrachoric polychoric mixedCor YuleCor smc
#' @importFrom semTools reliability
#' @importFrom mirt mirt
#' @importFrom tidyr drop_na gather
#' @importFrom dplyr select
#' @importFrom rlang .data
#' @import ggplot2 GPArotation lavaan eRm markdown stringr shiny shinythemes shinyAce
#' @docType package
#' @author Michael Hock (\email{michael.hock@@uni-bamberg.de})
#' @references Shiny web framework for R: \url{https://shiny.posit.co}
#' @keywords package
#' @examples
#' \dontrun{runiana()}
NULL


#' Data Frames in Global Environment
#'
#' Returns of a vector with the names of the data frames present in the
#' global environment.
#'
#' @return A character vector with the names of the data frames
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
getDataFrames <- function() {
    x <- ls(.GlobalEnv)
    x <- x[sapply(x, function(x) is.data.frame(get(x, 1)))]
    x
}

#' Data Frames in Global Environment
#'
#' Returns of a vector with the names of the data frames present in
#' the global environment that contain at least \code{min} cases
#' (after NAs are omitted). If no data frame is found, Iana's example
#' data are loaded.
#'
#' @param min (integer) minimum number of required cases
#'
#' @return A character vector with the names of the data frames
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
getDataFramesIana <- function(min = 20) {
    x <- ls(.GlobalEnv)
    if (length(x) > 0) {
        x <- x[sapply(x, function(x) is.data.frame(get(x, 1)))]
        if (length(x) > 0) {
            lx <- rep(TRUE, length(x))
            for (i in 1:length(x)) {
                if(nrow(na.omit(get(x[i], 1))) < min) lx[i] <- FALSE
            }
            x <- x[lx]
        }
    }
    
    ### Clean example data
    if (length(x) == 0) {
        load("data/ExampleData.RData", .GlobalEnv)
        x <- ls(.GlobalEnv)
        x <- x[sapply(x, function(x) is.data.frame(get(x, 1)))]
    }
    if (length(x) <= 0) x <- "this should not happen: no data frames found"
    x
}

#' Catch and Save Both Errors and Warnings
#' 
#' Catch and save both errors and warnings, and in the case of
#' a warning, also keep the computed result.
#'
#' @param expr an \R expression to evaluate
#' @return a list with 'value' and 'warning', where
#'   'value' may be an error caught.
#' @author Martin Maechler;
#' Copyright (C) 2010-2012  The R Core Team
tryCatch.W.E <- function(expr) {
    W <- NULL
    w.handler <- function(w){ # warning handler
        W <<- w
        invokeRestart("muffleWarning")
    }
    list(value = withCallingHandlers(tryCatch(expr, error = function(e) e),
        warning = w.handler),
        warning = W)
}

#' Try to Print Results of a Command
#'
#' Try to print the results of a command, catching errors and warnings
#' with \code{\link{tryCatch.W.E}}. Warnings are appended to the
#' results.
#'
#' @param x an expression
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
tryPrintExpr <- function(x) {
    out <- tryCatch.W.E(x)
    if (inherits(out$value, c("simpleError", "error"))) {
        cat("\n*** ERROR ************************************************************\n")
        cat(str_wrap(str_trim(out$value$message), 70))
        cat("\n**********************************************************************\n\n")
    } else {
        print(out$value)
    }
    if (!is.null(out$warning)) {
        cat("\n*** WARNING **********************************************************\n")
        cat(str_wrap(str_trim(out$warning$message), 70))
        cat("\n**********************************************************************\n\n")
    }
    
}

#' Degrees of Freedom for a Factor Model
#'
#' Returns the degrees of freedom for a exploratory factor model, fit by maximum
#' likelihood. If the model cannot be fitted because it involves too many
#' observed variables a negative value is returned.
#'
#' @param p number of observed variables
#' @param m number of factors to be extracted
#'
#' @return degrees of freedom or a negative value.
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
dfEFA <- function(p, m) {
    if (p < m) return(-1)
    dof <- 0.5 * ((p - m)^2 - p - m)
    dof
}

#' Basic Descriptive Statistics
#'
#' Return mean, standard deviation, skewness, and kurtosis of a variable. Missing values are removed before the computation.
#'
#' @param x a numeric variable
#'
#' @return a data frame
#'
#' @export
#'
basicDescr <- function(x) {
    # require(psych)
    if (! is.numeric(x)) stop("x must be numeric")
    data.frame(Mean = mean(x, na.rm = TRUE),
        SD = sd(x, na.rm = TRUE),
        Skew = psych::skew(x),
        Kurtosis = psych::kurtosi(x),
        Min = min(x, na.rm = TRUE),
        Max = max(x, na.rm = TRUE))
}

#' Frequencies
#'
#' Returns a data frame representing a table with frequency counts of the 
#' unique values of the variables in x. If present, item stems are appended 
#' as the last row of the table.
#'
#' @param x a data frame
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
#'
frequencies <- function(x) {
    # needs tidyr::gather
    if (!is.data.frame(x))
        stop("x must be a data frame.")
    it <- getItemText(x)
    names.x <- names(x)
    x <- tidyr::gather(x, key = "item", value = "score", names.x)
    x <- xtabs(~ item + score, data = x)
    x <- as.data.frame(unclass(x))
    # Sort the rows of x according to the orginal item order
    x <- x[match(names.x, rownames(x)),]
    if (!is.null(it)) {
        x$Text <- it
    }
    x
}

#' Parallel analysis
#'
#' Performs a parallel analysis for the variables (usually items) in a data frame.
#'
#' @param x a data frame containing the variables (items) to be analyzed
#' @param fm string specifying the factoring method ("ml", "minres", ...)
#' @param cor string specifying how the correlations are found: "cor" (correlation), "cov" (covariance), "tet" (tetrachoric correlation), "poly" (polychoric correlation)
#' @param n.factors integer specifying the number of factors to plot; if NULL plot all factors
#' @param sim logical; if TRUE, sample from normal distribution, otherwise resample from the data
#' @param onlyFA logical; if TRUE, show only factor analysis results, if FALSE show also results from principal components
#'
# #' @details ...
#'
#' @return a ggplot2 object 
#'
#' @author Computation: William Revelle, see \code{\link{psych}}, specifically \code{\link{fa.parallel}}.
#' @author Plot: Michael Hock, \email{michael.hock@@uni-bamberg.de}
#'
#' @export
#'
parallelAnalysis <- function(x, fm = "ml", cor = "cor", n.factors = NULL, sim = TRUE, onlyFA = FALSE) {
    #     require(ggplot2)
    #     require(psych)
    #     require(tidyr)
    
    # Modified version of fa.parallel from psych, version 1.8.12.
    # Changes:
    # - replacement of mclappy by lapply.
    
    fa.parallel_modified <- function (x, n.obs = NULL, fm = "minres", fa = "both", nfactors = 1,
        main = "Parallel Analysis Scree Plots", n.iter = 20, error.bars = FALSE,
        se.bars = FALSE, SMC = FALSE, ylabel = NULL, show.legend = TRUE,
        sim = TRUE, quant = 0.95, cor = "cor", use = "pairwise",
        plot = TRUE, correct = 0.5)
    {
        cl <- match.call()
        ci <- 1.96
        arrow.len <- 0.05
        nsub <- dim(x)[1]
        nvariables <- dim(x)[2]
        resample <- TRUE
        if ((isCorrelation(x)) && !sim) {
            warning("You specified a correlation matrix, but asked to just resample (sim was set to FALSE).  This is impossible, so sim is set to TRUE")
            sim <- TRUE
            resample <- FALSE
        }
        if (!is.null(n.obs)) {
            nsub <- n.obs
            rx <- x
            resample <- FALSE
            if (dim(x)[1] != dim(x)[2]) {
                warning("You specified the number of subjects, implying a correlation matrix, but do not have a correlation matrix, correlations found ")
                switch(cor, cor = {
                    rx <- cor(x, use = use)
                }, cov = {
                    rx <- cov(x, use = use)
                    covar <- TRUE
                }, tet = {
                    rx <- tetrachoric(x, correct = correct)$rho
                }, poly = {
                    rx <- polychoric(x, correct = correct)$rho
                }, mixed = {
                    rx <- mixedCor(x, use = use, correct = correct)$rho
                }, Yuleb = {
                    rx <- YuleCor(x, , bonett = TRUE)$rho
                }, YuleQ = {
                    rx <- YuleCor(x, 1)$rho
                }, YuleY = {
                    rx <- YuleCor(x, 0.5)$rho
                })
                if (!sim) {
                    warning("You specified a correlation matrix, but asked to just resample (sim was set to FALSE).  This is impossible, so sim is set to TRUE")
                    sim <- TRUE
                    resample <- FALSE
                }
            }
        }
        else {
            if (isCorrelation(x)) {
                warning("It seems as if you are using a correlation matrix, but have not specified the number of cases. The number of subjects is arbitrarily set to be 100  ")
                rx <- x
                nsub = 100
                n.obs = 100
                resample <- FALSE
            }
            else {
                switch(cor, cor = {
                    rx <- cor(x, use = use)
                }, cov = {
                    rx <- cov(x, use = use)
                    covar <- TRUE
                }, tet = {
                    rx <- tetrachoric(x, correct = correct)$rho
                }, poly = {
                    rx <- polychoric(x, correct = correct)$rho
                }, mixed = {
                    rx <- mixedCor(x, use = use, correct = correct)$rho
                }, Yuleb = {
                    rx <- YuleCor(x, , bonett = TRUE)$rho
                }, YuleQ = {
                    rx <- YuleCor(x, 1)$rho
                }, YuleY = {
                    rx <- YuleCor(x, 0.5)$rho
                })
            }
        }
        valuesx <- eigen(rx)$values
        if (SMC) {
            diag(rx) <- smc(rx)
            fa.valuesx <- eigen(rx)$values
        }
        else {
            fa.valuesx <- fa(rx, nfactors = nfactors, rotate = "none",
                fm = fm, warnings = FALSE)$values
        }
        temp <- list(samp = vector("list", n.iter), samp.fa = vector("list",
            n.iter), sim = vector("list", n.iter), sim.fa = vector("list",
                n.iter))
        ## Original:
        # templist <- mclapply(1:n.iter, function(XX) {
        ## New:
        templist <- lapply(1:n.iter, function(XX) {
            if (is.null(n.obs)) {
                bad <- TRUE
                while (bad) {
                    sampledata <- matrix(apply(x, 2, function(y) sample(y,
                        nsub, replace = TRUE)), ncol = nvariables)
                    colnames(sampledata) <- colnames(x)
                    switch(cor, cor = {
                        C <- cor(sampledata, use = use)
                    }, cov = {
                        C <- cov(sampledata, use = use)
                        covar <- TRUE
                    }, tet = {
                        C <- tetrachoric(sampledata, correct = correct)$rho
                    }, poly = {
                        C <- polychoric(sampledata, correct = correct)$rho
                    }, mixed = {
                        C <- mixedCor(sampledata, use = use, correct = correct)$rho
                    }, Yuleb = {
                        C <- YuleCor(sampledata, , bonett = TRUE)$rho
                    }, YuleQ = {
                        C <- YuleCor(sampledata, 1)$rho
                    }, YuleY = {
                        C <- YuleCor(sampledata, 0.5)$rho
                    })
                    bad <- any(is.na(C))
                }
                values.samp <- eigen(C)$values
                temp[["samp"]] <- values.samp
                if (fa != "pc") {
                    if (SMC) {
                        sampler <- C
                        diag(sampler) <- smc(sampler)
                        temp[["samp.fa"]] <- eigen(sampler)$values
                    }
                    else {
                        temp[["samp.fa"]] <- fa(C, fm = fm, nfactors = nfactors,
                            SMC = FALSE, warnings = FALSE)$values
                    }
                }
            }
            if (sim) {
                simdata = matrix(rnorm(nsub * nvariables), nrow = nsub,
                    ncol = nvariables)
                sim.cor <- cor(simdata)
                temp[["sim"]] <- eigen(sim.cor)$values
                if (fa != "pc") {
                    if (SMC) {
                        diag(sim.cor) <- smc(sim.cor)
                        temp[["sim.fa"]] <- eigen(sim.cor)$values
                    }
                    else {
                        fa.values.sim <- fa(sim.cor, fm = fm, nfactors = nfactors,
                            SMC = FALSE, warnings = FALSE)$values
                        temp[["sim.fa"]] <- fa.values.sim
                    }
                }
            }
            replicates <- list(samp = temp[["samp"]], samp.fa = temp[["samp.fa"]],
                sim = temp[["sim"]], sim.fa = temp[["sim.fa"]])
        })
        if (is.null(ylabel)) {
            ylabel <- switch(fa, pc = "eigen values of principal components",
                fa = "eigen values of principal factors", both = "eigenvalues of principal components and factor analysis")
        }
        values <- t(matrix(unlist(templist), ncol = n.iter))
        values.sim.mean = colMeans(values, na.rm = TRUE)
        values.ci = apply(values, 2, function(x) quantile(x, quant))
        if (se.bars) {
            values.sim.se <- apply(values, 2, sd, na.rm = TRUE)/sqrt(n.iter)
        }
        else {
            values.sim.se <- apply(values, 2, sd, na.rm = TRUE)
        }
        ymin <- min(valuesx, values.sim.mean)
        ymax <- max(valuesx, values.sim.mean)
        sim.pcr <- sim.far <- NA
        switch(fa, pc = {
            if (plot) {
                plot(valuesx, type = "b", main = main, ylab = ylabel,
                    ylim = c(ymin, ymax), xlab = "Component Number",
                    pch = 4, col = "blue")
            }
            if (resample) {
                sim.pcr <- values.sim.mean[1:nvariables]
                sim.pcr.ci <- values.ci[1:nvariables]
                sim.se.pcr <- values.sim.se[1:nvariables]
                if (plot) {
                    points(sim.pcr, type = "l", lty = "dashed", pch = 4,
                        col = "red")
                }
            } else {
                sim.pcr <- NA
                sim.se.pc <- NA
            }
            if (sim) {
                if (resample) {
                    sim.pc <- values.sim.mean[(nvariables + 1):(2 *
                            nvariables)]
                    sim.pc.ci <- values.ci[(nvariables + 1):(2 *
                            nvariables)]
                    sim.se.pc <- values.sim.se[(nvariables + 1):(2 *
                            nvariables)]
                } else {
                    sim.pc <- values.sim.mean[1:nvariables]
                    sim.pc.ci <- values.ci[1:nvariables]
                    sim.se.pc <- values.sim.se[1:nvariables]
                }
                if (plot) {
                    points(sim.pc, type = "l", lty = "dotted", pch = 4,
                        col = "red")
                }
                pc.test <- which(!(valuesx > sim.pc.ci))[1] - 1
            } else {
                sim.pc <- NA
                sim.pc.ci <- NA
                sim.se.pc <- NA
                pc.test <- which(!(valuesx > sim.pcr.ci))[1] - 1
            }
            fa.test <- NA
            sim.far <- NA
            sim.fa <- NA
        }, fa = {
            if (plot) {
                plot(fa.valuesx, type = "b", main = main, ylab = ylabel,
                    ylim = c(ymin, ymax), xlab = "Factor Number",
                    pch = 2, col = "blue")
            }
            sim.se.pc <- NA
            if (resample) {
                sim.far <- values.sim.mean[(nvariables + 1):(2 *
                        nvariables)]
                sim.far.ci <- values.ci[(nvariables + 1):(2 * nvariables)]
                sim.se.far <- values.sim.se[(nvariables + 1):(2 *
                        nvariables)]
                if (plot) {
                    points(sim.far, type = "l", lty = "dashed", pch = 2,
                        col = "red")
                }
            }
            if (sim) {
                if (resample) {
                    sim.fa <- values.sim.mean[(3 * nvariables + 1):(4 *
                            nvariables)]
                    sim.fa.ci <- values.ci[(3 * nvariables + 1):(4 *
                            nvariables)]
                    sim.se.fa <- values.sim.se[(3 * nvariables +
                            1):(4 * nvariables)]
                } else {
                    sim.fa <- values.sim.mean[(nvariables + 1):(2 *
                            nvariables)]
                    sim.fa.ci <- values.sim.mean[(nvariables + 1):(2 *
                            nvariables)]
                    sim.se.fa <- values.sim.se[(nvariables + 1):(2 *
                            nvariables)]
                    sim.far <- NA
                    sim.far.ci <- NA
                    sim.se.far <- NA
                }
                if (plot) {
                    points(sim.fa, type = "l", lty = "dotted", pch = 2,
                        col = "red")
                }
                fa.test <- which(!(fa.valuesx > sim.fa.ci))[1] -
                    1
            } else {
                sim.fa <- NA
                fa.test <- which(!(fa.valuesx > sim.far.ci))[1] -
                    1
            }
            sim.pc <- NA
            sim.pcr <- NA
            sim.se.pc <- NA
            pc.test <- NA
        }, both = {
            if (plot) {
                plot(valuesx, type = "b", main = main, ylab = ylabel,
                    ylim = c(ymin, ymax), xlab = "Factor/Component Number",
                    pch = 4, col = "blue")
                points(fa.valuesx, type = "b", pch = 2, col = "blue")
            }
            if (sim) {
                if (resample) {
                    sim.pcr <- values.sim.mean[1:nvariables]
                    sim.pcr.ci <- values.ci[1:nvariables]
                    sim.se.pcr <- values.sim.se[1:nvariables]
                    sim.far <- values.sim.mean[(nvariables + 1):(2 *
                            nvariables)]
                    sim.se.far <- values.sim.se[(nvariables + 1):(2 *
                            nvariables)]
                    sim.far.ci <- values.ci[(nvariables + 1):(2 *
                            nvariables)]
                    sim.pc <- values.sim.mean[(2 * nvariables + 1):(3 *
                            nvariables)]
                    sim.pc.ci <- values.ci[(2 * nvariables + 1):(3 *
                            nvariables)]
                    sim.se.pc <- values.sim.se[(2 * nvariables +
                            1):(3 * nvariables)]
                    sim.fa <- values.sim.mean[(3 * nvariables + 1):(4 *
                            nvariables)]
                    sim.fa.ci <- values.ci[(3 * nvariables + 1):(4 *
                            nvariables)]
                    sim.se.fa <- values.sim.se[(3 * nvariables +
                            1):(4 * nvariables)]
                    pc.test <- which(!(valuesx > sim.pcr.ci))[1] -
                        1
                    fa.test <- which(!(fa.valuesx > sim.far.ci))[1] -
                        1
                } else {
                    sim.pc <- values.sim.mean[1:nvariables]
                    sim.pc.ci <- values.ci[1:nvariables]
                    sim.se.pc <- values.sim.se[1:nvariables]
                    sim.fa <- values.sim.mean[(nvariables + 1):(2 *
                            nvariables)]
                    sim.fa.ci <- values.ci[(nvariables + 1):(2 *
                            nvariables)]
                    sim.se.fa <- values.sim.se[(nvariables + 1):(2 *
                            nvariables)]
                    pc.test <- which(!(valuesx > sim.pc.ci))[1] -
                        1
                    fa.test <- which(!(fa.valuesx > sim.fa.ci))[1] -
                        1
                }
                if (plot) {
                    points(sim.pc, type = "l", lty = "dotted", pch = 4,
                        col = "red")
                    points(sim.fa, type = "l", lty = "dotted", pch = 4,
                        col = "red")
                    points(sim.pcr, type = "l", lty = "dashed", pch = 2,
                        col = "red")
                    points(sim.far, type = "l", lty = "dashed", pch = 2,
                        col = "red")
                }
                pc.test <- which(!(valuesx > sim.pc.ci))[1] - 1
                fa.test <- which(!(fa.valuesx > sim.fa.ci))[1] -
                    1
            } else {
                sim.pcr <- values.sim.mean[1:nvariables]
                sim.pcr.ci <- values.ci[1:nvariables]
                sim.se.pcr <- values.sim.se[1:nvariables]
                sim.far <- values.sim.mean[(nvariables + 1):(2 *
                        nvariables)]
                sim.far.ci <- values.ci[(nvariables + 1):(2 * nvariables)]
                sim.se.far <- values.sim.se[(nvariables + 1):(2 *
                        nvariables)]
                sim.fa <- NA
                sim.pc <- NA
                sim.se.fa <- NA
                sim.se.pc <- NA
                pc.test <- which(!(valuesx > sim.pcr.ci))[1] - 1
                fa.test <- which(!(fa.valuesx > sim.far.ci))[1] -
                    1
            }
            if (resample) {
                if (plot) {
                    points(sim.pcr, type = "l", lty = "dashed", pch = 4,
                        col = "red")
                    points(sim.far, type = "l", lty = "dashed", pch = 4,
                        col = "red")
                }
            }
        })
        if (error.bars) {
            if (!any(is.na(sim.pc))) {
                for (i in 1:length(sim.pc)) {
                    ycen <- sim.pc[i]
                    yse <- sim.se.pc[i]
                    arrows(i, ycen - ci * yse, i, ycen + ci * yse,
                        length = arrow.len, angle = 90, code = 3, col = par("fg"),
                        lty = NULL, lwd = par("lwd"), xpd = NULL)
                }
            }
            if (!any(is.na(sim.pcr))) {
                for (i in 1:length(sim.pcr)) {
                    ycen <- sim.pcr[i]
                    yse <- sim.se.pcr[i]
                    arrows(i, ycen - ci * yse, i, ycen + ci * yse,
                        length = arrow.len, angle = 90, code = 3, col = par("fg"),
                        lty = NULL, lwd = par("lwd"), xpd = NULL)
                }
            }
            if (!any(is.na(sim.fa))) {
                for (i in 1:length(sim.fa)) {
                    ycen <- sim.fa[i]
                    yse <- sim.se.fa[i]
                    arrows(i, ycen - ci * yse, i, ycen + ci * yse,
                        length = arrow.len, angle = 90, code = 3, col = par("fg"),
                        lty = NULL, lwd = par("lwd"), xpd = NULL)
                }
            }
            if (!any(is.na(sim.far))) {
                for (i in 1:length(sim.far)) {
                    ycen <- sim.far[i]
                    yse <- sim.se.far[i]
                    arrows(i, ycen - ci * yse, i, ycen + ci * yse,
                        length = arrow.len, angle = 90, code = 3, col = par("fg"),
                        lty = NULL, lwd = par("lwd"), xpd = NULL)
                }
            }
        }
        if (show.legend && plot) {
            if (is.null(n.obs)) {
                switch(fa, both = {
                    if (sim) {
                        legend("topright", c("  PC  Actual Data", "  PC  Simulated Data",
                            " PC  Resampled Data", "  FA  Actual Data",
                            "  FA  Simulated Data", " FA  Resampled Data"),
                            col = c("blue", "red", "red", "blue", "red",
                                "red"), pch = c(4, NA, NA, 2, NA, NA),
                            text.col = "green4", lty = c("solid", "dotted",
                                "dashed", "solid", "dotted", "dashed"),
                            merge = TRUE, bg = "gray90")
                    } else {
                        legend("topright", c("  PC  Actual Data", " PC  Resampled Data",
                            "  FA  Actual Data", " FA  Resampled Data"),
                            col = c("blue", "red", "blue", "red"), pch = c(4,
                                NA, 2, NA, NA), text.col = "green4", lty = c("solid",
                                    "dashed", "solid", "dashed"), merge = TRUE,
                            bg = "gray90")
                    }
                }, pc = {
                    if (sim) {
                        legend("topright", c("  PC  Actual Data", "  PC  Simulated Data",
                            " PC  Resampled Data"), col = c("blue", "red",
                                "red", "blue", "red", "red"), pch = c(4,
                                    NA, NA, 2, NA, NA), text.col = "green4",
                            lty = c("solid", "dotted", "dashed", "solid",
                                "dotted", "dashed"), merge = TRUE, bg = "gray90")
                    } else {
                        legend("topright", c("  PC  Actual Data", " PC  Resampled Data"),
                            col = c("blue", "red", "red", "blue", "red",
                                "red"), pch = c(4, NA, NA, 2, NA, NA),
                            text.col = "green4", lty = c("solid", "dashed",
                                "solid", "dotted", "dashed"), merge = TRUE,
                            bg = "gray90")
                    }
                }, fa = {
                    if (sim) {
                        legend("topright", c("  FA  Actual Data", "  FA  Simulated Data",
                            " FA  Resampled Data"), col = c("blue", "red",
                                "red", "blue", "red", "red"), pch = c(4,
                                    NA, NA, 2, NA, NA), text.col = "green4",
                            lty = c("solid", "dotted", "dashed", "solid",
                                "dotted", "dashed"), merge = TRUE, bg = "gray90")
                    } else {
                        legend("topright", c("  FA  Actual Data", " FA  Resampled Data"),
                            col = c("blue", "red", "red", "blue", "red",
                                "red"), pch = c(4, NA, NA, 2, NA, NA),
                            text.col = "green4", lty = c("solid", "dashed",
                                "solid", "dotted", "dashed"), merge = TRUE,
                            bg = "gray90")
                    }
                })
            }
            else {
                switch(fa, both = {
                    legend("topright", c("PC  Actual Data", " PC  Simulated Data",
                        "FA  Actual Data", " FA  Simulated Data"),
                        col = c("blue", "red", "blue", "red"), pch = c(4,
                            NA, 2, NA), text.col = "green4", lty = c("solid",
                                "dotted", "solid", "dotted"), merge = TRUE,
                        bg = "gray90")
                }, pc = {
                    legend("topright", c("PC  Actual Data", " PC  Simulated Data"),
                        col = c("blue", "red", "blue", "red"), pch = c(4,
                            NA, 2, NA), text.col = "green4", lty = c("solid",
                                "dotted", "solid", "dotted"), merge = TRUE,
                        bg = "gray90")
                }, fa = {
                    legend("topright", c("FA  Actual Data", " FA  Simulated Data"),
                        col = c("blue", "red", "blue", "red"), pch = c(4,
                            NA, 2, NA), text.col = "green4", lty = c("solid",
                                "dotted", "solid", "dotted"), merge = TRUE,
                        bg = "gray90")
                })
            }
        }
        colnames(values) <- paste0("Sim", 1:ncol(values))
        if (fa != "pc" && plot)
            abline(h = 1)
        results <- list(fa.values = fa.valuesx, pc.values = valuesx,
            pc.sim = sim.pc, pc.simr = sim.pcr, fa.sim = sim.fa,
            fa.simr = sim.far, nfact = fa.test, ncomp = pc.test,
            Call = cl)
        if (fa == "pc") {
            colnames(values)[1:nvariables] <- paste0("C", 1:nvariables)
        }
        else {
            colnames(values)[1:(2 * nvariables)] <- c(paste0("C",
                1:nvariables), paste0("F", 1:nvariables))
            if (sim) {
                if (resample)
                    colnames(values)[(2 * nvariables + 1):ncol(values)] <- c(paste0("CSim",
                        1:nvariables), paste0("Fsim", 1:nvariables))
            }
            results$nfact <- fa.test
        }
        results$ncomp <- pc.test
        results$values <- values
        cat("Parallel analysis suggests that ")
        cat("the number of factors = ", fa.test, " and the number of components = ",
            pc.test, "\n")
        class(results) <- c("psych", "parallel")
        return(invisible(results))
    }
    
    if (!is.data.frame(x)) stop("x must be data frame")
    ### x0 <- psych::fa.parallel(x, fm = fm, cor = cor, sim = sim)
    x0 <- fa.parallel_modified(x, fm = fm, cor = cor, sim = sim)
    
    nfac <- length(x0$pc.values)
    x <- data.frame(Factor = 1:nfac,
        pc = x0$pc.values,
        fa = x0$fa.values)
    if (sim) {
        x$pc.sim = x0$pc.sim
        x$fa.sim = x0$fa.sim        
    } else {
        x$pc.sim = x0$pc.simr
        x$fa.sim = x0$fa.simr        
    }
    
    mydata <- tidyr::gather(x, key = "Source", value = "ev", names(x)[-1])
    mydata$sim <- c(rep("Empirical", 2*nfac), rep("Simulated", 2*nfac))
    mydata$type <- c(rep("Principal Components Analysis", nfac),
        rep("Factor Analysis", nfac),
        rep("Principal Components Analysis", nfac),
        rep("Factor Analysis", nfac))
    
    if (is.null(n.factors)) n.factors  <- nfac
    mydata <- mydata[mydata$Factor <= n.factors,]
    n.factors <- max(mydata$Factor)
    
    # Compute ticks for x-axis
    if (n.factors < 11) {
        myticks <- 1:n.factors
    } else {
        incr <- round(n.factors/10)
        myticks <- seq(from = 1, to = n.factors, by = incr)
    }
    
    # Plot
    if (onlyFA) {
        mydata <- mydata[mydata$type == "Factor Analysis",]
        # For use of '.data' see https://rlang.r-lib.org/reference/dot-data.html
        pl <- ggplot2::ggplot(mydata, aes(.data$Factor, .data$ev, shape=sim, colour=sim)) +
            xlab("Factor")
        
    } else {
        pl <- ggplot2::ggplot(mydata, aes(.data$Factor, .data$ev, shape= sim, colour= sim)) +
            xlab("Factor/Component") +
            facet_wrap(~ type)
        
    }
    pl <- pl +
        ylab("Eigenvalue") +
        geom_point(size = I(4)) +
        geom_line(aes(linetype=sim)) +
        scale_linetype_manual(breaks=c("Empirical", "Simulated"), values=c(1,5)) +
        scale_color_manual(values=c("#000000", "#9999CC")) +
        theme(legend.title=element_blank(),
            legend.position=c(1, 1),
            legend.justification=c(1, 1),
            legend.box.just = "left") +
        scale_x_continuous(breaks = myticks) +
        geom_abline(intercept = 0:1, slope = 0, colour = "#9999CC",
            linetype = "dashed")
    pl
}

#' Velicer's MAP Test
#'
#' Perform Velicer's Minimum Average Partial test for the variables (usually items) in a data frame.
#'
#' @param data a data frame containing the variables (items) to be analyzed
#' @param n the maximum number of factors to consider (default: 20)
#' @param ... ignored
#' @param x a mapTest object
#'
#' @details The \code{plot} method produces a graphical representation of the squared correlations.
#'
#' @return A vector of class 'mapTest' containing the average squared (partial)
#'   correlations. The first element of the vector is the average squared
#'   correlation with no component removed.
#'
#' @author Computation: William Revelle, see \code{\link{psych}}, specifically \code{\link{VSS}}.
#' @author Plot: Michael Hock, \email{michael.hock@@uni-bamberg.de}
#'
#' @references Velicer, W. (1976) Determining the number of components from the matrix of partial correlations. \emph{Psychometrika, 41,} 321-327.
#' @export
#'
#' @examples
#' local({
#' set.seed(1234)
#' #
#' # Simulate 2 factors (f1, f2) underlying 6 items (x1 to x6).
#' #
#' n <- 200
#' f1 <- rnorm(n)
#' f2 <- rnorm(n)
#' Df <- data.frame(x1 = f1 + rnorm(n),
#'                  x2 = f1 + rnorm(n),
#'                  x3 = f1 + rnorm(n),
#'                  x4 = f2 + rnorm(n),
#'                  x5 = f2 + rnorm(n),
#'                  x6 = f2 + rnorm(n))
#' mt <- mapTest(Df)
#' print(mt)
#' plot(mt)
#'
#' #
#' # If we remove 1 item the MAP test suggests the wrong number of factors...
#' #
#' Df2 <- dplyr::select(Df, -6)
#' mt2 <- mapTest(Df2)
#' print(mt2)
#' plot(mt2)
#' factanal(Df2, 2)
#' })
#'
mapTest <- function(data, n = 20, ...) {
    x <- cor(data, use="pairwise")
    nvar <- dim(x)[2]
    
    mean.sqcor <- mean((x[lower.tri(x)])^2)
    mean.sqcor
    
    if (n >= nvar) n1 <- nvar - 1
    else n1 <- n
    
    min.partial <- rep(NA, n1)
    e <- eigen(x)
    evect <- e$vectors
    comp <- evect %*% diag(sqrt(e$values))
    for (i in 1:n1) {
        c11.star <- x - comp[,1:i] %*% t(comp[,1:i])
        d <- diag(1/sqrt(diag(c11.star)))
        rstar <- d %*% c11.star %*% d
        diag(rstar) <- 0
        min.partial[i] <- sum(rstar * rstar)  / (nvar*(nvar-1))
    }
    res <- c(mean.sqcor, min.partial)
    class(res) <- c("mapTest", "numeric")
    return(res)
}

#' @rdname mapTest
#' @method print mapTest
#' @export
print.mapTest <- function(x, ...) {
    cat("Velicer's MAP test reaches its minimum with m =", which.min(x)-1, "components.\nMaximum number of components tested:", length(x) - 1, "\n\n")
    MAP <- data.frame(m = 0:(length(x) - 1), AP = round(x, 3))
    print(MAP, row.names = FALSE)
}

#' Plot mapTest Object
#' 
#' Plot mapTest object, see \code{\link{mapTest}}.
#' 
#' @param x mapTest object
#' @param ... ignored
#' @method plot mapTest
#' @export
#' 
plot.mapTest <- function(x, ...) {
    Component <- 0:(length(x) - 1)
    MAP <- data.frame(Component, x)
    # Compute ticks for x-axis
    myticks <- Component
    
    p <- ggplot(MAP, aes(Component, x)) +
        geom_line() +
        geom_point(size = 5,
            shape = 21,
            fill = "white") +
        geom_abline(intercept = x[1], slope = 0, colour = "darkblue",
            linetype = "dashed") +
        theme(text = element_text(size = 20, colour = "black"),
            axis.title.x = element_text(vjust = 0.2),
            axis.title.y = element_text(vjust = 0.3),
            plot.title = element_text(vjust = 1.5)) +
        xlab("Components Removed") +
        scale_x_continuous(breaks=myticks) # + # Ticks
    p
}

#' Reliability
#'
#' Performs classical item and reliability analysis of set of items.
#'
#' @param x a numeric data frame in which rows represent persons and columns represent items to be analyzed
#' @param invert logical specifying whether items with negative loadings on the first principal component of the data should be inverted (default: TRUE)
#' @param digits number of digits to use in the output
#' @param dfname name of the data frame (only needed by Iana)
#'
#' @details \code{reliability} checks if items have negative loadings on the first principal component of the data and, if so, inverts these items.
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
reliability <- function(x, invert = TRUE, digits = 3, dfname = NULL) {
    if (is.null(dfname))
        dfname <- deparse(substitute(x))
    if (!is.data.frame(x))
        stop("x must be a data frame.")
    n <- ncol(x)
    if (n < 2)
        stop("At least 2 items are needed.")
    cases.orig <- nrow(x)
    ### Better use tidyr::drop_na? na.omit throws attributes away.
    x <- na.omit(x)
    cases <- nrow(x)
    if (cases < cases.orig) {
        warning("Missing values encountered")
        cat("\nMissing values encountered.\n")
        cat("Using ", cases, " out of ", cases.orig, " cases.\n")
    }
    if (cases < 3)
        stop("At least 3 cases are needed.")
    loadings <- try(principal(x)$loadings[, 1], silent = TRUE)
    if (inherits(loadings, "try-error")) {
        warning("principal component loadings cannot be computed")
    } else {
        if (any(loadings < 0)) {
            neg.items <- names(loadings[loadings < 0])
            cat("WARNING:\nThe following items have loadings < 0 on the first principal component:\n")
            cat(paste(neg.items, collapse = ", "))
            cat("\nYou may want to omit or invert these items.\n\nCode:\n")
            maxx <- max(x) + min(x)
            cat("# Assign the sum of the mininum and the maximum response to maxx.             \nmaxx <-",
                maxx, "\n")
            for (item in neg.items) {
                item <- paste0(dfname, "$", item)
                cat(paste(item, "<- maxx -", item, "\n"))
            }
            cat("# End of code\n")
            if (invert) {
                cat("\nNote: For the following analyses, these items were inverted.\n")
                x[, names(loadings[loadings < 0])] <- maxx - x[,
                    names(loadings[loadings < 0])]
            }
        }
    }
    M <- cov(x)
    alpha <- (n/(n - 1)) * (1 - sum(diag(M))/sum(M))
    M <- cor(x)
    stand.alpha <- (n/(n - 1)) * (1 - sum(diag(M))/sum(M))
    row.sum <- apply(x, 1, sum)
    cov.it <- cov(x, row.sum)
    r.it <- cor(x, row.sum)
    r.it.c <- matrix(nrow = n, ncol = 1)
    cov.it.c <- matrix(nrow = n, ncol = 1)
    alpha.rm <- matrix(nrow = n, ncol = 1)
    if (n > 2) {
        for (i in 1:n) {
            row.sum <- apply(x[, -i], 1, sum)
            cov.it.c[i, 1] <- cov(x[, i], row.sum)
            r.it.c[i, 1] <- cor(x[, i], row.sum)
            M <- cov(x[, -i])
            alpha.rm[i, 1] <- ((n - 1)/(n - 2)) * (1 - sum(diag(M))/sum(M))
        }
    }
    else {
        warning("With only 2 items \"corrected\" statistics cannot be computed.")
    }
    item.means <- colMeans(x)
    item.sds <- apply(x, 2, sd)
    bad <- ifelse(alpha.rm > alpha, "*", "")
    badest <- which.max(alpha.rm)
    bad[badest] <- paste(bad[badest], "<=")
    bad <- str_pad(bad, 4, "right")
    itemstats <- data.frame(M = item.means, SD = item.sds, cov.it,
        cov.it.c, r.it, r.it.c, alpha.rm, bad)
    old.ops <- options(digits = digits)
    on.exit(options(old.ops), add = TRUE)
    cat("\nCronbach's alpha is        ", alpha, "\n")
    cat("Standardized item alpha is ", stand.alpha, "\n")
    cat("\nItem statistics:\n\n")
    print(itemstats)
    cat("\nCOV/r_it: item-total covariance/correlation\nCOV/r_itc: 'corrected' item-total covariance/correlation\n    (with the respective item removed)\nalpha.rm: alpha if item removed\nbad: *item decreases alpha, <= item with mininum contribution to alpha\n")
    cat("\nCases: ", cases, "     Items: ", n, "\n")
    cat("\nStatistics for the total score (sums or means of item scores):\n\n")
    Total <- data.frame(Sum_score = rowSums(x), Mean_score = rowMeans(x))
    print(describe(Total))
    cat("\n")
}

#' Empirical ICCs
#'
#' Plot of empirical ICCs of the items in a data frame.
#'
#' @param x data frame in which rows are subjects and colums are test items
#' @param score total score to use
#' @param method function to use for curve fitting, e.g. "loess" or "lm"
#' @param span degree of smoothing used for loess (see \code{\link{loess}})
#' @param alpha opaqueness of the points in the scatterplot
#' @param jitter amount of jitter
#'
#' @details To examine item characteristics, item scores are plotted against total scores or factor scores. To avoid overplotting, a small amount of jitter is added to overlapping points. In the upper left corner, the correlation of the total/factor score and the item score is given. The lines are locally weighted regression lines, the shaded regions represent 95\% confidence intervals around the expected item scores. If many data points are present it may be useful to decrease the opaqueness of the points and/or to deactivate jitter.
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
empICC <- function(x,
    score = c("factor.thomson", "factor.bartlett", "mean", "sum"),
    method = "loess",
    span = 0.75,
    alpha = 0,
    jitter = 0.4) {
    
    if (!is.data.frame(x)) stop("x must be a data frame.")
    
    ### x <- na.omit(x)
    
    ###
    if (is.na(jitter)) jitter <- 0.4  #### needed for shiny (otherwise app crashes if values are outside the "allowed" range)
    ###
    
    score <- match.arg(score)
    if (score == "factor.thomson") {
        xlab <- ("Factor Score")
        fm <- factanal(x, 1, scores = "regression")
        scores <- fm$scores[,1]
    } else if (score == "factor.bartlett") {
        xlab <- ("Factor Score")
        fm <- factanal(x, 1, scores = "Bartlett")
        scores <- fm$scores[,1]
    } else if (score == "sum") {
        xlab <- ("Total (Sum) Score")
        scores <- rowSums(x, na.rm = TRUE) ####
    } else {
        xlab <- ("Total (Mean) Score")
        scores <- rowMeans(x, na.rm = TRUE) ####
    }
    
    variable <- factor(names(x))
    x <- cbind(scores, x)
    corrs <- round(cor(x)[-1,1], 2)
    corrs <- sprintf("r==%.2f", corrs)
    corrs <- data.frame(variable, corrs)
    
    # x <- reshape2::melt(x, id.vars = "scores") # original version
    x <- tidyr::gather(x, key = "variable", value = "value", names(x)[-1])
    x.corrs <- min(x$scores)
    y.corrs <- max(x$value) + 0.25 ###
    
    p <- ggplot(x, aes(scores, .data$value)) +
        geom_jitter(width = jitter, height = jitter, alpha = I(alpha)) +
        facet_wrap(~variable) +
        xlab(xlab) + ylab("Item Score") +
        geom_text(aes(x=x.corrs, y=y.corrs, label=corrs),
            data=corrs,
            parse=TRUE, hjust = 0, size = 5) +
        geom_smooth(method = method, span = span) +
        theme(text = element_text(size = 14))
    p
}

#' ICCs for Rasch Model
#'
#' Plot item characteristic curves for a Rasch model.
#'
#' @param object of class RM
#' @param empICC see eRm::plotICC
#' @param empCI not used yet
#' @param xlim see eRm::plotICC
#' @param xlab see eRm::plotICC
#' @param ylab see eRm::plotICC
#'
#' @details This a slightly modified version of plotICC.Rm from package eRm (version 0.15-1). The main  difference is the usage of ggplot2 for plotting.
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
ggplotICC.RM <- function(object, empICC = NULL, empCI = NULL,
    xlim = c(-4,4), xlab = "Dimension", ylab = "Probability") {
    ### Mik
    plist.internal <- function(object,theta)
        # computes a list of expected probabilities for objects of class Rm
        # with 0th category included!
    {
        
        X <- object$X
        mt_vek <- apply(X, 2, max, na.rm=TRUE) # number of categories - 1 for each item
        mt_ind <- rep(1:length(mt_vek), mt_vek)
        
        #--------compute list matrix of probabilites for fixed theta)
        p.list <- tapply(object$betapar, mt_ind, function(beta.i) {
            beta.i <- c(0, beta.i)
            ind.h <- 0:(length(beta.i)-1)
            theta.h <- ind.h %*% t(theta)
            tb <- exp(theta.h+beta.i)
            denom <- colSums(tb)
            pi.mat <- apply(tb, 1, function(y) {y/denom})
            return(pi.mat)
        })
        return(p.list)
    }
    
    ### Hack to avoid Note in package check
    Theta <- NULL
    Probability <- NULL
    ICC <- NULL
    Dimension <- NULL
    Eigenvalue <- NULL
    ### End of Hack
    
    # plotICC.Rm from eRm 0.15-1
    ############################################################################
    if (object$model != "RM") stop("object must be Rasch model (RM)")
    
    X <- object$X
    #    if (is.null(col)) col <- 1:(max(apply(X,2,max,na.rm=TRUE))+1)
    #    main.arg <- main
    
    # some sanity checks
    
    if (is.null(empICC)) {
        emp.plot <- FALSE
    } else if (!is.element(empICC[[1]], c("raw","loess","tukey","kernel"))) {
        emp.plot <- FALSE
        warning('empICC must be one of "raw","loess","tukey","kernel"')
    } else {
        th.est <- person.parameter(object)
        thetapar <- th.est$thetapar
        if (length(thetapar)!= 1) {
            warning("empirical ICCs are not produced for different NA groups")
            emp.plot <- FALSE
        } else {
            thetapar.u <- unique(round(unlist(thetapar),5))
            if (length(thetapar.u)<4) {
                warning("No empirical ICCs for less the 4 different person parameters")
                emp.plot <- FALSE
            } else
                emp.plot <- TRUE
        }
    }
    
    theta <- seq(xlim[1],xlim[2], by = 0.1) # x-axis
    p.list <- plist.internal(object,theta)  # matrix of probabilities
    th.ord <- order(theta) #### Why this?
    
    textlab <- colnames(object$X)
    ivec <- 1:length(p.list)
    
    p.list <- lapply(p.list,function(x) {x[,-1]})  # Delete 0-probabilites
    p.mat <- matrix(unlist(p.list),ncol=length(p.list))  # matrix with solving probabilities
    text.ylab <- p.mat[(1:length(theta))[theta==median(theta)],]
    
    ### plot for RMs #################
    for (j in 1:length(ivec)) { # runs over items
        i <- ivec[j]
        yp <- as.matrix(p.list[[i]])
        yy <- yp[th.ord,]
        
        ####################################################################
        ### ICC: 'model', 'empirical'
        if (j == 1)
            mikPlotData <- data.frame(
                ICC = factor(rep("Model", length(theta)),
                    levels = c("Empirical", "Model")),
                Item = rep(textlab[i], length(theta)),
                Theta = sort(theta), Probability = yy)
        else
            mikPlotData <- rbind(mikPlotData,
                data.frame(
                    ICC = factor(rep("Model", length(theta)),
                        levels = c("Empirical", "Model")),
                    Item = rep(textlab[i], length(theta)),
                    Theta = sort(theta), Probability = yy))
        ###
        
        ## empirical ICC
        if (emp.plot) {
            freq.table <- as.matrix(table(rowSums(X),X[,i]))
            rel.freq <- freq.table[,2]/rowSums(freq.table)
            idx <- as.numeric(rownames(freq.table))
            xy<-cbind(th.est$pred.list[[1]]$y[idx+1],rel.freq)
            
            
            if(empICC[[1]]=="loess")
                if(!is.null(empICC$smooth)) smooth <- empICC$smooth else smooth <- 0.75
            if(empICC[[1]]=="kernel")
                if(!is.null(empICC$smooth)) smooth<-empICC$smooth else smooth<-0.5
            
            nn <- rowSums(freq.table)
            switch(empICC[[1]],
                "raw"={},
                "loess"={xy[,2]<-loess(xy[,2]~xy[,1],span=smooth)$fitted},
                "tukey"={xy[,2]<-smooth(xy[,2])},
                "kernel"={xy[,2]<-ksmooth(xy[,1],xy[,2],bandwidth=smooth,x.points=xy[,1])[[2]]}
            )
            xy[,2] <- ifelse(xy[,2]>1,1,ifelse(xy[,2]<0,0,xy[,2])) # bounding p in [0,1]
            
            #            if(is.null(empICC$type)) empICC$type <- "p"
            #            if(is.null(empICC$pch)) empICC$pch <- 1
            #            if(is.null(empICC$col)) empICC$col <- "black"
            #            if(is.null(empICC$lty)) empICC$lty <- "solid"
            
            
            # confidence intervals for empirical ICC
            if(!is.null(empCI)) {
                # functions from prop.test()
                p.L <- function(x, n, alpha) {
                    if (x <= 0) 0 else qbeta(alpha, x, n - x + 1)}
                p.U <- function(x, n, alpha) {
                    if (x >= n) 1 else qbeta(1 - alpha, x + 1, n - x)}
                CINT <- function(x, n, conf.level){
                    alpha <- (1 - conf.level)/2
                    c(p.L(x,n, alpha), p.U(x,n, alpha))
                }
                
                if(is.null(empCI$clevel)) empCI$clevel <- 0.95
                #               if(is.null(empCI$col)) empCI$col <- "red"
                #               if(is.null(empCI$lty)) empCI$lty <- "dotted"
                
                
                cyf<-cbind(xy[,2] * nn, nn)
                cy<-apply(cyf,1,function(x) CINT(x[1],x[2],empCI$clevel))
                ### apply(cbind(xy[,1],t(cy)),1,function(x)segments(x[1],x[2],x[1],x[3],lty=empCI$lty,col=empCI$col))
            }
            
            #################################################################
            ## plots the point estimates of the empirical ICC
            # lines(xy[,1],xy[,2],type=empICC$type, pch=empICC$pch, col=empICC$col, lty=empICC$lty, ...)
            ###
            
            len <- length(xy[,1])
            mikPlotData <- rbind(mikPlotData,
                data.frame(
                    ICC = factor(rep("Empirical", len,
                        levels = c("Empirical", "Model"))),
                    Item = rep(textlab[i], len),
                    Theta = xy[,1], Probability = xy[,2]))
        }
    }
    
    myplot <- ggplot(mikPlotData,
        aes(Theta, Probability, colour = ICC)) +
        facet_wrap(~Item) +
        geom_line() +
        xlab(xlab) + ylab(ylab)
    if (emp.plot) {
        onlyEmp <- mikPlotData[as.character(mikPlotData$ICC) == "Empirical",]
        myplot <- myplot +
            geom_point(data = onlyEmp, size = 4,
                shape = 21,
                fill = "white")
    }
    print(myplot)
}

# Item Classification -----------------------------------------------------

#' Associate Text with Variables
#'
#' \code{setItemText} can be used to associate text (usually the item stem or a description of it) with the variables (items) in a data frame.
#'
#' @param x a data frame
#' @param items either a character vector containing the descriptions of the items (columns in the data frame) or the name of a file containing the descriptions for the items. If a file name is given, each of the descriptions must occupy one line and each item in the data frame must have a description.
#'
#' @details Technically, \code{setItemText} sets the \code{item.text} attribute for the variables (items) in a data frame, which corresponds to "variable labels" known form other statistical systems. This text is then displayed along with the variable name in functions such as \code{\link{classifyItems}} as a mnemonic for the content of the item.
#'
#' The text can be specified either as a character vector of the same length as the number of the columns in the data frame or in a text file that contains the descriptions of the items. The file is read via \code{\link{read.table}}, with the separator set to a newline character (i.e., \code{"\n"}) Consequently, each description must occupy exactly one physical line (which may, of course, span several display lines). The number of descriptions in the file and the number of items must be the same.
#'
#' Notice that attributes are lost when data frames are subsetted via \code{\link{subset}}. For preserving the \code{item.text} attribute, \code{\link[dplyr]{select}} or \code{\link[dplyr]{filter}} can be used instead of \code{\link{subset}}.
#'
#' @return A data frame with the \code{item.text} attribute set for variable.
#'
#' @seealso \code{\link{getItemText}} for retrieving \code{item.text} attributes.
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
#'
setItemText <- function(x, items = NULL) {
    if (!is.data.frame(x)) stop("x must be a data frame.")
    if (is.null(items)) stop("no items supplied")
    if(length(items) > 1) {
        if(!is.character(items) || length(items) != ncol(x))
            stop("'items' must be a character vector of the same length as the number of columns in x or the name of a text file")
    } else {
        if (!file.exists(items)) stop ("specified file does not exist")
        items <- read.table(items, sep="\n", as.is=TRUE)[,1]
        if (length(items) != ncol(x)) stop(paste0("number of items and columns in data frame do not match. Number of items =", length(items), ", number of columns =", ncol(x)))
    }
    for (i in 1:ncol(x))
        attr(x[[i]], "item.text") <- items[i]
    invisible(x)
}

print.itemText <- function(x, ...) {
    print(data.frame(Item = x), right = FALSE)
}

#' Return the Labels Associated with Items
#'
#' Return the labels associated with items in a data frame.
#'
#' @param x a data frame
#'
#' @return a character vector of class 'itemText' containing the labels of the items
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
getItemText <- function(x) {
    if (!is.data.frame(x)) stop("x must be a data frame")
    itemtext <- sapply(X = x, FUN = attr, which = "item.text")
    if (is.list(itemtext)) return(NULL)
    names(itemtext) <- colnames(x)
    class(itemtext) <- c("itemText", "character")
    return(itemtext)
}

#' Principal Components and Exploratory Factor Analysis
#'
#' Perform principal components or exploratory factor analysis of a set of variables. This is a wrapper around psych::principal, psych::fa, and psych::irt.fa.
#' 
#' @param x a data frame of variables to be analyzed
#' @param nfactors (integer) number of factors to be extracted
#' @param rotate (char) rotation
#' @param fm factoring (char) method. May be any method allowed for psych::fa or "principal" for principal components analysis.
#' @param polychor (logical) use polychoric correlatons (via psych::irt.fa)?
#' @param return.res (logical) if TRUE, return output as a list instead of simply printing the output. This is used by Iana to render the output via renderTable().
#'
#' @return if \code{return.res} is TRUE, a list with components \code{res} (the results of factor analysis) and \code{stats} (the fit statistics)
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
#
factoranalysis <- function(x, nfactors, rotate = "oblimin", fm = "minres",
    polychor = FALSE, return.res = FALSE) {
    loadNamespace("GPArotation") ### needed for oblimin and some other
    ### rotations in psych::principal
    if (fm == "principal") {
        q <- psych::principal(x, nfactors, rotate = rotate)
        if (!return.res) cat("PRINCIPAL COMPONENTS ANALYSIS\n")
    } else if (polychor) {
        q <- psych::irt.fa(x, nfactors, fm = fm,
            rotate = rotate, plot = FALSE, 
            sort = FALSE)$fa
        if (!return.res) cat("FACTOR ANALYSIS OF POLYCHORIC CORRELATIONS\n")
    } else {
        q <- psych::fa(x, nfactors, fm = fm, rotate = rotate)
        if (!return.res) cat("FACTOR ANALYSIS\n")
    }
    
    if (!return.res) {
        cat("\nMethod:  ", fm)
        cat("\nRotation:", q$rotation)
        cat("\nN:       ", q$n.obs, "\n")
        cat("\nFIT STATISTICS\n\n")
    }
    
    # BIC = chi2 + ln(N)[k(k + 1)/2 - df]
    if (fm == "principal") {
        Statistic = c("Chi-Square", "Degrees of Freedom", "p")
        Value <- c(q$STATISTIC, q$dof, q$PVAL)
    } else {
        Statistic = c("Chi-Square", "Degrees of Freedom", "p",
            "Tucker-Lewis-Index (NNFI)",
            "RMSR",
            "SRMR",
            "RMSEA", "RMSEA Lower Bound (90% CI)", "RMSEA Upper Bound (90% CI)",
            "BIC", "SABIC",
            "Maximum Absolute Residual",
            "Kaiser-Meyer-Olkin (KMO) Factor Adequacy")
        p = nrow(q$residual)
        ### p-1 -> RMSR
        srmr = sqrt( sum( (q$residual[upper.tri(q$residual)])^2 ) / (p * (p+1) / 2) )
        Value <- c(q$STATISTIC, q$dof, q$PVAL,
            q$TLI,
            q$rms,
            srmr,
            q$RMSEA[1], q$RMSEA[2], q$RMSEA[3],
            q$BIC, q$SABIC,
            max(abs(q$residual[upper.tri(q$residual)])),
            KMO(x)$MSA
        )
    }
    Value <- as.character(round(Value, 3))
    if (!is.na(q$PVAL)) {
        if(q$PVAL < .001) Value[3] <- "< .001"
    }
    if (length(Statistic) != length(Value)) {
        stats <- data.frame(Message = "Fit statistics could not be computed.")
    } else {
        stats <- data.frame(Statistic, Value)
    }        
    if (!return.res) {
        print(stats, row.names = FALSE)
        if (fm != "principal") {
            cat("\nRMSEA bounds are for a 90% confidence interval\nKMO is based on Pearson correlations\n")
        }
        return(q)
    } else {
        list(res = q, stats = stats)
    }
}

#' Classify Items
#'
#' Automatically classify the items in a data frame.
#'
#' @param fm a factor model fitted by psych::fa or principal components computed with psych::principal
#' @param Df a data frame containing the items
#' @param min.loading minimum loading of an item to be considered a marker of a factor
#' @param max.loading maximum loading of an item on a secondary factor (i.e., the factor on which the items has its second highest loading) to be considered a marker for the primary factor (i.e., the factor on which the items has its highest loading)
#' @param max.complexity maximum complexity of an item to be considered a marker item. This is only used for factor models (not for principal components)
#' @param itemlength trim item text to given number characters (0 = automatic trimming)
#' @param digits number of digits used in the output
#' @param Df.name name of data frame
#' @param return.res (logical) if TRUE, return output as a list of data frames instead of simply printing the output. This is used by Iana to render the output via renderTable().
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export
### Needs: getItemText from iana, str_trim from stringr

classifyItems <- function(fm, Df, min.loading = 0.4, max.loading = 0.3, max.complexity = 2, itemlength = 0, digits = 2, Df.name = deparse(substitute(Df)), return.res = FALSE) {
    
    if(!inherits(fm, "fa") && !inherits(fm, "principal") ) 
        stop("fm was not computed with psych::fa oder psych::principal")
    
    ### Todo: is Df needed? We now have the names of the items in attr
    ### itemText
    
    max2 <- function(x) {
        y <- sort(x, decreasing=TRUE)
        y[2]
    }
    
    # for irt.fa
    if (exists("fa", fm)) fm <- fm$fa
    
    # These will be rounded below...
    lmat <- fm$loadings
    communality <- fm$communality
    # principal does not compute complexity
    if(inherits(fm, "fa")) complexity <- fm$complexity
    else complexity <- (rep(-1, length(communality)))
    
    F <- apply(abs(lmat), 1, which.max)
    max.absload <- apply(abs(lmat), 1, max)
    max.absload2 <- apply(abs(lmat), 1, max2)
    max.absload2[is.na(max.absload2)] <- 0
    marker <- ifelse(
        (max.absload > min.loading) &
            (max.absload2 < max.loading) &
            (complexity < max.complexity),
        "*", " ")
    varnames <- rownames(lmat)
    lmat <- round(unclass(lmat), digits)
    communality <- round(communality, digits)
    if(inherits(fm, "fa")) complexity <- round(complexity, digits)
    
    # Loadings 
    
    if (!return.res) {
        cat("\nLOADINGS\n\n")
        cat("M = Marker, a_j = Factor loadings\n")
        cat("h2 = Communality, Cmpl = Factorial complexity\n")
    }
    ilength <- getOption("width") -  max(nchar(varnames)) - 3 -
        (ncol(lmat) + 2) * (digits + 4)
    items <- getItemText(Df)

    if (is.null(items)) {
        shortitems <- rep("-", length(communality))
        if (!return.res) {
            cat("\nHint: You can associate the text of the items with the columns \nof the data frame with 'setItemText()'. \nThis would allow to produce an item table.\n")
        }
    } else {
        items <- stringr::str_trim(items)
        if (itemlength == 0) {
            shortitems <- substr(items, 1, ilength)
        } else {
            maxilen <- max(nchar(items))
            if (itemlength > maxilen) itemlength <- maxilen
            shortitems <- substr(items, 1, itemlength)
        }
    }
    
    if(inherits(fm, "principal")) complexity <- (rep("-", length(complexity)))
    
    x <- data.frame(F, marker, lmat, communality, complexity, shortitems)
    x <- x[order(x$F, -max.absload), ]
    colnames(x) <- c(
        "F",
        "M",
        paste0("a_", 1:ncol(lmat)),
        "h2",
        "Cmpl",
        "Item"
    )
    if (return.res) {
        loadingsDf <- x
    } else {
        xl <- split(x, x$F)
        for (i in 1:length(xl)) {
            cat("\nFactor", i, "\n")
            print(xl[[i]][,-1], right = FALSE)
        }
    }
    
    # Markers
    
    mcount <- sum(ifelse(marker == "*", 1, 0))
    if (!return.res) {
        cat("\n", mcount, "of", ncol(Df), "Items were classified as markers.\n")
    }
    
    # Factor Correlations
    
    if (exists("Phi", fm)) {
        corrs <- round(fm$Phi, digits)
        rownames(corrs) <- colnames(corrs) <- paste0("F", 1:ncol(lmat))
        if (!return.res) {
            cat("\nFACTOR CORRELATIONS\n\n")
            print(corrs)
        }
    } else {
        corrs <- diag(ncol(lmat))
        rownames(corrs) <- colnames(corrs) <- paste0("F", 1:ncol(lmat))
    }
    
    # Code snippet
    
    code <- "\n"
    for (i in (1:ncol(lmat))) {
        selected <- row.names(x[(x$F == i) & (x$M == "*"), ])
        selected <- paste(selected, collapse = ", ")
        code <- paste0(code, "F", i, " <- dplyr::select(", Df.name,
            ", ", selected, ")\n")
    }
    if (!return.res) {
        cat("\nNOTE\n\nThe following code may be used to create data frames of items\nassigned to the factors. Some items may need to be inverted.\n", code)
    } else {
        list(factorloadings = loadingsDf, 
            factorcorrelations = corrs, 
            factorcode = str_trim(code))
    }
}


#' Classify Items for MIRT
#'
#' Automatically classify the items in a data frame.
#'
#' @param fm a factor model fitted by mirt::mirt
#' @param Df a data frame containing the items
#' @param min.loading minimum loading of an item to be considered a marker of a factor
#' @param max.loading maximum loading of an item on a secondary factor (i.e., the factor on which the items has its second highest loading) to be considered a marker for the primary factor (i.e., the factor on which the items has its highest loading)
#' @param max.complexity maximum complexity of an item to be considered a marker item
#' @param itemlength trim item text to given number characters (0 = automatic trimming)
#' @param digits number of digits used in the output
#' @param Df.name name of data frame
#'
#' @author Michael Hock \email{michael.hock@@uni-bamberg.de}
#'
#' @export

classifyItemsMirt <- function(fm, Df, min.loading = 0.4, max.loading = 0.3, max.complexity = 100, itemlength = 0, digits = 2, Df.name = deparse(substitute(Df))) {
    
    max2 <- function(x) {
        y <- sort(x, decreasing=TRUE)
        y[2]
    }
    
    fm <- summary(fm)
    # These will be rounded below...
    lmat <- fm$rotF
    communality <- fm$h2
    complexity <- (rowSums(lmat^2))^2 / rowSums(lmat^4) ### check me
    
    F <- apply(abs(lmat), 1, which.max)
    max.absload <- apply(abs(lmat), 1, max)
    max.absload2 <- apply(abs(lmat), 1, max2)
    max.absload2[is.na(max.absload2)] <- 0
    marker <- ifelse(
        (max.absload > min.loading) &
            (max.absload2 < max.loading) &
            (complexity < max.complexity),
        "*", " ")
    varnames <- colnames(Df)
    lmat <- round(unclass(lmat), digits)
    communality <- round(communality, digits)
    complexity <- round(complexity, digits)
    
    cat("\nLOADINGS\n\n")
    #cat("====================\n")
    cat("M = Marker, a_j = Factor loadings\n")
    cat("h2 = Communality, Cmpl = Factorial complexity\n")
    
    ilength <- getOption("width") -  max(nchar(varnames)) - 3 - (ncol(lmat) + 2) * (digits + 4)
    items <- getItemText(Df)
    
    if (is.null(items)) {
        shortitems <- rep("-", length(communality))
        cat("\nHint: You can associate the text of the items with the columns \nof the data frame with 'setItemText()'. \nThis would allow to produce an item table.\n")
    } else {
        items <- stringr::str_trim(items)
        if (itemlength == 0) {
            shortitems <- substr(items, 1, ilength)
        }
        else {
            maxilen <- max(nchar(items))
            if (itemlength > maxilen) itemlength <- maxilen
            shortitems <- substr(items, 1, itemlength)
        }
    }
    
    x <- data.frame(F, marker, lmat, communality, complexity, shortitems)
    rownames(x) <- varnames ######
    x <- x[order(x$F, -max.absload), ]
    colnames(x) <- c(
        "F",
        "M",
        paste0("a_", 1:ncol(lmat)),
        "h2",
        "Cmpl",
        "Item"
    )
    xl <- split(x, x$F)
    for (i in 1:length(xl)) {
        cat("\nFactor", i, "\n")
        print(xl[[i]][,-1], right = FALSE)
    }
    
    # Markers
    mcount <- sum(ifelse(marker == "*", 1, 0))
    cat("\n", mcount, "of", ncol(Df), "Items were classified as markers.\n")
    
    # Factor Correlations
    
    if (exists("Phi", fm)) {
        corrs <- round(fm$Phi, digits)
        rownames(corrs) <- colnames(corrs) <- paste0("F", 1:ncol(lmat))
        cat("\nFACTOR CORRELATIONS\n\n")
        print(corrs)
    }
    
    # Factors
    
    cat("\nFACTORS\n\n")
    # use fm$rotF, not lmat, because lmat now contains the rounded values
    colnames(fm$rotF) <- paste0("F", 1:ncol(fm$rotF))
    ssload <- colSums(fm$rotF^2)
    expl.var <- ssload / nrow(fm$rotF)
    cumsum.expl.var <- cumsum(expl.var)
    tab <- rbind(ssload, expl.var, cumsum.expl.var)
    row.names(tab) <- c("Sum of squared loadings", "Proportion Variance", "Cumulative Variance")
    print(round(tab, digits))
    
    # Code snippet
    
    cat("\nNOTE\n\nThe following code may be used to create data frames of items\nassigned to the factors. Some items may need to be inverted.\n\n")
    
    for (i in (1:ncol(lmat))) {
        selected <- rownames(x[(x$F == i) & (x$M == "*"), ])
        selected <- paste(selected, collapse = ", ")
        cat("F", i, " <- dplyr::select(", Df.name, ", ", selected, ")\n", sep = "")
    }
}
mihock/iana documentation built on Jan. 14, 2024, 8:58 p.m.