MIRT: Multidimensional Item Response Theory

Documented in DIF

#' Differential item functioning statistics
#'
#' This function runs the Wald and likelihood-ratio approaches for testing differential
#' item functioning (DIF). This is primarily a convenience wrapper to the
#' \code{\link{multipleGroup}} function for performing standard DIF procedures. Independent
#' models can be estimated in parallel by defining a parallel object with \code{\link{mirtCluster}},
#' which will help to decrease the runtime. For best results, the baseline model should contain
#' a set of 'anchor' items and have freely estimated hyper-parameters in the focal groups.
#'
#' Generally, the precomputed baseline model should have been
#' configured with two estimation properties: 1) a set of 'anchor' items,
#' where the anchor items have various parameters that have been constrained to be equal
#' across the groups, and 2) contain freely estimated latent mean and variance terms in
#' all but one group (the so-called 'reference' group).
#' These two properties help to fix the metric of the groups so that
#' item parameter estimates do not contain latent distribution characteristics.
#'
#' @aliases DIF
#' @param MGmodel an object returned from \code{\link{multipleGroup}} to be used as the reference
#'   model
#' @param which.par a character vector containing the parameter names which will be inspected for
#'   DIF
#' @param Wald logical; perform Wald tests for DIF instead of likelihood ratio test?
#' @param items2test a numeric vector, or character vector containing the item names, indicating
#'   which items will be tested for DIF. In models where anchor items are known, omit them from
#'   this vector. For example, if items 1 and 2 are anchors in a 10 item test, then
#'   \code{items2test = 3:10} would work for testing the remaining items (important to remember
#'   when using sequential schemes)
#' @param return_models logical; return estimated model objects for further analysis?
#'   Default is FALSE
#' @param scheme type of DIF analysis to perform, either by adding or dropping constraints across
#'   groups. These can be:
#' \describe{
#'   \item{'add'}{parameters in \code{which.par} will be constrained each item one at a time for
#'     items that are specified in \code{items2test}. This is beneficial when examining DIF from a
#'     model with parameters freely estimated across groups, and when inspecting differences via
#'     the Wald test}
#'   \item{'drop'}{parameters in \code{which.par} will be freely estimated for items that are
#'     specified in \code{items2test}. This is useful when supplying an overly restrictive model
#'     and attempting to detect DIF with a slightly less restrictive model}
#'   \item{'add_sequential'}{sequentially loop over the items being tested, and at the end of the
#'     loop treat DIF tests that satisfy the \code{seq_stat} criteria as invariant. The loop is
#'     then re-run on the remaining invariant items to determine if they are now displaying DIF in
#'     the less constrained model, and when no new invariant item is found the algorithm stops and
#'     returns the items that displayed DIF}
#'   \item{'drop_sequential'}{sequentially loop over the items being tested, and at the end of the
#'     loop treat items that violate the \code{seq_stat} criteria as demonstrating DIF. The loop is
#'     then re-run, leaving the items that previously demonstrated DIF as variable across groups,
#'     and the remaining test items that previously showed invariance are re-tested. The algorithm
#'     stops when no more items showing DIF are found and returns the items that displayed DIF}
#' }
#' @param seq_stat select a statistic to test for in the sequential schemes. Potential values are
#'   (in descending order of power) \code{'AIC'}, \code{'AICc'}, \code{'SABIC'}, and \code{'BIC'}.
#'   If a numeric value is input that ranges between 0 and 1, the 'p' value will be tested
#'   (e.g., \code{seq_stat = .05} will test for the difference of p < .05 in the add scheme,
#'   or p > .05 in the drop scheme), along with the specified \code{p.adjust} input
#' @param max_run a number indicating the maximum number of cycles to perform in sequential
#'   searches. The default is to perform search until no further DIF is found
#' @param plotdif logical; create item plots for items that are displaying DIF according to the
#'   \code{seq_stat} criteria? Only available for 'add' type schemes
#' @param type the \code{type} of plot argument passed to \code{plot()}. Default is 'trace', though
#'   another good option is 'infotrace'. For ease of viewing, the \code{facet_item} argument to
#'   mirt's \code{plot()} function is set to \code{TRUE}
#' @param p.adjust string to be passed to the \code{\link{p.adjust}} function to adjust p-values.
#'   Adjustments are located in the \code{adj_pvals} element in the returned list
#' @param verbose logical print extra information to the console?
#' @param ... additional arguments to be passed to \code{\link{multipleGroup}} and \code{plot}
#'
#' @author Phil Chalmers \email{rphilip.chalmers@@gmail.com}
#' @references
#' Chalmers, R., P. (2012). mirt: A Multidimensional Item Response Theory
#' Package for the R Environment. \emph{Journal of Statistical Software, 48}(6), 1-29.
#' \doi{10.18637/jss.v048.i06}
#'
#' Chalmers, R. P., Counsell, A., and Flora, D. B. (2016). It might not
#'   make a big DIF: Improved Differential Test Functioning statistics that account for
#'   sampling variability. \emph{Educational and Psychological Measurement, 76}, 114-140.
#'   \doi{10.1177/0013164415584576}
#' @keywords DIF
#' @seealso \code{\link{multipleGroup}}
#, \code{\link{DTF}}
#' @export DIF
#' @examples
#' \dontrun{
#'
#' #simulate data where group 2 has a smaller slopes and more extreme intercepts
#' set.seed(12345)
#' a1 <- a2 <- matrix(abs(rnorm(15,1,.3)), ncol=1)
#' d1 <- d2 <- matrix(rnorm(15,0,.7),ncol=1)
#' a2[1:2, ] <- a1[1:2, ]/3
#' d1[c(1,3), ] <- d2[c(1,3), ]/4
#' head(data.frame(a.group1 = a1, a.group2 = a2, d.group1 = d1, d.group2 = d2))
#' itemtype <- rep('2PL', nrow(a1))
#' N <- 1000
#' dataset1 <- simdata(a1, d1, N, itemtype)
#' dataset2 <- simdata(a2, d2, N, itemtype, mu = .1, sigma = matrix(1.5))
#' dat <- rbind(dataset1, dataset2)
#' group <- c(rep('D1', N), rep('D2', N))
#'
#' #### no anchors, all items tested for DIF by adding item constrains one item at a time.
#' # define a parallel cluster (optional) to help speed up internal functions
#' mirtCluster()
#'
#' # Information matrix with Oakes' identity (not controlling for latent group differences)
#' # NOTE: Without properly equating the groups the following example code is not testing for DIF,
#'      # but instead reflects a compbination of DIF + latent-trait distribution effects
#' model <- multipleGroup(dat, 1, group, SE = TRUE)
#'
#' #test whether adding slopes and intercepts constraints results in DIF. Plot items showing DIF
#' resulta1d <- DIF(model, c('a1', 'd'), plotdif = TRUE)
#' resulta1d
#'
#' #same as above, but using Wald tests with Benjamini & Hochberg adjustment
#' resulta1dWald <- DIF(model, c('a1', 'd'), Wald = TRUE, p.adjust = 'fdr')
#' resulta1dWald
#' round(resulta1dWald$adj_pvals, 4)
#'
#' #test whether adding only slope constraints results in DIF for all items
#' resulta1 <- DIF(model, 'a1')
#' resulta1
#'
#' #following up on resulta1d, to determine whether it's a1 or d parameter causing DIF
#' (a1s <- DIF(model, 'a1', items2test = 1:3))
#' (ds <- DIF(model, 'd', items2test = 1:3))
#'
#' ####
#' # using items 4 to 15 as anchors to test for DIF after adjusting for latent-trait differences
#' itemnames <- colnames(dat)
#' model_anchor <- multipleGroup(dat, model = 1, group = group,
#'   invariance = c(itemnames[4:15], 'free_means', 'free_var'))
#' anchor <- DIF(model_anchor, c('a1', 'd'), items2test = 1:3)
#' anchor
#'
#' ### drop down approach (freely estimating parameters accross groups) when
#' ### specifying a highly constrained model with estimated latent parameters
#' model_constrained <- multipleGroup(dat, 1, group,
#'   invariance = c(colnames(dat), 'free_means', 'free_var'))
#' dropdown <- DIF(model_constrained, 'd', scheme = 'drop')
#' dropdown
#'
#' ### sequential searches using SABIC as the selection criteria
#' # starting from completely different models
#' model <- multipleGroup(dat, 1, group)
#' stepup <- DIF(model, c('a1', 'd'), scheme = 'add_sequential')
#' stepup
#'
#' #step down procedure for highly constrained model
#' model <- multipleGroup(dat, 1, group, invariance = itemnames)
#' stepdown <- DIF(model, c('a1', 'd'), scheme = 'drop_sequential')
#' stepdown
#' }
DIF <- function(MGmodel, which.par, scheme = 'add', items2test = 1:extract.mirt(MGmodel, 'nitems'),
                seq_stat = 'SABIC', Wald = FALSE, p.adjust = 'none', return_models = FALSE,
                max_run = Inf, plotdif = FALSE, type = 'trace', verbose = TRUE, ...){

    loop_test <- function(item, model, which.par, values, Wald, itemnames, invariance, drop,
                          return_models, ...)
    {
        constrain <- model@Model$constrain
        parnum <- list()
        for(i in seq_len(length(which.par)))
            parnum[[i]] <- values$parnum[values$name == which.par[i] &
                                             values$item == itemnames[item]]
        for(i in length(parnum):1L)
            if(!length(parnum[[i]])) parnum[[i]] <- NULL
        if(!length(parnum))
            stop('Item ', item, ' does not contain any of the parameters defined in which.par.
                 Consider removing it from the item2test input or adding relevant parameters
                 to which.par', call.=FALSE)
        if(Wald){
            wv <- wald(model)
            infoname <- names(wv)
            L <- matrix(0, length(parnum), length(infoname))
            for(i in seq_len(length(parnum))){
                L[i, paste0(which.par[i], '.', parnum[[i]][1L]) == infoname] <- 1
                L[i, paste0(which.par[i], '.', parnum[[i]][2L]) == infoname] <- -1
            }
            res <- wald(model, L)
            return(res)
        }
        if(drop){
            for(j in seq_len(length(parnum))){
                for(i in length(constrain):1L){
                    if(all(parnum[[j]] == sort(constrain[[i]])))
                        constrain[[i]] <- NULL
                }
            }
        } else {
            for(i in seq_len(length(parnum)))
                constrain[[length(constrain) + 1L]] <- parnum[[i]]
        }
        newmodel <- multipleGroup(model@Data$data, model@Model$model, group=model@Data$group,
                                  invariance = invariance, constrain=constrain,
                                  itemtype = model@Model$itemtype, verbose = FALSE, ...)
        aov <- anova(newmodel, model, verbose = FALSE)
        attr(aov, 'parnum') <- parnum
        if(return_models) aov <- newmodel
        return(aov)
    }

    if(missing(MGmodel)) missingMsg('MGmodel')
    if(missing(which.par)) missingMsg('which.par')
    if(!any(sapply(MGmodel@ParObjects$pars, function(x, pick) x@ParObjects$pars[[pick]]@est,
                   pick = MGmodel@Data$nitems + 1L)))
        message('No hyper-parameters were estimated in the DIF model. For effective
                \tDIF testing, freeing the focal group hyper-parameters is recommended.')
    bfactorlist <- MGmodel@Internals$bfactor
    if(!is.null(bfactorlist$Priorbetween[[1L]]))
        stop('bifactor models are currently not supported in this function', call.=FALSE)
    itemnames <- colnames(MGmodel@Data$data)
    if(!any(scheme %in% c('add', 'drop', 'add_sequential', 'drop_sequential')))
        stop('scheme input is not valid', call.=FALSE)
    if(return_models){
        if(Wald)
            stop('return_models argument only valid for likelihood ratio tests', call.=FALSE)
    }
    if(Wald){
        if(scheme != 'add')
            stop('Wald tests are only appropriate when add scheme is used', call.=FALSE)
        if(!MGmodel@Options$SE)
            stop('Information matrix was not calculated', call.=FALSE)
    }
    if(plotdif && any(scheme %in% c('drop', 'drop_sequential')))
        stop('plotdif not supported for dropping schemes', call.=FALSE)
    pval <- 0
    if(is.numeric(seq_stat)){
        pval <- seq_stat
        seq_stat <- 'p'
    } else if(!any(seq_stat %in% c('p', 'AIC', 'AICc', 'SABIC', 'BIC'))){
        stop('Invalid seq_stat input', call.=FALSE)
    }
    if(is.character(items2test)) items2test <- which(items2test %in% itemnames)
    invariance <- MGmodel@Model$invariance
    values <- mod2values(MGmodel)
    drop <- scheme == 'drop' || scheme == 'drop_sequential'
    invariance <- MGmodel@Model$invariance[MGmodel@Model$invariance %in%
                                         c('free_means', 'free_var')]
    if(!length(invariance)) invariance <- ''
    res <- myLapply(X=items2test, FUN=loop_test, model=MGmodel, which.par=which.par, values=values,
                    Wald=Wald, drop=drop, itemnames=itemnames, invariance=invariance,
                    return_models=return_models, ...)
    names(res) <- itemnames[items2test]
    if(scheme %in% c('add_sequential', 'drop_sequential')){
        lastkeep <- rep(TRUE, length(res))
        updatedModel <- MGmodel
        run_number <- 2L
        if(run_number > max_run)
            stop('max_run number must be greater than 1 for sequential searches', call.=FALSE)
        while(TRUE){
            statdiff <- do.call(c, lapply(res, function(x, stat){
                if(stat == 'p') return(x[2L, 'p'])
                return(x[1L, stat] - x[2L, stat])
                }, stat = seq_stat))
            if(seq_stat == 'p'){
                statdiff <- p.adjust(statdiff, p.adjust)
                if(scheme == 'drop_sequential')
                    keep <- statdiff < pval
                else keep <- statdiff > pval
            } else {
                if(scheme == 'drop_sequential')
                    keep <- statdiff > 0
                else keep <- statdiff < 0
            }
            if(all(keep == lastkeep)) break
            lastkeep <- keep
            if(verbose)
                cat(sprintf('\rChecking for DIF in %d more items',
                            ifelse(drop, sum(keep), sum(!keep))))
            if(ifelse(drop, sum(keep), sum(!keep)) == 0) break
            constrain <- updatedModel@Model$constrain
            for(j in seq_len(length(keep))){
                parnum <- list()
                for(i in seq_len(length(which.par)))
                    parnum[[i]] <- values$parnum[values$name == which.par[i] &
                                                     values$item == itemnames[j]]
                for(i in length(parnum):1L)
                    if(!length(parnum[[i]])) parnum[[i]] <- NULL
                if(!length(parnum)) break
                if(keep[j] && !drop){
                    for(i in 1L:length(parnum))
                        constrain[[length(constrain) + 1L]] <- parnum[[i]]
                } else if(!keep[j] && drop){
                    for(j in seq_len(length(parnum))){
                        for(i in length(constrain):1L){
                            if(all(parnum[[j]] == sort(constrain[[i]])))
                                constrain[[i]] <- NULL
                        }
                    }
                }
            }
            updatedModel <- multipleGroup(MGmodel@Data$data, MGmodel@Model$model,
                                          group=MGmodel@Data$group, itemtype=MGmodel@Model$itemtype,
                                          invariance = invariance, constrain=constrain,
                                          verbose = FALSE, ...)
            pick <- !keep
            if(drop) pick <- !pick
            tmp <- myLapply(X=items2test[pick], FUN=loop_test, model=updatedModel,
                            which.par=which.par, values=values, Wald=Wald, drop=drop,
                            itemnames=itemnames, invariance=invariance, return_models=FALSE, ...)
            names(tmp) <- itemnames[items2test][pick]
            for(i in names(tmp))
                res[[i]] <- tmp[[i]]
            if(run_number == max_run) break
            run_number <- run_number + 1L
        }
        if(verbose)
            cat('\nComputing final DIF estimates...\n')
        pick <- !keep
        if(drop) pick <- !pick
        res <- myLapply(X=items2test[pick], FUN=loop_test, model=MGmodel,
                        which.par=which.par, values=values, Wald=Wald, drop=drop,
                        itemnames=itemnames, invariance=invariance, return_models=return_models,
                        ...)
        names(res) <- itemnames[items2test][pick]
    }

    for(i in seq_len(length(res)))
        attr(res[[i]], 'parnum') <- NULL
    if(return_models) return(res)
    if(p.adjust != 'none'){
        if(Wald){
            ps <- do.call(c, lapply(res, function(x) x$p))
        } else {
            ps <- do.call(c, lapply(res, function(x, stat){
                if(stat == 'p') return(x[2L, 'p'])
                return(x[1L, stat] - x[2L, stat])
            }, stat = 'p'))
        }
        ps <- p.adjust(ps, p.adjust)
        res$adj_pvals <- ps
    }
    if(plotdif && any(scheme %in% c('add', 'add_sequential'))){
        if(seq_stat != 'p'){
            statdiff <- do.call(c, lapply(res, function(x, stat){
                if(stat == 'p') return(x[2L, 'p'])
                return(x[1L, stat] - x[2L, stat])
            }, stat = seq_stat))
            keep <- statdiff < 0
        } else {
            statdiff <- res$adj_pvals
            if(is.null(statdiff)){
                if(Wald){
                    statdiff <- do.call(c, lapply(res, function(x) x$p))
                } else {
                    statdiff <- do.call(c, lapply(res, function(x, stat){
                        if(stat == 'p') return(x[2L, 'p'])
                        return(x[1L, stat] - x[2L, stat])
                    }, stat = 'p'))
                }
            }
            statdiff <- p.adjust(statdiff, p.adjust)
            keep <- statdiff > pval
        }
        which.item <- which(!keep)
        if(length(which.item)){
            print(plot(MGmodel, type = type, which.items=which.item, facet_items=TRUE, ...))
        } else {
            message('No DIF items were detected for plotting.')
        }
    }
    return(res)
}