mirt: Multidimensional Item Response Theory

Documented in SIBTEST

#' (Generalized) Simultaneous Item Bias Test (SIBTEST)
#'
#' Classical test theory approach to detecting unidirectional and bidirectional (with one
#' crossing location) DIF. This family of statistics is intended for unidimensional tests,
#' and applies a regression-corrected matched-total score approach to quantify the response
#' bias between two or more groups. Can be used for DIF, DBF, and DTF testing with two or more
#' discrete groups.
#'
#' SIBTEST is similar to the Mantel-Haenszel approach for detecting DIF but uses a regression
#' correction based on the KR-20/coefficient alpha reliability index to correct the observed
#' differences when the latent trait distributions are not equal.
#' Function supports the standard SIBTEST for dichotomous and polytomous data (compensatory) and
#' supports crossing DIF testing (i.e., non-compensatory/non-uniform) using the asymptotic sampling
#' distribution version of the Crossing-SIBTEST (CSIBTEST) statistic described by
#' Chalmers (2018) and the permutation method described by Li and Stout (1996). This
#' function also supports the multi-group generalizations (GSIBTEST and GCSIBTEST)
#' proposed by Chalmers and Zheng (2023), where users may specify alternative
#' contrast matrices to evaluate specific comparisons between groups as well as
#' perform joint hypothesis tests.
#'
#' @param dat integer-based dataset to be tested, containing dichotomous or polytomous responses
#' @param group a (factor) vector indicating group membership
#'   with the same length as the number of rows in \code{dat}
#' @param match_set an integer vector indicating which items to use as the items which are matched
#'   (i.e., contain no DIF). These are analogous to 'anchor' items in the likelihood method to locate
#'   DIF. If missing, all items other than the items found in the \code{suspect_set} will be used
#' @param focal_name name of the focal group; e.g., \code{'focal'}. If not specified then one will be
#'   selected automatically using \code{unique(group)[2]}
#' @param suspect_set an integer vector indicating which items to inspect with SIBTEST. Including only
#'   one value will perform a DIF test, while including more than one will perform a simultaneous
#'   bundle test (DBF); including all non-matched items will perform DTF.
#'   If missing, a simultaneous test using all the items not listed in match_set
#'   will be used (i.e., DTF)
#' @param guess_correction a vector of numbers from 0 to 1 indicating how much to correct the items
#'   for guessing. It's length should be the same as ncol(dat)
#' @param na.rm logical; remove rows in \code{dat} with any missing values? If \code{TRUE},
#'   rows with missing data will be removed, as well as the corresponding elements in the \code{group}
#'   input
#' @param randomize logical; perform the crossing test for non-compensatory bias
#'   using Li and Stout's (1996) permutation approach? Default is \code{FALSE}, which uses the
#'   ad-hoc mixed degrees of freedom method suggested by Chalmers (2018)
#' @param permute number of permutations to perform when \code{randomize = TRUE}. Default is 1000
#' @param p.adjust.method a character input dictating which \code{method} to use in \code{\link{p.adjust}}.
#'   when studying more than two groups. Default does not present any p-value adjustments
#' @param C a contrast matrix to use for pooled testing with more than two groups. Default uses an
#'   effects coding approach, where the last group (last column of the matrix) is treated as the reference
#'   group, and each column is associated with the respective name via \code{unique(group)} (i.e., the first
#'   column is the coefficient for \code{unique(group)[1]}, second column for \code{unique(group)[2]}, and
#'   so on)
#' @param Jmin the minimum number of observations required when splitting the data into focal and
#'   reference groups conditioned on the matched set
#' @param pk_focal logical; using the group weights from the focal group instead of the total
#'   sample? Default is FALSE as per Shealy and Stout's recommendation
#' @param correction logical; apply the composite correction for the difference between focal
#'   composite scores using the true-score regression technique? Default is \code{TRUE},
#'   reflecting Shealy and Stout's linear extrapolation method
#' @param DIF logical; should the elements in \code{suspect_set} be treated one at a time
#'   to test for DIF? Use of this logical will treat all other items as part of the \code{match_set}
#'   unless this input is provided explicitly. Default is \code{FALSE} to allow DBF and DTF tests
#' @param remove_cross logical; remove the subtest information associated with the approximate
#'   crossing location? If TRUE this reflects the CSIBTEST definition of Li and Stout (1996);
#'   if FALSE, this reflects the version of CSIBTEST utilized by Chalmers (2018). Only applicable
#'   in two-group settings (in multi-group this is fixed to FALSE)
#' @param pairwise logical; perform pairwise comparisons in multi-group applications?
#' @param details logical; return a data.frame containing the details required to compute SIBTEST?
#' @param plot a character input indicating the type of plot to construct. Options are \code{'none'}
#'   (default), \code{'observed'} for the scaled focal subtest scores against the matched subtest
#'   scores, \code{'weights'} for the proportion weights used (i.e., the proportion of observations at
#'   each matched score), \code{'difference'} for the difference between the scaled focal subtest scores
#'   against the matched subtest scores, and \code{'wdifference'} for the conditional differences multiplied
#'   by each respective weight. Note that the last plot reflects the components used in SIBTEST,
#'   and therefore the sum of these plotted observations will equal the beta coefficient for SIBTEST
#' @param ... additional plotting arguments to be passed
#'
#' @author Phil Chalmers \email{rphilip.chalmers@@gmail.com}
#' @keywords SIBTEST
#' @aliases SIBTEST
#' @export SIBTEST
#'
#' @references
#'
#' Chalmers, R. P. (2018). Improving the Crossing-SIBTEST statistic for
#' detecting non-uniform DIF. \emph{Psychometrika, 83}, 2, 376-386.
#'
#' Chalmers, R., P. (2012). mirt: A Multidimensional Item Response Theory
#' Package for the R Environment. \emph{Journal of Statistical Software, 48}(6), 1-29.
#' \doi{10.18637/jss.v048.i06}
#'
#' Chalmers, R. P. & Zheng, G. (2023). Multi-group Generalizations of
#' SIBTEST and Crossing-SIBTEST. \emph{Applied Measurement in Education, 36}(2), 171-191,
#' \doi{10.1080/08957347.2023.2201703}.
#'
#' Chang, H. H., Mazzeo, J. & Roussos, L. (1996). DIF for Polytomously Scored Items: An Adaptation of the
#'   SIBTEST Procedure. \emph{Journal of Educational Measurement, 33}, 333-353.
#'
#' Li, H.-H. & Stout, W. (1996). A new procedure for detection of crossing DIF.
#' \emph{Psychometrika, 61}, 647-677.
#'
#' Shealy, R. & Stout, W. (1993). A model-based standardization approach that separates true
#'   bias/DIF from group ability differences and detect test bias/DTF as well as item bias/DIF.
#'   \emph{Psychometrika, 58}, 159-194.
#'
#' @examples
#'
#' \dontrun{
#'
#' set.seed(1234)
#' n <- 30
#' N <- 500
#' a <- matrix(1, n)
#' d <- matrix(rnorm(n), n)
#' group <- c(rep('reference', N), rep('focal', N*2))
#'
#' ## -------------
#' # groups completely equal
#' dat1 <- simdata(a, d, N, itemtype = 'dich')
#' dat2 <- simdata(a, d, N*2, itemtype = 'dich')
#' dat <- rbind(dat1, dat2)
#'
#' # DIF (all other items as anchors)
#' SIBTEST(dat, group, suspect_set = 6)
#'
#' # Some plots depicting the above tests
#' SIBTEST(dat, group, suspect_set = 6, plot = 'observed')
#' SIBTEST(dat, group, suspect_set = 6, plot = 'weights')
#' SIBTEST(dat, group, suspect_set = 6, plot = 'wdifference')
#'
#' # Include CSIBTEST with randomization method
#' SIBTEST(dat, group, suspect_set = 6, randomize = TRUE)
#'
#' # remove crossing-location (identical to Li and Stout 1996 definition of CSIBTEST)
#' SIBTEST(dat, group, suspect_set = 6, randomize = TRUE, remove_cross=TRUE)
#'
#' # DIF (specific anchors)
#' SIBTEST(dat, group, match_set = 1:5, suspect_set = 6)
#' SIBTEST(dat, group, match_set = 1:5, suspect_set = 6, randomize=TRUE)
#'
#' # DBF (all and specific anchors, respectively)
#' SIBTEST(dat, group, suspect_set = 11:30)
#' SIBTEST(dat, group, match_set = 1:5, suspect_set = 11:30)
#'
#' # DTF
#' SIBTEST(dat, group, suspect_set = 11:30)
#' SIBTEST(dat, group, match_set = 1:10) #equivalent
#'
#' # different hyper pars
#' dat1 <- simdata(a, d, N, itemtype = 'dich')
#' dat2 <- simdata(a, d, N*2, itemtype = 'dich', mu = .5, sigma = matrix(1.5))
#' dat <- rbind(dat1, dat2)
#' SIBTEST(dat, group, 6:30)
#' SIBTEST(dat, group, 11:30)
#'
#' # DIF testing with anchors 1 through 5
#' SIBTEST(dat, group, 6, match_set = 1:5)
#' SIBTEST(dat, group, 7, match_set = 1:5)
#' SIBTEST(dat, group, 8, match_set = 1:5)
#'
#' # DIF testing with all other items as anchors
#' SIBTEST(dat, group, 6)
#' SIBTEST(dat, group, 7)
#' SIBTEST(dat, group, 8)
#'
#' ## -------------
#' ## systematic differing slopes and intercepts (clear DTF)
#' dat1 <- simdata(a, d, N, itemtype = 'dich')
#' dat2 <- simdata(a + c(numeric(15), rnorm(n-15, 1, .25)), d + c(numeric(15), rnorm(n-15, 1, 1)),
#'   N*2, itemtype = 'dich')
#' dat <- rbind(dat1, dat2)
#' SIBTEST(dat, group, 6:30)
#' SIBTEST(dat, group, 11:30)
#'
#' # Some plots depicting the above tests
#' SIBTEST(dat, group, suspect_set = 11:30, plot = 'observed')
#' SIBTEST(dat, group, suspect_set = 11:30, plot = 'weights')
#' SIBTEST(dat, group, suspect_set = 11:30, plot = 'wdifference')
#'
#' # DIF testing using valid anchors
#' SIBTEST(dat, group, suspect_set = 6, match_set = 1:5)
#' SIBTEST(dat, group, suspect_set = 7, match_set = 1:5)
#' SIBTEST(dat, group, suspect_set = 30, match_set = 1:5)
#'
#' # test DIF using specific match_set
#' SIBTEST(dat, group, suspect_set = 6:30, match_set = 1:5, DIF=TRUE)
#'
#' # test DIF using all-other-as-anchors method (not typically recommended)
#' SIBTEST(dat, group, suspect_set = 1:30, DIF=TRUE)
#'
#' # randomization method is fairly poor when smaller matched-set used
#' SIBTEST(dat, group, suspect_set = 30, match_set = 1:5, randomize=TRUE)
#' SIBTEST(dat, group, suspect_set = 30, randomize=TRUE)
#'
#' ## ----------------------------------
#' # three group SIBTEST test
#' set.seed(1234)
#' n <- 30
#' N <- 1000
#' a <- matrix(1, n)
#' d <- matrix(rnorm(n), n)
#' group <- c(rep('group1', N), rep('group2', N), rep('group3', N))
#'
#' # groups completely equal
#' dat1 <- simdata(a, d, N, itemtype = 'dich')
#' dat2 <- simdata(a, d, N, itemtype = 'dich')
#' dat3 <- simdata(a, d, N, itemtype = 'dich')
#' dat <- rbind(dat1, dat2, dat3)
#'
#' # omnibus test using effects-coding contrast matrix (default)
#' SIBTEST(dat, group, suspect_set = 6)
#' SIBTEST(dat, group, suspect_set = 6, randomize=TRUE)
#'
#' # explicit contrasts
#' SIBTEST(dat, group, suspect_set = 6, randomize=TRUE,
#'         C = matrix(c(1,-1,0), 1))
#'
#' # test all items for DIF
#' SIBTEST(dat, group, suspect_set = 1:ncol(dat), DIF=TRUE)
#' SIBTEST(dat, group, suspect_set = 16:ncol(dat), DIF=TRUE,
#'         match_set = 1:15) # specific anchors
#'
#' # post-hoc between two groups only
#' pick <- group %in% c('group1', 'group2')
#' SIBTEST(subset(dat, pick), group[pick], suspect_set = 1:ncol(dat), DIF=TRUE)
#'
#' # post-hoc pairwise comparison for all groups
#' SIBTEST(dat, group, suspect_set = 1:ncol(dat), DIF=TRUE, pairwise = TRUE)
#'
#' ## systematic differing slopes and intercepts
#' dat2 <- simdata(a + c(numeric(15), .5,.5,.5,.5,.5, numeric(10)),
#'         d + c(numeric(15), 0,.6,.7,.8,.9, numeric(10)),
#'         N, itemtype = 'dich')
#' dat <- rbind(dat1, dat2, dat3)
#'
#' SIBTEST(dat, group, suspect_set = 16)
#' SIBTEST(dat, group, suspect_set = 16, randomize=TRUE)
#'
#' SIBTEST(dat, group, suspect_set = 19)
#' SIBTEST(dat, group, suspect_set = 19, randomize=TRUE)
#'
#' SIBTEST(dat, group, suspect_set = c(16, 19), DIF=TRUE)
#' SIBTEST(dat, group, suspect_set = c(16, 19), DIF=TRUE, pairwise=TRUE)
#'
#'
#' }
SIBTEST <- function(dat, group, suspect_set, match_set, focal_name = unique(group)[2],
                    guess_correction = 0, Jmin = 5, na.rm = FALSE, randomize = FALSE,
                    C = cbind(1, -diag(length(unique(group)) - 1L)), pairwise = FALSE,
                    DIF = FALSE, p.adjust.method = 'none', permute = 1000, pk_focal = FALSE,
                    correction = TRUE, remove_cross = FALSE, details = FALSE, plot = 'none', ...){

    find_intersection <- function(diff, weight = NULL, use, scores, remove_cross,
                                  tab_match = NULL, C = NULL){
        if(is.matrix(diff)){
            Ystar <- diff
            ks <- Ns <- numeric(ncol(diff)*(ncol(diff)-1)/2)
            ind <- 1L
            for(i in 1L:ncol(Ystar)){
                for(j in 1L:ncol(Ystar)){
                    if(j > i){
                        weight <- pmax(tab_match[[i]], tab_match[[j]])
                        N <- sum(tab_match[[i]], na.rm=TRUE) +
                            sum(tab_match[[j]], na.rm=TRUE)
                        use <- weight/N > .01
                        use[is.na(use)] <- FALSE
                        k <- scores[use]
                        diff <- Ystar[use, i] - Ystar[use, j]
                        weight <- weight[use]
                        mod <- lm(diff ~ k, weights = weight)
                        cfs <- coef(mod)
                        ks[ind] <- -cfs[1L]/cfs[2L]
                        Ns[ind] <- N
                        ind <- ind + 1L
                    }
                }
            }
            # ks <- sum(ks * (Ns / sum(Ns)))
            ksmat <- Nsmat <- matrix(NA, ncol(C), ncol(C))
            ksmat[lower.tri(ksmat)] <- ks
            Nsmat[lower.tri(ksmat)] <- Ns
            ret <- matrix(FALSE, length(scores), nrow(C))

            # computed weighted ks per contrast
            for(j in 1:nrow(C)){
                whcC <- which(C[j,] != 0)
                wks <- sum(ksmat[whcC, whcC] * Nsmat[whcC, whcC] /
                               sum(Nsmat[whcC, whcC], na.rm = TRUE), na.rm=TRUE)
                ret[,j] <- scores > signif(wks, 1L)
                ## leave crossing location in for easier math
            }
        } else {
            k <- scores[use]
            diff <- diff[use]
            weight <- weight[use]
            mod <- lm(diff ~ k, weights = weight)
            cfs <- coef(mod)
            ks <- -cfs[1L]/cfs[2L]
            ret <- scores > signif(ks, 1L)
            if(remove_cross){
                pick <- which(ret)
                if(length(pick)){
                    ret[min(pick)] <- NA
                } else {
                    ret[length(ret)] <- NA
                }
            }
        }
        ret
    }

    if(pairwise){
        gnms <- if(is.factor(group)) levels(group) else unique(group)
        ngroups <- length(gnms)
        compare <- vector('list', ngroups*(ngroups-1L)/2L)
        nms <- ngroups*(ngroups-1L)/2L
        ind <- 1L
        for(i in 1L:ngroups){
            for(j in 1L:ngroups){
                if(i < j){
                    pick <- group == gnms[i] | group == gnms[j]
                    compare[[ind]] <- SIBTEST(dat[pick,], group=group[pick],
                                              suspect_set=suspect_set, match_set=match_set,
                                              guess_correction=guess_correction, Jmin=Jmin,
                                              na.rm=na.rm, randomize=randomize, pairwise = FALSE,
                                              DIF=DIF, p.adjust.method=p.adjust.method,
                                              permute=permute, pk_focal=pk_focal,
                                              correction=correction, remove_cross=remove_cross,
                                              details=FALSE, plot='none', ...)
                    nms[ind] <- paste0(gnms[c(i, j)], collapse='_')
                    ind <- ind + 1L
                }
            }
        }
        names(compare) <- nms
        return(compare)
    }

    if(DIF){
        if(missing(suspect_set))
            stop('DIF input requires suspect_set', call.=FALSE)
        if(missing(match_set))
            match_set <- 1L:ncol(dat)
        diftest <- vector('list', length(suspect_set))
        names(diftest) <- colnames(dat)[suspect_set]
        for(i in 1L:length(suspect_set)){
            diftest[[i]] <- SIBTEST(dat=dat, group=group, suspect_set=suspect_set[i],
                                    match_set=match_set[!(match_set %in% suspect_set[i])],
                                    focal_name=focal_name, guess_correction=guess_correction,
                                    Jmin=Jmin, na.rm=na.rm, randomize=randomize,
                                    C=C, DIF=FALSE, p.adjust.method='none',
                                    permute=permute, pk_focal=pk_focal, correction=correction,
                                    remove_cross=remove_cross, details=details, plot=plot, ...)
        }
        if(p.adjust.method != 'none'){
            ps <- sapply(diftest, function(x) x$p)
            psSIB <- p.adjust(ps[1L,], method = p.adjust.method)
            psCSIB <- p.adjust(ps[2L,], method = p.adjust.method)
            padj <- rbind(psSIB, psCSIB)
            for(i in 1L:length(diftest))
                diftest[[i]]$adj_p <- padj[,i]
        }
        return(diftest)
    }

    if(na.rm){
        is_na <- is.na(rowSums(dat))
        dat <- dat[!is_na, , drop=FALSE]
        group <- group[!is_na]
    }
    if(any(is.na(dat)))
        stop(c('SIBTEST does not support datasets with missing values. \n  ',
               'Pass na.rm=TRUE to remove missing rows'), call.=FALSE)
    ngroups <- length(unique(group))
    N <- nrow(dat)
    stopifnot(ngroups >= 2L)
    stopifnot(focal_name %in% group)
    stopifnot(N == length(group))
    stopifnot(Jmin >= 2)
    stopifnot(!(missing(suspect_set) && missing(match_set)))
    index <- 1L:ncol(dat)
    if(missing(match_set)) match_set <- index[-suspect_set]
    else if(missing(suspect_set)) suspect_set <- index[-match_set]
    if(length(guess_correction) > 1L){
        stopifnot(length(guess_correction) == ncol(dat))
    } else guess_correction <- rep(guess_correction, ncol(dat))
    if(missing(suspect_set)){
        suspect_set <- index[!(index %in% match_set)]
    } else suspect_set <- suspect_set
    stopifnot(!any(match_set %in% suspect_set))

    if(ngroups == 2L){
        focal_dat <- dat[group == focal_name,]
        focal_match_scores <- rowSums(focal_dat[,match_set, drop=FALSE])
        focal_suspect_scores <- rowSums(focal_dat[,suspect_set, drop=FALSE])
        ref_dat <- dat[group != focal_name,]
        ref_match_scores <- rowSums(ref_dat[,match_set, drop=FALSE])
        ref_suspect_scores <- rowSums(ref_dat[,suspect_set, drop=FALSE])
        tab_scores <- table(rowSums(dat[ ,match_set, drop=FALSE]))
        pkstar <- tab_scores / sum(tab_scores)
        scores <- as.integer(names(pkstar))

        # selection
        tab_focal <- tab_ref <- numeric(length(tab_scores))
        II <- tab_scores > Jmin
        tab1 <- table(focal_match_scores)
        match <- names(II) %in% names(tab1)
        II[match] <- tab1 > Jmin
        tab_focal[match] <- tab1
        tab2 <- table(ref_match_scores)
        match <- names(II) %in% names(tab2)
        II[match] <- II[match] & tab2 > Jmin
        tab_ref[match] <- tab2
        II <- II & scores != min(scores) & scores != max(scores)

        n <- length(match_set)
        Xbar_ref <- mean(ref_match_scores)
        Xbar_focal <- mean(focal_match_scores)
        b_focal <- CA(focal_dat[,match_set, drop=FALSE], guess_correction[match_set])
        b_ref <- CA(ref_dat[,match_set, drop=FALSE], guess_correction[match_set])
        V_ref <- V_focal <- Ybar_ref <- Ybar_focal <- sigma_ref <- sigma_focal <-
            numeric(length(tab_scores))
        for(kk in seq_len(length(tab_scores))){
            k <- scores[kk]
            pick_ref <- k == ref_match_scores
            pick_focal <- k == focal_match_scores
            V_ref[kk] <- 1/n * (Xbar_ref + b_ref * (k - Xbar_ref))
            V_focal[kk] <- 1/n * (Xbar_focal + b_focal * (k - Xbar_focal))
            sigma_focal[kk] <- var(focal_suspect_scores[pick_focal])
            sigma_ref[kk] <- var(ref_suspect_scores[pick_ref])
            Ybar_ref[kk] <- mean(ref_suspect_scores[pick_ref])
            Ybar_focal[kk] <- mean(focal_suspect_scores[pick_focal])
        }
        V <- (V_ref + V_focal) / 2
        sigma_focal <- ifelse(is.na(sigma_focal), 0, sigma_focal)
        sigma_ref <- ifelse(is.na(sigma_ref), 0, sigma_ref)
        Ybar_ref <- ifelse(is.nan(Ybar_ref), 0, Ybar_ref)
        Ybar_focal <- ifelse(is.nan(Ybar_focal), 0, Ybar_focal)
        II <- II & sigma_ref != 0 & sigma_focal != 0
        II[scores < mean(guess_correction*ncol(dat))] <- FALSE
        if(pk_focal){
            tmp <- table(rowSums(focal_dat[ ,match_set, drop=FALSE]))
            tmp <- tmp / sum(tmp)
            match <- names(II) %in% names(tmp)
            pkstar[] <- 0
            pkstar[match] <- tmp
        }
        pkstar[!II] <- 0
        pkstar <- pkstar / sum(pkstar)
        tab_focal[tab_focal == 0] <- NA
        tab_ref[tab_ref == 0] <- NA
        sigma_uni <- sqrt(sum(pkstar^2 * (sigma_focal/tab_focal + sigma_ref/tab_ref), na.rm = TRUE))
        ystar_ref_vec <- ystar_focal_vec <- numeric(length(II))
        for(kk in seq_len(length(tab_scores))){
            if(!II[kk]) next
            if(correction){
                M_ref <- (Ybar_ref[kk+1] - Ybar_ref[kk-1]) / (V_ref[kk+1] - V_ref[kk-1])
                M_focal <- (Ybar_focal[kk+1] - Ybar_focal[kk-1]) / (V_focal[kk+1] - V_focal[kk-1])
                ystar_ref_vec[kk] <- Ybar_ref[kk] + M_ref * (V[kk] - V_ref[kk])
                ystar_focal_vec[kk] <- Ybar_focal[kk] + M_focal * (V[kk] - V_focal[kk])
            } else {
                ystar_ref_vec[kk] <- Ybar_ref[kk]
                ystar_focal_vec[kk] <- Ybar_focal[kk]
            }
        }
        crossvec <- find_intersection(ystar_ref_vec - ystar_focal_vec, pmax(tab_ref, tab_focal),
                                      use = pmax(tab_ref, tab_focal)/N > .01, scores=scores,
                                      remove_cross=remove_cross)

        # compute stats
        beta_uni <- beta_cross <- 0
        for(kk in seq_len(length(tab_scores))){
            if(!II[kk]) next
            beta_uni <- beta_uni + pkstar[kk] * (ystar_ref_vec[kk]- ystar_focal_vec[kk])
            if(is.na(crossvec[kk])) next
            if(!crossvec[kk])
                beta_cross <- beta_cross + pkstar[kk] * (ystar_ref_vec[kk] - ystar_focal_vec[kk])
            else
                beta_cross <- beta_cross + pkstar[kk] * (ystar_focal_vec[kk] - ystar_ref_vec[kk])
        }
        X2_uni <- (beta_uni / sigma_uni)^2
        p_uni <- (1 - pchisq(X2_uni, 1L))
        beta1 <- sum(pkstar[crossvec] * (ystar_focal_vec[crossvec] - ystar_ref_vec[crossvec]))
        beta2 <- sum(pkstar[!crossvec] * (ystar_focal_vec[!crossvec] - ystar_ref_vec[!crossvec]))
        sigma1 <- sqrt(sum(pkstar[crossvec]^2 * (sigma_focal[crossvec]/tab_focal[crossvec] +
                                                     sigma_ref[crossvec]/tab_ref[crossvec]),
                           na.rm = TRUE))
        sigma2 <- sqrt(sum(pkstar[!crossvec]^2 * (sigma_focal[!crossvec]/tab_focal[!crossvec] +
                                                      sigma_ref[!crossvec]/tab_ref[!crossvec]),
                           na.rm = TRUE))
        df <- 0L
        if(sigma1 > 0) df <- df + 1L else sigma1 <- NA
        if(sigma2 > 0) df <- df + 1L else sigma2 <- NA
        X2_cross <- sum((beta1/sigma1)^2, (beta2/sigma2)^2, na.rm = TRUE)
        p_cross <- pchisq(X2_cross, df, lower.tail = FALSE)
        B_vec <- numeric(permute)
        sigma_cross <- NA
        ret <- data.frame(focal_group=focal_name, n_matched_set=length(match_set),
                          n_suspect_set = length(suspect_set),
                          beta = c(beta_uni, beta_cross), SE=c(sigma_uni, NA),
                          X2=c(X2_uni, X2_cross),
                          df=c(1, df), p = c(p_uni, p_cross))
        rownames(ret) <- c('SIBTEST', 'CSIBTEST')
        if(randomize){
            crossvec <- find_intersection(ystar_ref_vec - ystar_focal_vec,
                                          pmax(tab_ref, tab_focal),
                                          use = pmax(tab_ref, tab_focal)/N > .01,
                                          scores=scores,
                                          remove_cross=remove_cross)
            beta_cross2 <- 0
            for(kk in seq_len(length(tab_scores))){
                if(!II[kk] || is.na(crossvec[kk])) next
                if(!crossvec[kk])
                    beta_cross2 <- beta_cross2 +
                        pkstar[kk] * (ystar_ref_vec[kk] - ystar_focal_vec[kk])
                else
                    beta_cross2 <- beta_cross2 +
                        pkstar[kk] * (ystar_focal_vec[kk] - ystar_ref_vec[kk])
            }
            sigma_cross <- sqrt(sum((pkstar^2 * (sigma_focal/tab_focal +
                                                     sigma_ref/tab_ref))[!is.na(crossvec)],
                                    na.rm = TRUE))
            B <- abs(beta_cross2/sigma_cross)
            for(p in 1L:permute){
                diff <- sample(c(-1,1), length(ystar_ref_vec), replace = TRUE) *
                    (ystar_ref_vec - ystar_focal_vec)
                crossvec <- find_intersection(diff, pmax(tab_ref, tab_focal),
                                              use = pmax(tab_ref, tab_focal)/N > .01,
                                              scores=scores,
                                              remove_cross=remove_cross)
                beta <- 0
                for(kk in 1L:length(tab_scores)){
                    if(!II[kk] || is.na(crossvec[kk])) next
                    if(!crossvec[kk]) beta <- beta + pkstar[kk] * (diff[kk])
                    else beta <- beta + pkstar[kk] * (-diff[kk])
                }
                B_vec[p] <- beta/sigma_cross
            }
            p_cross2 <- mean(abs(B_vec) >= B)
            ret$X2[2] <- ret$df[2] <- NA
            ret$SE[2] <- sigma_cross
            ret$p[2] <- p_cross2
        }
        class(ret) <- c('mirt_df', 'data.frame')
        if(details){
            ret <- data.frame(pkstar=unname(as.numeric(pkstar)),
                              sigma_focal=sigma_focal, sigma_ref=sigma_ref,
                              Y_focal=Ybar_focal, Y_ref=Ybar_ref,
                              Ystar_focal=ystar_focal_vec,
                              Ystar_ref=ystar_ref_vec,
                              row.names = names(pkstar))
            if(randomize) attr(ret, "B_vec") <- B_vec
        }
        if(plot != 'none'){
            ret <- data.frame(total_score = 1:length(pkstar) - 1,
                              pkstar=unname(as.numeric(pkstar)),
                              Ystar=c(ystar_focal_vec, ystar_ref_vec),
                              Ystar_diff =ystar_focal_vec - ystar_ref_vec,
                              Ystar_diff_pkstar =
                                  unname((ystar_focal_vec-ystar_ref_vec)*as.numeric(pkstar)),
                              group = rep(c('focal', 'reference'), each=length(pkstar)))
            if(plot != "freq")
                for(i in 1L:nrow(ret))
                    if(ret$pkstar[i] == 0) ret[i, ] <- NA
            if(plot == "observed")
                return(lattice::xyplot(Ystar ~ total_score, data=ret,
                                       groups = group, xlab='Matched subtest',
                                       ylab = 'Scaled focal subtest', type='b',
                                       auto.key=list(space='right'), ...))
            if(plot == "weights")
                return(lattice::xyplot(pkstar~total_score,
                                       data=subset(ret, group=='focal'),
                                       xlab='Matched subtest',
                                       ylab = 'Proportion', type = 'b', ...))
            if(plot == "difference")
                return(lattice::xyplot(Ystar_diff~total_score,
                                       data=subset(ret, group=='focal'),
                                       xlab='Matched subtest',
                                       ylab = 'Focal subtest difference',
                                       panel = function(x, y, ...){
                                           panel.xyplot(x, y, type = 'b', ...)
                                           panel.abline(h=0, lty=2, col='red')
                                       }, ...))
            if(plot == "wdifference")
                return(lattice::xyplot(Ystar_diff_pkstar~total_score,
                                       data=subset(ret, group=='focal'),
                                       xlab='Matched subtest',
                                       ylab = 'Weighted focal subtest difference',
                                       panel = function(x, y, ...){
                                           panel.xyplot(x, y, type = 'b', ...)
                                           panel.abline(h=0, lty=2, col='red')
                                       }, ...))
            stop('plot argument not supported', call.=FALSE)
        }
    } else {
        # Multi-group

        if(plot != 'none')
            stop('Multi-group plots not currently supported')

        groupnms <- unique(group)
        if(ncol(C) != ngroups)
            stop(sprintf('The C matrix must have %i columns', ngroups), call.=FALSE)
        splt_dat_match <-
            lapply(groupnms, function(x) dat[group == x, match_set, drop=FALSE])
        splt_dat_suspect <-
            lapply(groupnms, function(x) dat[group == x, suspect_set, drop=FALSE])
        splt_match_scores <- lapply(splt_dat_match, rowSums)
        splt_suspect_scores <- lapply(splt_dat_suspect, rowSums)
        tab_scores <- table(rowSums(dat[ ,match_set, drop=FALSE]))
        pkstar <- tab_scores / sum(tab_scores)
        scores <- as.integer(names(pkstar))

        # selection
        splt_tab_match <- lapply(splt_match_scores, function(x){
            tab_full <- numeric(length(tab_scores))
            tab <- table(x)
            match <- names(tab_scores) %in% names(tab)
            tab_full[match] <- tab
            tab_full
        })
        II <- vector('list', nrow(C))
        for(cont in 1L:nrow(C)){
            pick <- ifelse(C[cont,] != 0, TRUE, FALSE)
            II[[cont]] <- colSums(do.call(rbind,
                                          lapply(splt_tab_match,
                                                 function(x) x > Jmin)) * pick) == sum(pick)
            II[[cont]] <- II[[cont]] & scores != min(scores) & scores != max(scores)
        }

        n <- length(match_set)
        Xbars <- sapply(splt_match_scores, mean)
        bs <- sapply(splt_dat_match, CA,
                     guess_correction=guess_correction[match_set])
        cond_stats_list <- lapply(1L:ngroups, function(ind){
            Vg <- Ybar <- sigma <- numeric(length(tab_scores))
            for(kk in seq_len(length(tab_scores))){
                k <- scores[kk]
                pick <- k == splt_match_scores[[ind]]
                Vg[kk] <- 1/n * (Xbars[ind] + bs[ind] * (k - Xbars[ind]))
                sigma[kk] <- var(splt_suspect_scores[[ind]][pick])
                Ybar[kk] <- mean(splt_suspect_scores[[ind]][pick])
            }
            sigma <- ifelse(is.na(sigma), 0, sigma)
            Ybar <- ifelse(is.nan(Ybar), 0, Ybar)
            list(Vg=Vg, sigma=sigma, Ybar=Ybar)
        })
        V <- rowMeans(do.call(cbind, lapply(cond_stats_list, function(x) x$Vg)))
        sigmas <- do.call(cbind, lapply(cond_stats_list, function(x) x$sigma))
        for(cont in 1L:nrow(C)){
            pick <- ifelse(C[cont,] != 0, TRUE, FALSE)
            pick_sigmas <- apply(sigmas, 1, function(x) all(x[pick] > 0))
            II[[cont]] <- II[[cont]] & pick_sigmas
            II[[cont]][scores < mean(guess_correction*ncol(dat))] <- FALSE
        }
        if(any(sapply(II, sum) < 3))
            stop('Too few sum scores used. Consider reducing Jmin argument
                 or adjusting guess_correction (if necessary)',
                 call.=FALSE)
        pkstar[!apply(do.call(cbind, II), 1L, any)] <- 0
        pkstar <- pkstar / sum(pkstar)
        splt_tab_match <- lapply(splt_tab_match, function(x){
            x[x == 0] <- NA
            x
        })
        Sigma_list <- lapply(1L:length(tab_scores), function(i){
            vals <- numeric(ngroups)
            for(g in 1L:ngroups)
                vals[g] <- cond_stats_list[[g]]$sigma[i] / splt_tab_match[[g]][i]
            C %*% diag(vals) %*% t(C) * pkstar[i]^2
        })
        Ystar <- sapply(1L:ngroups, function(ind){
            ystar_vec <- numeric(length(II[[1L]]))
            ystar_vec[] <- NA
            for(kk in seq_len(length(tab_scores))){
                if(!any(sapply(II, function(x) x[kk]))) next
                if(correction){
                    M <- with(cond_stats_list[[ind]], (Ybar[kk+1] - Ybar[kk-1]) /
                                  (Vg[kk+1] - Vg[kk-1]))
                    ystar_vec[kk] <- with(cond_stats_list[[ind]], Ybar[kk] +
                                              M * (V[kk] - Vg[kk]))
                } else {
                    ystar_vec[kk] <- with(cond_stats_list[[ind]], Ybar[kk])
                }
            }
            ystar_vec
        })

        crossmat <- find_intersection(Ystar, scores=scores, remove_cross=FALSE,
                                      tab_match=splt_tab_match, C=C)
        # compute stats
        Ystar[is.na(Ystar)] <- 0
        beta_uni <- beta_cross <- C %*% t(Ystar) %*% pkstar
        for(j in 1L:length(beta_cross))
            beta_cross[j] <- C[j, ,drop=FALSE] %*%(t(Ystar[!crossmat[,j], , drop=FALSE]) %*%
                                                       pkstar[!crossmat[,j]] -
                                                       t(Ystar[crossmat[,j], , drop=FALSE]) %*% pkstar[crossmat[,j]])

        sigma_uni <- matrix(0, nrow(C), nrow(C))
        for(i in seq_len(length(tab_scores)))
            if(all(!is.na(Sigma_list[[i]]))) sigma_uni <- sigma_uni + Sigma_list[[i]]
        slv_sigma_uni <- solve(sigma_uni)
        X2_uni <- t(beta_uni) %*% slv_sigma_uni %*% beta_uni
        p_uni <- (1 - pchisq(X2_uni, nrow(C)))

        crossmat_org <- find_intersection(Ystar, scores=scores,
                                          remove_cross=FALSE,
                                          tab_match=splt_tab_match, C=C)
        not_crossmat <- !crossmat_org & !is.na(crossmat_org)
        crossmat <- crossmat_org & !is.na(crossmat_org)
        beta_cross <- numeric(nrow(C))
        for(j in 1L:length(beta_cross))
            beta_cross[j] <- C[j, ,drop=FALSE] %*%
            (t(Ystar[!crossmat[,j], , drop=FALSE]) %*%
                 pkstar[!crossmat[,j]] - t(Ystar[crossmat[,j], , drop=FALSE]) %*%
                 pkstar[crossmat[,j]])
        X2_cross <- t(beta_cross) %*% slv_sigma_uni %*% beta_cross

        if(nrow(C) == 1L){
            ret <- data.frame(n_matched_set=length(match_set),
                              n_suspect_set = length(suspect_set),
                              beta = c(beta_uni, beta_cross),
                              X2=c(X2_uni, X2_cross),
                              df=c(1, NA), p = c(p_uni, NA))
            rownames(ret) <- c('GSIBTEST', 'GCSIBTEST')
            class(ret) <- c('mirt_df', 'data.frame')
        } else {
            mat1 <- rbind(as.vector(beta_uni), beta_cross)
            colnames(mat1) <- paste0('beta_', 1:nrow(C))
            ret <- data.frame(n_matched_set=length(match_set),
                              n_suspect_set = length(suspect_set),
                              mat1, X2=c(X2_uni, X2_cross),
                              df=c(nrow(C), NA), p = c(p_uni, NA))
            rownames(ret) <- c('GSIBTEST', 'GCSIBTEST')
            class(ret) <- c('mirt_df', 'data.frame')
        }
        if(randomize){
            X2_cross_vec <- numeric(permute)
            for(p in 1L:permute){
                Ystar_p <-  sample(c(-1,1), nrow(Ystar), replace = TRUE) * Ystar
                crossmat_org <- find_intersection(Ystar_p, scores=scores,
                                                  remove_cross=FALSE,
                                                  tab_match=splt_tab_match, C=C)
                not_crossmat <- !crossmat_org & !is.na(crossmat_org)
                crossmat <- crossmat_org & !is.na(crossmat_org)
                beta_cross_p <- numeric(nrow(C))
                for(j in 1L:length(beta_cross_p))
                    beta_cross_p[j] <- C[j, ,drop=FALSE] %*%
                    (t(Ystar_p[!crossmat[,j], , drop=FALSE]) %*%
                         pkstar[!crossmat[,j]] - t(Ystar_p[crossmat[,j], , drop=FALSE]) %*%
                         pkstar[crossmat[,j]])
                X2_cross_vec[p] <- t(beta_cross_p) %*% slv_sigma_uni %*% beta_cross_p
            }
            p_cross2 <- mean(abs(X2_cross_vec) >= X2_cross[1L])
            ret$p[2] <- p_cross2
        }
        if(details){
            ret <- data.frame(pkstar=unname(as.numeric(pkstar)),
                              sigma_focal=sigma_focal, sigma_ref=sigma_ref,
                              Y_focal=Ybar_focal, Y_ref=Ybar_ref,
                              Ystar_focal=ystar_focal_vec,
                              Ystar_ref=ystar_ref_vec,
                              row.names = names(pkstar))
        }
        return(ret)
    }
    ret
}