Nothing
#' Item fit statistics
#'
#' Computes item-fit statistics for a variety of unidimensional and multidimensional models.
#' Poorly fitting items should be inspected with the empirical plots/tables
#' for unidimensional models, otherwise \code{\link{itemGAM}} can be used to diagnose
#' where the functional form of the IRT model was misspecified, or models can be refit using
#' more flexible semi-parametric response models (e.g., \code{itemtype = 'spline'}).
#' If the latent trait density was approximated (e.g., Davidian curves, Empirical histograms, etc)
#' then passing \code{use_dentype_estimate = TRUE} will use the internally saved quadrature and
#' density components (where applicable). Currently, only S-X2 statistic supported for
#' mixture IRT models. Finally, where applicable the root mean-square error of approximation (RMSEA)
#' is reported to help gauge the magnitude of item misfit.
#'
#' @aliases itemfit
#' @param x a computed model object of class \code{SingleGroupClass},
#' \code{MultipleGroupClass}, or \code{DiscreteClass}
#' @param fit_stats a character vector indicating which fit statistics should be computed.
#' Supported inputs are:
#'
#' \itemize{
#' \item \code{'S_X2'} : Orlando and Thissen (2000, 2003) and
#' Kang and Chen's (2007) signed chi-squared test (default)
#' \item \code{'Zh'} : Drasgow, Levine, & Williams (1985) Zh
#' \item \code{'X2'} : Bock's (1972) chi-squared method.
#' The default inputs compute Yen's (1981) Q1 variant of the X2 statistic
#' (i.e., uses a fixed \code{group.bins = 10}). However, Bock's group-size variable
#' median-based method can be computed by passing \code{group.fun = median} and
#' modifying the \code{group.size} input to the desired number of bins
#' \item \code{'G2'} : McKinley & Mills (1985) G2 statistic (similar method to Q1,
#' but with the likelihood-ratio test).
#' \item \code{'PV_Q1'} : Chalmers and Ng's (2017) plausible-value variant
#' of the Q1 statistic.
#' \item \code{'PV_Q1*'} : Chalmers and Ng's (2017) plausible-value variant
#' of the Q1 statistic that uses parametric bootstrapping to obtain a suitable empirical
#' distribution.
#' \item \code{'X2*'} : Stone's (2000) fit statistics that require parametric
#' bootstrapping
#' \item \code{'X2*_df'} : Stone's (2000) fit statistics that require parametric
#' bootstrapping to obtain scaled versions of the X2* and degrees of freedom
#' \item \code{'infit'} : Compute the infit and outfit statistics
#' }
#'
#' Note that 'S_X2' and 'Zh' cannot be computed when there are missing response data
#' (i.e., will require multiple-imputation/row-removal techniques).
#'
#' @param which.items an integer vector indicating which items to test for fit.
#' Default tests all possible items
#' @param p.adjust method to use for adjusting all p-values for each respective item fit
#' statistic (see \code{\link{p.adjust}} for available options). Default is \code{'none'}
#' @param mincell the minimum expected cell size to be used in the S-X2 computations. Tables will be
#' collapsed across items first if polytomous, and then across scores if necessary
#' @param mincell.X2 the minimum expected cell size to be used in the X2 computations. Tables will be
#' collapsed if polytomous, however if this condition can not be met then the group block will
#' be omitted in the computations
#' @param return.tables logical; return tables when investigating \code{'X2'}, \code{'S_X2'},
#' and \code{'X2*'}?
#' @param group.size approximate size of each group to be used in calculating the \eqn{\chi^2}
#' statistic. The default \code{NA}
#' disables this command and instead uses the \code{group.bins} input to try and construct
#' equally sized bins
#' @param group.bins the number of bins to use for X2 and G2. For example,
#' setting \code{group.bins = 10} will will compute Yen's (1981) Q1 statistic when \code{'X2'} is
#' requested
#' @param group.fun function used when \code{'X2'} or \code{'G2'} are computed. Determines the central
#' tendency measure within each partitioned group. E.g., setting \code{group.fun = median} will
#' obtain the median of each respective ability estimate in each subgroup (this is what was used
#' by Bock, 1972)
#' @param empirical.plot a single numeric value or character of the item name indicating which
#' item to plot (via \code{itemplot}) and overlay with the empirical \eqn{\theta} groupings (see
#' \code{empirical.CI}). Useful for plotting the expected bins based on the \code{'X2'} or
#' \code{'G2'} method
#' @param S_X2.plot argument input is the same as \code{empirical.plot}, however the resulting image
#' is constructed according to the S-X2 statistic's conditional sum-score information
#' @param S_X2.plot_raw.score logical; use the raw-score information in the plot in stead of the latent
#' trait scale score? Default is \code{FALSE}
#' @param empirical.CI a numeric value indicating the width of the empirical confidence interval
#' ranging between 0 and 1 (default of 0 plots not interval). For example, a 95% confidence
#' interval would be plotted when \code{empirical.CI = .95}. Only applicable to dichotomous items
#' @param empirical.poly.collapse logical; collapse polytomous item categories to for expected scoring
#' functions for empirical plots? Default is \code{FALSE}
#' @param method type of factor score estimation method. See \code{\link{fscores}} for more detail
#' @param na.rm logical; remove rows with any missing values? This is required for methods such
#' as S-X2 because they require the "EAPsum" method from \code{\link{fscores}}
#' @param Theta a matrix of factor scores for each person used for statistics that require
#' empirical estimates. If supplied, arguments typically passed to \code{fscores()} will be
#' ignored and these values will be used instead. Also required when estimating statistics
#' with missing data via imputation
#' @param pv_draws number of plausible-value draws to obtain for PV_Q1 and PV_Q1*
#' @param boot number of parametric bootstrap samples to create for PV_Q1* and X2*
#' @param boot_dfapprox number of parametric bootstrap samples to create for the X2*_df statistic
#' to approximate the scaling factor for X2* as well as the scaled degrees of freedom estimates
#' @param ETrange range of integration nodes for Stone's X2* statistic
#' @param ETpoints number of integration nodes to use for Stone's X2* statistic
# @param impute a number indicating how many imputations to perform (passed to
# \code{\link{imputeMissing}}) when there are missing data present.
# Will return a data.frame object with the mean estimates
# of the stats and their imputed standard deviations
#' @param par.strip.text plotting argument passed to \code{\link[lattice]{lattice}}
#' @param par.settings plotting argument passed to \code{\link[lattice]{lattice}}
#' @param auto.key plotting argument passed to \code{\link[lattice]{lattice}}
#' @param ... additional arguments to be passed to \code{fscores()} and \code{\link[lattice]{lattice}}
#' @author Phil Chalmers \email{rphilip.chalmers@@gmail.com}
#' @keywords item fit
#' @export itemfit
#'
#' @seealso
#' \code{\link{personfit}}, \code{\link{itemGAM}}
#'
#' @references
#'
#' Bock, R. D. (1972). Estimating item parameters and latent ability when responses are scored
#' in two or more nominal categories. \emph{Psychometrika, 37}, 29-51.
#'
#' Chalmers, R., P. (2012). mirt: A Multidimensional Item Response Theory
#' Package for the R Environment. \emph{Journal of Statistical Software, 48}(6), 1-29.
#' \doi{10.18637/jss.v048.i06}
#'
#' Chalmers, R. P. & Ng, V. (2017). Plausible-Value Imputation Statistics for Detecting
#' Item Misfit. \emph{Applied Psychological Measurement, 41}, 372-387.
#' \doi{10.1177/0146621617692079}
#'
#' Drasgow, F., Levine, M. V., & Williams, E. A. (1985). Appropriateness measurement with
#' polychotomous item response models and standardized indices.
#' \emph{British Journal of Mathematical and Statistical Psychology, 38}, 67-86.
#'
#' Kang, T. & Chen, Troy, T. (2007). An investigation of the performance of the generalized
#' S-X2 item-fit index for polytomous IRT models. ACT
#'
#' McKinley, R., & Mills, C. (1985). A comparison of several goodness-of-fit statistics.
#' Applied Psychological Measurement, 9, 49-57.
#'
#' Orlando, M. & Thissen, D. (2000). Likelihood-based item fit indices for dichotomous item
#' response theory models. \emph{Applied Psychological Measurement, 24}, 50-64.
#'
#' Reise, S. P. (1990). A comparison of item- and person-fit methods of assessing model-data fit
#' in IRT. \emph{Applied Psychological Measurement, 14}, 127-137.
#'
#' Stone, C. A. (2000). Monte Carlo Based Null Distribution for an Alternative Goodness-of-Fit
#' Test Statistics in IRT Models. \emph{Journal of Educational Measurement, 37}, 58-75.
#'
#' Wright B. D. & Masters, G. N. (1982). \emph{Rating scale analysis}. MESA Press.
#'
#' Yen, W. M. (1981). Using simulation results to choose a latent trait model.
#' \emph{Applied Psychological Measurement, 5}, 245-262.
#'
#' @examples
#'
#' \dontrun{
#'
#' P <- function(Theta){exp(Theta^2 * 1.2 - 1) / (1 + exp(Theta^2 * 1.2 - 1))}
#'
#' #make some data
#' set.seed(1234)
#' a <- matrix(rlnorm(20, meanlog=0, sdlog = .1),ncol=1)
#' d <- matrix(rnorm(20),ncol=1)
#' Theta <- matrix(rnorm(2000))
#' items <- rep('2PL', 20)
#' ps <- P(Theta)
#' baditem <- numeric(2000)
#' for(i in 1:2000)
#' baditem[i] <- sample(c(0,1), 1, prob = c(1-ps[i], ps[i]))
#' data <- cbind(simdata(a,d, 2000, items, Theta=Theta), baditem=baditem)
#'
#' x <- mirt(data, 1)
#' raschfit <- mirt(data, 1, itemtype='Rasch')
#' fit <- itemfit(x)
#' fit
#'
#' # p-value adjustment
#' itemfit(x, p.adjust='fdr')
#'
#' # two different fit stats (with/without p-value adjustment)
#' itemfit(x, c('S_X2' ,'X2'), p.adjust='fdr')
#' itemfit(x, c('S_X2' ,'X2'))
#'
#' # Conditional sum-score plot from S-X2 information
#' itemfit(x, S_X2.plot = 1) # good fit
#' itemfit(x, S_X2.plot = 2) # good fit
#' itemfit(x, S_X2.plot = 21) # bad fit
#'
#' itemfit(x, 'X2') # just X2
#' itemfit(x, 'X2', method = 'ML') # X2 with maximum-likelihood estimates for traits
#' itemfit(x, group.bins=15, empirical.plot = 1, method = 'ML') #empirical item plot with 15 points
#' itemfit(x, group.bins=15, empirical.plot = 21, method = 'ML')
#'
#' # PV and X2* statistics (parametric bootstrap stats not run to save time)
#' itemfit(x, 'PV_Q1')
#'
#' if(interactive()) mirtCluster() # improve speed of bootstrap samples by running in parallel
#' # itemfit(x, 'PV_Q1*')
#' # itemfit(x, 'X2*') # Stone's 1993 statistic
#' # itemfit(x, 'X2*_df') # Stone's 2000 scaled statistic with df estimate
#'
#' # empirical tables for X2 statistic
#' tabs <- itemfit(x, 'X2', return.tables=TRUE, which.items = 1)
#' tabs
#'
#' #infit/outfit statistics. method='ML' agrees better with eRm package
#' itemfit(raschfit, 'infit', method = 'ML') #infit and outfit stats
#'
#' #same as above, but inputting ML estimates instead (saves time for re-use)
#' Theta <- fscores(raschfit, method = 'ML')
#' itemfit(raschfit, 'infit', Theta=Theta)
#' itemfit(raschfit, empirical.plot=1, Theta=Theta)
#' itemfit(raschfit, 'X2', return.tables=TRUE, Theta=Theta, which.items=1)
#'
#' # fit a new more flexible model for the mis-fitting item
#' itemtype <- c(rep('2PL', 20), 'spline')
#' x2 <- mirt(data, 1, itemtype=itemtype)
#' itemfit(x2)
#' itemplot(x2, 21)
#' anova(x, x2)
#'
#' #------------------------------------------------------------
#'
#' #similar example to Kang and Chen 2007
#' a <- matrix(c(.8,.4,.7, .8, .4, .7, 1, 1, 1, 1))
#' d <- matrix(rep(c(2.0,0.0,-1,-1.5),10), ncol=4, byrow=TRUE)
#' dat <- simdata(a,d,2000, itemtype = rep('graded', 10))
#' head(dat)
#'
#' mod <- mirt(dat, 1)
#' itemfit(mod)
#' itemfit(mod, 'X2') # less useful given inflated Type I error rates
#' itemfit(mod, empirical.plot = 1)
#' itemfit(mod, empirical.plot = 1, empirical.poly.collapse=TRUE)
#'
#' # collapsed tables (see mincell.X2) for X2 and G2
#' itemfit(mod, 'X2', return.tables = TRUE, which.items = 1)
#'
#' mod2 <- mirt(dat, 1, 'Rasch')
#' itemfit(mod2, 'infit', method = 'ML')
#'
#' # massive list of tables for S-X2
#' tables <- itemfit(mod, return.tables = TRUE)
#'
#' #observed and expected total score patterns for item 1 (post collapsing)
#' tables$O[[1]]
#' tables$E[[1]]
#'
#' # can also select specific items
#' # itemfit(mod, return.tables = TRUE, which.items=1)
#'
#' # fit stats with missing data (run in parallel using all cores)
#' dat[sample(1:prod(dim(dat)), 100)] <- NA
#' raschfit <- mirt(dat, 1, itemtype='Rasch')
#'
#' # use only valid data by removing rows with missing terms
#' itemfit(raschfit, c('S_X2', 'infit'), na.rm = TRUE)
#'
#' # note that X2, G2, PV-Q1, and X2* do not require complete datasets
#' thetas <- fscores(raschfit, method = 'ML') # save for faster computations
#' itemfit(raschfit, c('X2', 'G2'), Theta=thetas)
#' itemfit(raschfit, empirical.plot=1, Theta=thetas)
#' itemfit(raschfit, 'X2', return.tables=TRUE, which.items=1, Theta=thetas)
#'
#'}
#'
itemfit <- function(x, fit_stats = 'S_X2',
which.items = 1:extract.mirt(x, 'nitems'),
na.rm = FALSE, p.adjust = 'none',
group.bins = 10, group.size = NA, group.fun = mean,
mincell = 1, mincell.X2 = 2, return.tables = FALSE,
pv_draws = 30, boot = 1000, boot_dfapprox = 200,
S_X2.plot = NULL, S_X2.plot_raw.score = TRUE,
ETrange = c(-2,2), ETpoints = 11,
empirical.plot = NULL, empirical.CI = .95,
empirical.poly.collapse = FALSE, method = 'EAP', Theta = NULL, #impute = 0,
par.strip.text = list(cex = 0.7),
par.settings = list(strip.background = list(col = '#9ECAE1'),
strip.border = list(col = "black")),
auto.key = list(space = 'right', points=FALSE, lines=TRUE), ...){
impute <- 0
# fn <- function(ind, Theta, obj, vals, ...){
# tmpobj <- obj
# tmpdat <- imputeMissing(obj, Theta[[ind]], warn=FALSE)
# tmpmod <- mirt(tmpdat, model=1, TOL=NA,
# technical=list(customK=obj@Data$K, message=FALSE, warn=FALSE))
# tmpobj@Data <- tmpmod@Data
# whc <- 1L:length(Theta)
# return(itemfit(tmpobj, Theta=Theta[[sample(whc[-ind], 1L)]], ...))
# }
PV_itemfit <- function(mod, which.items = 1:extract.mirt(mod, 'nitems'),
draws = 100, p.adjust, ...){
pv <- fscores(mod, plausible.draws = draws, ...)
draws <- length(pv)
df.X2 <- Q1 <- matrix(NA, length(which.items), draws)
for (i in seq_len(draws)){
tmp <- itemfit(mod, fit_stats='X2', which.items=which.items,
Theta = pv[[i]], ...)
Q1[,i] <- tmp$X2
df.X2[,i] <- tmp$df.X2
}
Q1_m <- rowMeans(Q1)
df.X2_m <- rowMeans(df.X2)
RMSEA.X2_m <- rmsea(Q1_m, df.X2_m, N=nrow(pv[[1L]]))
p.Q1 <- pchisq(Q1_m, df.X2_m, lower.tail = FALSE)
p.Q1 <- ifelse(df.X2_m == 0, NaN, p.Q1)
ret <- data.frame(PV_Q1=Q1_m, df.PV_Q1=df.X2_m, RMSEA.PV_Q1=RMSEA.X2_m,
p.PV_Q1=p.adjust(p.Q1, method=p.adjust))
ret
}
boot_PV <- function(mod, org, is_NA, which.items = 1:extract.mirt(mod, 'nitems'),
itemtype, boot = 1000, draws = 30, verbose = FALSE, p.adjust, ...){
pb_fun <- function(ind, mod, N, sv, which.items, draws, itemtype, model, ...){
count <- 0L
K <- extract.mirt(mod, 'K')
while(TRUE){
count <- count + 1L
if(count == 20)
stop('20 consecutive parametric bootstraps failed for PV_Q1*', call.=FALSE)
dat <- simdata(model=mod, N=N)
dat[is_NA] <- NA
if(!all(apply(dat, 2, function(x) length(na.omit(unique(x)))) == K)) next
mod2 <- mirt(dat, model, itemtype=itemtype,
verbose=FALSE, pars=sv, technical=list(warn=FALSE))
if(!extract.mirt(mod2, 'converged')) next
tmp <- PV_itemfit(mod2, which.items=which.items, draws=draws, ...)
ret <- tmp$p.PV_Q1
if(any(is.nan(ret) | is.na(ret))) next
break
}
ret
}
N <- nrow(extract.mirt(mod, 'data'))
retQ1 <- matrix(NA, boot, length(which.items))
stopifnot(nrow(org) == length(which.items))
model <- extract.mirt(mod, 'model')
sv <- mod2values(mod)
retQ1 <- mySapply(1L:boot, pb_fun, progress=verbose,
mod=mod, N=N, sv=sv, itemtype=itemtype,
which.items=which.items, draws=draws, model=model,
p.adjust=p.adjust, ...)
if(nrow(retQ1) == 1L) retQ1 <- t(retQ1)
Q1 <- (1 + rowSums(org$p.PV_Q1 > t(retQ1), na.rm = TRUE)) / (1 + boot)
ret <- data.frame("p.PV_Q1_star"=p.adjust(Q1, method=p.adjust))
ret
}
StoneFit <- function(mod, is_NA, which.items = 1:extract.mirt(mod, 'nitems'), itemtype,
dfapprox = FALSE, boot = 1000, ETrange = c(-2,2), ETpoints = 11,
verbose = FALSE, p.adjust, return.tables, ...){
X2star <- function(mod, which.items, ETrange, ETpoints, itemtype, ...){
sv <- mod2values(mod)
sv$est <- FALSE
nfact <- extract.mirt(mod, 'nfact')
Theta <- thetaComb(seq(ETrange[1L], ETrange[2L], length.out=ETpoints),
nfact=nfact)
dat <- extract.mirt(mod, 'data')
Emod <- mirt(dat, nfact, itemtype=itemtype,
pars=sv, verbose=FALSE,
technical=list(storeEtable=TRUE, customTheta=Theta), ...)
Etable <- Emod@Internals$Etable[[1]]$r1
itemloc <- extract.mirt(mod, 'itemloc')
X2 <- rep(NA, ncol(dat))
if(return.tables)
ret <- vector('list', length(which.items))
for(i in seq_len(length(which.items))){
pick <- itemloc[which.items[i]]:(itemloc[which.items[i]+1L] - 1L)
O <- Etable[ ,pick]
item <- extract.item(mod, which.items[i])
E <- probtrace(item, Theta) * rowSums(O)
X2[which.items[i]] <- sum((O - E)^2 / E, na.rm = TRUE)
if(return.tables)
ret[[i]] <- list(O=O, E=E, Theta=Theta)
}
if(return.tables){
names(ret) <- which.items
return(ret)
}
X2[which.items]
}
pb_fun <- function(ind, is_NA, mod, N, model, itemtype, sv, which.items, ETrange,
ETpoints, ...){
count <- 0L
K <- extract.mirt(mod, 'K')
while(TRUE){
count <- count + 1L
if(count == 20)
stop('20 consecutive parametric bootstraps failed for X2*', call.=FALSE)
dat <- simdata(model=mod, N=N)
dat[is_NA] <- NA
if(!all(apply(dat, 2, function(x) length(na.omit(unique(x)))) == K)) next
mod2 <- mirt(dat, model, itemtype=itemtype, verbose=FALSE, pars=sv,
technical=list(warn=FALSE, omp=FALSE), ...)
if(!extract.mirt(mod2, 'converged')) next
ret <- X2star(mod2, which.items=which.items, ETrange=ETrange,
ETpoints=ETpoints, itemtype=itemtype, ...)
if(any(is.nan(ret) | is.na(ret))) next
break
}
ret
}
N <- nrow(extract.mirt(mod, 'data'))
X2bs <- matrix(NA, boot, length(which.items))
org <- X2star(mod, which.items=which.items, itemtype=itemtype,
ETrange=ETrange, ETpoints=ETpoints,
return.tables=return.tables, ...)
stopifnot(length(org) == length(which.items))
if(return.tables) return(org)
sv <- mod2values(mod)
model <- extract.mirt(mod, 'model')
X2bs <- mySapply(1L:boot, pb_fun, progress=verbose,
mod=mod, N=N, model=model, is_NA=is_NA,
itemtype=itemtype, sv=sv, which.items=which.items,
ETrange=ETrange, ETpoints=ETpoints, ...)
if(nrow(X2bs) == 1L) X2bs <- t(X2bs)
if(dfapprox){
M <- colMeans(X2bs)
V <- apply(X2bs, 2, var)
upsilon <- 2 * M^2 / V
gamma <- M / upsilon
df <- upsilon
for(i in seq_len(length(which.items))){
item <- extract.item(mod, which.items[i])
df[i] <- upsilon[i] - sum(item@est)
}
ret <- data.frame(X2_star_scaled=org/gamma, df.X2_star_scaled=df,
RMSEA.X2_star_scaled=rmsea(org/gamma, df, N=N),
p.X2_star_scaled=p.adjust(pchisq(org/gamma, df, lower.tail=FALSE),
method=p.adjust))
} else {
p <- apply(t(X2bs) > org, 1, mean)
ret <- data.frame(X2_star=org, p.X2_star=p.adjust(p, method=p.adjust))
}
ret
}
if(missing(x)) missingMsg('x')
if(!is.null(empirical.plot))
stopifnot("Model must be unidimensional to use empirical.plot option"=
extract.mirt(x, 'nfact') == 1L)
stopifnot(length(p.adjust) == 1L)
if(return.tables){
stopifnot(length(fit_stats) == 1L)
stopifnot(fit_stats %in% c('X2', 'S_X2', 'X2*'))
}
if(is(x, 'MixedClass'))
stop('MixedClass objects are not supported', call.=FALSE)
if(!is.null(empirical.plot) && return.tables && fit_stats == 'X2')
stop('Please select empirical.plot or return.table, not both', call.=FALSE)
if(!all(fit_stats %in% c('S_X2', 'Zh', 'X2', 'G2', 'infit', 'PV_Q1', 'PV_Q1*', 'X2*', 'X2*_df')))
stop('Unsupported fit_stats element requested', call.=FALSE)
if(any(c('X2', 'G2', 'PV_Q1', 'PV_Q1*') %in% fit_stats) && extract.mirt(x, 'nfact') > 1L)
stop('X2, G2, PV_Q1, or PV_Q1* are for unidimensional models only', call.=FALSE)
S_X2 <- 'S_X2' %in% fit_stats
Zh <- 'Zh' %in% fit_stats
X2 <- 'X2' %in% fit_stats
G2 <- 'G2' %in% fit_stats
infit <- 'infit' %in% fit_stats
if(!is.null(empirical.plot))
which.items <- empirical.plot
which.items <- sort(which.items)
if(!is.null(empirical.plot) || (return.tables && fit_stats == 'X2')){
Zh <- FALSE
if(length(which.items) > 1L)
stop('Plots and tables only supported for 1 item at a time', call.=FALSE)
}
stopifnot(is.numeric(empirical.CI))
if( (return.tables && fit_stats == 'X2') || !is.null(empirical.plot)){
X2 <- TRUE
S_X2 <- Zh <- infit <- G2 <- FALSE
}
J <- ncol(x@Data$data)
if(na.rm) x <- removeMissing(x)
if(na.rm) message('Sample size after row-wise response data removal: ',
nrow(extract.mirt(x, 'data')))
if(nrow(extract.mirt(x, 'data')) == 0L) stop('No data!', call.=FALSE)
if(any(is.na(x@Data$data)) && (Zh || S_X2) && impute == 0)
stop('Only X2, G2, PV_Q1, PV_Q1*, infit, X2*, and X2*_df can be computed with missing data.
Pass na.rm=TRUE to remove missing data row-wise', call.=FALSE)
if(!is.null(Theta)){
if(!is.matrix(Theta)) Theta <- matrix(Theta)
if(nrow(Theta) > nrow(x@Data$data))
Theta <- Theta[-extract.mirt(x, 'completely_missing'), , drop=FALSE]
stopifnot("Theta does not have the correct number of rows" =
nrow(Theta) == nrow(x@Data$data))
}
if(is(x, 'MultipleGroupClass') || is(x, 'DiscreteClass')){
discrete <- is(x, 'DiscreteClass')
if(discrete)
for(g in seq_len(x@Data$ngroups))
x@ParObjects$pars[[g]]@ParObjects$pars[[J+1L]]@est[] <- FALSE
ret <- vector('list', x@Data$ngroups)
if(is.null(Theta))
Theta <- fscores(x, method=method, full.scores=TRUE, plausible.draws=impute,
rotate = 'none', leave_missing=TRUE, ...)
for(g in seq_len(x@Data$ngroups)){
if(impute > 0L){
tmpTheta <- vector('list', impute)
for(i in seq_len(length(tmpTheta)))
tmpTheta[[i]] <- Theta[[i]][x@Data$groupNames[g] == x@Data$group, , drop=FALSE]
} else tmpTheta <- Theta[x@Data$groupNames[g] == x@Data$group, , drop=FALSE]
tmp_obj <- MGC2SC(x, g)
ret[[g]] <- itemfit(tmp_obj, fit_stats=fit_stats, group.size=group.size, group.bins=group.bins,
group.fun=group.fun, mincell=mincell, mincell.X2=mincell.X2,
empirical.plot=empirical.plot,
S_X2.plot=S_X2.plot, return.tables=return.tables,
S_X2.plot_raw.score = S_X2.plot_raw.score,
Theta=tmpTheta, empirical.CI=empirical.CI, method=method,
impute=impute, discrete=discrete, p.adjust=p.adjust, ...)
}
names(ret) <- x@Data$groupNames
if(!is.null(empirical.plot) || !is.null(S_X2.plot)){
for(g in 1L:length(ret))
ret[[g]]$main <- sprintf("%s (%s)", ret[[g]]$main, names(ret)[g])
return(do.call(gridExtra::grid.arrange, ret))
}
if(extract.mirt(x, 'ngroups') == 1L) return(ret[[1L]])
return(ret)
}
dots <- list(...)
discrete <- dots$discrete
discrete <- ifelse(is.null(discrete), FALSE, discrete)
if(x@Model$nfact > 3L && is.null(dots$QMC) && !discrete)
warning('High-dimensional models should use quasi-Monte Carlo integration. Pass QMC=TRUE',
call.=FALSE)
mixture <- is(x, 'MixtureClass')
if(mixture && !all(fit_stats == 'S_X2'))
stop("Only S_X2 fit statistic supported for mixture models")
pis <- NULL
if(mixture){
discrete <- TRUE
pis <- extract.mirt(x, 'pis')
}
if((return.tables && fit_stats == 'S_X2') || discrete) Zh <- X2 <- FALSE
ret <- data.frame(item=colnames(x@Data$data)[which.items])
itemloc <- x@Model$itemloc
pars <- x@ParObjects$pars
if(Zh || infit){
if(is.null(Theta))
Theta <- fscores(x, verbose=FALSE, full.scores=TRUE, method=method,
leave_missing=TRUE, rotate = 'none', ...)
prodlist <- attr(pars, 'prodlist')
nfact <- x@Model$nfact + length(prodlist)
fulldata <- x@Data$fulldata[[1L]]
if(any(Theta %in% c(Inf, -Inf))){
for(i in 1L:ncol(Theta)){
tmp <- Theta[,i]
tmp[tmp %in% c(-Inf, Inf)] <- NA
Theta[Theta[,i] == Inf, i] <- max(tmp, na.rm=TRUE) + .1
Theta[Theta[,i] == -Inf, i] <- min(tmp, na.rm=TRUE) - .1
}
}
if(Zh){
N <- nrow(Theta)
itemtrace <- matrix(0, ncol=ncol(fulldata), nrow=N)
for (i in which.items)
itemtrace[ ,itemloc[i]:(itemloc[i+1L] - 1L)] <- ProbTrace(x=pars[[i]], Theta=Theta)
log_itemtrace <- log(itemtrace)
LL <- log_itemtrace * fulldata
Lmatrix <- matrix(LL[as.logical(fulldata)], N, J)
mu <- sigma2 <- rep(0, J)
for(item in which.items){
P <- itemtrace[ ,itemloc[item]:(itemloc[item+1L]-1L)]
log_P <- log_itemtrace[ ,itemloc[item]:(itemloc[item+1L]-1L)]
mu[item] <- sum(P * log_P)
for(i in seq_len(ncol(P)))
for(j in seq_len(ncol(P)))
if(i != j)
sigma2[item] <- sigma2[item] + sum(P[,i] * P[,j] *
log_P[,i] * log(P[,i]/P[,j]))
}
tmp <- (colSums(Lmatrix) - mu) / sqrt(sigma2)
ret$Zh <- tmp[which.items]
}
if(infit){
dat_is_na <- apply(extract.mirt(x, 'data'), 2L, is.na)
N <- colSums(!dat_is_na)
attr(x, 'inoutfitreturn') <- TRUE
pf <- personfit(x, method=method, Theta=Theta)
pf$resid[dat_is_na] <- 0
pf$C[dat_is_na] <- 0
z2 <- pf$resid^2 / pf$W
outfit <- colSums(z2, na.rm = TRUE) / N
q.outfit <- sqrt(colSums((pf$C / pf$W^2) / N^2, na.rm=TRUE) - 1 / N)
q.outfit[q.outfit > 1.4142] <- 1.4142
z.outfit <- (outfit^(1/3) - 1) * (3/q.outfit) + (q.outfit/3)
infit <- colSums(pf$W * z2, na.rm=TRUE) / colSums(pf$W, na.rm=TRUE)
q.infit <- sqrt(colSums(pf$C - pf$W^2, na.rm=TRUE) /
colSums(pf$W, na.rm=TRUE)^2)
q.infit[q.infit > 1.4142] <- 1.4142
z.infit <- (infit^(1/3) - 1) * (3/q.infit) + (q.infit/3)
ret$outfit <- outfit[which.items]
ret$z.outfit <- z.outfit[which.items]
ret$infit <- infit[which.items]
ret$z.infit <- z.infit[which.items]
}
}
if(( (X2 || G2) || !is.null(empirical.plot) ||
(return.tables && fit_stats == 'X2')) && x@Model$nfact == 1L){
if(is.null(Theta))
Theta <- fscores(x, verbose=FALSE, full.scores=TRUE, method=method,
rotate = 'none', leave_missing=TRUE, ...)
nfact <- ncol(Theta)
prodlist <- attr(pars, 'prodlist')
fulldata <- x@Data$fulldata[[1L]]
if(any(Theta %in% c(Inf, -Inf))){
for(i in seq_len(ncol(Theta))){
tmp <- Theta[,i]
tmp[tmp %in% c(-Inf, Inf)] <- NA
Theta[Theta[,i] == Inf, i] <- max(tmp, na.rm=TRUE) + .1
Theta[Theta[,i] == -Inf, i] <- min(tmp, na.rm=TRUE) - .1
}
}
ord <- order(Theta[,1L])
fulldata <- fulldata[ord,]
pick <- !is.na(x@Data$data)
pick <- pick[ord, ]
Theta <- Theta[ord, , drop = FALSE]
den <- dnorm(Theta, 0, .5)
den <- den / sum(den)
cumTheta <- cumsum(den)
X2.value <- G2.value <- df.G2 <- df.X2 <- numeric(J)
if(!is.null(empirical.plot)){
if(nfact > 1L) stop('Cannot make empirical plot for multidimensional models', call.=FALSE)
if(!is.numeric(empirical.plot)){
inames <- colnames(x@Data$data)
ind <- 1L:length(inames)
empirical.plot <- ind[inames == empirical.plot]
}
}
for (i in which.items){
if(!is.na(group.size)){
Groups <- rep(20, sum(pick[,i]))
ngroups <- ceiling(sum(pick[,i]) / group.size)
weight <- 1/ngroups
for(q in seq_len(length(Groups)))
Groups[round(cumTheta[pick[,i]],2) >= weight*(q-1) &
round(cumTheta[pick[,i]],2) < weight*q] <- q
} else {
ngroups <- group.bins
Groups <- rep(1:group.bins, each = floor(sum(pick[,i]) / ngroups))
if(sum(pick[,i]) %% ngroups > 0L){
c1 <- sum(pick[,i]) %% ngroups
Groups <- c(rep(1, floor(c1/2)), Groups)
Groups <- c(Groups, rep(ngroups, c1 - floor(c1/2)))
}
}
if(return.tables){
Etable <- vector('list', ngroups)
mtheta_nms <- numeric(ngroups)
}
if(!is.null(empirical.plot))
empirical.plot_points <- matrix(NA, length(unique(Groups)), x@Data$K[empirical.plot] + 2L)
for(j in unique(Groups)){
tmpdat <- fulldata[pick[,i], , drop=FALSE]
dat <- tmpdat[Groups == j, itemloc[i]:(itemloc[i+1] - 1), drop = FALSE]
if(nrow(dat) <= 1L) next
colnames(dat) <- paste0("cat_", sort(unique(extract.mirt(x, "data")[,i])))
r <- colSums(dat)
N <- nrow(dat)
tmpTheta <- Theta[pick[,i], , drop=FALSE]
mtheta <- matrix(group.fun(tmpTheta[Groups == j,]), nrow=1)
if(!is.null(empirical.plot)){
tmp <- r/N
empirical.plot_points[j, ] <- c(mtheta, N, tmp)
}
P <- ProbTrace(x=pars[[i]], Theta=mtheta)
if(return.tables && any(N * P < mincell.X2)){
while(TRUE){
wch <- which(N * P < mincell.X2)
if(!length(wch) || length(r) == 1L) break
for(p in wch){
if(p == 1L){
r[2L] <- r[1L] + r[2L]
P[2L] <- P[1L] + P[2L]
} else {
r[p-1L] <- r[p] + r[p-1L]
P[p-1L] <- P[p] + P[p-1L]
}
}
r <- r[-wch]
P <- P[-wch]
}
if(length(r) == 1L) next
}
E <- N*P
if(return.tables){
Etable[[j]] <- data.frame(Observed=r, Expected=as.vector(E),
z.Residual=as.vector(sqrt((r - E)^2 / E) * sign(r-E)))
mtheta_nms[j] <- mtheta
} else {
X2.value[i] <- X2.value[i] + sum((r - E)^2 / E)
df.X2[i] <- df.X2[i] + length(r) - 1L
tmp <- r * log(r/E)
# replace with 0 is fine:
# https://stats.stackexchange.com/questions/107718/goodness-of-fit-likelihood-ratio-test-with-zero-values
count_nonzero <- sum(is.finite(tmp))
tmp[!is.finite(tmp)] <- 0
if(length(tmp) > 1L){
G2.value[i] <- G2.value[i] + 2*sum(tmp)
df.G2[i] <- df.G2[i] + count_nonzero - 1L
}
}
}
if(return.tables){
names(Etable) <- paste0('theta = ', round(mtheta_nms, 4))
return(Etable)
}
df.X2[i] <- df.X2[i] - sum(pars[[i]]@est)
df.G2[i] <- df.G2[i] - sum(pars[[i]]@est)
}
X2.value[X2.value == 0] <- NA
G2.value[G2.value == 0] <- NA
if(!is.null(empirical.plot)){
K <- x@Data$K[empirical.plot]
EPCI.lower <- EPCI.upper <- NULL
if(K == 2 && empirical.CI != 0){
p.L <- function(x, alpha) if (x[1] == 0) 0 else qbeta(alpha, x[1], x[2] - x[1] + 1)
p.U <- function(x, alpha) if (x[1] == x[2]) 1 else
qbeta(1 - alpha, x[1] + 1, x[2] - x[1])
N <- empirical.plot_points[,2]
O <- empirical.plot_points[,ncol(empirical.plot_points)] * N
EPCI.lower <- apply(cbind(O, N), 1, p.L, (1-empirical.CI)/2)
EPCI.upper <- apply(cbind(O, N), 1, p.U, (1-empirical.CI)/2)
}
theta <- seq(-4,4, length.out=max(c(50, ngroups)))
ThetaFull <- thetaComb(theta, nfact)
empirical.plot_P <- ProbTrace(pars[[empirical.plot]], ThetaFull)
empirical.plot_points <- empirical.plot_points[,-2]
colnames(empirical.plot_points) <- c('theta', paste0('p.', 1:K))
if(empirical.poly.collapse){
score <- 1L:ncol(empirical.plot_P) - 1L
plt1 <- data.frame(ThetaFull, colSums(t(empirical.plot_P) * score))
plt2 <- data.frame(empirical.plot_points[,1], colSums(t(empirical.plot_points[,-1]) * score))
colnames(plt1) <- colnames(plt2) <- c('Theta', "E")
return(xyplot(E~Theta, plt1,
main = paste('Empirical plot for item', empirical.plot),
xlab = expression(theta), ylab=expression(E(theta)),
panel = function(x, y, subscripts, ...){
panel.xyplot(x=x, y=y, type='l',
subscripts=subscripts, ...)
panel.points(cbind(plt2$Theta[subscripts], plt2$E[subscripts]),
col='black', ...)
},
par.strip.text=par.strip.text, par.settings=par.settings, ...))
}
while(nrow(empirical.plot_points) < nrow(empirical.plot_P))
empirical.plot_points <- rbind(empirical.plot_points,
rep(NA, length(empirical.plot_points[1,])))
plt.1 <- data.frame(id = 1:nrow(ThetaFull), Theta=ThetaFull, P=empirical.plot_P)
plt.1 <- reshape(plt.1, varying = 3:ncol(plt.1), direction = 'long', timevar = 'cat')
plt.2 <- data.frame(id = 1:nrow(empirical.plot_points), empirical.plot_points)
plt.2 <- reshape(plt.2, varying = 3:ncol(plt.2), direction = 'long', timevar = 'cat')
plt <- cbind(plt.1, plt.2)
if(K == 2) plt <- plt[plt$cat != 1, ]
plt$cat <- factor(paste0('Category ', plt$cat - 1 + extract.mirt(x, 'mins')[which.items]))
return(xyplot(if(K == 2) P~Theta else P ~ Theta|cat, plt, groups = cat,
main = paste('Empirical plot for item', empirical.plot),
ylim = c(-0.1,1.1), xlab = expression(theta), ylab=expression(P(theta)),
EPCI.lower=EPCI.lower, EPCI.upper=EPCI.upper,
panel = function(x, y, groups, subscripts, EPCI.lower, EPCI.upper, ...){
panel.xyplot(x=x, y=y, groups=groups, type='l',
subscripts=subscripts, ...)
panel.points(cbind(plt$theta[subscripts], plt$p[subscripts]),
col='black', ...)
if(!is.null(EPCI.lower)){
theta <- na.omit(plt$theta)
for(i in 1:length(theta))
panel.lines(c(theta[i], theta[i]), c(EPCI.lower[i],
EPCI.upper[i]),
lty = 2, col = 'red')
}
},
par.strip.text=par.strip.text, par.settings=par.settings, ...))
}
if(X2){
ret$X2 <- X2.value[which.items]
ret$df.X2 <- df.X2[which.items]
ret$RMSEA.X2 <- rmsea(ret$X2, ret$df.X2, N=nrow(fulldata))
ret$p.X2 <- suppressWarnings(pchisq(ret$X2, ret$df.X2, lower.tail=FALSE))
ret$df.X2[ret$df.X2 <= 0] <- 0
ret$p.X2[ret$df.X2 <= 0] <- NaN
ret$p.X2 <- p.adjust(ret$p.X2, method=p.adjust)
}
if(G2){
ret$G2 <- G2.value[which.items]
ret$df.G2 <- df.G2[which.items]
ret$RMSEA.G2 <- rmsea(ret$G2, ret$df.G2, N=nrow(fulldata))
ret$p.G2 <- suppressWarnings(pchisq(ret$G2, ret$df.G2, lower.tail=FALSE))
ret$df.G2[ret$df.G2 <= 0] <- 0
ret$p.G2[ret$df.G2 <= 0] <- NaN
ret$p.G2 <- p.adjust(ret$p.G2, method=p.adjust)
}
}
if(S_X2 || !is.null(S_X2.plot)){
dat <- x@Data$data
adj <- x@Data$mins
dat <- t(t(dat) - adj)
S_X2 <- df.S_X2 <- rep(NA, J)
O <- makeObstables(dat, x@Data$K, which.items=which.items)
Nk <- rowSums(O[[which(sapply(O, is.matrix))[1L]]])
dots <- list(...)
QMC <- ifelse(is.null(dots$QMC), FALSE, dots$QMC)
use_dentype_estimate <- ifelse(is.null(dots$use_dentype_estimate),
FALSE, dots$use_dentype_estimate)
quadpts <- dots$quadpts
if(is.null(quadpts) && QMC) quadpts <- 5000L
if(is.null(quadpts)) quadpts <- select_quadpts(x@Model$nfact)
if(x@Model$nfact > 3L && !QMC && method %in% c('EAP', 'EAPsum') && !discrete)
warning('High-dimensional models should use quasi-Monte Carlo integration. Pass QMC=TRUE',
call.=FALSE)
theta_lim <- dots$theta_lim
if(is.null(theta_lim)) theta_lim <- c(-6,6)
gp <- if(mixture) list() else ExtractGroupPars(pars[[length(pars)]])
if(!mixture && x@ParObjects$pars[[extract.mirt(x, 'nitems')+1L]]@dentype == 'custom'){
den_fun <- function(Theta, ...){
obj <- x@ParObjects$pars[[extract.mirt(x, 'nitems')+1L]]
as.vector(obj@den(obj, Theta=Theta))
}
if(is.null(dots$theta_lim) && !is.null(x@Internals$theta_lim))
theta_lim <- x@Internals$theta_lim
} else den_fun <- mirt_dmvnorm
E <- EAPsum(x, S_X2 = TRUE, gp = gp, CUSTOM.IND=x@Internals$CUSTOM.IND, den_fun=mirt_dmvnorm,
quadpts=quadpts, theta_lim=theta_lim, discrete=discrete, QMC=QMC, mixture=mixture,
pis=pis, which.items=which.items, use_dentype_estimate=use_dentype_estimate)
for(i in which.items)
E[[i]] <- E[[i]] * Nk
if(!is.null(S_X2.plot)){
Osub <- O[[S_X2.plot]]
Osub <- Osub / rowSums(Osub)
Esub <- E[[S_X2.plot]]
Esub[is.na(Esub)] <- 0
Esub <- Esub / rowSums(Esub)
if(!S_X2.plot_raw.score){
stopifnot(extract.mirt(x, 'nfact') == 1L)
fs <- fscores(x, method = 'EAPsum', full.scores=FALSE, verbose=FALSE)
scores <- fs[,'F1']
scores <- scores[-c(1, length(scores))]
} else scores <- 1L:nrow(Osub) + sum(extract.mirt(x, 'mins'))
df <- data.frame(Scores=scores,
y=c(as.numeric(Osub), as.numeric(Esub)),
type = rep(c('observed', 'expected'), each = prod(dim(Osub))),
category=rep(paste0('cat', 1L:ncol(Osub)), each=nrow(Osub)))
return(xyplot(y~Scores|category, df, type='b', group=df$type,
xlab = ifelse(S_X2.plot_raw.score, expression(Sum-Score), expression(theta)),
ylab=expression(P(theta)), auto.key=auto.key, par.strip.text=par.strip.text,
main = paste0("Observed vs Expected Values for Item ", S_X2.plot),
par.settings=par.settings, ...))
}
coll <- collapseCells(O, E, mincell=mincell)
if(return.tables)
return(list(O.org=O[which.items], E.org=E[which.items],
O=coll$O[which.items], E=coll$E[which.items]))
O <- coll$O
E <- coll$E
for(i in seq_len(J)){
if (is.null(dim(O[[i]])) || is.null(E[[i]])) next
S_X2[i] <- sum((O[[i]] - E[[i]])^2 / E[[i]], na.rm = TRUE)
df.S_X2[i] <- if(mixture){
tmp <- sum(sapply(1L:length(pis), function(g)
sum(x@ParObjects$pars[[g]]@ParObjects$pars[[i]]@est)))
sum(!is.na(E[[i]])) - nrow(E[[i]]) - tmp
} else sum(!is.na(E[[i]])) - nrow(E[[i]]) - sum(pars[[i]]@est)
}
df.S_X2 <- if(mixture){
df.S_X2 - sum(sapply(1L:length(pis), function(g)
sum(x@ParObjects$pars[[g]]@ParObjects$pars[[J+1L]]@est)))
} else df.S_X2 - sum(pars[[J+1L]]@est)
df.S_X2[df.S_X2 < 0] <- 0
S_X2[df.S_X2 == 0] <- NaN
ret$S_X2 <- S_X2[which.items]
ret$df.S_X2 <- df.S_X2[which.items]
ret$RMSEA.S_X2 <- rmsea(ret$S_X2, ret$df.S_X2, N=nrow(dat))
ret$p.S_X2 <- suppressWarnings(pchisq(ret$S_X2, ret$df.S_X2, lower.tail=FALSE))
ret$p.S_X2 <- p.adjust(ret$p.S_X2, method=p.adjust)
}
itemtype <- extract.mirt(x, 'itemtype')
if(any(c('PV_Q1', 'PV_Q1*') %in% fit_stats)){
tmp <- PV_itemfit(x, which.items=which.items, draws=pv_draws, itemtype=itemtype,
p.adjust=p.adjust, mincell.X2=mincell.X2, ...)
ret <- cbind(ret, tmp)
}
is_NA <- is.na(x@Data$data)
if('PV_Q1*' %in% fit_stats){
tmp <- boot_PV(x, is_NA=is_NA, org=tmp, which.items=which.items,
itemtype=itemtype, boot=boot, draws=pv_draws, p.adjust=p.adjust,
mincell.X2=mincell.X2, ...)
ret <- cbind(ret, tmp)
}
if('X2*' %in% fit_stats){
tmp <- StoneFit(x, is_NA=is_NA, which.items=which.items, boot=boot, dfapprox=FALSE,
itemtype=itemtype, ETrange=ETrange, ETpoints=ETpoints,
p.adjust=p.adjust, return.tables=return.tables, ...)
if(return.tables){
if(length(which.items) == 1L) tmp <- tmp[[1]]
return(tmp)
}
ret <- cbind(ret, tmp)
}
if('X2*_df' %in% fit_stats){
tmp <- StoneFit(x, is_NA=is_NA, which.items=which.items, boot=boot_dfapprox, dfapprox=TRUE,
itemtype=itemtype, ETrange=ETrange, ETpoints=ETpoints, p.adjust=p.adjust, ...)
ret <- cbind(ret, tmp)
}
ret <- as.mirt_df(ret)
return(ret)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.