#' Item and person estimates and fit statistics.
#'
#' @param scores A data frame or path to CSV file in crasch format with the
#' scoring information. The scores are be coded as integers in terms of the
#' construct levels. Missing responses are coded as \code{NA}.
#' @param itemInfo A data frame or path to CSV file in crasch format with
#' information on each item. There must be \code{I} rows and as many columns
#' as specified in the crasch format documentation. No factor variables are
#' allowed.
#' @param consInfo A data frame or path to CSV file containing labeling
#' information about the construct(s) in crasch format. Contains 1 row per
#' construct.
#' @param varsInfo A data frame containing demographic variable information for
#' respondents. One respondent per row, IDs must match those of
#' \code{scores} data frame. Numeric, character, and factor variables are
#' allowed.
#' @param estPackage A string that identifies which IRT package will be used to
#' conduct estimation of item (and possibly person) parameters. See details.
#' @param retainOrig A logical indicating whether one element of the output
#' object should include the original result object from \code{mirt} or
#' \code{TAM}.
#' @param missingAs0 A logical indicating whether \code{NA}s should be recoded
#' to the lowest level (available on a given item) of the construct.
#' If \code{FALSE}, missing data will remain as missing data.
#' @param longFormat A logical indicating whether the scores object is in long
#' format. If \code{TRUE}, reshaping to wide format will occur internally.
#' @param persMethod Choose from c("EAP", "MLE", "WLE")
#' @param consecutive A logical indicating whether to perform a 'consecutive'
#' analysis in which an independent analysis is conducted for each dimension.
#' If \code{TRUE}, the correlation between any two dimensions will be
#' constrained to be 0. If \code{FALSE}, multidimensional analysis that
#' allows non-zero correlations among dimensions will be conducted.
#' @param writeout A logical indicated whether the estimate objects should be
#' written to your working directory as CSVs. See Return below for the names
#' of each file produced. Each will also include any \code{fileSuffix} that
#' you specify.
#' @param fileSuffix A character string that will be affixed to the end of each
#' file name (if \code{writeout = TRUE}). Use this if you are conducting
#' multiple analyses in the same working directory and do not wish for your
#' existing files to be overwritten.
#'
#' @return A list with the following entries
#' \item{itemPars}{A matrix with the item parameter estimates. If
#' \code{estPackage = "mirt"}, this matrix will be...
#' \code{estPackage = "TAM"}, this matrix will be in the ConQuest
#' parameterization with an overall item `delta' and step `tau's.}
#' (\code{itemGrid.csv})
#' \item{itemSEs}{A matrix with the standard error estimates corresponding to
#' the estimates given in \code{itemPars}.} (\code{itemGrid.csv})
#' \item{itemThres}{A matrix giving the Thurstonian threshold estimates.}
#' (\code{itemGrid.csv})
#' \item{itemFit}{A data frame providing the fit statistics for items and
#' steps.} (\code{itemFit.csv})
#' \item{persPars}{A data frame containing estimates for person locations for
#' each dimension.} (\code{persGrid.csv})
#' \item{persSEs}{A data frame with standard errors for person estimates.}
#' (\code{persGrid.csv})
#' \item{persFit}{If non-consecutive (including unidimensional) analysis is
#' performed, this will be a data frame with person fit statistics. If
#' `consecutive' multidimensional analysis is performed (independent analysis
#' of each dimension), this will be a list with fit statistics for each
#' dimension. True multidimensional fit statistics are not available if
#' \code{TAM} is used. See details for how these are produced.}
#' (\code{persFit.csv})
#' \item{popDist}{A list containing a vector of estimated population means
#' for each dimension (often these are constrained to be 0) and a variance-
#' covariance matrix. Variances appear on the diagonal, covariances
#' elsewhere. `Consecutive' analyses fix all covariances to 0.}
#' (\code{popDist.csv})
#' \item{sepRel}{A vector containing the separation reliabilities for each
#' dimension.} (\code{classicalStats.csv})
#' \item{estSummary}{A list containing information about the estimation
#' settings and model fit. Use the \code{names()} function to see what is
#' provided here.} (\code{estSummary.csv})
#' \item{classicalStats}{A list containing information about each dimension.
#' Notably, Cronbach's Alpha and person separation reliabilities are
#' provided. Statistics are calculated by recoding all missings to 0 as well
#' as by only including complete cases.} (\code{classicalStats.csv})
#' \item{empties} A list of vectors (1 per item) of the categories (if any)
#' that were deemed possible, but no one scored into them.
#' \item{estResults}{If \code{retainOrig = TRUE}, this will contain the FULL
#' output provided by the estimation package. See the documentation for that
#' package for full details. Otherwise, this will be NULL.}
#' \item{scoresOrig}{}
#' \item{scoresRecoded}{Wide format score data, recoded to nonnegative
#' successive integers.}
#' \item{itemInfo}{}
#' \item{consInfo}{}
#' \item{varsInfo}{}
#'
#' @export
#add references -- Constructing Measures, Rasch, PCM
#add details -- specifically about where person parameters come from (TAM or sirt)
# -- where fit statistics come from
# -- where item parameters come from
craschR <- function(scores, itemInfo = NULL, consInfo = NULL, varsInfo = NULL,
estPackage="TAM", retainOrig = FALSE, missingAs0 = FALSE,
longFormat = FALSE, persMethod = "EAP", consecutive = FALSE,
writeout = TRUE, fileSuffix = NULL) {
if ( is.character(scores) ) {
if ( !grepl(".csv$",scores) ) {
stop("scores file must end with '.csv'")
}
scores <- data.frame(read.csv(scores, stringsAsFactors = FALSE, row.names = 1))
}
if ( is.character(itemInfo) ) {
if ( !grepl(".csv$",itemInfo) ) {
stop("itemInfo file must end with '.csv'")
}
itemInfo <- data.frame(read.csv(itemInfo,stringsAsFactors=FALSE))
}
if ( is.character(consInfo) ) {
if ( !grepl(".csv$",consInfo) ) {
stop("consInfo file must end with '.csv'")
}
consInfo <- data.frame(read.csv(consInfo,stringsAsFactors=FALSE))
}
if ( is.character(varsInfo) ) {
if ( !grepl(".csv$",varsInfo) ) {
stop("varsInfo file must end with '.csv'")
}
varsInfo <- data.frame(read.csv(varsInfo))
}
# check that all constructs are reflected in items
if (!(all(consInfo$cons.ID %in% itemInfo$cons.ID))) {
stop(paste('The construct',
consInfo$long.name[which(consInfo$cons.ID %in% itemInfo$cons.ID)],
'has no items mapped to it. \n',
' Remove its row from the consInfo object.'))
}
# check that all items are mapped to a construct in consInfo
if (!(all(itemInfo$cons.ID %in% consInfo$cons.ID))) {
stop(paste('No matching cons.ID in consInfo object for item(s):\n ',
paste(itemInfo$item.name[which(itemInfo$cons.ID %in% consInfo$cons.ID)],
collapse = ", ")))
}
if ( estPackage == "TAM" ) { requireNamespace("TAM", quietly = TRUE) }
startTime = date()
# addition of any input/argument also requires an update of the functions in
# internalChecks.R
checkInput(scores, itemInfo, consInfo, varsInfo, estPackage, retainOrig,
missingAs0, longFormat, persMethod, consecutive, writeout)
checkWrite(writeout, fileSuffix)
# reshape to wide if necessary
if ( longFormat ) {
wide <- reshape(scores[c("resp.ID","item.ID","cat")],timevar="item.ID",
v.names="cat",idvar="resp.ID",direction="wide")
row.names(wide) = wide$resp.ID
wide$resp.ID = NULL
colnames(wide) = itemInfo$item.name[itemInfo$item.ID ==
as.numeric(gsub("cat.","",colnames(wide)))]
} else {
wide <- scores
}
# create standard consInfo if not provided
# this will default to UNIdimensional
if ( is.null(consInfo) ) {
maxScore <- max(wide,na.rm=TRUE)
consInfo <- data.frame(matrix(paste("Level",1:maxScore),
nrow=1))
colnames(consInfo) = paste0("cat",1:maxScore)
consInfo = data.frame(cons.ID = 11111,
long.name = "Construct",
short.name = "cons",
consInfo,
stringsAsFactors = FALSE)
}
# create standard itemInfo if not provided
# this will place all items on the first dimension of consInfo
if ( is.null(itemInfo) ) {
maxScore <- max(wide,na.rm=TRUE)
itemInfo <- data.frame(matrix(rep(TRUE,maxScore * I),
nrow=I,ncol=maxScore))
colnames(itemInfo) = paste0("cat",1:maxScore)
itemInfo = data.frame(item.ID = 1:I,
item.name = as.character(colnames(wide)),
cons.ID = rep(consInfo$cons.ID[1],I),
item.type = rep(NA,I),
fixed = rep(FALSE,I),
itemInfo,
stringsAsFactors = FALSE)
}
# order wide in same order as items are given in itemInfo
wide = wide[itemInfo$item.name]
# addition of any input/argument also requires an update of the functions in
# internalChecks.R
# Note this must run AFTER the wide object is created
checkObjs(wide, itemInfo, consInfo, varsInfo, estPackage, retainOrig,
missingAs0, longFormat, persMethod, consecutive, writeout,
imageType)
I <- nrow(itemInfo) # number of items
N <- nrow(wide) # number of persons
D <- nrow(consInfo) # number of dimensions
if ( missingAs0 ) {
for (i in 1:I) {
wide[is.na(wide[,i]),i] =
min(which(as.logical(itemInfo[i,6:ncol(itemInfo)])))
}
}
scoresOrig <- wide
# recode the scores -- must be sequential nonnegative integers for each item
# there must be a way to vectorize this???
for (i in c(1:I)) {
J = sort(unique(wide[,i]),decreasing=FALSE)
for (j in c(1:length(J))) {
wide[,i][wide[,i]==J[j]] = j - 1
}
}
# check for and flag items with no variability
dropped = noVar(wide)
if (length(dropped) > 0) {
warning(length(dropped),
" item(s) showed no response variability and were dropped from analysis:\n",
paste(paste(" ",itemInfo$item.name[dropped]," (",
itemInfo$item.ID[dropped],")",sep=""),collapse="\n"),
"\n")
itemInfo = itemInfo[-dropped,]
wide = wide[,-dropped]
scoresOrig = scoresOrig[,-dropped]
I = nrow(itemInfo)
}
# TAM estimation
if ( estPackage == "TAM" ) {
Q <- as.matrix(table(ordered(itemInfo$item.ID,levels=itemInfo$item.ID),
ordered(itemInfo$cons.ID,levels=consInfo$cons.ID)))
colnames(Q) = consInfo$short.name[consInfo$cons.ID==as.numeric(colnames(Q))]
rownames(Q) = itemInfo$item.name#[itemInfo$item.ID==as.numeric(rownames(Q))]
if ( consecutive ) {
# anchor covariance between any two dimensions to 0
anch.cov <- cbind(t(combn(1:D,2)),rep(0,choose(D,2)))
warning('Item locations for items scored on different constructs cannot be directly compared.')
} else {
anch.cov <- NULL
}
results <- TAM::tam.mml(wide, irtmodel="PCM2", constraint = "cases",
Q = Q, variance.fixed = anch.cov,
control = list(progress=FALSE))
# maxK = results$maxK
# organize item estimates
itemEsts <- TAM.items(results, itemInfo)
# organize person estimates
persEsts <- TAM.pers(results, wide, itemInfo, itemEsts$Thres, consInfo,
consecutive, persMethod)
# calculate fit statistics for both items and persons
itemFit <- TAM::tam.fit(results,progress = FALSE)$itemfit
colnames(itemFit) = c("item", "outfit", "outfit_t", "outfit_p",
"outfit_pholm", "infit", "infit_t", "infit_p",
"infit_pholm")
if ( consecutive && D > 1 ) {
persFit <- list()
for (d in 1:D) {
persFit[[d]] <- data.frame(matrix(nrow = N, ncol = 4),
row.names = row.names(wide))
persFit[[d]][complete.cases(persEsts$Pars),] <-
sirt::pcm.fit(b = -results$AXsi[which(itemInfo$cons.ID ==
consInfo$cons.ID[d]),-1],
theta = as.matrix(
persEsts$Pars[complete.cases(persEsts$Pars), d]),
dat = wide[complete.cases(persEsts$Pars),
which(itemInfo$cons.ID ==
consInfo$cons.ID[d])])$personfit[,-1]
colnames(persFit[[d]]) <- c("outfit", "outfit_t", "infit", "infit_t")
}
names(persFit) <- consInfo$short.name
} else if ( D == 1 ) {
persFit <- list(data.frame(matrix(nrow = N, ncol = 4),
row.names = row.names(wide)))
persFit[[1]][complete.cases(persEsts$Pars),] <-
sirt::pcm.fit(b = -results$AXsi[,-1],
theta =
as.matrix(persEsts$Pars[complete.cases(persEsts$Pars),
1]),
dat = wide[complete.cases(persEsts$Pars),])$personfit[,-1]
colnames(persFit[[1]]) <- c("outfit", "outfit_t", "infit", "infit_t")
} else {
# ?? multi dimensional person fit ??
warning('Person fit statistics are not available for non-consecutive multi-dimensional analyses using TAM.')
persFit <- NULL
}
# person population parameter estimates
persVar = results$variance
persMean = results$beta
row.names(persVar) <- colnames(persVar) <- colnames(persMean) <-
consInfo$short.name
row.names(persMean) <- "popMean"
# separation reliability
if ( persMethod == "EAP" ) {
sepRel <- results$EAP.rel
} else {
sepRel <- sapply(1:D,function(x) {
# ( SSD - MSE ) / SSD
( var(persEsts$Pars[,x]) - sum(persEsts$SEs[,x]^2)/N ) / var(persEsts$Pars[,x])
})
}
names(sepRel) = consInfo$short.name
# estimation summary items
if ( results$maxK == 2 ) {
IRTmodel = "Rasch"
} else if ( results$maxK > 2 ) {
IRTmodel = "PCM"
} else {
IRTmodel = "ERROR"
}
# sirt estimation
} else if ( estPackage == "mirt" ) {
}
# empty categories (used in subsequent functions)
empties <- empty.cats(scoresOrig,
t(itemInfo[,6:ncol(itemInfo)]))
# classical test statistics (one matrix for each dimension)
classicalStats <- classical.test(wide, itemInfo, consInfo, sepRel)
endTime = date()
# need if estpackage==TAM
estSummary = list(estPackage = estPackage, consecutive = consecutive,
IRTmodel = IRTmodel, startTime = startTime,
endTime = endTime, N = N, I = I, D = D,
numIter = results$iter, numIntPoints = results$nnodes,
deviance = results$deviance, AIC = results$ic$AIC,
BIC = results$ic$BIC,
missingDataPerc = sum(is.na(wide))/prod(dim(wide))*100,
persMethod = persMethod)
if ( writeout ) {
write.csv(cbind(itemEsts$Pars, itemEsts$SEs, itemEsts$Thres),
paste0("itemGrid", fileSuffix, ".csv"))
write.csv(cbind(persEsts$Pars, persEsts$SEs, raw = persEsts$Raw,
max = persEsts$Max), paste0("persGrid", fileSuffix, ".csv"))
write.csv(itemFit, paste0("itemFit", fileSuffix, ".csv"))
write.csv(persFit, paste0("persFit", fileSuffix, ".csv"))
write.csv(rbind(persVar, persMean), paste0("popDist", fileSuffix, ".csv"))
write.csv(classicalStats, paste0("classicalStats", fileSuffix, ".csv"))
write.csv(as.matrix(estSummary), paste0("estSummary", fileSuffix, ".csv"))
# organize empty category info into readable table
rowMax <- max(sapply(empties, length))
if (rowMax > 0) { # only write if there are empty categories
empties0 <- data.frame(item.ID = itemInfo$item.ID,
item.name = itemInfo$item.name,
cons.ID = itemInfo$cons.ID,
category = do.call(rbind, lapply(empties,
function(x) {
length(x) <- rowMax
x })))
if (ncol(empties0) > 4) {
empties0 = reshape(empties0, direction = "long",
varying = 4:ncol(empties0), idvar = "item.ID")
empties0 = empties0[complete.cases(empties0),-4]
} else {
empties0 = empties0[complete.cases(empties0),]
}
write.csv(empties0, paste0("empties", fileSuffix, ".csv"),
row.names = FALSE)
}
}
output = list(itemPars = itemEsts$Pars,
itemSEs = itemEsts$SEs,
itemThres = itemEsts$Thres,
itemFit = itemFit,
persPars = as.data.frame(persEsts$Pars),
persSEs = persEsts$SEs,
persRaw = persEsts$Raw,
persMax = persEsts$Max,
persFit = persFit,
popDist = list(mean = persMean,
var.cov = persVar),
sepRel = sepRel,
estSummary = estSummary,
classicalStats = classicalStats,
empties = empties,
scoresOrig = scoresOrig,
scoresRecoded = wide,
itemInfo = itemInfo,
consInfo = consInfo,
varsInfo = varsInfo)
if ( retainOrig ) {
output$estResults = results
}
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.