Nothing
#' @title EdSurvey Direct Estimation
#' @description Prepare IRT parameters and score items and then estimate a linear model with direct estimation.
#'
#' @param formula a \ifelse{latex}{\code{formula}}{\code{\link[stats]{formula}}} for the
#' model.
#' @param data an \code{edsurvey.data.frame} for the National Assessment of Educational Progress (NAEP)
#' and the Trends in International Mathematics and Science Study (TIMSS).
#' The attributes \code{dichotParamTab}, \code{polyParamTab}, \code{testData},
#' \code{scoreCard} (for NAEP), and \code{scoreDict} (for TIMSS) must not be \code{NULL}.
#' Use the function \code{setNAEPScoreCard} or \code{setAttributes} to set attributes.
#' @param weightVar a character indicating the weight variable to use.
#' The \code{weightVar} must be one of the weights for the
#' \code{edsurvey.data.frame}. If \code{NULL}, it uses the default
#' for the \code{edsurvey.data.frame}.
#' @param dropOmittedLevels a logical value. When set to the value of \code{TRUE}, drops
#' the levels of all factor variables that are specified
#' in an \code{edsurvey.data.frame}. Use \code{print} on an
#' \code{edsurvey.data.frame} to see the omitted levels.
#' To draw plausible values for the full dataset, the user must set this to \code{FALSE}.
#' @param composite logical; for a NAEP composite, setting to \code{FALSE} fits the model to all items at once,
#' in a single construct, whereas setting to \code{TRUE} fits the model as a NAEP composite
#' (i.e., a weighted average of the subscales). This argument is not applicable for TIMSS which is always
#' fit as an overall (non-composite).
#' @param verbose logical; indicates whether a detailed printout should display during execution, only for NAEP data.
#' @param multiCore allows the \code{foreach} package to be used. This function will setup and take down the cluster.
#' @param numberOfCores the number of cores to be used when using \code{multiCore}. Defaults to 75\% of available cores. Users
#' can check available cores with \code{detectCores()}.
#' @param minNode numeric; minimum integration point in direct estimation; see \ifelse{latex}{\code{mml}}{\code{\link[Dire]{mml}}}.
#' @param maxNode numeric; maximum integration point in direct estimation; see \ifelse{latex}{\code{mml}}{\code{\link[Dire]{mml}}}.
#' @param Q integer; number of integration points per student used when integrating over the levels of the latent outcome construct.
#' @param idVar a variable that is used to explicitly define the name of the student identifier
#' variable to be used from \code{data}. Defaults to \code{NULL}, and \code{sid} is used
#' as the student identifier.
#' @param returnMmlCall logical; when \code{TRUE}, do not process the mml call but instead return it for the user to edit before calling
#'
#' @param omittedLevels this argument is deprecated. Use \code{dropOmittedLevels}
#'
#' @details
#' Typically, models are fit with NAEP data using plausible values to integrate out the uncertainty in the measurement of individual
#' student outcomes. When direct estimation is used, the measurement error is integrated out explicitly using \code{Q} quadrature points.
#' See documentation for \ifelse{latex}{\code{mml}}{\code{\link[Dire]{mml}}} in the \code{Dire} package.
#'
#' The \code{scoreDict} helps turn response categories that are not simple item responses, such as \code{Not Reached} and \code{Multiple},
#' to something coded as inputs for the \code{mml} function in \code{Dire}. How \code{mml} treats these values depends on the test.
#' For NAEP, for a dichotomous item, 8 is scored as the same proportion correct as the guessing parameter for that item, 0 is
#' an incorrect response, an NA does not change the student's score, and 1 is correct. TIMSS does not require a \code{scoreDict}.
#'
#' @return
#' An \code{mml.sdf} object, which is the outcome from \code{mml.sdf}, with the following elements:
#' \item{mml}{an object containing information from the \code{mml} procedure.
#' \code{?mml} can be used for further information.}
#' \item{scoreDict}{the scoring used in the \code{mml} procedure}.
#' \item{itemMapping}{the item mapping used in the \code{mml} procedure}.
#' @references
#' Cohen, J., & Jiang, T. (1999). Comparison of partially measured latent traits across nominal subgroups.
#' \emph{Journal of the American Statistical Association}, \emph{94}(448), 1035--1044. https://doi.org/10.2307/2669917
#'
#' @import NAEPirtparams
#' @importFrom Dire mml
#' @importFrom stats na.omit
#' @importFrom utils flush.console
#' @aliases defaultNAEPScoreCard defaultTIMSSScoreDict
#' @example /man/examples/mml.sdf.R
#' @export
mml.sdf <- function(formula,
data,
weightVar = NULL,
dropOmittedLevels = TRUE,
composite = TRUE,
verbose = 0,
multiCore = FALSE,
numberOfCores = NULL,
minNode = -4,
maxNode = 4,
Q = 34,
idVar = NULL,
returnMmlCall = FALSE,
omittedLevels = deprecated()) {
stopifnot(inherits(verbose, c("numeric", "integer", "logical")))
### check data class
checkDataClass(data, c("edsurvey.data.frame", "light.edsurvey.data.frame"))
if (lifecycle::is_present(omittedLevels)) {
lifecycle::deprecate_soft("4.0.0", "mml.sdf(omittedLevels)", "mml.sdf(dropOmittedLevels)")
dropOmittedLevels <- omittedLevels
}
if (!is.logical(dropOmittedLevels)) stop("The ", sQuote("dropOmittedLevels"), " argument must be logical.")
has_IRTAttributes(data, errorCheck = TRUE) # ensure IRT attributes are set, throw error if not
verbose <- as.numeric(verbose)
# if the weight var is not set, use the default
weightVar <- checkWeightVar(data, weightVar)
# check for parrallel if multiCore True
if (any(grepl("_linking", all.vars(formula), fixed = TRUE))) {
stop("mml.sdf does not support linking error.")
}
idVar <- fixIdVar(data, idVar, verbose)
multiCoreSetup <- setupMulticore(multiCore, numberOfCores, verbose)
if (verbose > 0) {
message("Gathering item information.")
}
# check for no response
if (is.null(getResponse(formula))) {
stop("Please specify response variable. Equation should be of the form: y ~ x")
}
### check for transformations in equations: sqrt(mmat), exp(mmat), mmat^0.5, mmat^(1/3), log(mmat),
if (all(grepl("\\^\\d+|log\\(.*\\)|\\^\\(.*\\)|exp\\(.*\\)|sqrt\\(.*\\)", formula[2:3]))) {
stop("Please transform variable outside of equations.")
}
### check for Strata and Psu Var
checkPsuStrata(data)
strataVar <- getStratumVar(data)
psuVar <- getPSUVar(data)
### check idVar
# if there was no ID, add one
if (FALSE %in% idVar) {
data$sid <- 1:nrow(data)
idVar <- "sid"
}
# be sure tbl data is correctly converted to factor
if (inherits(data, "light.edsurvey.data.frame")) {
data <- lightUnclassCols(data)
}
# check survey
survey <- getAttributes(data, "survey")
if (!survey %in% c("TIMSS", "NAEP")) {
# Other assessments that are not TIMSS or NAEP
stop("This assessment is unsupported by mml.sdf.")
}
# check the survey
theYear <- as.numeric(getAttributes(data, "year"))
theSubject <- all.vars(formula[[2]])
if (length(theSubject) != 1) {
stop("There must be exactly one outcome for mml.sdf. Found:", pasteItems(dQuote(theSubject)))
}
# year check
if (survey == "TIMSS" && !theYear %in% c(2011, 2015, 2019)) {
stop("Item parameters are currently only available for 2011, 2015, and 2019.")
}
# test this is length 1
theSubject <- theSubject[theSubject %in% names(getAttributes(data, "pvvars"))]
if (length(theSubject) < 1) {
stop(paste0("Cannot find subject ", dQuote(all.vars(formula[[2]]))))
}
scoreInfo <- getScoreInfo(data, survey, theSubject)
# check for paramTab items not in data
scoreInfo <- checkParamTabAgainstItems(data, scoreInfo)
# ### create stuItems, stuDat
# # get dependent vars, weight, and items from data
indepVars <- all.vars(formula)
indepVars <- indepVars[!indepVars %in% theSubject]
getDataArgs <- list(
data = data, varnames = c(scoreInfo$itemsUse, theSubject, indepVars, weightVar, strataVar, psuVar, idVar),
dropOmittedLevels = dropOmittedLevels
)
edf <- quietGetData(data, getDataArgs)
# mml uses character id variables, so recast here
edf[[idVar]] <- as.character(edf[[idVar]])
# check completeness
edf <- filterOutIncompleteZeroWeight(edf, indepVars, weightVar, verbose)
scoreCall <- getScoreCall(data, scoreInfo)
# make the score call enviornment
scoreCallEnv <- list2env(scoreInfo)
assign("edf", edf, envir = scoreCallEnv) # add edf to the environment
edf <- eval(scoreCall, envir = scoreCallEnv)
scoreFunction <- scoreCall[[1]]
# return scored data
pvs <- edf[ , c(scoreInfo$itemsUse, idVar)]
pvs$id <- pvs[ , idVar]
pvs[ , idVar] <- NULL
# robust to light or full data frame
data <- mergePVGeneral(data, pvs, idVar)
# make long stuItems from the wide edf
stuItems <- as.data.frame(melt(as.data.table(edf[ , c(scoreInfo$itemsUse, idVar)]),
id.vars = idVar,
measure.vars = c(scoreInfo$itemsUse)
))
# creat stuItems
colnames(stuItems) <- c(idVar, "key", "score")
stuItems <- stuItems[!is.na(stuItems$score), ]
if (nrow(stuItems) == 0) {
stop("No students on this data have valid test data.")
}
# create stuDat
stuDat <- edf[edf[ , idVar] %in% unique(stuItems[ , idVar]), c(idVar, indepVars, weightVar, strataVar, psuVar)]
if (nrow(stuDat) == 0) {
stop("No students with valid test data also have valid covariates.")
}
# no composite in TIMSS
if (composite && survey %in% "TIMSS") {
mc <- missing(composite)
if (mc) {
composite <- FALSE
} else {
# the next line sets missing(composite) to FALSE, so that must be tested above it
testDat <- getAttributes(data, "testData", errorCheck = FALSE)
subTestDat <- testDat[testDat$test %in% theSubject, ]
if (nrow(subTestDat) == 0 || any(is.na(subTestDat$weights))) {
stop("Composite is not recomended for TIMSS.")
}
}
# the user seems to know what they are doing, they added weights. So allow it
}
# setting up cluster for multi-core
startMulticore(multiCoreSetup, verbose = verbose)
if (verbose > 0) {
message("Starting MML Procedure.")
}
waldDenomBaseDof <- waldDof(edf, getStratumVar(data), getPSUVar(data))
clObj <- list(
formula = formula,
stuItems = stuItems,
stuDat = stuDat,
idVar = idVar,
dichotParamTab = scoreInfo$dichotParamTab,
polyParamTab = scoreInfo$polyParamTab,
testScale = scoreInfo$testDat,
Q = Q,
composite = composite,
minNode = minNode,
maxNode = maxNode,
strataVar = strataVar,
PSUVar = psuVar,
weightVar = weightVar,
fast = TRUE,
verbose= max(0,verbose -1),
multiCore = multiCoreSetup$multiCore
)
if (returnMmlCall) {
obj <- structure(
list(
"Call" = clObj,
"survey" = survey,
"getDataArgs" = getDataArgs,
"scoreDict" = scoreInfo$scoreDict,
"idVar" = idVar,
"scoreFunction" = scoreFunction,
"waldDenomBaseDof" = waldDenomBaseDof
),
class = "mml.sdf.precall"
)
return(obj)
}
mmlObj <- do.call(mml, clObj)
# get call
call <- match.call()
# main mml.sdf class
obj <- structure(
list(
"Call" = call,
"mml" = mmlObj,
"survey" = survey,
"getDataArgs" = getDataArgs,
"scoreDict" = scoreInfo$scoreDict,
"idVar" = idVar,
"scoreFunction" = scoreFunction,
"waldDenomBaseDof" = waldDenomBaseDof,
"data" = data
),
class = c("mml.sdf")
)
# append composite class
obj$scoreDict <- scoreInfo$scoreDict
obj$theSubject <- theSubject
if (survey == "NAEP") {
class(obj) <- c("mml.NAEP", class(obj))
} else {
class(obj) <- c("mml.TIMSS", class(obj))
}
# return
return(obj)
}
quietGetData <- function(data, args) {
withCallingHandlers(
{
edf <- do.call(getData, args)
},
warning = function(w) {
if (startsWith(conditionMessage(w), "Updating labels on")) {
invokeRestart("muffleWarning")
}
}
)
}
#' Give error if Strata and PSU var not present on light edsurvey data frame.
#' @param data an \code{edsurvey.data.frame} or \code{light.edsurvey.data.frame} to check for PSU and Strata variables
#' @keywords internal
checkPsuStrata <- function(data) {
strataVar <- getStratumVar(data)
psuVar <- getPSUVar(data)
if (inherits(data, "light.edsurvey.data.frame")) {
if (!strataVar %in% colnames(data)) {
stop(paste0(
"Missing strataVar, ", sQuote(strataVar), ". Please include",
sQuote(strataVar), "in your getData() call."
))
}
if (!psuVar %in% colnames(data)) {
stop(paste0(
"Missing psuVar, ", sQuote(psuVar), ". Please include",
sQuote(psuVar), "in your getData() call."
))
}
}
}
#' @importFrom stats terms.formula
getResponse <- function(form) {
tf <- terms.formula(form)
respIndex <- attr(tf, "response")
if (respIndex == 0) {
return(NULL)
}
return(attr(tf, "variables")[[1 + attr(tf, "response")]])
}
#' Give warning if idVar is null on on light edsurvey dataframe.
#' @param data an \code{edsurvey.data.frame}, \code{light.edsurvey.data.frame}, or \code{data.frame} to look for the \code{idVar} on
#' @param idVar the variable to look for
#' Without an idVar the users won't be able to merge PVs back to the data after running an mml. If there is no idVar, a default is chosen.
#' @keywords internal
checkIdVar <- function(data, idVar) {
if (inherits(data, "light.edsurvey.data.frame")) {
if (is.null(idVar)) {
if (!"ROWID" %in% colnames(data)) {
warning("Student level results can't be merged without a valid ", sQuote("idVar"), ". An arbitrary ID, ", sQuote("SID"), ", will be set.")
idVar <- FALSE
} else {
idVar <- "ROWID"
}
}
} else {
if (inherits(data, "edsurvey.data.frame")) {
if (is.null(idVar)) {
if (!"ROWID" %in% colnames(data$cache)) {
warning("Student level results can't be merged without a valid ", sQuote("idVar"), ". An arbitrary ID, ", sQuote("SID"), ", will be set.")
idVar <- FALSE
} else {
idVar <- "ROWID"
}
} else {
if (!idVar %in% colnames(data)) {
stop(paste0("Cannot find idVar ", dQuote(idVar), " on data"))
}
}
}
}
return(idVar)
}
filterParamTabToSubject <- function(paramTab, survey, subject) {
# identify which TIMSS subtest to use, if necessary
if (survey == "TIMSS") {
# default, so there is a subtest
paramTab$subtest <- paramTab$content_subtest
if (subject %in% paramTab$cognitive_subtest) {
paramTab$subtest <- paramTab$cognitive_subtest
}
paramTab$content_subtest <- NULL
paramTab$cognitive_subtest <- NULL
}
if (!"test" %in% colnames(paramTab)) {
stop(paste0("paramTab does not must contain a column named ", dQuote("test")))
}
if (!"subtest" %in% colnames(paramTab)) {
stop(paste0("paramTab does not must contain a column named ", dQuote("subtest")))
}
paramTab <- paramTab[paramTab$test %in% subject | paramTab$subtest %in% subject, ]
return(paramTab)
}
getScoreCall <- function(data, scoreInfo) {
scoreFunction <- getAttributes(data, "scoreFunction")
if (is.null(scoreFunction)) {
stop("attribute scoreFunction must be set on the data.")
}
if (inherits(scoreFunction, "function")) {
if(!"polyParamTab" %in% names(scoreInfo)) {
scoreCall <- list(scoreFunction, quote(edf), quote(NULL), quote(dichotParamTab), quote(scoreDict))
}else {
scoreCall <- list(scoreFunction, quote(edf), quote(polyParamTab), quote(dichotParamTab), quote(scoreDict))
}
mode(scoreCall) <- "call"
} else {
if(!"polyParamTab" %in% names(scoreInfo)) {
scoreCall <- call(scoreFunction, quote(edf), quote(NULL), quote(dichotParamTab), quote(scoreDict))
}else {
scoreCall <- call(scoreFunction, quote(edf), quote(polyParamTab), quote(dichotParamTab), quote(scoreDict))
}
}
return(scoreCall)
}
getScoreInfo <- function(data, survey, theSubject) {
polyParamTab <- getAttributes(data, "polyParamTab", errorCheck = FALSE)
polyParamTab <- filterParamTabToSubject(polyParamTab, survey, theSubject)
dichotParamTab <- getAttributes(data, "dichotParamTab", errorCheck = FALSE)
dichotParamTab <- filterParamTabToSubject(dichotParamTab, survey, theSubject)
testDat <- getAttributes(data, "testData", errorCheck = FALSE)
scoreDict <- getAttributes(data, "scoreDict", errorCheck = FALSE)
if (!theSubject %in% c(testDat$subtest, testDat$test)) {
stop(paste0("Cannot find ", dQuote(theSubject), " in the testData columns ", dQuote("subtest"), " or ", dQuote("test")))
}
if (!theSubject %in% c(dichotParamTab$subtest, dichotParamTab$test, polyParamTab$subtest, polyParamTab$test)) {
stop(paste0("Cannot find ", dQuote(theSubject), " in either the dichotParamTab nor polyParamTab columns ", dQuote("subtest"), " or ", dQuote("test")))
}
return(list(polyParamTab = polyParamTab, dichotParamTab = dichotParamTab, testDat = testDat, scoreDict = scoreDict))
}
checkParamTabAgainstItems <- function(data, scoreInfo) {
ppt <- scoreInfo$polyParamTab
dpt <- scoreInfo$dichotParamTab
items <- c(ppt$ItemID, dpt$ItemID)
itemsUse <- items[items %in% colnames(data)]
itemsNotInData <- setdiff(items, itemsUse)
if (length(itemsNotInData) > 0) {
ppt <- ppt[ppt$ItemID %in% itemsUse, ]
dpt <- dpt[dpt$ItemID %in% itemsUse, ]
warning(paste0("These items were in the assessment, but not in your data: ", pasteItems(dQuote(itemsNotInData), final = "and ")))
}
if (length(itemsUse) < 1) {
stop(paste0(dQuote("data"), " does not contain any of the required items: ", pasteItems(dQuote(itemsNotInData), final = "and ")))
}
# check poly param tab has score points that agree with parameters
dCols <- grep("d[0-9]", colnames(ppt), value = TRUE)
apparentScorePoints <- apply(ppt[ , dCols, drop = FALSE], 1, function(x) {
length(dCols) - sum(is.na(x))
})
if (!"socrePoints" %in% colnames(ppt)) {
ppt$scorePoints <- apparentScorePoints
}
ppt[is.na(ppt$scorePoints), "scorePoints"] <- 0
if (any(ppt$scorePoints != apparentScorePoints)) {
badIDs <- ppt[ppt$scorePoints != apparentScorePoints, "ItemID"]
ppt <- ppt[ppt$scorePoints == apparentScorePoints, ]
warning(paste0(pasteItems(badIDs), " did not have enough parameters for the max score and will not be used."))
}
# null these out instead of passing a zero length param tab
if (nrow(ppt) == 0) {
ppt <- NULL
}
if (nrow(dpt) == 0) {
dpt <- NULL
}
scoreInfo$polyParamTab <- ppt
scoreInfo$dichotParamTab <- dpt
scoreInfo$itemsUse <- itemsUse
return(scoreInfo)
}
filterOutIncompleteZeroWeight <- function(edf, indepVars, weightVar, verbose = FALSE) {
incomplete <- !complete.cases(edf[ , c(indepVars, weightVar)])
if (any(incomplete)) {
if (verbose) {
message("Removing ", sum(incomplete), " rows with NAs from analysis.")
}
edf <- edf[!incomplete, , drop = FALSE]
}
# remove non-positive (full sample) weights
if (!is.null(weightVar)) {
if (any(edf[ , weightVar] <= 0)) {
posWeights <- edf[ , weightVar] > 0
if (verbose) {
message("Removing ", sum(!posWeights), " rows with nonpositive weight from analysis.")
}
edf <- edf[posWeights, ]
}
}
# check that there is some data to work with
if (nrow(edf) <= 0) {
stop(paste0(
sQuote("data"), " must have more than 0 rows after a call to ",
sQuote("getData"), ", incomplete cases, and non-positive weights are removed."
))
}
return(edf)
}
fixIdVar <- function(data, idVar, verbose) {
# if 1) there is no idVar, 2) ROWID is on the data, 3) ROWID is unique, make it the idVar
if (is.null(idVar)) {
if (is.null(idVar) && "ROWID" %in% colnames(data) && length(unique(data[["ROWID"]])) == length(data[["ROWID"]])) {
idVar <- "ROWID"
} else {
if (verbose > 0) {
message("no idVar set. This is necessary to draw plausible values.")
}
}
}
idVar <- checkIdVar(data, idVar)
return(idVar)
}
# returns TRUE/FALSE if an edsurvey.data.frame or light.edsurvey.data.frame have appropriate IRT attributes set
# set errorCheck = TRUE to throw warning about what part of the irt attributes are missing
has_IRTAttributes <- function(esdf, errorCheck = FALSE) {
# validate inputs
checkDataClass(esdf, c("edsurvey.data.frame", "light.edsurvey.data.frame"))
stopifnot("errorCheck must be a logical value of length 1" = is.logical(errorCheck) && length(errorCheck) == 1)
isNAEP <- grepl("NAEP", getAttributes(esdf, "survey", errorCheck = errorCheck), ignore.case = TRUE)
# these might not be set after rebindAttributes is called, we will throw our own errors here
dichotParam <- getAttributes(esdf, "dichotParamTab", errorCheck = FALSE)
polyParam <- getAttributes(esdf, "polyParamTab", errorCheck = FALSE)
testData <- getAttributes(esdf, "testData", errorCheck = FALSE)
scoreCard <- getAttributes(esdf, "scoreCard", errorCheck = FALSE)
scoreDict <- getAttributes(esdf, "scoreDict", errorCheck = FALSE)
scoreFunction <- getAttributes(esdf, "scoreFunction", errorCheck = FALSE)
eMsg <- paste0("Missing IRT Attributes for this edsurvey.data object. IRT attributes required for use with ", dQuote("mml.sdf"), " function. See ?mml.sdf help for details.")
if (isNAEP) {
eMsg <- paste0(eMsg, " Try setting IRT attributes with the AM .dct file(s) using the ", dQuote("setNAEPScoreCard"), " function.")
}
hasItems <- (inherits(dichotParam, "data.frame") && inherits(polyParam, "data.frame"))
hasTests <- inherits(testData, "data.frame")
hasScore <- (inherits(scoreCard, "data.frame") || inherits(scoreDict, "data.frame"))
hasFunction <- !is.null(scoreFunction)
if (!all(c(hasItems, hasTests, hasScore, hasFunction))) {
if (errorCheck) {
stop(eMsg)
}
return(FALSE)
}
# ALL TESTS PASSED, RETURN TRUE
return(TRUE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.