#' A utility function computing some commonly used values in IRT
#' @description Compute some commonly used values in IRT such as
#' probability of correct response, expected item scores, and item information.
#' Use \code{what} argument to specify one or more results to return. See \strong{Value} section for details. \cr\cr
#' Run '?MIRTutils-package' for more details, such as the context of the current package and models supported.
#'
#' @param theta a scalar or a vector of examinee ability
#' @param SA_parm a matrix or dataframe of item parameters for standalone items, where columns are
#' a (slope), b1, b2, ..., b_k (difficulty or step difficulty), g (guessing), ItemID, and AssertionID.
#' Columns must follow the above order.
#' See \code{example_SA_parm} for an example. Use \code{?example_SA_parm} for detailed column descriptions
#' @param Cluster_parm a matrix or dataframe of item parameters for cluster items, where columns are
#' a (slope), b (difficulty), cluster variance, cluster position, ItemID, and AssertionID.
#' Columns must follow the above order.
#' See \code{example_Cluster_parm} for an example. Use \code{?example_Cluster_parm} for detailed column descriptions
#' @param Dv scaling factor for IRT model (usually 1 or 1.7)
#' @param n.nodes number of nodes used when integrating out the nuisance dimension
#' @param what a character vector specifying what to compute. Possible values are "prob", "escore", "iteminfo", and "mprob".
#' See \strong{Value} section for details.
#'
#' @note If the test does not have SA items or Cluster items, use default (NULL) for the corresponding data and parameter arguments. \cr\cr
#'
#' @return A list of results specified in \code{what}: \cr
#' * \code{prob}: probability of correct response. When Rasch testlet model is involved, this is conditional probability given both overall and each node of the nuisance dimension
#' * \code{escore}: expected item score
#' * \code{iteminfo}: item information. When Rasch testlet model is involved, this is item info based on marginal prob of raw scores calculated using Lord-Wingerskey algorithm
#' * \code{mprob}: When Rasch testlet model is involved, this is the marginal probability of correct response for each assertion. For unidimensional models, this is the same as prob
#'
#' @author Zhongtian Lin <lzt713@gmail.com>
#' @examples
#' data(example_SA_parm)
#' data(example_Cluster_parm)
#' theta <- seq(-3,3,1)
#' rst <- utility(theta, example_SA_parm, example_Cluster_parm, n.nodes = 11)
#' @export
utility <- function(theta, SA_parm=NULL, Cluster_parm=NULL, Dv=1, n.nodes = 50, what = c("prob","escore","iteminfo","mprob")){
if(is.null(SA_parm) & is.null(Cluster_parm)) {stop("No item found!!!")}
if ("mprob" %in% what && is.null(Cluster_parm)) {warning("No cluster item found. mprob=prob for unidimensional models")}
# -------------------------------------------------
# ------------ Standalone Part --------------------
# -------------------------------------------------
if(is.null(SA_parm)) {
SA_parm_3pl = SA_parm_gpc = NA
probs_SA = escore_SA = info_SA = mprobs_SA = NULL
} else {
# +++ Add SA_parm validity checks later
SA_parm = as.data.frame(SA_parm)
names(SA_parm)[1] = c("a")
names(SA_parm)[(ncol(SA_parm)-2):ncol(SA_parm)] = c("g", "ItemID", "AssertionID")
names(SA_parm)[2:(ncol(SA_parm)-3)] = paste0("b",2:(ncol(SA_parm)-3)-1)
# Separate out 3PL and GPC item par
dich.pos = which(apply(as.matrix(is.na(SA_parm[grepl("^b",names(SA_parm))][,-1])), 1, prod) == 1)
poly.pos = which(apply(as.matrix(is.na(SA_parm[grepl("^b",names(SA_parm))][,-1])), 1, prod) == 0)
SA_parm_3pl = SA_parm[dich.pos,]
SA_parm_gpc = SA_parm[poly.pos,]
if (nrow(SA_parm_3pl) != 0) {
a = SA_parm_3pl$a
b = SA_parm_3pl$b1
g = rep(1,length(theta)) %o% SA_parm_3pl$g
lin_pred_SA = Dv * t(apply(outer(theta, b, "-"), 1, function(x) a*x))
lin_pred_SA = matrix(lin_pred_SA, ncol = length(a))
# conditional prob
probs_SA_3pl = g + (1 - g) * plogis(lin_pred_SA)
colnames(probs_SA_3pl) = SA_parm_3pl$AssertionID
# item info
if ("iteminfo" %in% what) {
info_SA_3pl = t((Dv * a)^2 * t(1 - probs_SA_3pl))/probs_SA_3pl * ((probs_SA_3pl - g)/(1 - g))^2
} else {
info_SA_3pl = NULL
}
# Expected score
if ("escore" %in% what) {
escore_SA_3pl = probs_SA_3pl
} else {
escore_SA_3pl = NULL
}
} else {
probs_SA_3pl = NA
escore_SA_3pl = info_SA_3pl = NULL
}
if (nrow(SA_parm_gpc) != 0) {
a.gpc = SA_parm_gpc[,1]
b.gpc = SA_parm_gpc[,grepl("^b",names(SA_parm_gpc))]
maxscr = rowSums(!is.na(b.gpc))
b.gpc.list = split(b.gpc, seq(nrow(b.gpc)))
b.gpc.list = lapply(b.gpc.list,function(x) x[!is.na(x)])
# theta is a vector of examinee ability
# a is a scalar of the a parameter for one single item
# bs is a vector of all the step difficulty parameters of one single item
# maxscr is the maximum possible score (where possible score starts from 0)
# a dataframe or matrix of step difficulty, where row is item, col is steps
gpcm = function (theta, a, bs, maxscr, Dv=1) {
p = matrix(nrow = length(theta), ncol = length(bs)+1)
denom = sapply(theta, function(x) 1 + sum(exp(cumsum(unlist(Dv * a * (x - bs))))))
for (k in 1:maxscr) {
numer = exp(Dv * a * (k * theta - sum(bs[1:floor(k)])))
p[,k+1] = numer/denom
}
p[,1] = 1 - rowSums(p,na.rm = TRUE)
p
}
# conditional prob
probs_SA_gpc = mapply(gpcm, rep(list(theta),nrow(SA_parm_gpc)), a.gpc, b.gpc.list, maxscr, Dv, SIMPLIFY = FALSE)
names(probs_SA_gpc) = SA_parm_gpc$AssertionID
# item info
if ("iteminfo" %in% what) {
info_SA_gpc = sapply(probs_SA_gpc,
function(y) apply(y, 1,
function(x) (sum((seq_along(x)-1)^2*x) - sum((seq_along(x)-1)*x)^2)))
info_SA_gpc_names = names(probs_SA_gpc)
info_SA_gpc = matrix(info_SA_gpc,nrow = length(theta))
colnames(info_SA_gpc) = info_SA_gpc_names
info_SA_gpc = sweep(info_SA_gpc, 2, (Dv * a.gpc)^2, "*")
} else {
info_SA_gpc= NULL
}
# Expected score
if ("escore" %in% what) {
escore_SA_gpc = sapply(probs_SA_gpc, function(x) rowSums(sweep(x,2,0:(ncol(x)-1),"*")))
escore_SA_gpc_names = names(probs_SA_gpc)
escore_SA_gpc = matrix(escore_SA_gpc,nrow = length(theta))
colnames(escore_SA_gpc) = escore_SA_gpc_names
} else {
escore_SA_gpc = NULL
}
} else {
probs_SA_gpc = NA
escore_SA_gpc = info_SA_gpc = NULL
}
probs_SA = list(probs_SA_3pl = probs_SA_3pl, probs_SA_gpc = probs_SA_gpc)
escore_SA = cbind(escore_SA_3pl, escore_SA_gpc)
info_SA = cbind(info_SA_3pl, info_SA_gpc)
mprobs_SA = probs_SA
}
# -------------------------------------------------
# --------------- Cluster Part --------------------
# -------------------------------------------------
if(is.null(Cluster_parm)) {
probs_CL = info_CL = escore_CL = mprobs_CL = NULL
} else {
gq = statmod::gauss.quad.prob(n.nodes, dist = 'normal', sigma = 1)
nodes = gq$nodes
whts = gq$weights
Cluster_parm = as.data.frame(Cluster_parm)
colnames(Cluster_parm) = c("a","b","cluster_var","position", "ItemID", "AssertionID")
Cluster_parm$position = match(Cluster_parm$position, sort(unique(Cluster_parm$position))) # reorder the position so it starts from 1
probs_CL = info_CL = escore_CL = mprobs_CL = list()
for (k in 1:length(unique(Cluster_parm$position))) {
one_cluster_parm = Cluster_parm[Cluster_parm$position == k,]
mvars = one_cluster_parm$cluster_var
ma = one_cluster_parm$a # a parameters for these assertions
mb = one_cluster_parm$b # b parameters for these assertions
n.ass = length(mb)
rescaled.nodes = nodes %o% sqrt(mvars) # rescaled nodes for these assertions
probs_CL[[k]] = lapply(theta, function(x) plogis(Dv * (ma * (x + t(rescaled.nodes) - mb))))
# line above: list[[cluster]], list[person], row(assertion), col(node)
# item info & escore
if ("iteminfo" %in% what | "escore" %in% what) {
LW_results = lord_wing(cluster_var = mvars, a = ma, b = mb, theta = theta, n.nodes = n.nodes, return_additional = T, Dv=Dv)
# probs_CL[[k]] = lapply(seq(dim(LW_results$probs)[1]), function(x) LW_results$probs[ x, , ])
# # line above: list[[cluster]], list[person], row(node), col(assertion)
prk.marginal = sapply(1:length(LW_results$prk), function(x) as.vector(LW_results$prk[[x]]$W0[-1,] %*% LW_results$whts))
probs = LW_results$probs
qrobs = LW_results$qrobs
rawscores = 0:n.ass
rawscoresXnodes = 0:n.ass %o% rescaled.nodes[,1]
temp1 = apply(log(qrobs), c(1,2), sum)
temp2 = lapply(1:nrow(rawscoresXnodes), function(x) sweep(temp1, 2, rawscoresXnodes[x,], "+"))
temp3 = exp(array(as.numeric(unlist(temp2)), dim=c(dim(temp2[[1]]), length(temp2)))) # every person, every node, every possible raw score
temp4 = temp3 * outer(-apply(probs, c(1,2), sum), rawscores, "+")
numer = apply(temp4, 3, function(x) x %*% whts)
denom = apply(temp3, 3, function(x) x %*% whts)
info_CL[[k]] = -rowSums(t(prk.marginal) * -(numer/denom)^2)
escore_CL[[k]] = colSums(prk.marginal * 0:n.ass)
}
# marginal prob
if ("mprob" %in% what) {
mprobs_CL[[k]] = t(do.call(cbind, lapply(probs_CL[[k]], function(x) x %*% whts)))
}
}
info_CL = do.call(cbind, info_CL)
escore_CL = do.call(cbind, escore_CL)
mprobs_CL = do.call(cbind, mprobs_CL)
names(probs_CL) = unique(Cluster_parm$ItemID)
if ("iteminfo" %in% what) colnames(info_CL) = unique(Cluster_parm$ItemID)
if ("escore" %in% what) colnames(escore_CL) = unique(Cluster_parm$ItemID)
if ("mprob" %in% what) colnames(mprobs_CL) = Cluster_parm$AssertionID
}
# Outputing Results
prob = list(probs_SA_3pl=probs_SA$probs_SA_3pl, probs_SA_gpc=probs_SA$probs_SA_gpc, probs_CL = probs_CL)
iteminfo = as.data.frame(cbind(theta, info_SA, info_CL))
escore = as.data.frame(cbind(theta, escore_SA, escore_CL))
mprob = list(probs_SA_3pl=mprobs_SA$probs_SA_3pl, probs_SA_gpc=mprobs_SA$probs_SA_gpc, mprobs_CL = mprobs_CL)
return_val = list()
if ("prob" %in% what) return_val$prob = prob
if ("escore" %in% what) return_val$escore = escore
if ("iteminfo" %in% what) return_val$iteminfo = iteminfo
if ("mprob" %in% what) return_val$mprob = mprob
return_val
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.