Nothing
#' Lord-Wingersky Recursion Formula
#'
#' @description This function computes the conditional distributions of number-correct (or observed) scores
#' given probabilities of category responses to items or given a set of theta values using Lord and
#' Wingersky recursion formula (1984).
#'
#' @param x A data frame containing the item metadata (e.g., item parameters, number of categories, models ...).
#' See \code{\link{irtfit}}, \code{\link{info}} or \code{\link{simdat}} for more details about the item metadata.
#' This data frame can be easily obtained using the function \code{\link{shape_df}}. If \code{prob = NULL}, this data frame is
#' used in the recursion formula. See below for details.
#' @param theta A vector of theta values where the conditional distribution of observed scores are computed.
#' The theta values are only required when a data frame is specified in the argument \code{x}.
#' @param prob A matrix containing the probability of answering each category of an item. Each row indicates an item and
#' each column represents each category of the item. When the number of categories differs between items, the empty cells
#' should be filled with zeros or NA values. If \code{x = NULL}, this probability matrix is used in the recursion Formula.
#' @param cats A numeric vector specifying the number of categories for each item. For example, a dichotomous
#' item has two categories. This information is only required when a probability matrix is specified in the argument
#' \code{prob}.
#' @param D A scaling factor in IRT models to make the logistic function as close as possible to the normal ogive function
#' (if set to 1.7). Default is 1.
#'
#' @details The Lord and Wingersky recursive algorithm is an efficient way of calculating the compound probabilities
#' of any number-correct scores on a test based on IRT models. This algorithm is particularly useful when computing
#' the IRT model-based observed score distribution for a test.
#'
#' To compute the conditional distributions of observed scores, either the item metadata set specified in \code{x} or
#' the probability matrix specified in \code{prob} can be used.
#'
#' @return When the \code{prob} argument is provided, this function returns a vector of the probabilities of obtaining every
#' observed score on a test. When the \code{x} argument is specified, the function returns a matrix of conditional probabilities
#' across all possible observed scores and theta values.
#'
#' @author Hwanggyu Lim \email{hglim83@@gmail.com}
#'
#' @references
#' Kolen, M. J. & Brennan, R. L. (2004) \emph{Test Equating, Scaling, and Linking} (2nd ed.). New York:
#' Springer.
#'
#' Lord, F. & Wingersky, M. (1984). Comparison of IRT true score and equipercentile observed score equatings.
#' \emph{Applied Psychological Measurement, 8}(4), 453-461.
#'
#' @examples
#' ## example 1: when a matrix of probabilities is used as a data set
#' ## this is an example from Kolen and Brennan (2004, p. 183)
#' # create a matrix of probabilities of getting correct and incorrect answers for three items
#' probs <- matrix(c(.74, .73, .82, .26, .27, .18), nrow = 3, ncol = 2, byrow = FALSE)
#'
#' # create a vector of score categories for the three items
#' cats <- c(2, 2, 2)
#'
#' # compute the conditional distributions of observed scores
#' lwrc(prob = probs, cats = cats)
#'
#' ## example 2: when a matrix of probabilities is used as a data set
#' ## with a mixed-format test
#' # category probabilities for a dichotomous item
#' p1 <- c(0.2, 0.8, 0, 0, 0)
#' # category probabilities for a dichotomous item
#' p2 <- c(0.4, 0.6, NA, NA, NA)
#' # category probabilities for a polytomous item with five categories
#' p3 <- c(0.1, 0.2, 0.2, 0.4, 0.1)
#' # category probabilities for a polytomous item with three categories
#' p4 <- c(0.5, 0.3, 0.2, NA, NA)
#'
#' # rbind the probability vectors
#' p <- rbind(p1, p2, p3, p4)
#'
#' # create a vector of score categories for the four items
#' cats <- c(2, 2, 5, 3)
#'
#' # compute the conditional distributions of observed scores
#' lwrc(prob = p, cats = cats)
#'
#' ## example 3: when a data frame for the item metadata of
#' ## a mixed-format test is used.
#' # import the "-prm.txt" output file from flexMIRT
#' flex_prm <- system.file("extdata", "flexmirt_sample-prm.txt", package = "irtQ")
#'
#' # read item parameters and transform them to item metadata
#' x <- bring.flexmirt(file = flex_prm, "par")$Group1$full_df
#'
#' # compute the conditional distributions of observed scores
#' lwrc(x = x, theta = seq(-4, 4, 0.2), D = 1)
#'
#' @export
lwrc <- function(x = NULL, theta, prob = NULL, cats, D = 1) {
if (is.null(x)) {
max.cat <- ncol(prob)
n.cats <- length(cats)
n.prob <- nrow(prob)
logic <- is.numeric(prob)
if (!logic) stop("All numbers in 'prob' should be numeric.", call. = FALSE)
if (n.prob != n.cats) {
stop(paste0(
"There are ", n.prob, " items in the probability matrix (or data.frame), ",
"whereas there are ", n.cats, " items in the category vector."
), call. = FALSE)
}
if (min(cats) < 2) {
stop("Minimum number of categories for each item is 2", call. = FALSE)
}
# Probabilities for each category at the 1st item
p <- as.double(prob[1, 1:cats[1]])
# Create a temporary vector to contain probabilities
tmp <- c()
# Possible observed score range for a first item
obs.range <- 0:(cats[1] - 1)
# proceed the further analysis when # of the items > 1
if (n.cats > 1) {
# Calculate probabilities to earn an observed score by accumulating over remaining items
for (j in 2:n.prob) { # should start from a 2nd item
# Probability to earn zero score. This is a special case
tmp[1] <- p[1] * prob[j, 1]
# The range of category scores for an added item
cat.range <- 0:(cats[j] - 1)
# Possible minimum and maximum observed scores when the item is added
# but, except zero and perfect score
obs_rg <- range(obs.range)
cat_rg <- range(cat.range)
min.obs <- obs_rg[1]
max.obs <- obs_rg[2]
min.cat <- cat_rg[1]
max.cat <- cat_rg[2]
min.s <- min.obs + min.cat + 1
max.s <- max.obs + max.cat - 1
# possible observed score range except zero and perfect score
poss.score <- min.s:max.s
length.score <- length(poss.score)
# score.mat <- matrix(poss.score, nrow=length(min.cat:max.cat), ncol=length.score, byrow=TRUE)
score.mat <- col(array(NA, c(cats[j], length.score)))
# Difference between the possible observed score and each category score if the added item
poss.diff <- score.mat - min.cat:max.cat
# The difference above should be greater than or equal to the minimum observed score
# where the item is added, and less than or equal to the maximum observed score where
# the item is added
cols <- poss.diff >= min.obs & poss.diff <= max.obs
# Final probability to earn the observed score
prob.score <- c()
for (k in 1:length.score) {
tmp.cols <- which(cols[, k])
tmp.diff <- poss.diff[tmp.cols, k]
prob.score[k] <- sum(p[(tmp.diff + 1)] * prob[j, tmp.cols])
}
tmp[1 + poss.score] <- prob.score
# Probability to earn perfect score. This is a special case.
tmp[max.s + 2] <- p[length(p)] * prob[j, cats[j]]
# Update probabilities
p <- tmp
# Update the range of possible observed scores
# obs.range <- (min.s-1):(max.s+1)
obs.range <- c((min.s - 1), poss.score, (max.s + 1))
# Reset the temporary vector
tmp <- c()
}
}
# return the results
names(p) <- paste0("score.", obs.range)
p
} else {
# confirm and correct all item metadata information
x <- confirm_df(x)
# count N of thetas
n.theta <- length(theta)
# prepare the probability matrices
prepdat <- prep4lw2(x = x, theta = theta, D = D)
# apply the recursion formula
p <- lwRecurive(
prob.cats = prepdat$prob.cat,
cats = prepdat$cats,
n.theta = n.theta
)
# return the results
p
}
}
### --------------------------------------------------------------------------------------------------------------------
# "lwRecursive" function
# Arg:
# prob.cats: (list) each element of a list includes a probability matrix (or data.frame) for an item.
# In each probability matrix, rows indicate theta values (or weights) and columns indicate
# score categories. Thus, each elements in the matrix represents the probability that a person
# who has a certain ability earns a certain score.
# cats: (vector) containes score categories for all items
# n.theta: (integer) number of thetas
#' @importFrom Rfast rowsums
lwRecurive <- function(prob.cats, cats, n.theta) {
# if(length(unique(sapply(prob.cats, nrow))) != 1L) {
# stop("At least, one probability matrix (or data.frame) has the difference number of rows across all marices in a list", call.=FALSE)
# }
# if(length(prob.cats) != length(cats)) {
# stop(paste0("There are ", nrow(prob.cats), " items in the probability matrix (or data.frame) at each element of a list, ",
# "whereas there are ", length(cats), " items in the category vector."), call.=FALSE)
# }
if (min(cats) < 2) {
stop("Minimum number of categories for each item is 2", call. = FALSE)
}
# Probabilities for each category at 1st item
p <- prob.cats[[1]]
# the number of theta values
# n.theta <- nrow(prob.cats[[1]])
# possible observed score range for a first item
obs.range <- 0:(cats[1] - 1)
# proceed the further analysis when # of the items > 1
if (length(cats) > 1) {
# create a temporary matrix to contain all probabilities
tScore.range <- 0:sum(cats - 1)
# tmp <- matrix(0, nrow=n.theta, ncol=length(tScore.range))
tmp <- array(0, c(n.theta, length(tScore.range)))
# Calculate probabilities to earn an observed score by accumulating over remaining items
for (j in 2:length(cats)) {
# Probability to earn zero score. This is a special case.
tmp[, 1] <- p[, 1] * prob.cats[[j]][, 1]
# The range of category scores for an added item
cat.range <- 0:(cats[j] - 1)
# Possible minimum and maximum observed scores when the item is added
# but, except zero and perfect score
obs_rg <- range(obs.range)
cat_rg <- range(cat.range)
min.obs <- obs_rg[1]
max.obs <- obs_rg[2]
min.cat <- cat_rg[1]
max.cat <- cat_rg[2]
min.s <- min.obs + min.cat + 1
max.s <- max.obs + max.cat - 1
# possible observed score range except zero and perfect score
poss.score <- min.s:max.s
length.score <- length(poss.score)
# score.mat <- matrix(poss.score, nrow=length(min.cat:max.cat), ncol=length.score, byrow=TRUE)
# score.mat <- matrix(poss.score, nrow=cats[j], ncol=length.score, byrow=TRUE)
score.mat <- col(array(NA, c(cats[j], length.score)))
# Difference between the possible observed score and each category score if the added item
# poss.diff <- score.mat - min.cat:max.cat
poss.diff <- score.mat - cat.range
# The difference above should be greater than or equal to the minimum observed score
# where the item is added, and less than or equal to the maximum observed score where
# the item is added
cols <- poss.diff >= min.obs & poss.diff <= max.obs
# Final probability to earn the observed score
prob.score <- array(0, c(n.theta, length.score))
for (k in 1:length.score) {
tmp.cols <- cols[, k]
tmp.diff <- poss.diff[tmp.cols, k]
prob.score[, k] <- Rfast::rowsums(p[, (tmp.diff + 1), drop = FALSE] * prob.cats[[j]][, tmp.cols, drop = FALSE])
}
tmp[, 1 + poss.score] <- prob.score
# cols2 <- c(cols)
# diff2 <- poss.diff[cols2]
# cols3 <- rep(c(cat.range + 1), length.score)[cols2]
# prob.score2 <- p[, (diff2 + 1), drop=FALSE] * prob.cats[[j]][, cols3, drop=FALSE]
# length.col <- Rfast::colsums(cols)
# end.col <- cumsum(length.col)
# start.col <- end.col - length.col + 1
# prob.score2 <-
# purrr::map2(.x=start.col, .y=end.col,
# .f=function(x, y) Rfast::rowsums(prob.score2[, x:y])) %>%
# prob.score2 <- do.call(what="cbind", prob.score2)
# prob.score2 <-
# mapply(FUN = function(x, y) {Rfast::rowsums(prob.score2[, x:y])}, x=start.col, y=end.col)
# Probability to earn perfect score. This is a special case.
tmp[, max.s + 2] <- p[, ncol(p)] * prob.cats[[j]][, cats[j]]
# Update the range of possible observed scores
# obs.range <- (min.s-1):(max.s+1)
obs.range <- c((min.s - 1), poss.score, (max.s + 1))
# Update probabilities
# p <- tmp[, 1:length(obs.range), drop=FALSE]
p <- tmp[, obs.range + 1, drop = FALSE]
}
}
# return the results
# colnames(p) <- paste0("score.", 0:sum(cats-1))
colnames(p) <- paste0("score.", obs.range)
rownames(p) <- paste0("theta.", 1:n.theta)
t(p)
}
# "prep4lw2" function
# This function is used only for "lwrc" function
prep4lw2 <- function(x, theta, D) {
# break down the item metadata into several components
elm_item <- breakdown(x)
# compute category probabilities for all items
prob.cats <- trace(elm_item, theta, D = D, tcc = FALSE)$prob.cats
# extract score categories for all items
cats <- elm_item$cats
# Return results
list(prob.cats = prob.cats, cats = cats)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.