#' @title Generate simulated data
#' @description Generates simulated responses to a test given informations
#' about items values of latent traits.
#' @param theta a matrix (or data frame) of already generated latent traits'
#' values
#' @param items a list with test items' specification
#' @param performAssertions a logical value indicating whether function should
#' perform assertions of the other arguments (\code{TRUE} by default, may be
#' changed to \code{FALSE} for a little performance gain)
#' @param tryConvertToNumeric a logical value indicating whether function should
#' try to convert generated responses to numeric values (technically responses
#' are get from rownames of the items' scoring matrices that are character
#' vectors)
#' @return matrix with responses on items
#' \code{\link{generate_item_responses_sqn}},
#' \code{\link{generate_item_responses_sml}}
#' @export
generate_test_responses <- function(theta, items, performAssertions = TRUE,
tryConvertToNumeric = TRUE) {
if (performAssertions) {
if (is.data.frame(theta)) {
theta <- as.matrix(theta)
}
stopifnot("Argument `theta` must be a numeric matrix." =
is.matrix(theta),
"Argument `theta` must be a numeric matrix." =
is.numeric(theta),
"Argument `theta` must have at least one row." = nrow(theta) > 0,
"Argumentr `theta` can't contain NAs." = all(!is.na(theta)),
"Argument `items` must be a list of class 'itemList'" =
is.list(items),
"Each element of `items` must be of class 'rstylesItem'" =
all(sapply(items, inherits, what = "rstylesItem")),
"Argument `tryCovertToNumeric` must be TRUE or FALSE." =
is.logical(tryConvertToNumeric),
"Argument `tryCovertToNumeric` must be TRUE or FALSE." =
length(tryConvertToNumeric) == 1,
"Argument `tryCovertToNumeric` must be TRUE or FALSE." =
tryConvertToNumeric %in% c(FALSE, TRUE))
traitNames <- sapply(items, function(x) {return(names(x$slopes))})
traitNames <- unique(as.vector(traitNames))
traitNames <- sub("[[:digit:]]+$", "", traitNames)
if (!all(traitNames %in% colnames(theta))) {
stop("Items refer to latent traits that don't appear in the provided covariance matrix of latent traits: '",
paste(setdiff(traitNames, colnames(theta)), collapse = "', '"),
"'.")
}
}
responses = matrix(NA_integer_, nrow = nrow(theta), ncol = length(items),
dimnames = list(NULL, names(items)))
for (i in 1L:ncol(responses)) {
scoringMatrix <- items[[i]]$scoringMatrix
# dispatch for speed
if (!is.null(items[[i]]$scoringOnPreviousResponses) && i > 1L) {
for (j in which(!duplicated(responses[, 1L:(i - 1L), drop = FALSE]))) {
scoringMatrixTemp <-
cbind(do.call(items[[i]]$scoringOnPreviousResponses,
list(previousResponses = responses[j, 1L:(i - 1L)],
scoringMatrix = scoringMatrix)),
scoringMatrix)
# line below is an order of magnitude faster than using apply
whichRows <-
colSums(t(responses[, 1L:(i - 1L), drop = FALSE]) ==
responses[j, 1L:(i - 1L)]) == (i - 1L)
if (items[[i]]$mode == 'irtree') {
responses[whichRows, i] <-
generate_item_responses_sqn(theta[whichRows, , drop = FALSE],
scoringMatrixTemp,
items[[i]]$slopes,
items[[i]]$intercepts,
items[[i]]$editResponse,
TRUE)
} else {#gpcm
responses[whichRows, i] <-
generate_item_responses_sml(theta[whichRows, , drop = FALSE],
scoringMatrixTemp,
items[[i]]$slopes,
items[[i]]$intercepts)
}
}
} else {# if there is no scoring on previous responses, there is no need for looping through combinations of previuos responses
if (items[[i]]$mode == 'irtree') {
responses[, i] <- generate_item_responses_sqn(theta, scoringMatrix,
items[[i]]$slopes,
items[[i]]$intercepts,
items[[i]]$editResponse)
} else {#gpcm
responses[, i] <- generate_item_responses_sml(theta, scoringMatrix,
items[[i]]$slopes,
items[[i]]$intercepts)
}
}
}
if (tryConvertToNumeric) {
uniqueResponses <- setdiff(unique(responses), NA)
if (!any(is.na(suppressWarnings(as.numeric(uniqueResponses))))) {
responses <- matrix(as.numeric(responses), nrow = nrow(responses),
dimnames = list(rownames(responses),
colnames(responses)))
}
}
return(responses)
}
#' @title Internals: simulating responding to item in the IRTree way
#' @description Function generates responses using IRTree approach, i.e.
#' 'sequentially`. It goes through consecutive columns of a scoring matrix,
#' calling \code{\link{generate_item_responeses_gpcm}} to get responses at
#' a given node of a tree and the recursively calls itself on subsets of
#' observations with a given response with reduced scoring matrix.
#' @section Limitations:
#' Because function internally relies on calling
#' \code{\link{generate_item_responeses_gpcm}}, no normal ogive models can be
#' used (this may be changed in the future versions).
#' @param theta a matrix of latent traits' values
#' @param scoringMatrix a matrix describing scoring patterns on each latent trait
#' @param slopes a vector of slope parameters for each trait
#' @param intercepts a vector of intercept parameters
#' @param editResponse an optional function returning scoring matrix that should
#' be used instead that provided by \code{scoringMatrix}; this should be function
#' accepting two arguments: \code{response} - generated response (by the model
#' described with the first column of the \code{scoringMatrix}) that is supposed
#' to be \emph{edited} and \code{scoringMatrix} - current scoring matrix (to be
#' replaced)
#' @param decidingOnPreviousResponse a logical value indicating whether first
#' column of provided scoring matrix describes making decision whether to
#' respond on the basis of responses to previous items or not (in this first
#' case \emph{negative} choice shouldn't reduce number of rows in a response
#' matrix)
#' @return vector of responses on item
#' @seealso \code{link{generate_test_responses}},
#' \code{\link{generate_item_responses_sml}},
#' \code{\link{generate_item_responeses_gpcm}}
generate_item_responses_sqn <- function(theta, scoringMatrix, slopes,
intercepts, editResponse = NULL,
decidingOnPreviousResponse = FALSE) {
slopeAtNode <- slopes[which(names(slopes) == colnames(scoringMatrix)[1L])]
interceptsAtNode <-
intercepts[c(1L, grep(paste0("^", names(slopeAtNode),
"_?[[:digit:]]+"), names(intercepts)))]
thetaAtNode <- theta[, sapply(colnames(theta),
function(x, y) {substr(y, 1, nchar(x)) == x},
y = names(slopeAtNode)),
drop = FALSE]
scoringMatrixAtNode <-
matrix(scoringMatrix[!duplicated(scoringMatrix[, 1L]), 1L], ncol = 1,
dimnames = list(NULL, colnames(scoringMatrix)[1L]))
scoringMatrixAtNode <-
scoringMatrixAtNode[order(scoringMatrixAtNode[, 1L]), , drop = FALSE]
rownames(scoringMatrixAtNode) <- scoringMatrixAtNode
responses <- generate_item_responeses_gpcm(
thetaAtNode,
scoringMatrixAtNode * slopeAtNode,
interceptsAtNode)
if (ncol(scoringMatrix) > 1L || is.function(editResponse)) {
responsesBeforeUpdate <- responses
for (r in unique(responses)) {
if (is.function(editResponse)) {
scoringMatrixNextNode <- do.call(editResponse,
list(response = r,
scoringMatrix = scoringMatrix))
} else if (decidingOnPreviousResponse && r == "0") {
scoringMatrixNextNode <- scoringMatrix[, -1L, drop = FALSE]
} else {
scoringMatrixNextNode <- scoringMatrix[scoringMatrix[, 1L] == r,
-1L, drop = FALSE]
}
# dropping columns describing other branches of a tree
scoringMatrixNextNode <-
scoringMatrixNextNode[, c(
!apply(is.na(scoringMatrixNextNode[, -ncol(scoringMatrixNextNode),
drop = FALSE]),
2L, all),
TRUE), drop = FALSE]
interceptsNextNode <-
intercepts[-grep(paste0("^", names(slopeAtNode),
"_?[[:digit:]]+"), names(intercepts))]
nPossibleResponsesNextNode <-
length(setdiff(unique(scoringMatrixNextNode[, 1L]), NA))
if (nPossibleResponsesNextNode > 1L) {
responses[responsesBeforeUpdate == r] <-
generate_item_responses_sqn(theta[responsesBeforeUpdate == r, ,
drop = FALSE],
scoringMatrixNextNode,
slopes[-1L], interceptsNextNode)
} else if (nPossibleResponsesNextNode == 1L) {
# there is no check whether this is vector of length one!
responses[responsesBeforeUpdate == r] <-
rownames(scoringMatrixNextNode)[!is.na(scoringMatrixNextNode[, 1L])]
} else {
# there is no check whether this is vector of length one!
responses[responsesBeforeUpdate == r] <-
rownames(scoringMatrix)[scoringMatrix[, 1L] == r]
}
}
} else {
responsesBeforeUpdate <- responses
for (i in unique(responses)) {
responses[responsesBeforeUpdate == i] <-
rownames(scoringMatrix)[as.character(scoringMatrix[, 1L]) == i]
}
}
return(responses)
}
#' @title Internals: simulating responding to item in the GPCM way
#' @description Function generates responses in the GPCM way with a whole
#' scoring matrix used at once. Only (G)PCM/NRM approach is suitable in such
#' a case, because with complicated scoring matrices there is no guarantee that
#' probabilities of responses are increasing along with order of responses
#' (rows) in a scoring matrix. Consequently, no normal ogive models can be used.
#' @param theta a matrix of latent traits' values
#' @param scoringMatrix a matrix describing scoring patterns on each latent trait
#' @param slopes a vector of slope parameters of each trait
#' @param intercepts a vector of intercept parameters
#' @return a vector of responses on item
#' @seealso \code{link{generate_test_responses}},
#' \code{\link{generate_item_responses_sqn}},
#' \code{\link{generate_item_responeses_gpcm}}
generate_item_responses_sml <- function(theta, scoringMatrix, slopes,
intercepts) {
theta <- theta[, sapply(colnames(scoringMatrix), match,
table = colnames(theta)), drop = FALSE]
slopes <- slopes[sapply(colnames(scoringMatrix), match,
table = names(slopes)), drop = FALSE]
responses <- generate_item_responeses_gpcm(
theta,
scoringMatrix * rep(slopes, each = nrow(scoringMatrix)),
intercepts)
return(responses)
}
#' @title Internals: calculation of (G)PCM probabilities and drawing responses
#' @description Computes probabilities of each response using (G)PCM and draws
#' responses. Be aware that for efficiency reasons it assumes that
#' \strong{\code{theta} and \code{weightsMatrix} have the same number and order
#' of columns} (unlike \code{\link{generate_item_responses_sqn}} and
#' \code{\link{generate_item_responses_sml}} that are used to match theirs
#' arguments \code{theta}, \code{scoringMatrix} and \code{slopes} before
#' passing them to \code{generate_item_responeses_gpcm}).
#' @param theta a matrix of latent traits' values
#' @param weightsMatrix a matrix of slope parameters (being
#' multiplication of a design matrix by discriminations of factors)
#' @param intercepts a vector of intercept parameters
#' @return a vector of responses on item
generate_item_responeses_gpcm <- function(theta, weightsMatrix, intercepts) {
probs <- matrix(NA_real_,
nrow = nrow(theta), ncol = nrow(weightsMatrix),
dimnames = list(NULL, rownames(weightsMatrix)))
for (i in 1L:nrow(weightsMatrix)) {
probs[, i] <-
exp(intercepts[i] + theta %*% t(weightsMatrix[i, , drop = FALSE]))
}
probs <- probs / rowSums(probs)
for (i in 1L:nrow(probs)) {
probs[i, ] <- cumsum(probs[i, ])
}
# contrary to common opinion loop (above) can be faster (than commented code below)
# probs <- t(apply(probs, 1, cumsum))
probs[, ncol(probs)] = stats::runif(nrow(probs), 0, 1)
responses <- vector(mode = "character", length = nrow(probs))
for (i in 1L:nrow(probs)) {
responses[i] <- colnames(probs)[probs[i, ] >= probs[i, ncol(probs)]][1L]
}
# contrary to common opinion loop (above) can be faster (than commented code below)
# responses <- apply(probs, 1,
# function(x) {return(
# names(x)[x >= x[length(x)]][1]
# )})
return(responses)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.