#' Item bank generation
#'
#' This function can be used to generate an item bank.
#' The user can provide a Q-matrix or create one defining a set of arguments.
#' Item quality is sampled from a uniform distribution with mean = \emph{mean.IQ} and range = \emph{range.IQ}. Alternatively, it is possible to provide a matrix with the guessing and slip parameters (\emph{gs.param}) or a list with the success probabilities of each latent group (\emph{catprob.parm}).
#' Item parameters are generated so that the monotonicity constraint is satisfied.
#'
#' @param Q Numeric matrix of length \emph{J} number of items x \emph{K} number of atributes. Q-matrix
#' @param gen.Q A list of arguments to generate a Q-matrix if \code{Q} is not provided. \code{J} = number of items (scalar numeric). \code{K} = number of attributes (scalar numeric). \code{propK.J} = numeric vector summing up to 1 that determines the proportion of 1-attribute, 2-attribute, ..., items. The length of \code{propK.J} determines the maximum number of attributes considered for an item (see \code{Examples} below). \code{nI} = Scalar numeric that sets the minimum number of identity matrices to be included in the Q-matrix. \code{minJ.K} = numeric vector of length \emph{K} that sets the minimum number of items measuring each attribute. \code{max.Kcor} = scalar numeric that sets the maximum positive correlation allowed between two attributes
#' @param mean.IQ Item discrimination (mean for the uniform distribution). \emph{mean.IQ} = \emph{P}(\strong{1}) - \emph{P}(\strong{0}) (Sorrel et al., 2017; Najera et al., in press). Must be a scalar numeric between 0 and 1
#' @param range.IQ Item discrimination (range for the uniform distribution). Must be a scalar numeric between 0 and 1
#' @param gs.parm A matrix or data frame for guessing and slip parameters. The number of columns must be 2, where the first column represents the guessing parameters (or \emph{P}(\strong{0})), and the second column represents slip parameters (or 1-\emph{P}(\strong{1}))
#' @param catprob.parm A list of success probabilities of each latent group for each non-zero category of each item. This argument requires to specify a Q-matrix in \code{Q}
#' @param model A character vector of length \emph{J} with one model for each item, or a single value to be used for all items. The possible options include \code{"DINA"}, \code{"DINO"}, \code{"ACDM"}, and \code{"GDINA"}. One-attribute items will be coded in the output as \code{"GDINA"}
#' @param min.param Scalar numeric. Minimum value for the delta parameter of the principal effects of each attribute. Only usable if \code{model} = \code{"ACDM"} or \code{model} = \code{"GDINA"}
#' @param seed Scalar numeric. A scalar to use with \code{set.seed}
#'
#' @return \code{gen.itembank} returns an object of class \code{gen.itembank}.
#' \describe{
#' \item{simQ}{Generated Q-matrix (only if \code{gen.Q} arguments have been used)}
#' \item{simcatprob.parm}{A list of success probabilities for each latent group in each item}
#' \item{simdelta.parm}{A list of delta parameters for each item}
#' \item{check}{A list that contains the mean.IQ and range.IQ for the item bank so that users can check whether these values match the expected results}
#' \item{specifications}{A list that contains all the specifications}
#' }
#'
#' @references
#'
#' Najera, P., Sorrel, M. A., de la Torre, J., & Abad, F. J. (2020). Improving robustness in Q-matrix validation using an iterative and dynamic procedure. \emph{Applied Psychological Measurement, 44}, 431-446.
#'
#' Sorrel, M. A., Abad, F. J., Olea, J., de la Torre, J., & Barrada, J. R. (2017). Inferential item-fit evaluation in cognitive diagnosis modeling. \emph{Applied Psychological Measurement, 41}, 614-631.
#'
#' @examples
#'####################################
#'# Example 1. #
#'# Generate item bank providing a #
#'# Q-matrix using the G-DINA model #
#'####################################
#'
#' Q <- sim180GDINA$simQ
#' bank <- gen.itembank(Q = Q, mean.IQ = .70, range.IQ = .20, model = "GDINA")
#'
#'####################################
#'# Example 2. #
#'# Generate item bank providing a #
#'# Q-matrix with gs.parm #
#'####################################
#'
#' Q <- sim180GDINA$simQ
#' J <- nrow(Q)
#' gs <- data.frame(g = runif(J, 0.2, 0.4), s = runif(J, 0, 0.2))
#' bank <- gen.itembank(Q = Q, gs.parm = gs, model = "GDINA", min.param = 0.05)
#'
#'####################################
#'# Example 3. #
#'# Generate item bank providing a #
#'# Q-matrix with catprob.parm #
#'####################################
#'
#' Q <- sim180GDINA$simQ[c(1:5, 73:77, 127:131),]
#' catparm.list <- list(J1 = c(0.2, 0.8),
#' J2 = c(0.1, 0.7),
#' J3 = c(0.2, 0.9),
#' J4 = c(0.3, 0.9),
#' J5 = c(0.3, 0.8),
#' J6 = c(0.2, 0.4, 0.5, 0.8),
#' J7 = c(0.1, 0.7, 0.8, 0.9),
#' J8 = c(0.2, 0.3, 0.3, 0.7),
#' J9 = c(0.2, 0.4, 0.4, 0.6),
#' J10 = c(0.3, 0.5, 0.6, 0.9),
#' J11 = c(0.1, 0.3, 0.3, 0.5, 0.4, 0.5, 0.7, 0.8),
#' J12 = c(0.2, 0.6, 0.7, 0.6, 0.7, 0.8, 0.8, 0.9),
#' J13 = c(0.2, 0.6, 0.2, 0.3, 0.6, 0.7, 0.4, 0.9),
#' J14 = c(0.3, 0.4, 0.3, 0.5, 0.5, 0.6, 0.7, 0.9),
#' J15 = c(0.1, 0.1, 0.2, 0.1, 0.2, 0.3, 0.2, 0.8))
#' bank <- gen.itembank(Q = Q, catprob.parm = catparm.list)
#'
#'####################################
#'# Example 4. #
#'# Generate item bank providing a #
#'# Q-matrix using multiple models #
#'####################################
#'
#' Q <- sim180GDINA$simQ
#' K <- ncol(Q)
#' model <- sample(c("DINA", "DINO", "ACDM"), size = nrow(Q), replace = TRUE)
#' bank <- gen.itembank(Q = Q, mean.IQ = .70, range.IQ = .20, model = model)
#'
#'####################################
#'# Example 5. #
#'# Generate item bank without #
#'# providing a Q-matrix (using #
#'# gen.Q arguments) #
#'####################################
#'
#' bank <- gen.itembank(gen.Q = list(J = 150, K = 5, propK.J = c(0.4, 0.3, 0.2, 0.1),
#' nI = 3, minJ.K = 30, max.Kcor = 1),
#' mean.IQ = .80, range.IQ = .10, min.param = 0.1)
#'
#' @export
#'
gen.itembank <- function(Q = NULL,
gen.Q = list(J = NULL, K = NULL, propK.J = NULL, nI = 1, minJ.K = 1, max.Kcor = 1),
mean.IQ = NULL, range.IQ = NULL, gs.parm = NULL, catprob.parm = NULL,
model = "GDINA",
min.param = 0,
seed = NULL)
{
#----------------------------
# Warning end error messages
#----------------------------
if(is.null(Q) & any(sapply(gen.Q, is.null))){stop("Q argument or all gen.Q arguments required.")}
if(!is.null(Q) & all(!sapply(gen.Q, is.null))){warning("If Q argument is specified, gen.Q arguments are ignored.")}
if(!is.null(mean.IQ) & is.null(range.IQ)){stop("Either both or none mean.IQ and range.IQ must be specified.")}
if(is.null(mean.IQ) & !is.null(range.IQ)){stop("Either both or none mean.IQ and range.IQ must be specified.")}
if(!is.null(catprob.parm) & is.null(Q)){stop("Q must be specified if catprob.parm is specified.")}
if(!is.null(catprob.parm)) {if(!all(2^rowSums(Q) == sapply(catprob.parm, length))){stop("Q and catprob.parm do not coincide in the number of attributes measured by, at least, one item.")}}
if(is.null(catprob.parm) & is.null(gs.parm) & is.null(mean.IQ)){stop("Either mean.IQ and range.IQ, catprob.parm, or gs.parm must be specified.")}
if(!is.null(catprob.parm) & (!is.null(gs.parm) | !is.null(mean.IQ))){
warning("More than one item parameter argument was specified. catprob.parm will be used.")
gs.parm <- mean.IQ <- range.IQ <- NULL
}
if(!is.null(gs.parm) & !is.null(mean.IQ)){
warning("More than one item parameter argument was specified. gs.parm will be used.")
mean.IQ <- range.IQ <- NULL
}
if(!is.null(catprob.parm) & min.param > 0){warning("min.param is ignored if catprob.parm is provided.")}
if(!is.null(mean.IQ)){if(mean.IQ > 1 | mean.IQ < 0){stop("mean.IQ must be a value between 0 and 1.")}}
if(!is.null(range.IQ)){if(range.IQ > 1 | range.IQ < 0){stop("range.IQ must be a value between 0 and 1.")}}
if(is.null(Q)){
if(is.null(gen.Q$J)){stop("gen.Q$J argument required.")}
if(is.null(gen.Q$K)){stop("gen.Q$K argument required.")}
if(is.null(gen.Q$propK.J)){stop("gen.Q$propK.J argument required.")}
if(is.null(gen.Q$nI)){gen.Q$nI <- 1}
if(is.null(gen.Q$minJ.K)){gen.Q$minJ.K <- 1}
if(is.null(gen.Q$max.Kcor)){gen.Q$max.Kcor <- 1}
}
#----------------
# Gather objects
#----------------
arg.range.IQ <- range.IQ
if(!is.null(mean.IQ)){
range.IQ <- range.IQ/2
}
arg.gs.parm <- gs.parm
arg.catprob.parm <- catprob.parm
if(!is.null(seed)){set.seed(seed)}
if(is.null(seed)){seed <- sample(1:1000000, size = 1)}
if(!is.null(Q)){
sim.Q <- NULL
arg.Q <- Q
J <- nrow(Q)
K <- ncol(Q)
arg.model <- model
if(length(model) == 1){model <- rep(model, J)}
} else {
arg.Q <- NULL
J <- gen.Q$J
K <- gen.Q$K
sim.Q <- Q <- genQ(J, K, gen.Q$nI, gen.Q$propK.J, T, gen.Q$minJ.K, gen.Q$max.Kcor, seed)$Q
arg.model <- model
if(length(model) == 1){model <- rep(model, J)}
}
if(!is.null(mean.IQ)){
min.P0 <- round(((1 - mean.IQ) / 2) - (range.IQ / 2), 10)
max.P0 <- round(((1 - mean.IQ) / 2) + (range.IQ / 2), 10)
min.P1 <- round(((1 - ((1 - mean.IQ) / 2))) - (range.IQ / 2), 10)
max.P1 <- round(((1 - ((1 - mean.IQ) / 2))) + (range.IQ / 2), 10)
if(min.P0 < 0){
warning("mean.IQ and range.IQ may give probabilities below 0 for P0. Probabilities coherced to be 0 at least.")
min.P0 <- 0
}
if(max.P1 > 1){
warning("mean.IQ and range.IQ may give probabilities above 1 for P1. Probabilities coherced to be 1 at most.")
max.P1 <- 1
}
if(max.P0 > min.P1){
warning("mean.IQ and range.IQ may give to P0 higher probabilities than to P1. Probabilities coherced to be always higher for P1.")
diff <- max.P0 - min.P1
max.P0 <- max.P0 - diff / 2
min.P1 <- min.P1 + diff / 2
}
P <- cbind(P0 = runif(J, min.P0, max.P0), P1 = runif(J, min.P1, max.P1))
} else if(!is.null(gs.parm)){
P <- cbind(gs.parm[,1], 1 - gs.parm[,2])
colnames(P) <- c("P0", "P1")
} else if(!is.null(catprob.parm)){
P <- NULL
}
#--------------------------
# Generate item parameters
#--------------------------
if(!is.null(P)){
catprob.parm <- delta.parm <- list()
for(j in 1:J){
K.j <- sum(Q[j,])
lg.j <- 2^sum(Q[j,])
param <- switch(model[j],
DINA = list(catprob.parm = c(rep(P[j,"P0"], lg.j - 1), P[j,"P1"]),
delta.parm = c(P[j,"P0"], P[j,"P1"] - P[j,"P0"])),
DINO = list(catprob.parm = c(P[j,"P0"], rep(P[j,"P1"], lg.j - 1)),
delta.parm = c(P[j,"P0"], P[j,"P1"] - P[j,"P0"])),
ACDM = gen.ACDMparam(P[j,"P0"], P[j,"P1"], K.j, min.param),
GDINA = gen.GDINAparam(P[j,"P0"], P[j,"P1"], K.j, min.param))
catprob.parm[[j]] <- param$catprob.parm
delta.parm[[j]] <- param$delta.parm
names(catprob.parm[[j]]) <- names(delta.parm[[j]]) <- NULL
}
} else {
delta.parm <- list()
for(j in 1:J){
K.j <- sum(Q[j,])
patt <- GDINA::attributepattern(K.j)
lg.j <- 2^sum(Q[j,])
delta <- catprob.parm[[j]][1]
for(lg in 2:lg.j){
att.diff <- t(apply(patt[-lg,,drop = FALSE], 1, function(x) patt[lg,,drop = FALSE] - x))
nested <- which(apply(att.diff, 1, function(x) all(x >= 0) & any(x == 1)))
delta <- c(delta, catprob.parm[[j]][lg] - sum(delta[nested]))
}
delta.parm[[j]] <- delta
}
names(catprob.parm[[j]]) <- names(delta.parm[[j]]) <- NULL
}
check <- list()
if(!is.null(mean.IQ)){
check[["mean.IQ.bank"]] <- mean(P[,2] - P[,1])
check[["range.IQ.bank"]] <- max(P[,2]-P[,1])-min(P[,2]-P[,1])
}
if(is.null(arg.Q)){
check[["propK.J"]] <- table(rowSums(sim.Q))/nrow(sim.Q)
tmp <- cor(sim.Q)
diag(tmp) <- NA
check[["max.Kcor"]] <- max(tmp, na.rm = TRUE)
}
#----------------
# Return results
#----------------
res <- list()
res$simQ <- sim.Q
res$simcatprob.parm <- catprob.parm
res$simdelta.parm <- delta.parm
res$check <- check
model[which(rowSums(Q) == 1)] <- "GDINA"
res$specifications <- list(Q = arg.Q, gen.Q = gen.Q, mean.IQ = mean.IQ, range.IQ = arg.range.IQ, gs.parm = arg.gs.parm,
catprob.parm = arg.catprob.parm, model = model, min.param = min.param, seed = seed)
class(res) <- "gen.itembank"
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.