library(mirt)
library(TestGardener)
library(foreach)
library(doParallel)
# U is data (1-max for each item)
# folds are number of folds for CV
# R are number of iterations
# mirtModel is mirt model type
# scoreOpt is score option list for TestGardener
# cyc are cycles for TestGardener
# noBasis are number of basis functions for TestGardener
# noBins are number of bins functions for TestGardener
compareModelProbsSim <- function(U, folds, mirtModel="gpcm", thetaMethod="EAP",
scoreOpt, cyc=10, noBasis, noBin) {
# randomize folds
foldIds <- sample(rep(1:folds, length.out=nrow(U)))
itemCorrectMirt <- matrix(0, nrow = folds, ncol = ncol(U))
itemCorrectTG <- matrix(0, nrow = folds, ncol = ncol(U))
loglmirt <- vector("numeric", length = nrow(U))
loglTG <- vector("numeric", length = nrow(U))
res <- list()
for (k in 1:folds) {
u <- U[foldIds!=k, ] # fit with all except for given fold
testu <- U[foldIds==k, ]
# Fit models on u
TGMod <- fitTG(u, cyc, scoreOpt, noBasis, noBin)
mirtMod <- mirt(data = data.frame(u-1), model=1, itemtype = mirtModel)
# predict thetas of testu (foldIds==k)
mirtThetas <- fscores(mirtMod, method = thetaMethod,
response.pattern = testu-1)[, "F1"]
TGThetas <- estimateThetasTG(testu, TGMod$parList[[cyc]]$WfdList, scoreOpt)
TGWfdList <- TGMod$parList[[cyc]]$WfdList
# get prob. sum and likelihood for data in fold
mirtProbs <- matrix(0, nrow = nrow(testu), ncol = ncol(testu))
TGProbs <- matrix(0, nrow = nrow(testu), ncol = ncol(testu))
for (item in 1:ncol(U)) {
extr <- extract.item(mirtMod, item)
mirtItemProbs <- probtrace(extr, mirtThetas)
for (person in 1:nrow(testu)) {
# For GPCM we need to fix Nan for Inf or -Inf
if (mirtModel=="gpcm" && mirtThetas[person] == Inf) {
mirtItemProbs[person, ] <- c(rep(0, length(mirtItemProbs[person, ])-1), 1)
}
if (mirtModel=="gpcm" && mirtThetas[person] == -Inf) {
mirtItemProbs[person, ] <- c(1, rep(0, length(mirtItemProbs[person, ])-1))
}
correctScore <- testu[person, item]
mirtProbs[person, item] <- mirtItemProbs[person, correctScore]
TGProbs[person, item] <- approx(seq(0, 100, len = 101),
TGWfdList[[item]]$Pmatfine[, correctScore],
TGThetas[person],
method = "linear")$y
}
}
loglmirt <- sum(log(mirtProbs))
loglTG <- sum(log(TGProbs))
# store mean percentage for each item from each person (in given fold)
mirtMeans <- colMeans(mirtProbs)
TGMeans <- colMeans(TGProbs)
# weight by number of prediction out of total (smaller folds are weighted less)
res[[length(res)+1]] <- list(mirtMeans*nrow(testu)/nrow(U), TGMeans*nrow(testu)/nrow(U), loglmirt, loglTG)
print(paste("progress: ", k/folds)) # print progress every 10th iteration
}
itemCorrectMirt <- vector("numeric", length = ncol(U))
itemCorrectTG <- vector("numeric", length = ncol(U))
lmirt <- vector("numeric")
lTG <- vector("numeric")
for (fold in 1:length(res)) {
itemCorrectMirt <- itemCorrectMirt + res[[fold]][[1]]
itemCorrectTG <- itemCorrectTG + res[[fold]][[2]]
lmirt <- c(lmirt, res[[fold]][[3]])
lTG <- c(lTG, res[[fold]][[4]])
}
return(list(mirt = itemCorrectMirt, TG = itemCorrectTG, Lmirt = lmirt, LTG = lTG))
}
# U is data (1-max for each item)
# folds are number of folds for CV
# R are number of iterations
# mirtModel is mirt model type
# scoreOpt is score option list for TestGardener
# cyc are cycles for TestGardener
# noBasis are number of basis functions for TestGardener
# noBins are number of bins functions for TestGardener
compareModelProbsSimParallel <- function(U, folds, mirtModel="gpcm", thetaMethod="EAP",
scoreOpt, cyc=10, noBasis, noBin) {
# randomize folds
foldIds <- sample(rep(1:folds, length.out=nrow(U)))
cl <- makeCluster(detectCores(logical = F))
registerDoParallel(cl)
res <- foreach(k = 1:folds, .packages = c('TestGardener', 'mirt'),
.export = c('fitTG', 'estimateThetasTG', 'getPercRank'),
.verbose = T) %dopar% {
u <- U[foldIds!=k, ] # fit with all except for given fold
testu <- U[foldIds==k, ]
# Fit models on u
TGMod <- fitTG(u, cyc, scoreOpt, noBasis, noBin)
mirtMod <- mirt(data = data.frame(u-1), model=1, itemtype = mirtModel)
# predict thetas of testu (foldIds==k)
mirtThetas <- fscores(mirtMod, method = thetaMethod,
response.pattern = testu-1)[, "F1"]
TGThetas <- estimateThetasTG(testu, TGMod$parList[[cyc]]$WfdList, scoreOpt)
TGWfdList <- TGMod$parList[[cyc]]$WfdList
# get prob. sum and likelihood for data in fold
mirtProbs <- matrix(0, nrow = nrow(testu), ncol = ncol(testu))
TGProbs <- matrix(0, nrow = nrow(testu), ncol = ncol(testu))
for (item in 1:ncol(U)) {
extr <- extract.item(mirtMod, item)
mirtItemProbs <- probtrace(extr, mirtThetas)
for (person in 1:nrow(testu)) {
# For GPCM we need to fix Nan for Inf or -Inf
if (mirtModel=="gpcm" && mirtThetas[person] == Inf) {
mirtItemProbs[person, ] <- c(rep(0, length(mirtItemProbs[person, ])-1), 1)
}
if (mirtModel=="gpcm" && mirtThetas[person] == -Inf) {
mirtItemProbs[person, ] <- c(1, rep(0, length(mirtItemProbs[person, ])-1))
}
correctScore <- testu[person, item]
mirtProbs[person, item] <- mirtItemProbs[person, correctScore]
TGProbs[person, item] <- approx(seq(0, 100, len = 101),
TGWfdList[[item]]$Pmatfine[, correctScore],
TGThetas[person],
method = "linear")$y
}
}
loglmirt <- sum(log(mirtProbs))
loglTG <- sum(log(TGProbs))
# store mean percentage for each item from each person (in given fold)
mirtMeans <- colMeans(mirtProbs)
TGMeans <- colMeans(TGProbs)
# weight by number of prediction out of total (smaller folds are weighted less)
list(mirtMeans*nrow(testu)/nrow(U), TGMeans*nrow(testu)/nrow(U), loglmirt, loglTG)
}
stopCluster(cl)
itemCorrectMirt <- vector("numeric", length = ncol(U))
itemCorrectTG <- vector("numeric", length = ncol(U))
lmirt <- vector("numeric")
lTG <- vector("numeric")
for (fold in 1:length(res)) {
itemCorrectMirt <- itemCorrectMirt + res[[fold]][[1]]
itemCorrectTG <- itemCorrectTG + res[[fold]][[2]]
lmirt <- c(lmirt, res[[fold]][[3]])
lTG <- c(lTG, res[[fold]][[4]])
}
return(list(mirt = itemCorrectMirt, TG = itemCorrectTG, Lmirt = lmirt, LTG = lTG))
}
fitTG <- function(U, cyc, scoreOpt, noBasis, noBin) {
optList <- list(itemLab=NULL, optLab=NULL, optScr=scoreOpt)
dataList <- make.dataList(U, NULL, optList, NumBasis = noBasis, nbin = noBin)
initThetas <- dataList$percntrnk
thetaQnt <- dataList$thetaQnt
analyzeResult <- Analyze(initThetas, thetaQnt,
dataList, ncycle=cyc,
itdisp=FALSE)
analyzeResult
}
estimateThetasTG <- function(U, wfdList, optscr) {
optList <- list(itemLab=NULL, optLab=NULL, optScr=optscr)
initThetas <- getPercRank(U, optList)
# U has to be in datalist for thetafun
dataList <- list(U = U)
temp <- thetafun(initThetas,
WfdList = wfdList,
dataList = dataList)$theta_out
}
# Same as make.dataList from testGardener without jittering and other stuff
getPercRank <- function(U, optList) {
N <- nrow(U)
n <- ncol(U)
noption <- matrix(0, n, 1)
scrvec <- matrix(0, N, 1)
itmvec <- matrix(0, n, 1)
for (i in 1:n) {
for (j in 1:N) {
scoreij <- optList$optScr[[i]][U[j, i]]
if (length(scoreij) > 0) {
if (is.na(scoreij))
print(paste("is.na score:", j))
if (is.null(scoreij))
print(paste("is.null score:", j))
}
else {
print(paste("score of length 0:", j))
}
scrvec[j] <- scrvec[j] + scoreij
itmvec[i] <- itmvec[i] + scoreij
}
}
scrjit <- scrvec
scrrnk <- matrix(0, N, 1)
for (j in 1:N) {
scrrnk[j] <- sum(scrjit <= scrjit[j])
}
percntrnk <- 100 * scrrnk/N
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.