R/compareModelProbsSim.R

Defines functions getPercRank estimateThetasTG fitTG compareModelProbsSimParallel compareModelProbsSim

library(mirt)
library(TestGardener)
library(foreach)
library(doParallel)

# U is data (1-max for each item)
# folds are number of folds for CV
# R are number of iterations
# mirtModel is mirt model type
# scoreOpt is score option list for TestGardener
# cyc are cycles for TestGardener
# noBasis are number of basis functions for TestGardener
# noBins are number of bins functions for TestGardener
compareModelProbsSim <- function(U, folds, mirtModel="gpcm", thetaMethod="EAP",
                                 scoreOpt, cyc=10, noBasis, noBin) {
    # randomize folds
    foldIds <- sample(rep(1:folds, length.out=nrow(U)))

    itemCorrectMirt <- matrix(0, nrow = folds, ncol = ncol(U))
    itemCorrectTG <- matrix(0, nrow = folds, ncol = ncol(U))
    loglmirt <- vector("numeric", length = nrow(U))
    loglTG <- vector("numeric", length = nrow(U))
    res <- list()
    for (k in 1:folds) {
        u <- U[foldIds!=k, ] # fit with all except for given fold
        testu <- U[foldIds==k, ]

        # Fit models on u
        TGMod <- fitTG(u, cyc, scoreOpt, noBasis, noBin)
        mirtMod <- mirt(data = data.frame(u-1), model=1, itemtype = mirtModel)

        # predict thetas of testu (foldIds==k)
        mirtThetas <- fscores(mirtMod, method = thetaMethod,
                              response.pattern = testu-1)[, "F1"]
        TGThetas <- estimateThetasTG(testu, TGMod$parList[[cyc]]$WfdList, scoreOpt)

        TGWfdList <- TGMod$parList[[cyc]]$WfdList

        # get prob. sum and likelihood for data in fold
        mirtProbs <- matrix(0, nrow = nrow(testu), ncol = ncol(testu))
        TGProbs <- matrix(0, nrow = nrow(testu), ncol = ncol(testu))
        for (item in 1:ncol(U)) {
            extr <- extract.item(mirtMod, item)
            mirtItemProbs <- probtrace(extr, mirtThetas)
            for (person in 1:nrow(testu)) {
                # For GPCM we need to fix Nan for Inf or -Inf
                if (mirtModel=="gpcm" && mirtThetas[person] == Inf) {
                    mirtItemProbs[person, ] <- c(rep(0, length(mirtItemProbs[person, ])-1), 1)
                }
                if (mirtModel=="gpcm" && mirtThetas[person] == -Inf) {
                    mirtItemProbs[person, ] <- c(1, rep(0, length(mirtItemProbs[person, ])-1))
                }

                correctScore <- testu[person, item]
                mirtProbs[person, item] <- mirtItemProbs[person, correctScore]
                TGProbs[person, item] <- approx(seq(0, 100, len = 101),
                                                TGWfdList[[item]]$Pmatfine[, correctScore],
                                                TGThetas[person],
                                                method = "linear")$y
            }
        }
        loglmirt <- sum(log(mirtProbs))
        loglTG <- sum(log(TGProbs))

        # store mean percentage for each item from each person (in given fold)
        mirtMeans <- colMeans(mirtProbs)
        TGMeans <- colMeans(TGProbs)
        # weight by number of prediction out of total (smaller folds are weighted less)
        res[[length(res)+1]] <- list(mirtMeans*nrow(testu)/nrow(U), TGMeans*nrow(testu)/nrow(U), loglmirt, loglTG)

        print(paste("progress: ", k/folds)) # print progress every 10th iteration
    }
    itemCorrectMirt <- vector("numeric", length = ncol(U))
    itemCorrectTG <- vector("numeric", length = ncol(U))
    lmirt <- vector("numeric")
    lTG <- vector("numeric")
    for (fold in 1:length(res)) {
        itemCorrectMirt <- itemCorrectMirt + res[[fold]][[1]]
        itemCorrectTG <- itemCorrectTG + res[[fold]][[2]]
        lmirt <- c(lmirt, res[[fold]][[3]])
        lTG <- c(lTG, res[[fold]][[4]])
    }

    return(list(mirt = itemCorrectMirt, TG = itemCorrectTG, Lmirt = lmirt, LTG = lTG))
}

# U is data (1-max for each item)
# folds are number of folds for CV
# R are number of iterations
# mirtModel is mirt model type
# scoreOpt is score option list for TestGardener
# cyc are cycles for TestGardener
# noBasis are number of basis functions for TestGardener
# noBins are number of bins functions for TestGardener
compareModelProbsSimParallel <- function(U, folds, mirtModel="gpcm", thetaMethod="EAP",
                                 scoreOpt, cyc=10, noBasis, noBin) {
    # randomize folds
    foldIds <- sample(rep(1:folds, length.out=nrow(U)))

    cl <- makeCluster(detectCores(logical = F))
    registerDoParallel(cl)
    res <- foreach(k = 1:folds, .packages = c('TestGardener', 'mirt'),
                   .export = c('fitTG', 'estimateThetasTG', 'getPercRank'),
                   .verbose = T) %dopar% {
        u <- U[foldIds!=k, ] # fit with all except for given fold
        testu <- U[foldIds==k, ]
        # Fit models on u
        TGMod <- fitTG(u, cyc, scoreOpt, noBasis, noBin)
        mirtMod <- mirt(data = data.frame(u-1), model=1, itemtype = mirtModel)

        # predict thetas of testu (foldIds==k)
        mirtThetas <- fscores(mirtMod, method = thetaMethod,
                              response.pattern = testu-1)[, "F1"]
        TGThetas <- estimateThetasTG(testu, TGMod$parList[[cyc]]$WfdList, scoreOpt)

        TGWfdList <- TGMod$parList[[cyc]]$WfdList

        # get prob. sum and likelihood for data in fold
        mirtProbs <- matrix(0, nrow = nrow(testu), ncol = ncol(testu))
        TGProbs <- matrix(0, nrow = nrow(testu), ncol = ncol(testu))
        for (item in 1:ncol(U)) {
            extr <- extract.item(mirtMod, item)
            mirtItemProbs <- probtrace(extr, mirtThetas)
            for (person in 1:nrow(testu)) {
                # For GPCM we need to fix Nan for Inf or -Inf
                if (mirtModel=="gpcm" && mirtThetas[person] == Inf) {
                    mirtItemProbs[person, ] <- c(rep(0, length(mirtItemProbs[person, ])-1), 1)
                }
                if (mirtModel=="gpcm" && mirtThetas[person] == -Inf) {
                    mirtItemProbs[person, ] <- c(1, rep(0, length(mirtItemProbs[person, ])-1))
                }

                correctScore <- testu[person, item]
                mirtProbs[person, item] <- mirtItemProbs[person, correctScore]
                TGProbs[person, item] <- approx(seq(0, 100, len = 101),
                                                TGWfdList[[item]]$Pmatfine[, correctScore],
                                                TGThetas[person],
                                                method = "linear")$y
            }
        }
        loglmirt <- sum(log(mirtProbs))
        loglTG <- sum(log(TGProbs))

        # store mean percentage for each item from each person (in given fold)
        mirtMeans <- colMeans(mirtProbs)
        TGMeans <- colMeans(TGProbs)
        # weight by number of prediction out of total (smaller folds are weighted less)
        list(mirtMeans*nrow(testu)/nrow(U), TGMeans*nrow(testu)/nrow(U), loglmirt, loglTG)
    }
    stopCluster(cl)

    itemCorrectMirt <- vector("numeric", length = ncol(U))
    itemCorrectTG <- vector("numeric", length = ncol(U))
    lmirt <- vector("numeric")
    lTG <- vector("numeric")
    for (fold in 1:length(res)) {
        itemCorrectMirt <- itemCorrectMirt + res[[fold]][[1]]
        itemCorrectTG <- itemCorrectTG + res[[fold]][[2]]
        lmirt <- c(lmirt, res[[fold]][[3]])
        lTG <- c(lTG, res[[fold]][[4]])
    }

    return(list(mirt = itemCorrectMirt, TG = itemCorrectTG, Lmirt = lmirt, LTG = lTG))
}


fitTG <- function(U, cyc, scoreOpt, noBasis, noBin) {
    optList <- list(itemLab=NULL, optLab=NULL, optScr=scoreOpt)
    dataList <- make.dataList(U, NULL, optList, NumBasis = noBasis, nbin = noBin)
    initThetas  <- dataList$percntrnk
    thetaQnt  <- dataList$thetaQnt
    analyzeResult <- Analyze(initThetas, thetaQnt,
                             dataList, ncycle=cyc,
                             itdisp=FALSE)
    analyzeResult
}

estimateThetasTG <- function(U, wfdList, optscr) {
    optList <- list(itemLab=NULL, optLab=NULL, optScr=optscr)
    initThetas <- getPercRank(U, optList)
    # U has to be in datalist for thetafun
    dataList <- list(U = U)
    temp <- thetafun(initThetas,
                     WfdList = wfdList,
                     dataList = dataList)$theta_out
}

# Same as make.dataList from testGardener without jittering and other stuff
getPercRank <- function(U, optList) {
    N <- nrow(U)
    n <- ncol(U)

    noption <- matrix(0, n, 1)
    scrvec <- matrix(0, N, 1)
    itmvec <- matrix(0, n, 1)
    for (i in 1:n) {
        for (j in 1:N) {
            scoreij <- optList$optScr[[i]][U[j, i]]
            if (length(scoreij) > 0) {
                if (is.na(scoreij))
                    print(paste("is.na score:", j))
                if (is.null(scoreij))
                    print(paste("is.null score:", j))
            }
            else {
                print(paste("score of length 0:", j))
            }
            scrvec[j] <- scrvec[j] + scoreij
            itmvec[i] <- itmvec[i] + scoreij
        }
    }
    scrjit <- scrvec
    scrrnk <- matrix(0, N, 1)
    for (j in 1:N) {
        scrrnk[j] <- sum(scrjit <= scrjit[j])
    }
    percntrnk <- 100 * scrrnk/N
}
joakimwallmark/PolyOptimalIRT documentation built on Dec. 21, 2021, 1:16 a.m.