# R/postpred.R In hplieninger/mpt2irt: Bringing Multinomial Processing Tree Models To Item Response Theory To Investigate Response Styles

#### Documented in global_ORposterior_expectedposterior_predictedpost_prob_irtreeppc_irtreeppc_resp_irtreeprint.ppcstatistic_item_corstatistic_item_scorestatistic_ORstatistic_Q3

if (getRversion() >= "2.15.1")  utils::globalVariables(c("."))

#' Calculate posterior predictive probabilities.
#'
#' Function takes an MCMC list of posterior samples and calculates the
#' model-predicted probabilities. This can either be done for the persons in the
#' sample or for out-of-sample predictions for new persons (\code{new_theta =
#' TRUE}).
#'
#' @param fit_sum List. Output from \code{\link{summarize_irtree_fit}} that
#'   contains \code{mcmc.objects}.
#' @param iter Numeric. Number of iterations to use, the maximum is the total
#'   number of retained iterations (via \code{\link{fit_irtree}}).
#' @param N Numeric. Number of persons for whom posterior predictives should be
#'   drawn. Should be equal to the number of persons in the sample if
#'   \code{new_theta = FALSE}.
#' @param new_theta Logical. Wheter to calculate the probabilities for the
#'   persons in the sample or for out-of-sample predictions for \code{N} 'new'
#'   persons (\code{new_theta = TRUE}).
#' @return Returns an array of probabilities of dimension \code{iter} x \code{N}
#'   x J x 5 (for J items with 5 response categories).
#' @inheritParams fit_irtree
#' @inheritParams runjags::combine.mcmc
# @importFrom magrittr %>%
#' @importFrom rlang .data
#' @import coda
#' @examples
#' \dontrun{
#' J <- 10
#' betas <- cbind(rnorm(J, .5), rnorm(J, .5), rnorm(J, 1.5), rnorm(J, 0))
#' dat <- generate_irtree_ext(N = 20, J = J, betas = betas, beta_ARS_extreme = .5)
#'
#' # fit model
#' res1 <- fit_irtree(dat$X, revItem = dat$revItem, M = 200)
#' res2 <- summarize_irtree_fit(res1)
#'
#' # posterior predictive probabilities
#' res3 <- post_prob_irtree(res2)
#' dim(res3)
#' }
#' @export
post_prob_irtree <- function(fit_sum = NULL,
mcmc.objects = NULL,
iter = 100,
N = NULL,
traitItem = NULL,
revItem = NULL,
fitModel = NULL,
new_theta = FALSE) {

checkmate::assert_list(fit_sum, all.missing = FALSE, min.len = 1,
null.ok = TRUE)
checkmate::assert_list(mcmc.objects, types = "numeric", any.missing = FALSE,
min.len = 1, null.ok = TRUE)
if (is.null(fit_sum) & is.null(mcmc.objects))
stop("At least one of 'fit_sum' and 'mcmc.objects' must be specified.")
if (is.null(mcmc.objects)) {
if ("mcmc" %in% names(fit_sum)) {
mcmc.objects <- fit_sum$mcmc } else { stop("'fit_sum' must contain an element 'mcmc'. Or 'mcmc.objects' ", "must be specified.") } } checkmate::assert_int(iter, lower = 1, upper = nrow(mcmc.objects[[1]])*length(mcmc.objects)) # checkmate::qassert(N, "X1[1,)") checkmate::assert_int(N, lower = 1, null.ok = !is.null(fit_sum$args$N)) checkmate::assert_integerish(revItem, lower = 0, upper = 1, any.missing = FALSE, min.len = 1, null.ok = !is.null(fit_sum$args$revItem)) checkmate::assert_integerish(traitItem, lower = 1, any.missing = FALSE, min.len = 1, null.ok = !is.null(fit_sum$args$traitItem)) checkmate::assert_character(fitModel, min.chars = 1, any.missing = FALSE, len = 1, null.ok = !is.null(fit_sum$args$fitModel)) checkmate::qassert(new_theta, "B1") # if (is.null(revItem)) revItem <- fit_sum$revItem
# if (is.null(traitItem)) traitItem <- fit_sum$traitItem # if (is.null(fitModel)) fitModel <- fit_sum$fitModel
# if (is.null(N)) N <- fit_sum$N if (is.null(revItem)) revItem <- fit_sum$args$revItem if (is.null(traitItem)) traitItem <- fit_sum$args$traitItem if (is.null(fitModel)) fitModel <- fit_sum$args$fitModel if (is.null(N)) N <- fit_sum$args$N J <- length(traitItem) M <- nrow(mcmc.objects[[1]]) mcmc_attr <- attr(mcmc.objects[[1]], "mcpar") mcmc.objects <- coda::as.mcmc.list( lapply(mcmc.objects, coda::mcmc, start = mcmc_attr[1], thin = 1)) thin <- floor(M/iter*length(mcmc.objects)) arsModel <- fitModel %in% c("ext", "ext2") dimen <- 1 + switch(fitModel, "ext" = 3, "ext2" = 3, "2012" = 2, "pcm" = 0, "steps" = 0, "shift" = 3) if (is.null(fit_sum$args$S)) { n.trait <- length(table(traitItem)) S <- dimen - 1 + n.trait } else { S <- fit_sum$args$S } # S_t = S: Number of theta dimensions # S_b1: Number of columns in beta matrix # S_b2: Number of beta dimensions S_b1 <- switch(fitModel, "2012" = 3, "ext" = 4, "ext2" = 5, "pcm" = 4, "steps" = 4, "shift" = 3, S) vars <- c("^Sigma", "^beta", "^theta") if (fitModel %in% c("ext")) { vars <- c(vars, "^beta_ARS_extreme") } fit_mcmc2 <- window(mcmc.objects, thin = thin) %>% runjags::combine.mcmc(., vars = vars) rm(mcmc.objects) reps <- seq_len(nrow(fit_mcmc2)) R <- length(reps) # beta <- runjags::combine.mcmc(fit_mcmc2, vars = "^beta", collapse.chains = F) %>% # tibble::as_tibble() %>% # dplyr::select(., dplyr::matches("beta\$")) %>% # dplyr::select(sapply(1:S_b1, . %>% seq(., by = S_b1, length = J)) %>% # as.vector) %>% # t %>% # matrix %>% # array(dim = c(J, S_b1, length(reps))) tmp1 <- runjags::combine.mcmc(fit_mcmc2, vars = "^beta", collapse.chains = F) %>% as.data.frame %>% dplyr::select(., dplyr::matches("beta\\[")) %>% {suppressMessages(reshape2::melt(.))} tmp2 <- regmatches(tmp1variable, regexpr("[0-9]+,[0-9]+", tmp1variable)) %>% strsplit(",") %>% unlist %>% as.numeric %>% matrix(ncol = 2, byrow = T) %>% data.frame(tmp1, .) tmp3 <- tmp2[order(tmp2X1, tmp2X2), ] beta <- apply( array( matrix(tmp3value, ncol = length(reps), byrow = T), dim = c(S_b1, J, length(reps))), c(1, 3), t) if (new_theta == TRUE) { message("New person parameters are sampled from covariance matrix Sigma.") Sigma <- runjags::combine.mcmc(fit_mcmc2, vars = "^Sigma", collapse.chains = F) theta <- array(NA, c(N, S, R)) for (rrr in reps) theta[, , rrr] <- MASS::mvrnorm(n = N, mu = rep(0, S), Sigma = matrix(Sigma[rrr, ], S, S)) } else { # # sel_theta <- grepl("theta\\[[0-9]+,[0-9]+\$", colnames(fit_mcmc2)) # theta <- runjags::combine.mcmc(fit_mcmc2, vars = "^theta", collapse.chains = F) %>% # tibble::as_tibble() %>% # dplyr::select(., dplyr::matches("theta\\[")) %>% # dplyr::select(sapply(1:S, . %>% seq(., by = S, length = N)) %>% # as.vector) %>% # t %>% # matrix %>% # array(dim = c(N, S, R)) tmp1 <- runjags::combine.mcmc(fit_mcmc2, vars = "^theta", collapse.chains = F) %>% as.data.frame %>% dplyr::select(., dplyr::matches("theta\\[")) %>% {suppressMessages(reshape2::melt(.))} tmp2 <- regmatches(tmp1$variable, regexpr("[0-9]+,[0-9]+", tmp1$variable)) %>% strsplit(",") %>% unlist %>% as.numeric %>% matrix(ncol = 2, byrow = T) %>% data.frame(tmp1, .) tmp3 <- tmp2[order(tmp2$X1, tmp2$X2), ] theta <- apply( array( matrix(tmp3$value, ncol = length(reps), byrow = T),
dim = c(S, N, length(reps))),
c(1, 3), t)
}

if (fitModel %in% c("ext")) {
beta_ARS_extreme <- runjags::combine.mcmc(fit_mcmc2, vars = "^beta_ARS_extreme",
collapse.chains = F)
}

# If user has dplyr installed, use dplyr::progress_estimated(), otherwise
# use txtProgressBar()
p <- FALSE
try(p <- dplyr::progress_estimated(R, 5), silent = TRUE)
if (!is.environment(p)) {
pb <- txtProgressBar(style = 3, char = "zzz ", min = min(reps), max = max(reps))
}

prob <- array(NA_real_, dim = c(length(reps), N, J, 5))
for (rrr in reps) {

if (fitModel %in% c("ext", "2012", "ext2")) {
##### probs for 'ext' and '2012' #####

middle <- pnorm(outer(theta[, 1, rrr], beta[, 1, rrr], "-"))
extreme <- pnorm(outer(theta[, 2, rrr], beta[, 2, rrr], "-"))
acquies <- matrix(0, N, J)
if (arsModel == TRUE)
acquies <- pnorm(outer(theta[, 3, rrr], beta[, 3, rrr], "-"))

latent <- theta[, traitItem + dimen - 1, rrr] -
matrix(beta[, dimen, rrr], N, J, byrow = TRUE)
latent[, revItem == 1] <- -latent[, revItem == 1]
trait <- pnorm(latent)

extreme_a <- matrix(0, N, J)
if (fitModel == "ext") {
extreme_a <- matrix(pnorm(theta[, 2, rrr] - beta_ARS_extreme[rrr, ]), N, J)
}
if (fitModel == "ext2") {
extreme_a <- pnorm(outer(theta[, 2, rrr], beta[, 5, rrr], "-"))
}

prob[rrr, , , 1] <- (1-acquies)*(1-middle)*(1-trait)*extreme
prob[rrr, , , 2] <- (1-acquies)*(1-middle)*(1-trait)*(1-extreme)
prob[rrr, , , 3] <- (1-acquies)*   middle
prob[rrr, , , 4] <- (1-acquies)*(1-middle)*trait*(1-extreme) + acquies*(1-extreme_a)
prob[rrr, , , 5] <- (1-acquies)*(1-middle)*trait*   extreme  + acquies*   extreme_a

} else if (fitModel == "steps") {
##### probs for 'steps' #####
node1 <- pnorm(theta[, traitItem, rrr] -
matrix(beta[, 1, rrr], N, J, byrow = TRUE))
node2 <- pnorm(theta[, traitItem, rrr] -
matrix(beta[, 2, rrr], N, J, byrow = TRUE))
node3 <- pnorm(theta[, traitItem, rrr] -
matrix(beta[, 3, rrr], N, J, byrow = TRUE))
node4 <- pnorm(theta[, traitItem, rrr] -
matrix(beta[, 4, rrr], N, J, byrow = TRUE))

p_catx <- array(NA_real_, dim = c(N, J, 5))
p_catx[ , , 1] = (1-node1);
p_catx[ , , 2] =    node1 *(1-node2);
p_catx[ , , 3] =    node1 *   node2 *(1-node3);
p_catx[ , , 4] =    node1 *   node2 *   node3 *(1-node4);
p_catx[ , , 5] =    node1 *   node2 *   node3 *   node4 ;

prob[rrr, , ,] <- p_catx

prob[rrr, , revItem == 1, 5] <- p_catx[ , revItem == 1, 1]
prob[rrr, , revItem == 1, 4] <- p_catx[ , revItem == 1, 2]
prob[rrr, , revItem == 1, 3] <- p_catx[ , revItem == 1, 3]
prob[rrr, , revItem == 1, 2] <- p_catx[ , revItem == 1, 4]
prob[rrr, , revItem == 1, 1] <- p_catx[ , revItem == 1, 5]

} else if (fitModel == "pcm") {
##### probs for 'pcm' #####
theta_x <- theta[, rep((traitItem + dimen - 1), each = 4),rrr]

betas_x <- beta[, , rrr] %>%
t %>%
matrix(nrow = N, ncol = J*4, byrow = T)

log_arg <- (theta_x - betas_x) %>%
t %>%
matrix(ncol = 4, byrow = T)
p1 <- log_arg %>%
cbind(1, .) %>%
apply(1, . %>% cumsum %>% exp) %>%
t %>%
# exp %>%
magrittr::divide_by(., rowSums(.))

p1[revItem == 1, ] <- p1[revItem == 1, ] %>%
apply(1, rev) %>%
t

prob[rrr, , ,] <- p1 %>%
array(dim = c(J, N, 5)) %>%
apply(c(1, 3), t)

} else if (fitModel == "shift") {
##### probs for 'shift' #####

middle <- pnorm(outer(theta[, 1, rrr], beta[, 1, rrr], "-"))
extreme <- pnorm(outer(theta[, 2, rrr], beta[, 2, rrr], "-"))

trait <- theta[, (traitItem + dimen - 1),rrr] %>%
magrittr::subtract(matrix(beta[, 3, rrr], N, J, byrow = T)) %>%
magrittr::multiply_by(matrix((-1)^revItem, N, J, byrow = T)) %>%
pnorm

prob[rrr, , , 1] <- (1-middle)*(1-trait)*extreme
prob[rrr, , , 2] <- (1-middle)*(1-trait)*(1-extreme)
prob[rrr, , , 3] <-    middle
prob[rrr, , , 4] <- (1-middle)*trait*(1-extreme)
prob[rrr, , , 5] <- (1-middle)*trait*   extreme

}
if (is.environment(p)) {
p$tick()$print()
} else {
setTxtProgressBar(pb, rrr)
}
}
return(prob)
}

#' Calculate expected responses on the basis of posterior predicitive
#' probabilities.
#'
#' The function takes the posterior predicitve probabilities (from
#' \code{\link{post_prob_irtree}}) and calculates for every
#' iteration-person-item combination the expected response. This is needed for
#'
#' @param prob Numeric array of dimension R x N x J x 5 (for N persons, J items
#'   with 5 categories, and R iterations).
#' @return Array of expected responses of dimension R x N x J (for N persons, J
#'   items, and R iterations).
#'
# @export
posterior_expected <- function(prob = NULL) {
# checkmate::assert_array(prob, mode = "numeric", any.missing = FALSE, d = 4)
# checkmate::qassert(prob, "N+[0,1]")
#
# # apply is slow here:
# apply(prob, MARGIN = 1:3, function(p) sum(p * (1:5))) -
d <- dim(prob)
r <- array(matrix(prob, ncol = 5) %*% 1:5, d[1:3])
return(r)
}

#' Draw responses on the basis of posterior predicitive probabilities.
#'
#' The function takes the posterior predicitve probabilities (from
#' \code{\link{post_prob_irtree}}) and draws for every
#' iteration-person-item combination a predicted response.
#'
#' @param prob Numeric array of dimension R x N x J x 5 (for N persons, J items
#'   with 5 categories, and R iterations).
#' @return Array of predicted responses of dimension R x N x J (for N persons, J
#'   items, and R iterations).
#'
# @export
posterior_predicted <- function(prob = NULL) {
checkmate::assert_array(prob, mode = "numeric", any.missing = FALSE, d = 4)
checkmate::qassert(prob, "N+[0,1]")

### apply is unefficient here:
# apply(prob, MARGIN = 1:3, function(p) sample.int(5, 1, prob = p))
d <- dim(prob)
array(sample_pp(matrix(prob, ncol = 5)), d[1:3])
# pp <- array(NA, dim(prob)[1:3])
# for (r in 1:dim(pp)[1]){
#     for (n in 1:dim(pp)[2]){
#         for (j in 1:dim(pp)[3]){
#             pp[r,n,j] <- sample.int(5, 1, prob = prob[r,n,j,])
#         }
#     }
# }
# pp
}

##### low-level implementation of statistics (input: 1 replication) #####
#
# resp: N x J matrix      [observed / predicted responses]
# exp: N x J matrix       [expected response]
# prob: N x J x 5 array   [predicted probabilities]

#' Discrepency measure for PPC: Item-total correlation.
#'
#' This function calculates the polyserial correlation between a person's total
#' score and his or her item response. This discrepency measure is useful to
#' detect misfit due to a missing item-slope/-discrimination parameter. It may
#' not be suitable for the response style models discussed herein, because these
#' models assume that the total score as well as the item response is a
#' composite of target trait and response styles.
#'
#' @references Li, T., Xie, C., & Jiao, H. (2017). Assessing fit of alternative
#'   unidimensional polytomous IRT models using posterior predictive model
#'   checking. Psychological Methods, 22, 397-408. doi:10.1037/met0000082
#' @references Zhu, X., & Stone, C. A. (2012). Bayesian comparison of
#'   alternative graded response models for performance assessment applications.
#'   Educational and Psychological Measurement, 72, 774-799.
#'   doi:10.1177/0013164411434638
#'
#' @param resp Numeric matrix of dimension N x J (for N persons and J items)
#'   containing the observed item responses.
#' @inheritParams fit_irtree
#' @return Vector of length J of polyserial correlations.
#'
# @export
statistic_item_cor <- function(resp = NULL,
revItem = NULL,
traitItem = NULL) {
# if (is.null(revItem)) revItem <- rep(0, ncol(resp))
# if (is.null(traitItem)) traitItem <- rep(1, ncol(resp))

resp[, revItem == 1] <- 6 - resp[, revItem == 1]
scores <- vapply(1:6, function(x) rowSums(resp[, traitItem == x]),
vector("numeric", length = nrow(resp)))
# scores <- rowSums(resp)
# apply(resp, 2, function(r) polycor::polyserial(scores, r))
s <-  vapply(1:ncol(resp),
function(r) polycor::polyserial(scores[, traitItem[r]], resp[, r]), 1)
return(s)
}

#' Discrepency measure for PPC: Yen's Q3.
#'
#' This function calculates Yen's Q3 statistic.
#'
#' @references Li, T., Xie, C., & Jiao, H. (2017). Assessing fit of alternative
#'   unidimensional polytomous IRT models using posterior predictive model
#'   checking. Psychological Methods, 22, 397-408. doi:10.1037/met0000082
#' @references Yen, W. M. (1993). Scaling performance assessments: Strategies
#'   for managing local item dependence. Journal of Educational Measurement, 30,
#'   187-213. doi:10.1111/j.1745-3984.1993.tb00423.x
#' @references Zhu, X., & Stone, C. A. (2012). Bayesian comparison of
#'   alternative graded response models for performance assessment applications.
#'   Educational and Psychological Measurement, 72, 774-799.
#'   doi:10.1177/0013164411434638
#'
#' @param resp Numeric matrix of dimension N x J (for N persons and J items)
#'   containing the responses.
#' @param exp Numeric matrix of dimension N x J (for N persons and J items)
#'   containing the expected responses.
# @inheritParams ppc_irtree
#' @return Vector of length J of polyserial correlations.
#'
# @export
statistic_Q3 <- function(resp = NULL,
exp = NULL){
d <- resp - exp
# # plot correlations of observed/expected/diffs
# hist(mapply(cor, data.frame(t(resp)), data.frame(t(exp))))  # per person
# hist(mapply(cor, data.frame(resp), data.frame(exp)))        # per item
# hist(cor(d))

r <- cor(d)
r1 <- r[lower.tri(r)]
return(r1)

# GDDM (Levy & Svetina, 2011)
# MBC <- combn(ncol(d), 2, FUN = function(idx)
#     mean(d[,idx[1]] * d[,idx[2]]) )
# GDDM <- mean(abs(MBC))
}

#' Discrepency measure for PPC: Global Odds ratio for two items.
#'
#' This function calculates the (natural logarithm of the) global odds ratio of
#' a pair of items, which are dichotomized beforehand.
#'
#' @references Li, T., Xie, C., & Jiao, H. (2017). Assessing fit of alternative
#'   unidimensional polytomous IRT models using posterior predictive model
#'   checking. Psychological Methods, 22, 397-408. doi:10.1037/met0000082
#' @references Zhu, X., & Stone, C. A. (2012). Bayesian comparison of
#'   alternative graded response models for performance assessment applications.
#'   Educational and Psychological Measurement, 72, 774-799.
#'   doi:10.1177/0013164411434638
#'
#' @param x Numeric vector containing the responses to the first item.
#' @param y Numeric vector containing the responses to the second item.
#' @param threshold Numeric vector of length 1. The threshold used for dichotomization.
#' @param constant Numeric vector of length 1. A constant added to all four
#'   frequencies in order to avoid dividing by zero.
# @inheritParams ppc_irtree
#' @return The odds ratio.
#'
# @export
global_OR <- function(x,
y,
threshold = 3,
constant = .1) {
# odds ratio: (as log, numerically more stable and better to plot)
ln <- log(table(factor(x > threshold, levels = c(FALSE, TRUE)),
factor(y > threshold, levels = c(FALSE, TRUE))) + constant)
or <- ln[1,1] + ln[2,2] - (ln[1,2] + ln[2,1])
return(or)
}

#' Discrepency measure for PPC: Global Odds ratio for three or more items.
#'
#' This function calculates the (natural logarithm of the) global odds ratio of
#' pairs of items, which are dichotomized beforehand.
#'
#' @references Li, T., Xie, C., & Jiao, H. (2017). Assessing fit of alternative
#'   unidimensional polytomous IRT models using posterior predictive model
#'   checking. Psychological Methods, 22, 397-408. doi:10.1037/met0000082
#' @references Zhu, X., & Stone, C. A. (2012). Bayesian comparison of
#'   alternative graded response models for performance assessment applications.
#'   Educational and Psychological Measurement, 72, 774-799.
#'   doi:10.1177/0013164411434638
#'
#' @param resp Numeric matrix of dimension N x J (for N persons and J items)
#'   containing the responses.
# @inheritParams ppc_irtree
#' @return Vector of length 'J choose 2' odds ratios.
#'
# @export
#
statistic_OR <- function(resp) {
combn(ncol(resp), 2, FUN = function(idx)
global_OR(resp[,idx[1]], resp[,idx[2]]) )
}

# group: N vector         [group membership for Yen's Q1, e.g., ability]
# not Q1: deviation between response and expected value != expected frequency!
# statistic_Q1 <- function(resp, exp, group){
#     sum((resp - exp)^2 / exp)
# }

#' Discrepency measure for PPC: Item score distribution X2.
#'
#' This function calculates the item score distribution, i.e., the discrepancy
#' between observed frequencies and posterior predictive frequencies for each
#' item using Pearson's X2 statistic.
#'
#' @references Zhu, X., & Stone, C. A. (2012). Bayesian comparison of
#'   alternative graded response models for performance assessment applications.
#'   Educational and Psychological Measurement, 72, 774-799.
#'   doi:10.1177/0013164411434638
#'
#' @param resp Numeric matrix of dimension N x J (for N persons and J items)
#'   containing the responses.
#' @inheritParams ppc_irtree
#' @return Vector of length J containing X2 values.
#'
# @export
statistic_item_score <- function(resp = NULL,
prob = NULL){
# Zhu & Stone (2012, p. 779)
x1 <- apply(prob, c(2:3), sum)
x2 <- t(apply(resp, 2, function(x) table(factor(x, 1:5))))

x3 <- rowSums(((x2 - x1)^2)/x1)
return(x3)
}

##### high-level usage of statistics (input: all replication) #####

#' Posterior predicitve checks and p-values for several discrepency measures.
#'
#' This function takes posterior predictive probabilities (from
#' \code{\link{post_prob_irtree}}), and compares---for several discrepency
#' measures---observations and predictions both descriptively and via posterior
#' predicitive p-values.
#'
#' @references Levy, R. (2011). Posterior predictive model checking for
#'   conjunctive multidimensionality in item response theory. Journal of
#'   Educational and Behavioral Statistics, 36, 672-694.
#'   doi:10.3102/1076998611410213
#' @references Li, T., Xie, C., & Jiao, H. (2017). Assessing fit of alternative
#'   unidimensional polytomous IRT models using posterior predictive model
#'   checking. Psychological Methods, 22, 397-408. doi:10.1037/met0000082
#' @references Sinharay, S., Johnson, M. S., & Stern, H. S. (2006). Posterior
#'   predictive assessment of item response theory models. Applied Psychological
#'   Measurement, 30, 298-321. doi:10.1177/0146621605285517
#' @references Zhu, X., & Stone, C. A. (2012). Bayesian comparison of
#'   alternative graded response models for performance assessment applications.
#'   Educational and Psychological Measurement, 72, 774-799.
#'   doi:10.1177/0013164411434638
#'
#' @param prob Numeric array of dimension R x N x J x 5 (for N persons, J items
#'   with 5 categories, and R iterations).
#' @param statistics Character vector of length >= 1 specifying the discrepency
#'   measures to use.
#' @param X Numeric matrix of dimension N x J containing the observed item responses.
#'
#' \itemize{
#' \item \code{item_cor}: Item-total correlation, i.e., polyserial correlation between response and total score.
#' \item \code{OR}: Odds ratio for (dichotomized) pairs of items.
#' \item \code{Q3}: Yen's Q3 statistic.
#' \item \code{ISD}: Item-score-distribution, i.e., Pearson's X2 of residuals.
#' \item \code{resp}: Observed responses (integer), predicted responses (integer), and expected responses (numeric).
#' }
#' @inheritParams tidyup_irtree_fit
#' @return Returns an object of class \code{ppc}
#' @seealso \code{\link{print.ppc}} for summarizing the results and
#'   \code{\link{ppc_resp_irtree}} for summarizing posterior predictive response
#'   frequencies.
#' @examples
#' \dontrun{
#' J <- 10
#' betas <- cbind(rnorm(J, .5), rnorm(J, .5), rnorm(J, 1.5), rnorm(J, 0))
#' dat <- generate_irtree_ext(N = 20, J = J, betas = betas, beta_ARS_extreme = .5)
#'
#' # fit model
#' res1 <- fit_irtree(dat$X, revItem = dat$revItem, M = 200)
#' res2 <- summarize_irtree_fit(res1)
#'
#' # posterior predictive checking
#' res3 <- post_prob_irtree(res2)
#' res4 <- ppc_irtree(prob = res3, fit = res1)
#' res4
#' }
#'
#' @export
ppc_irtree <- function(prob = NULL,
statistics = c("item_cor", "OR", "Q3", "ISD", "resp"),
fit = NULL,
X = NULL,
revItem = NULL,
traitItem = NULL) {

checkmate::assert_array(prob, mode = "numeric", any.missing = FALSE, d = 4)
checkmate::qassert(prob, "N+[0,1]")
statistics <- match.arg(statistics, several.ok = TRUE)
checkmate::qassert(fit, "l+")
checkmate::assert_matrix(X, mode = "integerish", any.missing = FALSE,
min.rows = 2, min.cols = 2,
null.ok = !is.null(fit$args$traitItem))
checkmate::assert_integerish(X, lower = 1, upper = 5, any.missing = FALSE,
null.ok = !is.null(fit$args$traitItem))
checkmate::assert_integerish(revItem, lower = 0, upper = 1, any.missing = FALSE,
min.len = 1,
null.ok = !is.null(fit$args$revItem))
checkmate::assert_integerish(traitItem, lower = 1, any.missing = FALSE,
min.len = 1,
null.ok = !is.null(fit$args$traitItem))

if (is.null(X)) X <- fit$args$X
if (is.null(revItem)) revItem <- fit$args$revItem
if (is.null(traitItem)) traitItem <- fit$args$traitItem

R <- dim(prob)[1]
J <- dim(prob)[3]

message("Drawing samples and computing statistics for ", R,
" replications.\nThis may take a little time, go get a coffee.")
X.pred <- posterior_predicted(prob)
X.exp  <- posterior_expected(prob)

ppc <- list()

# item-total correlation
if ("item_cor" %in% statistics) {
time1 <- Sys.time()
item_cor <- list(obs = statistic_item_cor(X, revItem = revItem,
traitItem = traitItem),
pred = apply(X.pred, 1, statistic_item_cor,
revItem = revItem, traitItem = traitItem))
item_cor$ppp <- rowMeans(item_cor$obs <= item_cor$pred) ppc$item_cor <- item_cor
time2 <- Sys.time()

message("Finished: Item-total correlation (", round(difftime(time2, time1, units = "mins"), 1), " minutes)")
}

# global odds ratio (OR)
if ("OR" %in% statistics) {
time1 <- Sys.time()
OR <- list(obs = c(statistic_OR(X)),
pred = apply(X.pred, 1, statistic_OR))
OR$ppp <- rowMeans(OR$obs <= OR$pred) ppc$OR <- OR
time2 <- Sys.time()

message("Finished: Odds ratio (", round(difftime(time2, time1, units = "mins"), 1), " minutes)")
}

# Yen's Q3
if ("Q3" %in% statistics) {
time1 <- Sys.time()
Q3 <- list(obs = apply(X.exp, 1, function(e) statistic_Q3(X, e)),
pred = matrix(NA, choose(J, 2), R))
for (r in 1:R) {
Q3$pred[, r] <- statistic_Q3(X.pred[r, , ], X.exp[r, , ]) } Q3$ppp <- rowMeans(Q3$obs <= Q3$pred)
ppc$Q3 <- Q3 time2 <- Sys.time() message("Finished: Yen's Q3 (", round(difftime(time2, time1, units = "mins"), 1), " minutes)") } # # GDDM (Levy & Svetina, 2011) # GDDM <- list(obs = apply(X.exp, 1, function(e) statistic_Q3(X, e)), # pred = rep(NA, R)) # for (r in 1:R) { # GDDM$pred[r] <- statistic_Q3(X.pred[r,,], X.exp[r,,])
# }
# GDDM$ppp <- GDDM$obs <= GDDM$pred # # message("Finished: GDDM") # Item score distribution if ("ISD" %in% statistics) { time1 <- Sys.time() ISD <- list(obs = apply(prob, 1, function(e) statistic_item_score(X, e)), # pred = matrix(NA, J, R) pred = sapply(1:R, function(x) statistic_item_score(X.pred[x, , ], prob[x, , , ])) ) # for (r in 1:R){ # ISD$pred[, r] <- statistic_item_score(X.pred[r,,], prob[r,,,])
# }
ISD$ppp <- rowMeans(ISD$obs <= ISD$pred) ppc$ISD <- ISD
time2 <- Sys.time()

message("Finished: Item score distribution (", round(difftime(time2, time1, units = "mins"), 1), " minutes)")
}

# ppc <- list(item_cor = item_cor, OR = OR, Q3 = Q3
#             , ISD = ISD
#             # , GDDM = GDDM
#             # , X = list(obs = X, pred = X.pred, exp = X.exp)
#             )
if ("resp" %in% statistics) {
ppc$X <- list(obs = X, pred = X.pred, exp = X.exp) } class(ppc) <- "ppc" return(ppc) } #' Print posterior predictive p-values #' #' This function calculates and prints PPP-values for objects of class #' \code{ppc}. More precisely, it prints---across all items---the average of the #' PPP-values as well as the percentage of extreme PPP-values. For example, for #' a data set comprised of 20 items, 190 odds ratios can be calculated, namely, #' one for each pair of items. The present print method calculates the avarage #' PPP-value (should probably be close to 0.5) as well as the percentage of #' those 190 PPP-values that are extreme (should hopefully be close to 0). #' #' @param x List as returned from \code{\link{ppc_irtree}} #' @param na.rm Logical argument passed to \code{\link[base]{mean}} #' \code{\link[stats]{median}}. If \code{na.rm = FALSE}, the present print #' method will return \code{NA} if at least one PPP-value could not be #' calculated for a discrepency measure. Thus this is extremely helpful for #' identifying problems; set it to \code{TRUE} only if you know what you are #' doing. #' @param ... Further arguments passed to \code{\link{print.table}}. #' @return Table of PPP-values #' #' @export print.ppc <- function(x, na.rm = FALSE, ...) { # ppp <- sapply(x[1:3], "[[", "ppp") ppp <- lapply(x[names(x) != "X"], "[[", "ppp") tab <- sapply(ppp, function(p) c(average = mean(p, na.rm = na.rm), median = median(p, na.rm = na.rm), below05 = mean(p <= .05, na.rm = na.rm), above95 = mean(p >= .95, na.rm = na.rm))) print.table(tab, ...) } #' Posterior predicitve checks of response frequencies. #' #' This function summarizes posterior predicted responses (1, ..., 5) for every #' item and category. For a single iteration/replication, this results in a #' response distribution (barplot); across multiple iterations, quantiles (using #' \code{probs}) are calculated. These quantiles should overlap the observed #' frequencies. #' #' @param ppc List. Output from \code{\link{ppc_irtree}} that contains #' posterior predicted responses with dimensions R x N x J (for N persons, J #' items, and R replications/iterations). #' @param probs Numeric. Vector of probabilities (passed to #' \code{\link[stats]{quantile}}) that is used to calculate quantiles of the #' posterior predictive distribution. Argument may be named (used to generate #' colnames). Defaults to \code{c(.025, .975, .16, .84)}. #' @return Returns a data frame containing the specified quantiles of the #' posterior predictive distribution for every category of every item. # @inheritParams fit_irtree # @inheritParams runjags::combine.mcmc # @importFrom magrittr %>% # @importFrom rlang .data # @import coda #' @examples #' \dontrun{ #' J <- 10 #' betas <- cbind(rnorm(J, .5), rnorm(J, .5), rnorm(J, 1.5), rnorm(J, 0)) #' dat <- generate_irtree_ext(N = 20, J = J, betas = betas, beta_ARS_extreme = .5) #' #' # fit model #' res1 <- fit_irtree(dat$X, revItem = dat$revItem, M = 200) #' res2 <- summarize_irtree_fit(res1) #' #' # posterior predictive checking #' res3 <- post_prob_irtree(res2) #' res4 <- ppc_irtree(prob = res3, statistics = "resp", fit = res1) #' res5 <- ppc_resp_irtree(res4) #' #' library(ggplot2) #' ggplot(res5, aes(x = Categ, y = Obs, ymin = q16, ymax = q84)) + #' geom_col() + #' geom_errorbar() + #' geom_point(aes(y = q50)) + #' facet_wrap(~ Item) #' } #' @export ppc_resp_irtree <- function(ppc = NULL, probs = NULL) { if ("ppc" %in% class(ppc)) { try(pred <- ppc$X$pred) try(obs <- ppc$X$obs) } else if (is.list(ppc)) { try(pred <- ppc$pred)
try(obs <- ppc$obs) } else { pred <- ppc } checkmate::assert_array(pred, mode = "integerish", any.missing = FALSE, d = 3) checkmate::qassert(pred, "X+[1,5]") checkmate::assert_numeric(probs, lower = 0, upper = 1, any.missing = FALSE, min.len = 1, unique = TRUE, null.ok = TRUE) if (is.null(probs)) { probs <- c(.025, .975, .16, .84, .50) names(probs) <- c("q025", "q975", "q16", "q84", "q50") } else if (is.null(attr(probs, "names"))) { names(probs) <- paste0("q", probs) } a1 <- apply(pred, c(1,3), function(x) prop.table(table(factor(x, levels = 1:5)))) a2 <- apply(a1, c(1, 3), quantile, probs = probs, names = F) names(dimnames(a2)) <- c("Q", "Categ", "Item") dimnames(a2)$Q <- names(probs)
a3 <- reshape2::melt(a2)
a4 <- reshape2::dcast(a3, value.var = "value",
formula = Item + Categ ~ Q)
a4$Persons <- dim(pred)[2] a4$Samples <- dim(pred)[1]

if (exists("obs", inherits = FALSE)) {
a4 <- apply(obs, 2, function(x) prop.table(table(factor(x, levels = 1:5)))) %>%
reshape2::melt(varnames = c("Categ", "Item"), value.name = "Obs") %>%
dplyr::full_join(a4, by = c("Categ", "Item"))
}

return(a4)
}

hplieninger/mpt2irt documentation built on Aug. 4, 2018, 10:52 a.m.