Plots from Johnson & Kuhn (2014)

This document (nearly) duplicates the examples from Johnson and Kuhn (2014). Minor differences are due to the use of R Markdown.

Necessary Packages

library(ltbayes)
library(ggplot2)
library(gridExtra)
library(reshape2)

Figure 1

Trace plot (a) and estimated posterior density (b) for $\zeta$ based on 5000 simulated realizations from the posterior distribution.

samp <- 5000
burn <- 1000

alph <- c(1.27,1.34,1.14,1,0.67)
beta <- c(1.19,0.59,0.15,-0.59,-2)
gamm <- c(0.1,0.15,0.15,0.2,0.1)

zeta <- postsamp(fmodel3pl, c(0,0,1,1,1), 
    apar = alph, bpar = beta, cpar = gamm,
    control = list(nbatch = samp + burn, scale = 3))

zeta <- data.frame(sample = 1:samp, 
    zeta = zeta$batch[(burn + 1):(samp + burn)])

p <- ggplot(zeta, aes(x = sample, y = zeta))
p <- p + geom_line() + ylab(expression(zeta)) + xlab("Sample")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + ggtitle("(a)\n")

p1 <- p

p <- ggplot(zeta, aes(x = zeta)) + geom_density(adjust = 2) + coord_flip()
p <- p + xlab(expression(zeta)) + ylab("Density")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + ggtitle("(b)\n")

p2 <- p

grid.arrange(p1, p2, nrow = 1)

Figure 2

Violin plots for 5000 simulated realizations from the posterior distribution of $\zeta$ given each of 32 possible response patterns, stratified by sum score. Points and line segments within each violin plot represent the posterior mean and 95% credibility interval, respectively.

y <- patterns(5, 2, 0:5)
zeta <- vector("list", length = 32)
for (i in 1:32) {
  zeta[[i]] <- postsamp(fmodel3pl, y[i,], apar = alph, bpar = beta, cpar = gamm,
    control = list(nbatch = samp + burn))$batch[(burn + 1):(samp + burn)]
}
zeta <- data.frame(zeta = unlist(zeta))
zeta$pattern <- rep(apply(y, 1, paste, collapse = ""), each = samp)
zeta$sum <- rep(apply(y, 1, sum), each = samp)

p <- ggplot(zeta, aes(x = pattern, y = zeta))
p <- p + geom_violin(trim = FALSE)
p <- p + facet_grid(. ~ sum, scales = "free_x", space = "free_x")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15)) + xlab("Response Pattern")
p <- p + theme(axis.text.x = element_text(size = 10, angle = 45, 
    vjust = 0.5, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black")) 
p <- p + ylab(expression(zeta)) + xlab("Reponse Pattern")
p <- p + stat_summary(fun.y = mean, 
   fun.ymin = function(x) quantile(x, probs = 0.025), 
   fun.ymax = function(x) quantile(x, probs = 0.975), size = 0.5)

plot(p)

Figure 3

Violin plots (a) and empirical cumulative distribution functions (b) for 5000 simulated realizations from the posterior distribution of $\zeta$ conditional on sum score. Points and line segments within each violin plot represent the posterior mean and 95% credibility interval, respectively.

zeta <- vector("list", length = 6)
for (i in 1:6) {
    y <- patterns(5, 2, i-1)
    zeta[[i]] <- postsamp(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm,
        control = list(nbatch = samp + burn))
    zeta[[i]] <- zeta[[i]]$batch[(burn+1):(samp + burn)]
}
zeta <- data.frame(zeta = unlist(zeta))
zeta$sum <- factor(rep(0:5, each = samp))

p <- ggplot(zeta, aes(x = zeta, color = sum, linetype = sum)) + stat_ecdf()
p <- p + ylab("") + xlim(c(-3,3)) + guides(color = guide_legend(title = "Sum"))
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.y = element_text(color = "black")) 
p <- p + ylab("") + xlab(expression(zeta))
p <- p + guides(linetype = guide_legend(title = "Sum"))
p <- p + ggtitle("(b)\n")

p1 <- p

p <- ggplot(zeta, aes(x = sum, y = zeta))
p <- p + geom_violin(trim = FALSE)
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15)) + xlab("Sum Score")
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black")) + ylab(expression(zeta))
p <- p + stat_summary(fun.y = mean, 
    fun.ymin = function(x) quantile(x, probs = 0.025), 
    fun.ymax = function(x) quantile(x, probs = 0.975), size = 0.5)
p <- p + ggtitle("(a)\n")

p2 <- p

grid.arrange(p2, p1, nrow = 1)

Figure 4

Estimated density functions and empirical cumulative distribution functions for the posterior distributions of $\zeta$ given $\tilde{Y} < \tilde{y}$ (lower interval) versus $\tilde{Y} \ge \tilde{y}$ (upper interval) for $c = 1, 2, \dots, 5$.

zetal <- vector("list", length = 5)
zetau <- vector("list", length = 5)
for (i in 1:5) {
    zetal[[i]] <- postsamp(fmodel3pl, patterns(5, 2, 0:(i-1)), 
        apar = alph, bpar = beta, cpar = gamm,
        control = list(nbatch = samp + burn))
    zetau[[i]] <- postsamp(fmodel3pl, patterns(5, 2, i:5), 
        apar = alph, bpar = beta, cpar = gamm,
        control = list(nbatch = samp + burn))
    zetal[[i]] <- zetal[[i]]$batch[(burn+1):(samp + burn)]
    zetau[[i]] <- zetau[[i]]$batch[(burn+1):(samp + burn)]
}
zetal <- data.frame(zeta = unlist(zetal))
zetau <- data.frame(zeta = unlist(zetau))
zetal$interval <- "Lower"
zetau$interval <- "Upper"
zetal$cut <- rep(c("y1","y2","y3","y4","y5"), each = samp)
zetau$cut <- rep(c("y1","y2","y3","y4","y5"), each = samp)
zeta <- rbind(zetal, zetau)

p <- ggplot(zeta, aes(x = zeta)) + geom_density(aes(linetype = interval), 
    adjust = 2, show.legend = FALSE)
p <- p + facet_wrap(~ cut, ncol = 5) + xlim(c(-4,4)) + ylab("") + xlab(expression(zeta))
p <- p + stat_ecdf(aes(linetype = interval))
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + scale_linetype_manual(values = c(6,1), name = "Interval")

plot(p)

Figure 5

Posterior probabilities, conditional on sum score ($\tilde{y}$), of $\zeta$ falling into one of four consecutive intervals defined as $R_1 = {\zeta | \zeta < Q_1}$, $R_2 = {\zeta|Q_1 < \zeta < Q_2}$, $R_3 = {\zeta|Q_2 < \zeta < Q_3}$, and $R_4 = {\zeta|\zeta > Q_4}$ where $Q_1$, $Q_2$, and $Q_3$ are the quartiles of the prior distribution of $\zeta$.

zeta <- vector("list", length = 6)
for (i in 1:6) {
    y <- patterns(5, 2, i-1)
    zeta[[i]] <- postsamp(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm,
        control = list(nbatch = samp + burn))
    zeta[[i]] <- zeta[[i]]$batch[(burn + 1):(samp + burn)]
    zeta[[i]] <- table(cut(zeta[[i]], c(-Inf, qnorm(c(0.25,0.5,0.75)), Inf)))
}
zeta <- data.frame(zeta = unlist(zeta))
zeta$quartile = factor(rep(1:4, 6))
levels(zeta$quartile) <- c("First","Second","Third","Fourth")
zeta$zeta <- zeta$zeta/samp * 100
zeta$sum <- factor(rep(0:5, each = 4))
zeta$text <- paste(round(zeta$zeta, 1), "%", sep = "")
zeta$side <- factor(ifelse(zeta$zeta > 50, 1, 0))

p <- ggplot(zeta, aes(x = sum, y = quartile, fill = zeta)) + geom_tile()
p <- p + scale_fill_gradient(name = "Probability (%)", low = grey(0.9), 
    high = grey(0.1), breaks = c(0, 50, 100), limits = c(0, 100))
p <- p + geom_text(aes(label = text, color = side)) 
p <- p + scale_color_manual(values = c("black","white"), guide = FALSE)
p <- p + theme_bw() + coord_fixed()
p <- p + theme(text = element_text(size = 10))
p <- p + theme(axis.text = element_text(color = "black"))
p <- p + ylab("") + xlab("Sum")

plot(p)

Figure 6

Posterior probabilities of the form $P(\zeta_A > \zeta_B|\tilde{Y}_A = \tilde{y}_A, \tilde{Y}_B = \tilde{y}_B)$ --- i.e., the posterior probability that examinee A has a larger latent score than examinee B given their sum scores.

prob <- matrix(NA, 6, 6)
for (j in 1:6) {
    for (i in j:6) {
        if (i == j) {
            prob[i,j] <- 0.5
        }      
        else {
            zeta1 <- postsamp(fmodel3pl, patterns(5, 2, i-1), 
                apar = alph, bpar = beta, cpar = gamm,
                control = list(nbatch = samp + burn))
            zeta2 <- postsamp(fmodel3pl, patterns(5, 2, j-1), 
                apar = alph, bpar = beta, cpar = gamm,
                control = list(nbatch = samp + burn))
      zeta1 <- zeta1$batch[(burn + 1):(samp + burn)]
      zeta2 <- zeta2$batch[(burn + 1):(samp + burn)]
      prob[i,j] <- mean(zeta1 > zeta2)
      prob[j,i] <- 1 - prob[i,j]
        }
    }
}
prob <- data.frame(p = as.vector(prob), sumr = factor(rep(0:5, 6)), 
    sumc = factor(rep(0:5, each = 6)))
prob$p <- prob$p * 100
prob$text <- paste(round(prob$p, 1), "%", sep = "")
prob$side <- factor(ifelse(prob$p > 50, 1, 0))

p <- ggplot(prob, aes(x = sumc, y = sumr, fill = p)) + geom_tile()
p <- p + scale_fill_gradient(name = "Probability (%)", low = grey(0.9), 
    high = grey(0.1), breaks = c(0, 50, 100), limits = c(0, 100))
p <- p + geom_text(aes(label = text, color = side)) 
p <- p + scale_color_manual(values = c("black","white"), guide = FALSE)
p <- p + theme_bw() + coord_fixed()
p <- p + theme(text = element_text(size = 10))
p <- p + theme(axis.text = element_text(color = "black"))
p <- p + ylab("Examinee A Sum") + xlab("Examinee B Sum")

plot(p)

Figure 7

Profiles of the unnormalized log-posterior distribution of $\zeta$ given $\tilde{Y} = 0, 1, \dots, 5$ with normal and improper uniform prior distributions.

post <- vector("list", length = 6)
for (i in 1:6) {
    y <- patterns(5, 2, i-1)
    post[[i]] <- posttrace(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm, 
        zmin = -5, zmax = 5, length = 100)
    post[[i]] <- cbind(post[[i]]$zeta, post[[i]]$post)
}
post <- data.frame(do.call("rbind", post))
names(post) <- c("zeta","post")
post$sum <- factor(rep(0:5, each = 100))

post.norm <- post
post.norm$prior <- "normal"

post <- vector("list", length = 6)
for (i in 1:6) {
    y <- patterns(5, 2, i-1)
    post[[i]] <- posttrace(fmodel3pl, y, apar = alph, bpar = beta, cpar = gamm,
        zmin = -5, zmax = 5, prior = function(z) 1, length = 100)
    post[[i]] <- cbind(post[[i]]$zeta, post[[i]]$post)
}
post <- data.frame(do.call("rbind", post))
names(post) <- c("zeta","post")
post$sum <- factor(rep(0:5, each = 100))

post.unif <- post
post.unif$prior = "uniform"

post <- rbind(post.norm, post.unif)

names(post)[4] <- "Prior"

p <- ggplot(post, aes(x = zeta, y = post, linetype = Prior))
p <- p + geom_line() + facet_wrap(~ sum)
p <- p + xlab(expression(zeta)) + ylab("Log-Posterior Density")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text.x = element_text(size = 10, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))

plot(p)

Figure 8

Profile likelihoods for $\zeta$ given $\tilde{Y} = 2,3,4$ with maximum likelihood estimates and profile likelihood confidence intervals.

post <- post[post$Prior == "uniform",]
post <- post[post$sum %in% c(2,3,4),]

y2 <- profileci(fmodel3pl, patterns(5, 2, 2), apar = alph, 
    bpar = beta, cpar = gamm, lower = FALSE)
y3 <- profileci(fmodel3pl, patterns(5, 2, 3), apar = alph, 
    bpar = beta, cpar = gamm)
y4 <- profileci(fmodel3pl, patterns(5, 2, 4), apar = alph, 
    bpar = beta, cpar = gamm)

names(post)[3] <- "Sum"

p <- ggplot(post, aes(x = zeta, y = post, linetype = Sum))
p <- p + geom_line() + ylim(c(-14,0))
p <- p + xlab(expression(zeta)) + ylab("Log-Posterior Density")
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 20))
p <- p + theme(axis.text.x = element_text(size = 15, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + annotate("segment", x = y2$zeta, xend = y2$zeta, 
    y = -14, yend = y2$post, linetype = 1)
p <- p + annotate("segment", x = y2$upper, xend = y2$upper, 
    y = -14, yend = y2$f.upper, linetype = 1)
p <- p + annotate("segment", x = y3$zeta, xend = y3$zeta, 
    y = -14, yend = y3$post, linetype = 3)
p <- p + annotate("segment", x = y3$lower, xend = y3$lower, 
    y = -14, yend = y3$f.lower, linetype = 3)
p <- p + annotate("segment", x = y3$upper, xend = y3$upper, 
    y = -14, yend = y3$f.upper, linetype = 3)
p <- p + annotate("segment", x = y4$zeta, xend = y4$zeta, 
    y = -14, yend = y4$post, linetype = 2)
p <- p + annotate("segment", x = y4$lower, xend = y4$lower, 
    y = -14, yend = y4$f.lower, linetype = 2)
p <- p + annotate("segment", x = y4$upper, xend = y4$upper, 
    y = -14, yend = y4$f.upper, linetype = 2)

plot(p)

Figure 9

Item information functions for five items of a 3-parameter binary logistic model.

zeta <- seq(-3, 3, length = 100)
info <- matrix(NA, 100, 5)
for (j in 1:100) {
    info[j,] <- information(fmodel3pl, c(0,1,1,1,1), zeta = zeta[j], 
        apar = alph, bpar = beta, cpar = gamm)$item
}

matplot(zeta, info, type = "l", ylab = "Information", bty = "n", xlab = expression(zeta))
legend(-3, 0.3, paste("Item", 1:5), lty = 1:5, col = 1:5)

Figure 11

Posterior distribution of $\zeta$ for the hyperbolic cosine model given $\tilde{y} = 1$ (a), $y' = (1,0,0,0,0)$ (b), $y' = (1,1,1,0,0)$ (c), and $y' = (0,1,1,1,0)$ (d).

fmodelhcm <- function(zeta, y, alph, beta, prior = dnorm, ...) {
    if (is.vector(y)) y <- matrix(y, 1, length(y))
    m <- ncol(y)
    n <- nrow(y)
    prob <- matrix(NA, m, 2)
  yprb <- matrix(NA, n, m)
    prob[,1] <- 2*cosh(zeta - beta)
    prob[,2] <- exp(alph)
    prob <- sweep(prob, 1, apply(prob, 1, sum), "/") 
    for (i in 1:n) {
        yprb[i,] <- prob[row(prob) == 1:m & col(prob) == y[i,] + 1]
    }
    return(list(post = log(sum(apply(yprb, 1, prod))) 
        + log(prior(zeta, ...)), prob = prob))
}

dmixnorm <- function(z, pi, m1, m2, s1, s2) {
   return(pi*dnorm(z, m1, s1) + (1 - pi)*dnorm(z, m2, s2))
}

apar <- c(1,1,1,1,1)
bpar <- c(-2,-1,0,1,2)
n <- 10000

tmp.1 <- postsamp(fmodelhcm, patterns(5,2,1), alph = apar, beta = bpar, prior = dmixnorm, 
   m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n))

tmp.2 <- postsamp(fmodelhcm, c(1,0,0,0,0), alph = apar, beta = bpar, prior = dmixnorm, 
   m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n))

tmp.3 <- postsamp(fmodelhcm, c(1,1,1,0,0), alph = apar, beta = bpar, prior = dmixnorm, 
   m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n))

tmp.4 <- postsamp(fmodelhcm, c(0,1,1,1,0), alph = apar, beta = bpar, prior = dmixnorm, 
   m1 = -2, m2 = 2, s1 = 2, s2 = 2, pi = 0.5, control = list(nbatch = n))

tmp <- data.frame(zeta = c(tmp.1$batch, tmp.2$batch, tmp.3$batch, tmp.4$batch), 
    Pattern = rep(letters[1:4], times = c(n,n,n,n*5)))  

p <- ggplot(tmp, aes(x = zeta, color = Pattern, linetype = Pattern)) 
p <- p + geom_density(adjust = 2)
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 20))
p <- p + theme(axis.text.x = element_text(size = 15, color = "black"))
p <- p + theme(axis.text.y = element_text(color = "black"))
p <- p + xlab(expression(zeta)) + ylab("Posterior Density")
plot(p)

Figure 12

Item and test Fisher information functions for five items of a hyperbolic cosine model.

zeta <- seq(-3, 3, length = 100)
info <- matrix(NA, 100, 5+1)
for (j in 1:100) {
    tmp <- information(fmodelhcm, c(0,0,0,0,0), zeta = zeta[j], alph = apar, beta = bpar)
    info[j,] <- c(tmp$test, tmp$item)
}

info <- data.frame(cbind(zeta, info))
names(info) <- c("zeta","test",paste("item", 1:5))
info <- melt(info, id.vars = "zeta", variable.name = "Type", value.name = "information")

p <- ggplot(info, aes(x = zeta, y = information, color = Type, linetype = Type))
p <- p + geom_line() + ylab("Information") + xlab(expression(zeta))
p <- p + theme_bw() + theme(legend.key = element_blank())
p <- p + theme(text = element_text(size = 15))
p <- p + theme(axis.text = element_text(size = 15)) 

plot(p)

References

Johnson, T. R. & Kuhn, K. M. (2015). Simulation-Based Bayesian Inference for Latent Traits of Item Response Models: Introduction to the ltbayes Package for R. Behavior Research Methods, 47, 1309--1327.



Try the ltbayes package in your browser

Any scripts or data that you put into this service are public.

ltbayes documentation built on May 2, 2019, 12:40 p.m.