Nothing
##########################################################################
## sample from the posterior distribution of a 2-dimensional pairwise
## comparisons model with Dirichlet process prior in R using
## linked C++ code in Scythe.
##
## Model is:
##
## i = 1,...,n.resp (resondents)
## j = 1,...,n.cand (candidates)
##
## Y_{ijj'} = 1 if i chooses j over j'
## Y_{ijj'} = 0 if i chooses j' over j
## Y_{ijj'} = NA if i chooses neither;
##
## Pr(Y_{ijj'} = 1) = \Phi( z_{i}^{T} [\theta_{j} - \theta_{ j'} ] )
## z_{i}=[cos(gamma_{i}) sin(gamma_{i})]^{T}
##
## gamma_i has a Dirichlet Process prior.
## We use a finite stick breaking process to approximate the infinite
## Dirichlet Process.
##
## theta_j \overset{ind}{\sim} N_{2}(0, I_{2})
## (some theta_js truncated above or below 0, or fixed to constants)
##
##
## candidate IDs in columns 2 to 4 need to begin with a letter
##
## This software is distributed under the terms of the GNU GENERAL
## PUBLIC LICENSE Version 2, June 1991. See the package LICENSE
## file for more information.
##
## Based on 1-d code from KQ 3/17/2015
## Initial 2-d DP code QY 11/14/2019
## Various improvements and modifications KQ and QY 2019 to 2021
## Added to MCMCpack KQ 6/26/2021
##
## Copyright (C) 2003-2007 Andrew D. Martin and Kevin M. Quinn
## Copyright (C) 2007-present Andrew D. Martin, Kevin M. Quinn,
## and Jong Hee Park
##########################################################################
#' Markov Chain Monte Carlo for the Two-Dimensional Pairwise Comparisons
#' Model with Dirichlet Process Prior in Yu and Quinn (2021)
#'
#' This function generates a sample from the posterior distribution of a
#' model for pairwise comparisons data with a probit link. Unlike standard
#' models for pairwise comparisons data, in this model the latent attribute
#' of each item being compared is a vector in two-dimensional Euclidean space.
#'
#' \code{MCMCpaircompare2d} uses the data augmentation approach of Albert and
#' Chib (1993) in conjunction with Gibbs and Metropolis-within-Gibbs steps
#' to fit the model. The user supplies data and a sample from the
#' posterior is returned as an \code{mcmc} object, which can be subsequently
#' analyzed in the \code{coda} package.
#'
#' The simulation is done in compiled C++ code to maximize efficiency.
#'
#' Please consult the \code{coda} package documentation for a comprehensive
#' list of functions that can be used to analyze the posterior sample.
#'
#' The model takes the following form:
#'
#' \deqn{i = 1,...,I \ \ \ \ (raters) }
#' \deqn{j = 1,...,J \ \ \ \ (items) }
#' \deqn{Y_{ijj'} = 1 \ \ if \ \ i \ \ chooses \ \ j \ \ over \ \ j'}
#' \deqn{Y_{ijj'} = 0 \ \ if \ \ i \ \ chooses \ \ j' \ \ over \ \ j}
#' \deqn{Y_{ijj'} = NA \ \ if \ \ i \ \ chooses \ \ neither}
#'
#' \deqn{\Pr(Y_{ijj'} = 1) = \Phi( \mathbf{z}_{i}' [\boldsymbol{\theta}_{j} -
#' \boldsymbol{\theta}_{ j'} ])}
#' \deqn{\mathbf{z}_{i}=[\cos(\gamma_{i}), \ \sin(\gamma_{i})]' }
#'
#' The following priors are assumed:
#' \deqn{\gamma_i \sim G}
#' \deqn{G \sim \mathcal{DP}(\alpha G_0)}
#' \deqn{G_0 = \mathcal{U}nif(0, \pi/2)}
#' \deqn{\alpha \sim \mathcal{G}amma(a_0, b_0)}
#' \deqn{\boldsymbol{\theta}_j \sim
#' \mathcal{N}_{2}(\mathbf{0}, \mathbf{I}_{2})}
#' For identification, some \eqn{\boldsymbol{\theta}_j}s are truncated
#' above or below 0, or fixed to constants.
#'
#'
#' @param pwc.data A data.frame containing the pairwise comparisons data.
#' Each row of \code{pwc.data} corresponds to a single pairwise comparison.
#' \code{pwc.data} needs to have exactly four columns. The first column
#' contains a unique identifier for the rater. Column two contains the unique
#' identifier for the first item being compared. Column three contains the
#' unique identifier for the second item being compared. Column four contains
#' the unique identifier of the item selected from the two items being
#' compared. If a tie occurred, the entry in the fourth column should be NA.
#' \strong{The identifiers in columns 2 through 4 must start with a letter.
#' Examples are provided below.}
#'
#' @param theta.constraints A list specifying possible simple equality or
#' inequality constraints on the item parameters. A
#' typical entry in the list has one of three forms:
#' \code{itemname=list(d,c)} which will constrain the dth dimension of
#' theta for the item named \code{itemname} to be equal to c,
#' \code{itemname=list(d,"+")} which will constrain the dth dimension of
#' theta for the item named \code{itemname} to be positive, and
#' \code{itemname=list(d, "-")} which will constrain the dth dimension of
#' theta for the item named \code{itemname} to be negative.
#'
#' @param burnin The number of burn-in iterations for the sampler.
#'
#' @param mcmc The number of Gibbs iterations for the sampler.
#'
#' @param thin The thinning interval used in the simulation. The number of
#' Gibbs iterations must be divisible by this value.
#'
#' @param verbose A switch which determines whether or not the progress of the
#' sampler is printed to the screen. If \code{verbose} is greater than 0
#' output is printed to the screen every
#' \code{verbose}th iteration.
#'
#' @param seed The seed for the random number generator. If NA, the Mersenne
#' Twister generator is used with default seed 12345; if an integer is passed
#' it is used to seed the Mersenne twister. The user can also pass a list of
#' length two to use the L'Ecuyer random number generator, which is suitable
#' for parallel computation. The first element of the list is the L'Ecuyer
#' seed, which is a vector of length six or NA (if NA a default seed of
#' \code{rep(12345,6)} is used). The second element of list is a positive
#' substream number. See the MCMCpack specification for more details.
#'
#' @param gamma.start The starting value for the gamma vector. This
#' can either be a scalar or a column vector with dimension equal to the number
#' of raters. If this takes a scalar value, then that value will serve as the
#' starting value for all of the gammas. The default value of NA will set the
#' starting value of each gamma parameter to \eqn{\pi/4}.
#'
#'
#' @param theta.start Starting values for the theta. Can be either a numeric
#' scalar, a J by 2 matrix (where J is the number of items compared), or NA.
#' If a scalar, all theta values are set to that value (except elements already
#' specified via theta.contraints. If NA, then non constrained elements of
#' theta are set equal to 0, elements constrained to be positive are set equal
#' to 0.5, elements constrained to be negative are set equal to -0.5 and
#' elements with equality constraints are set to satisfy those constraints.
#'
#' @param store.theta Should the theta draws be returned? Default is TRUE.
#'
#' @param store.gamma Should the gamma draws be returned? Default is TRUE.
#'
#' @param tune Tuning parameter for the random walk Metropolis proposal for
#' each gamma_i. \code{tune} is the width of the uniform proposal centered at
#' the current value of gamma_i. Must be a positive scalar.
#'
#' @param procrustes Should the theta and gamma draws be post-processed with
#' a Procrustes transformation? Default is FALSE. The Procrustes target matrix
#' is derived from the constrained elements of theta. Each row of theta that
#' has both theta values constrained is part of the of the target matrix.
#' Elements with equality constraints are set to those values. Elements
#' constrained to be positive are set to 1. Elements constrained to be negative
#' are set to -1. If \code{procrustes} is set to \code{TRUE} theta.constraints
#' must be set so that there are at least three rows of theta that have both
#' elements of theta constrained.
#'
#' @param alpha.start The starting value for the DP concentration parameter
#' alpha. Must be a positive scalar. Defaults to 1. If \code{alpha.fixed} is
#' set equal to TRUE, then alpha is held fixed at alpha.start.
#'
#' @param cluster.max The maximum number of clusters allowed in the
#' approximation to the DP prior for gamma. Defaults to 100. Must be a
#' positive integer.
#'
#' @param cluster.mcmc The number of additional MCMC iterations that are done
#' to sample each cluster-specific gamma value within one main MCMC iteration.
#' Must be a positive integer. Defaults to 500. Setting this to a lower value
#' speeds runtime at the cost of (possibly) worse mixing.
#'
#' @param alpha.fixed Logical value indicating whether the DP concentration
#' parameter alpha be held fixed (\code{TRUE}) or estimated (\code{FALSE}).
#'
#' @param a0 The shape parameter of the gamma prior for alpha. This is the
#' same parameterization of the gamma distribution as R's internal
#' \code{rgamma()} function. Only relevant if \code{alpha.fixed} is set equal
#' to \code{FALSE}. Defaults to 1.
#'
#' @param b0 The rate parameter of the gamma prior for alpha. This is the
#' same parameterization of the gamma distribution as R's internal
#' \code{rgamma()} function. Only relevant if \code{alpha.fixed} is set equal
#' to \code{FALSE}. Defaults to 1.
#'
#' @param ... further arguments to be passed
#'
#'
#' @return An mcmc object that contains the posterior sample. This object can
#' be summarized by functions provided by the coda package. Most of the column
#' names of the mcmc object are self explanatory. Note however that the columns
#' with names of the form "cluster.[raterID]" give the cluster membership of
#' each rater at each stored MCMC iteration. Because of the possibility of
#' label switching, the particular values of these cluster membership variables
#' are not meaningful. What is meaningful is whether two raters share the same
#' cluster membership value at a particular MCMC iteration. This indicates
#' that those two raters were clustered together during that iteration.
#' Finally, note that the "n.clusters" column gives the number of distinct
#' gamma values at each iteration, i.e. the number of clusters at that
#' iteration.
#'
#' @author Qiushi Yu <yuqiushi@umich.edu> and
#' Kevin M. Quinn <kmq@umich.edu>
#'
#' @seealso \code{\link[coda]{plot.mcmc}},\code{\link[coda]{summary.mcmc}},
#' \code{\link[MCMCpack]{MCMCpaircompare}},
#' \code{\link[MCMCpack]{MCMCpaircompare2dDP}}
#'
#' @references Albert, J. H. and S. Chib. 1993. ``Bayesian Analysis of Binary
#' and Polychotomous Response Data.'' \emph{J. Amer. Statist. Assoc.} 88,
#' 669-679
#'
#' Yu, Qiushi and Kevin M. Quinn. 2021. ``A Multidimensional Pairwise
#' Comparison Model for Heterogeneous Perceptions with an Application to
#' Modeling the Perceived Truthfulness of Public Statements on COVID-19.''
#' University of Michigan Working Paper.
#'
#' Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park. 2011. ``MCMCpack:
#' Markov Chain Monte Carlo in R.'', \emph{Journal of Statistical Software}.
#' 42(9): 1-21. \doi{10.18637/jss.v042.i09}.
#'
#' Daniel Pemstein, Kevin M. Quinn, and Andrew D. Martin. 2007. \emph{Scythe
#' Statistical Library 1.0.} \url{http://scythe.wustl.edu.s3-website-us-east-1.amazonaws.com/}.
#'
#' Martyn Plummer, Nicky Best, Kate Cowles, and Karen Vines. 2006. ``Output
#' Analysis and Diagnostics for MCMC (CODA)'', \emph{R News}. 6(1): 7-11.
#' \url{https://CRAN.R-project.org/doc/Rnews/Rnews_2006-1.pdf}.
#'
#' @keywords models
#'
#' @examples
#' \dontrun{
#' ## a synthetic data example
#' set.seed(123)
#'
#' I <- 65 ## number of raters
#' J <- 50 ## number of items to be compared
#'
#' ## 3 clusters:
#' ## raters 1 to 5 put most weight on dimension 1
#' ## raters 6 to 10 put most weight on dimension 2
#' ## raters 11 to I put substantial weight on both dimensions
#' gamma.true <- c(rep(0.05, 5),
#' rep(1.50, 5),
#' rep(0.7, I-10) )
#' theta1.true <- rnorm(J, m=0, s=1)
#' theta2.true <- rnorm(J, m=0, s=1)
#' theta1.true[1] <- 2
#' theta2.true[1] <- 2
#' theta1.true[2] <- -2
#' theta2.true[2] <- -2
#' theta1.true[3] <- 2
#' theta2.true[3] <- -2
#'
#'
#'
#' n.comparisons <- 125 ## number of pairwise comparisons for each rater
#'
#' ## generate synthetic data according to the assumed model
#' rater.id <- NULL
#' item.1.id <- NULL
#' item.2.id <- NULL
#' choice.id <- NULL
#' for (i in 1:I){
#' for (c in 1:n.comparisons){
#' rater.id <- c(rater.id, i+100)
#' item.numbers <- sample(1:J, size=2, replace=FALSE)
#' item.1 <- item.numbers[1]
#' item.2 <- item.numbers[2]
#' item.1.id <- c(item.1.id, item.1)
#' item.2.id <- c(item.2.id, item.2)
#' z <- c(cos(gamma.true[i]), sin(gamma.true[i]))
#' eta <- z[1] * (theta1.true[item.1] - theta1.true[item.2]) +
#' z[2] * (theta2.true[item.1] - theta2.true[item.2])
#' prob.item.1.chosen <- pnorm(eta)
#' u <- runif(1)
#' if (u <= prob.item.1.chosen){
#' choice.id <- c(choice.id, item.1)
#' }
#' else{
#' choice.id <- c(choice.id, item.2)
#' }
#' }
#' }
#' item.1.id <- paste("item", item.1.id+100, sep=".")
#' item.2.id <- paste("item", item.2.id+100, sep=".")
#' choice.id <- paste("item", choice.id+100, sep=".")
#'
#' sim.data <- data.frame(rater.id, item.1.id, item.2.id, choice.id)
#'
#'
#' ## fit the model (should be run for more than 10500 iterations)
#' posterior <- MCMCpaircompare2dDP(pwc.data=sim.data,
#' theta.constraints=list(item.101=list(1,2),
#' item.101=list(2,2),
#' item.102=list(1,-2),
#' item.102=list(2,-2),
#' item.103=list(1,"+"),
#' item.103=list(2,"-")),
#' verbose=100,
#' burnin=500, mcmc=10000, thin=5,
#' cluster.mcmc=10,
#' store.theta=TRUE, store.gamma=TRUE,
#' tune=0.1)
#'
#'
#'
#'
#'
#' theta1.draws <- posterior[, grep("theta1", colnames(posterior))]
#' theta2.draws <- posterior[, grep("theta2", colnames(posterior))]
#' gamma.draws <- posterior[, grep("gamma", colnames(posterior))]
#'
#' theta1.post.med <- apply(theta1.draws, 2, median)
#' theta2.post.med <- apply(theta2.draws, 2, median)
#' gamma.post.med <- apply(gamma.draws, 2, median)
#'
#' theta1.post.025 <- apply(theta1.draws, 2, quantile, prob=0.025)
#' theta1.post.975 <- apply(theta1.draws, 2, quantile, prob=0.975)
#' theta2.post.025 <- apply(theta2.draws, 2, quantile, prob=0.025)
#' theta2.post.975 <- apply(theta2.draws, 2, quantile, prob=0.975)
#' gamma.post.025 <- apply(gamma.draws, 2, quantile, prob=0.025)
#' gamma.post.975 <- apply(gamma.draws, 2, quantile, prob=0.975)
#'
#'
#'
#' ## compare estimates to truth
#' par(mfrow=c(2,2))
#' plot(theta1.true, theta1.post.med, xlim=c(-2.5, 2.5), ylim=c(-2.5, 2.5),
#' col=rgb(0,0,0,0.3))
#' segments(x0=theta1.true, x1=theta1.true,
#' y0=theta1.post.025, y1=theta1.post.975,
#' col=rgb(0,0,0,0.3))
#' abline(0, 1, col=rgb(1,0,0,0.5))
#'
#' plot(theta2.true, theta2.post.med, xlim=c(-2.5, 2.5), ylim=c(-2.5, 2.5),
#' col=rgb(0,0,0,0.3))
#' segments(x0=theta2.true, x1=theta2.true,
#' y0=theta2.post.025, y1=theta2.post.975,
#' col=rgb(0,0,0,0.3))
#' abline(0, 1, col=rgb(1,0,0,0.5))
#'
#' plot(gamma.true, gamma.post.med, xlim=c(0, 1.6), ylim=c(0, 1.6),
#' col=rgb(0,0,0,0.3))
#' segments(x0=gamma.true, x1=gamma.true,
#' y0=gamma.post.025, y1=gamma.post.975,
#' col=rgb(0,0,0,0.3))
#' abline(0, 1, col=rgb(1,0,0,0.5))
#'
#'
#' ## plot point estimates
#' plot(theta1.post.med, theta2.post.med,
#' xlim=c(-2.5, 2.5), ylim=c(-2.5, 2.5),
#' col=rgb(0,0,0,0.3))
#' for (i in 1:length(gamma.post.med)){
#' arrows(x0=0, y0=0,
#' x1=cos(gamma.post.med[i]),
#' y1=sin(gamma.post.med[i]),
#' col=rgb(1,0,0,0.2), len=0.05, lwd=0.5)
#' }
#'
#' }
#' @export
"MCMCpaircompare2dDP" <- function(pwc.data, theta.constraints=list(),
burnin=1000, mcmc=20000, thin=1,
verbose=0, seed=NA,
gamma.start=NA,
theta.start=NA,
store.theta=TRUE,
store.gamma=FALSE,
tune=0.3, procrustes=FALSE,
alpha.start=1, cluster.max=100,
cluster.mcmc=500,
alpha.fixed=TRUE,
a0=1, b0=1,
...){
## checks
check.offset(list(...))
check.mcmc.parameters(burnin, mcmc, thin)
if (!(procrustes %in% c(TRUE, FALSE))){
cat("procrustes cannot take a value other than TRUE or FALSE.\n")
stop("Please check function arguments and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
## convert all columns to character data
pwc.data[,1] <- as.character(pwc.data[,1])
pwc.data[,2] <- as.character(pwc.data[,2])
pwc.data[,3] <- as.character(pwc.data[,3])
pwc.data[,4] <- as.character(pwc.data[,4])
## check input data
if (ncol(pwc.data) != 4){
cat("pwc.data must have 4 columns. The specified pwc.data does not have 4 columns.\n")
stop("Please check data and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
for (i in 1:nrow(pwc.data)){
if (!(pwc.data[i,4] %in% c(NA, pwc.data[i,2], pwc.data[i,3]))){
cat("pwc.data[", i, ",4] is not in {NA, pwc.data[", i, ",2:3]}.\n", sep="")
stop("Please check data and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
}
## extract key constants from pwc.data
n <- nrow(pwc.data)
n.resp <- length(unique(pwc.data[,1]))
n.cand <- length(unique( c(pwc.data[,2], pwc.data[,3])))
## convert pwc.data into purely numeric matrix
resp.codes <- sort(unique(pwc.data[,1]))
cand.codes <- sort(unique( c(pwc.data[,2], pwc.data[,3]) ))
pwc.data.numeric <- matrix(-999, nrow(pwc.data), 4)
for (p in 1:n){
if (pwc.data[p,1] > 0){
pwc.data.numeric[p,1] <- which(pwc.data[p,1] == resp.codes)
}
if (pwc.data[p,2] > 0){
pwc.data.numeric[p,2] <- which(pwc.data[p,2] == cand.codes)
}
if (pwc.data[p,3] > 0){
pwc.data.numeric[p,3] <- which(pwc.data[p,3] == cand.codes)
}
if (pwc.data[p,4] > 0){
pwc.data.numeric[p,4] <- which(pwc.data[p,4] == cand.codes)
}
}
if (length(cluster.max) > 1){
cat("cluster.max is not a scalar in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (!is.numeric(cluster.max)){
cat("cluster.max is non-numeric in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (cluster.max <= 0){
cat("cluster.max takes a value <= 0 in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (length(cluster.mcmc) > 1){
cat("cluster.mcmc is not a scalar in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (!is.numeric(cluster.mcmc)){
cat("cluster.mcmc is non-numeric in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (cluster.mcmc <= 0){
cat("cluster.mcmc takes a value <= 0 in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (length(a0) > 1){
cat("a0 is not a scalar in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (!is.numeric(a0)){
cat("a0 is non-numeric in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (a0 <= 0){
cat("a0 takes a value <= 0 in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (length(b0) > 1){
cat("b0 is not a scalar in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (!is.numeric(b0)){
cat("b0 is non-numeric in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (b0 <= 0){
cat("b0 takes a value <= 0 in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
## set up constraints on theta
holder <- build.pairwise.theta.constraints(theta.constraints,
cand.codes, n.cand, 2)
theta.eq.constraints <- holder[[1]]
theta.ineq.constraints <- holder[[2]]
## starting values for theta
theta <- pairwise.theta.start(theta.start, n.cand, 2,
theta.eq.constraints,
theta.ineq.constraints)
## starting values for alpha
if (length(alpha.start) > 1){
cat("alpha.start is not a scalar in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (!is.numeric(alpha.start)){
cat("alpha.start is non-numeric in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (alpha.start <= 0){
cat("alpha.start takes a value <= 0 in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
## starting values for cluster.gamma
cluster.gamma<-gamma.start
if (all(is.na(gamma.start))){
cluster.gamma <- gamma.start <- rep(pi/4, cluster.max)
}
if (length(gamma.start) < cluster.max){
cluster.gamma <- rep(gamma.start, length.out=cluster.max)
}
if (!is.numeric(gamma.start)){
cat("gamma.start is non-numeric in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (min(gamma.start) < 0){
cat("gamma.start takes a value < 0 in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
if (max(gamma.start) > pi/2){
cat("gamma.start takes a value > pi/2 in MCMCpaircompare2dDP().\n")
stop("Please check specification and try MCMCpaircompare2dDP() again.\n",
call.=FALSE)
}
##starting values for judge membership
judge.cluster.membership<-rep(NA, n.resp)
##starting values gamma_i, which depends on judge i's cluster membership
gamma<-rep(NA,n.resp)
## I randomly assign judges to 1:cluster.max clusters with equal likelihood
for(i in 1:n.resp){
judge.cluster.membership[i]<-sample(1:cluster.max, 1)
gamma[i]<-cluster.gamma[judge.cluster.membership[i]]
}
## define holder for posterior sample
if(store.gamma == FALSE & store.theta == TRUE) {
sample <- matrix(data=0, mcmc/thin, 2*n.cand)
}
else if (store.gamma == TRUE & store.theta == FALSE){
#we store judge i's gamma, judge i's cluster membership, and the # of unique clusters in one iteration
sample <- matrix(data=0, mcmc/thin, n.resp*2+1)
}
else if (store.gamma == TRUE & store.theta == TRUE){
sample <- matrix(data=0, mcmc/thin, 2*n.cand + n.resp*2+1)
}
else{
cat("Error: store.gamma == FALSE & store.theta == FALSE.\n")
stop("Please respecify and call MCMCpaircompare() again.\n",
call.=FALSE)
}
#if alpha.fixed=FALSE, we store alpha posterior sample
if(alpha.fixed==FALSE){
sample<-cbind(sample,0)
}
## define holder for the acceptance rate of each gamma
gamma_accept_rate<-rep(0, n.resp)
## seeds
seeds <- form.seeds(seed)
lecuyer <- seeds[[1]]
seed.array <- seeds[[2]]
lecuyer.stream <- seeds[[3]]
## create theta.sub.target and associated row indicator
theta.eq.const.holder <- theta.eq.constraints
theta.ineq.const.holder <- theta.ineq.constraints
theta.eq.const.holder[theta.eq.const.holder[,1] == -999 &
theta.ineq.const.holder[,1] != 0, 1] <-
theta.ineq.const.holder[theta.eq.const.holder[,1] == -999 &
theta.ineq.const.holder[,1] != 0, 1]
theta.eq.const.holder[theta.eq.const.holder[,2] == -999 &
theta.ineq.const.holder[,2] != 0, 2] <-
theta.ineq.const.holder[theta.eq.const.holder[,2] == -999 &
theta.ineq.const.holder[,2] != 0, 2]
theta.sub.target.indic <- ((theta.eq.const.holder[,1] != -999) &
(theta.eq.const.holder[,2] != -999))
theta.sub.target <- theta.eq.const.holder[theta.sub.target.indic,]
if (procrustes == TRUE){
if (nrow(theta.sub.target) < 3){
cat("Error: procrustes == TRUE but theta.constraints has < 3 rows.\n")
stop("Please respecify and call MCMCpaircompare() again.\n",
call.=FALSE)
}
else{
theta.eq.constraints <- 0 * theta.eq.constraints - 999
theta.ineq.constraints <- 0 * theta.ineq.constraints
}
}
## call C++ code to draw sample
posterior <- .C("cMCMCpaircompare2dDP",
sampledata = as.double(sample),
samplerow = as.integer(nrow(sample)),
samplecol = as.integer(ncol(sample)),
pwc.datanumericdata = as.integer(pwc.data.numeric-1),#minus one because respondents and items are indexed from zero in C++
pwc.datanumericrow = as.integer(nrow(pwc.data.numeric)),
pwc.datanumericcol = as.integer(ncol(pwc.data.numeric)),
burnin = as.integer(burnin),
mcmc = as.integer(mcmc),
clustermcmc = as.integer(cluster.mcmc),
thin = as.integer(thin),
lecuyer = as.integer(lecuyer),
seedarray = as.integer(seed.array),
lecuyerstream = as.integer(lecuyer.stream),
verbose = as.integer(verbose),
thetastartdata = as.double(theta),
thetastartrow = as.integer(nrow(theta)),
thetastartcol = as.integer(ncol(theta)),
gammastartdata = as.double(gamma),
gammastartrow = as.integer(length(gamma)),
gammastartcol = as.integer(1),
clustergammastartdata=as.double(cluster.gamma),
clustergammastartrow= as.integer(length(cluster.gamma)),
clustergammastartcol= as.integer(1),
judgeclustermembershipstartdata=as.integer(judge.cluster.membership-1), #minus one because clusters are indexed from zero in C++
judgeclustermembershipstartrow=as.integer(length(judge.cluster.membership)),
judgeclustermembershipstartcol= as.integer(1),
tunevalue=as.double(tune),
thetaeqdata = as.double(theta.eq.constraints),
thetaeqrow = as.integer(nrow(theta.eq.constraints)),
thetaeqcol = as.integer(ncol(theta.eq.constraints)),
thetaineqdata = as.double(theta.ineq.constraints),
thetaineqrow = as.integer(nrow(theta.ineq.constraints)),
thetaineqcol = as.integer(ncol(theta.ineq.constraints)),
storegamma = as.integer(store.gamma),
storetheta = as.integer(store.theta),
gammaacceptrate=as.double(gamma_accept_rate),
alpha=as.double(alpha.start),
clustermax=as.integer(cluster.max),
alphafixed=as.integer(alpha.fixed),
a=as.double(a0),
b=as.double(b0),
PACKAGE="MCMCpack"
)
## undo the C++ indexing by 0
posterior$pwc.datanumericdata <- posterior$pwc.datanumericdata + 1
theta.names <- c(paste("theta1.", cand.codes, sep = ""), paste("theta2.", cand.codes, sep = ""))
## I store theta's in the following way:
## if we have 1,2,3,4,5 cand.codes,
## then theta.names is "theta1.1" "theta1.2" "theta1.3" "theta1.4" "theta1.5"
## "theta2.1" "theta2.2" "theta2.3" "theta2.4" "theta2.5"
gamma.names <- paste("gamma.", resp.codes, sep = "")
cluster.names <- paste("cluster.", resp.codes, sep = "")
## put together matrix and build MCMC object to return
sample <- matrix(posterior$sampledata, posterior$samplerow,
posterior$samplecol,
byrow=FALSE)
output <- mcmc(data=sample, start=burnin+1, end=burnin+mcmc, thin=thin)
names <- NULL
if(store.theta == TRUE) {
names <- c(names, theta.names)
}
if (store.gamma == TRUE){
names <- c(names, gamma.names, cluster.names, "n.clusters")
}
if (alpha.fixed == FALSE){
names <- c(names, "alpha")
}
varnames(output) <- names
if (procrustes){
gammas <- output[, grep("gamma", colnames(output))]
thetas <- output[, grep("theta", colnames(output))]
thetas1 <- thetas[,1:n.cand]
thetas2 <- thetas[,(n.cand+1):ncol(thetas)]
for (iter in 1:nrow(thetas)){
theta.sub <- cbind(thetas1[iter, theta.sub.target.indic],
thetas2[iter, theta.sub.target.indic])
procrust.out <- procrustes(theta.sub, theta.sub.target,
translation=FALSE, dilation=FALSE)
R <- procrust.out$R
theta.mat <- cbind(thetas1[iter,], thetas2[iter,])
theta.mat <- theta.mat %*% R
thetas1[iter, ] <- theta.mat[,1]
thetas2[iter, ] <- theta.mat[,2]
z <- cbind(cos(gammas[iter,]), sin(gammas[iter,]))
z <- t(R) %*% t(z)
gammas[iter,] <- atan2(z[2,], z[1,])
}
# output <- mcmc(data=cbind(thetas1, thetas2, gammas),
# start=burnin+1, end=burnin+mcmc, thin=thin)
output[,1:(2*n.cand)]<-cbind(thetas1, thetas2)
output[,(1+2*n.cand):(n.resp+2*n.cand)]<-gammas
}
attr(output,"title") <-
"MCMCpaircompare2dDP Posterior Sample"
gamma_accept_rate <- posterior$gammaacceptrate
attr(output, "gamma_accept_rate") <- gamma_accept_rate
attr(output, "procrustes") <- procrustes
return(output)
} ## end MCMCpaircompare
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.