Nothing
##########################################################################
## sample from the posterior distribution of a pairwise comparisons model
## in R using linked C++ code in Scythe.
##
## Model is:
##
## i = 1,...,n.resp (resondents)
## j = 1,...,n.cand (candidates)
##
## Y_{ijj'} = 1 if i chooses j over j'
## Y_{ijj'} = 0 if i chooses j' over j
## Y_{ijj'} = NA if i chooses neither;
##
## Pr(Y_{ijj'} = 1) = \Phi( \alpha_{i} [\theta_{j} - \theta_{ j'} ] )
##
## alpha_i \overset{iid}{\sim} N(a, A^{-1})
## theta_j \overset{ind}{\sim} N(0, 1)
## (some theta_js truncated above or below 0, or fixed to constants)
##
##
## candidate IDs in columns 2 to 4 need to begin with a letter
##
## This software is distributed under the terms of the GNU GENERAL
## PUBLIC LICENSE Version 2, June 1991. See the package LICENSE
## file for more information.
##
## Original code KQ 3/17/2015
## Added to MCMCpack KQ 6/24/2021
##
## Copyright (C) 2003-2007 Andrew D. Martin and Kevin M. Quinn
## Copyright (C) 2007-present Andrew D. Martin, Kevin M. Quinn,
## and Jong Hee Park
##########################################################################
#' Markov Chain Monte Carlo for a Pairwise Comparisons Model with Probit Link
#'
#' This function generates a sample from the posterior distribution of a
#' model for pairwise comparisons data with a probit link. Thurstone's model
#' is a special case of this model when the \eqn{\alpha} parameter is fixed at
#' 1.
#'
#' \code{MCMCpaircompare} uses the data augmentation approach of Albert and
#' Chib (1993). The user supplies data and priors, and a sample from the
#' posterior is returned as an \code{mcmc} object, which can be subsequently
#' analyzed in the \code{coda} package.
#'
#' The simulation is done in compiled C++ code to maximize efficiency.
#'
#' Please consult the \code{coda} package documentation for a comprehensive
#' list of functions that can be used to analyze the posterior sample.
#'
#' The model takes the following form:
#'
#' \deqn{i = 1,...,I \ \ \ \ (raters) }
#' \deqn{j = 1,...,J \ \ \ \ (items) }
#' \deqn{Y_{ijj'} = 1 \ \ if \ \ i \ \ chooses \ \ j \ \ over \ \ j'}
#' \deqn{Y_{ijj'} = 0 \ \ if \ \ i \ \ chooses \ \ j' \ \ over \ \ j}
#' \deqn{Y_{ijj'} = NA \ \ if \ \ i \ \ chooses \ \ neither}
#'
#' \deqn{Pr(Y_{ijj'} = 1) = \Phi( \alpha_{i} [\theta_{j} - \theta_{ j'} ] ) }
#'
#' The following Gaussian priors are assumed:
#' \deqn{\alpha_i \sim \mathcal{N}(a, A^{-1})}
#' \deqn{\theta_j \sim \mathcal{N}(0, 1)}
#' For identification, some \eqn{\theta_j}s are truncated above or below 0,
#' or fixed to constants.
#'
#'
#' @param pwc.data A data.frame containing the pairwise comparisons data.
#' Each row of \code{pwc.data} corresponds to a single pairwise comparison.
#' \code{pwc.data} needs to have exactly four columns. The first column
#' contains a unique identifier for the rater. Column two contains the unique
#' identifier for the first item being compared. Column three contains the
#' unique identifier for the second item being compared. Column four contains
#' the unique identifier of the item selected from the two items being
#' compared. If a tie occurred, the entry in the fourth column should be NA.
#' For applications without raters (such as sports competitions) all entries
#' in the first column should be set to a single value and \code{alpha.fixed}
#' (see below) should be set to \code{TRUE}. \strong{The identifiers in
#' columns 2 through 4 must start with a letter. Examples are provided below.}
#'
#' @param theta.constraints A list specifying possible simple equality or
#' inequality constraints on the item parameters. A typical entry in the
#' list has one of three forms: \code{itemname=c} which will constrain the
#' item parameter for the item named \code{itemname} to be equal to c,
#' \code{itemname="+"} which will constrain the item parameter for the
#' item named \code{itemname} to be positive, and \code{itemname="-"} which
#' will constrain the item parameter for the item named \code{itemname} to
#' be negative.
#'
#' @param alpha.fixed Should alpha be fixed to a constant value of 1 for all
#' raters? Default is FALSE. If set to FALSE, an alpha value is estimated for
#' each rater.
#'
#' @param burnin The number of burn-in iterations for the sampler.
#'
#' @param mcmc The number of Gibbs iterations for the sampler.
#'
#' @param thin The thinning interval used in the simulation. The number of
#' Gibbs iterations must be divisible by this value.
#'
#' @param verbose A switch which determines whether or not the progress of the
#' sampler is printed to the screen. If \code{verbose} is greater than 0
#' output is printed to the screen every
#' \code{verbose}th iteration.
#'
#' @param seed The seed for the random number generator. If NA, the Mersenne
#' Twister generator is used with default seed 12345; if an integer is passed
#' it is used to seed the Mersenne twister. The user can also pass a list of
#' length two to use the L'Ecuyer random number generator, which is suitable
#' for parallel computation. The first element of the list is the L'Ecuyer
#' seed, which is a vector of length six or NA (if NA a default seed of
#' \code{rep(12345,6)} is used). The second element of list is a positive
#' substream number. See the MCMCpack specification for more details.
#'
#' @param alpha.start The starting value for the alpha vector. This
#' can either be a scalar or a column vector with dimension equal to the number
#' of alphas. If this takes a scalar value, then that value will serve as the
#' starting value for all of the alphas. The default value of NA will set the
#' starting value of each alpha parameter to 1.
#'
#' @param a The prior mean of alpha. Must be a scalar. Default is 0.
#'
#' @param A The prior precision of alpha. Must be a positive scalar.
#' Default is 0.25 (prior variance is 4).
#'
#' @param store.theta Should the theta draws be returned? Default is TRUE.
#'
#' @param store.alpha Should the alpha draws be returned? Default is FALSE.
#'
#' @param ... further arguments to be passed
#'
#'
#' @return An mcmc object that contains the posterior sample. This object can
#' be summarized by functions provided by the coda package.
#'
#'
#' @seealso \code{\link[coda]{plot.mcmc}},\code{\link[coda]{summary.mcmc}},
#' \code{\link[MCMCpack]{MCMCpaircompare2d}},
#' \code{\link[MCMCpack]{MCMCpaircompare2dDP}}
#'
#' @references Albert, J. H. and S. Chib. 1993. ``Bayesian Analysis of Binary
#' and Polychotomous Response Data.'' \emph{J. Amer. Statist. Assoc.} 88,
#' 669-679
#'
#' Yu, Qiushi and Kevin M. Quinn. 2021. ``A Multidimensional Pairwise
#' Comparison Model for Heterogeneous Perception with an Application to
#' Modeling the Perceived Truthfulness of Public Statements on COVID-19.''
#' University of Michigan Working Paper.
#'
#' Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park. 2011. ``MCMCpack:
#' Markov Chain Monte Carlo in R.'', \emph{Journal of Statistical Software}.
#' 42(9): 1-21. \doi{10.18637/jss.v042.i09}.
#'
#' Daniel Pemstein, Kevin M. Quinn, and Andrew D. Martin. 2007. \emph{Scythe
#' Statistical Library 1.0.} \url{http://scythe.wustl.edu.s3-website-us-east-1.amazonaws.com/}.
#'
#' Martyn Plummer, Nicky Best, Kate Cowles, and Karen Vines. 2006. ``Output
#' Analysis and Diagnostics for MCMC (CODA)'', \emph{R News}. 6(1): 7-11.
#' \url{https://CRAN.R-project.org/doc/Rnews/Rnews_2006-1.pdf}.
#'
#' @keywords models
#'
#' @examples
#'
#' \dontrun{
#' ## Euro 2016 example
#' data(Euro2016)
#'
#' posterior1 <- MCMCpaircompare(pwc.data=Euro2016,
#' theta.constraints=list(Ukraine="-",
#' Portugal="+"),
#' alpha.fixed=TRUE,
#' verbose=10000,
#' burnin=10000, mcmc=500000, thin=100,
#' store.theta=TRUE, store.alpha=FALSE)
#'
#' ## alternative identification constraints
#' posterior2 <- MCMCpaircompare(pwc.data=Euro2016,
#' theta.constraints=list(Ukraine="-",
#' Portugal=1),
#' alpha.fixed=TRUE,
#' verbose=10000,
#' burnin=10000, mcmc=500000, thin=100,
#' store.theta=TRUE, store.alpha=FALSE)
#'
#'
#'
#'
#'
#'
#'
#'
#' ## a synthetic data example with estimated rater-specific parameters
#' set.seed(123)
#'
#' I <- 65 ## number of raters
#' J <- 50 ## number of items to be compared
#'
#'
#' ## raters 1 to 5 have less sensitivity to stimuli than raters 6 through I
#' alpha.true <- c(rnorm(5, m=0.2, s=0.05), rnorm(I - 5, m=1, s=0.1))
#' theta.true <- sort(rnorm(J, m=0, s=1))
#'
#' n.comparisons <- 125 ## number of pairwise comparisons for each rater
#'
#' ## generate synthetic data according to the assumed model
#' rater.id <- NULL
#' item.1.id <- NULL
#' item.2.id <- NULL
#' choice.id <- NULL
#' for (i in 1:I){
#' for (c in 1:n.comparisons){
#' rater.id <- c(rater.id, i+100)
#' item.numbers <- sample(1:J, size=2, replace=FALSE)
#' item.1 <- item.numbers[1]
#' item.2 <- item.numbers[2]
#' item.1.id <- c(item.1.id, item.1)
#' item.2.id <- c(item.2.id, item.2)
#' eta <- alpha.true[i] * (theta.true[item.1] - theta.true[item.2])
#' prob.item.1.chosen <- pnorm(eta)
#' u <- runif(1)
#' if (u <= prob.item.1.chosen){
#' choice.id <- c(choice.id, item.1)
#' }
#' else{
#' choice.id <- c(choice.id, item.2)
#' }
#' }
#' }
#' item.1.id <- paste("item", item.1.id+100, sep=".")
#' item.2.id <- paste("item", item.2.id+100, sep=".")
#' choice.id <- paste("item", choice.id+100, sep=".")
#'
#' sim.data <- data.frame(rater.id, item.1.id, item.2.id, choice.id)
#'
#'
#' ## fit the model
#' posterior <- MCMCpaircompare(pwc.data=sim.data,
#' theta.constraints=list(item.101=-2,
#' item.150=2),
#' alpha.fixed=FALSE,
#' verbose=10000,
#' a=0, A=0.5,
#' burnin=10000, mcmc=200000, thin=100,
#' store.theta=TRUE, store.alpha=TRUE)
#'
#' theta.draws <- posterior[, grep("theta", colnames(posterior))]
#' alpha.draws <- posterior[, grep("alpha", colnames(posterior))]
#'
#' theta.post.med <- apply(theta.draws, 2, median)
#' alpha.post.med <- apply(alpha.draws, 2, median)
#'
#' theta.post.025 <- apply(theta.draws, 2, quantile, prob=0.025)
#' theta.post.975 <- apply(theta.draws, 2, quantile, prob=0.975)
#' alpha.post.025 <- apply(alpha.draws, 2, quantile, prob=0.025)
#' alpha.post.975 <- apply(alpha.draws, 2, quantile, prob=0.975)
#'
#' ## compare estimates to truth
#' par(mfrow=c(1,2))
#' plot(theta.true, theta.post.med, xlim=c(-2.5, 2.5), ylim=c(-2.5, 2.5),
#' col=rgb(0,0,0,0.3))
#' segments(x0=theta.true, x1=theta.true,
#' y0=theta.post.025, y1=theta.post.975,
#' col=rgb(0,0,0,0.3))
#' abline(0, 1, col=rgb(1,0,0,0.5))
#'
#' plot(alpha.true, alpha.post.med, xlim=c(0, 1.2), ylim=c(0, 3),
#' col=rgb(0,0,0,0.3))
#' segments(x0=alpha.true, x1=alpha.true,
#' y0=alpha.post.025, y1=alpha.post.975,
#' col=rgb(0,0,0,0.3))
#' abline(0, 1, col=rgb(1,0,0,0.5))
#'
#' }
#'
#' @export
"MCMCpaircompare" <- function(pwc.data, theta.constraints=list(),
alpha.fixed=FALSE,
burnin=1000, mcmc=20000, thin=1,
verbose=0, seed=NA,
alpha.start=NA,
a=0, A=0.25,
store.theta=TRUE,
store.alpha=FALSE,
...){
## checks
check.offset(list(...))
check.mcmc.parameters(burnin, mcmc, thin)
if (!is.logical(alpha.fixed)){
cat("alpha.fixed must be a logical value.\n")
stop("Please check data and try MCMCpaircompare() again.\n",
call.=FALSE)
}
## convert all columns to character data
pwc.data[,1] <- as.character(pwc.data[,1])
pwc.data[,2] <- as.character(pwc.data[,2])
pwc.data[,3] <- as.character(pwc.data[,3])
pwc.data[,4] <- as.character(pwc.data[,4])
## check input data
if (ncol(pwc.data) != 4){
cat("pwc.data must have 4 columns. The specified pwc.data does not have 4 columns.\n")
stop("Please check data and try MCMCpaircompare() again.\n",
call.=FALSE)
}
for (i in 1:nrow(pwc.data)){
if (!(pwc.data[i,4] %in% c(NA, pwc.data[i,2], pwc.data[i,3]))){
cat("pwc.data[", i, ",4] is not in {NA, pwc.data[", i, ",2:3]}.\n", sep="")
stop("Please check data and try MCMCpaircompare() again.\n",
call.=FALSE)
}
}
if (!is.numeric(a) | length(a) !=1){
cat("a must be a scalar.\n")
stop("Please check specification and try MCMCpaircompare() again.\n",
call.=FALSE)
}
if (!is.numeric(A) | length(A) !=1 | A <= 0){
cat("A must be a positive scalar.\n")
stop("Please check specification and try MCMCpaircompare() again.\n",
call.=FALSE)
}
## extract key constants from pwc.data
n <- nrow(pwc.data)
n.resp <- length(unique(pwc.data[,1]))
n.cand <- length(unique( c(pwc.data[,2], pwc.data[,3])))
## convert pwc.data into purely numeric matrix
resp.codes <- sort(unique(pwc.data[,1]))
cand.codes <- sort(unique( c(pwc.data[,2], pwc.data[,3]) ))
pwc.data.numeric <- matrix(-999, nrow(pwc.data), 4)
for (p in 1:n){
if (!is.na(pwc.data[p,1])){
pwc.data.numeric[p,1] <- which(pwc.data[p,1] == resp.codes)
}
if (!is.na(pwc.data[p,2])){
pwc.data.numeric[p,2] <- which(pwc.data[p,2] == cand.codes)
}
if (!is.na(pwc.data[p,3])){
pwc.data.numeric[p,3] <- which(pwc.data[p,3] == cand.codes)
}
if (!is.na(pwc.data[p,4])){
pwc.data.numeric[p,4] <- which(pwc.data[p,4] == cand.codes)
}
}
## set up constraints on theta
if(length(theta.constraints) != 0) {
for (i in 1:length(theta.constraints)){
theta.constraints[[i]] <-
list(as.integer(1), theta.constraints[[i]][1])
}
}
theta.eq.constraints <- matrix(NA, n.cand)
theta.ineq.constraints <- matrix(0, n.cand)
rownames(theta.eq.constraints) <- cand.codes
rownames(theta.ineq.constraints) <- cand.codes
if (length(theta.constraints) != 0){
constraint.names <- names(theta.constraints)
for (i in 1:length(constraint.names)){
name.i <- constraint.names[i]
theta.constraints.i <- theta.constraints[[i]]
cand.index <- theta.constraints.i[[1]]
replace.element <- theta.constraints.i[[2]]
if (is.numeric(replace.element)){
theta.eq.constraints[rownames(theta.eq.constraints)==name.i,
cand.index] <- replace.element
}
if (replace.element=="+"){
theta.ineq.constraints[rownames(theta.ineq.constraints)==name.i,
cand.index] <- 1
}
if (replace.element=="-"){
theta.ineq.constraints[rownames(theta.ineq.constraints)==name.i,
cand.index] <- -1
}
}
}
testmat <- theta.ineq.constraints * theta.eq.constraints
if (min(is.na(testmat))==0){
if ( min(testmat[!is.na(testmat)]) < 0){
cat("Constraints on theta are logically inconsistent.\n")
stop("Please respecify and call ", calling.function(), " again.\n")
}
}
theta.eq.constraints[is.na(theta.eq.constraints)] <- -999
## starting values for theta
theta.start <- rep(0, n.cand)
for (j in 1:n.cand){
cand.code.j <- cand.codes[j]
if (theta.eq.constraints[cand.code.j,1] != -999){
theta.start[j] <- theta.eq.constraints[cand.code.j,1]
}
if (theta.ineq.constraints[cand.code.j,1] != 0){
theta.start[j] <- theta.ineq.constraints[cand.code.j,1]
}
}
## starting values for alpha
if (is.na(alpha.start)){
alpha.start <- rep(1, n.resp)
}
if (length(alpha.start) < n.resp){
alpha.start <- rep(alpha.start, length.out=n.resp)
}
if (!is.numeric(alpha.start)){
cat("alpha.start is non-numeric in MCMCpaircompare().\n")
stop("Please check specification and try MCMCpaircompare() again.\n",
call.=FALSE)
}
if (alpha.fixed){
alpha.start <- rep(1, n.resp)
}
## define holder for posterior sample
if(store.alpha == FALSE & store.theta == TRUE) {
sample <- matrix(data=0, mcmc/thin, n.cand)
}
else if (store.alpha == TRUE & store.theta == FALSE){
sample <- matrix(data=0, mcmc/thin, n.resp)
}
else if (store.alpha == TRUE & store.theta == TRUE){
sample <- matrix(data=0, mcmc/thin, n.cand + n.resp)
}
else{
cat("Error: store.alpha == FALSE & store.theta == FALSE.\n")
stop("Please respecify and call MCMCpaircompare() again.\n",
call.=FALSE)
}
## seeds
seeds <- form.seeds(seed)
lecuyer <- seeds[[1]]
seed.array <- seeds[[2]]
lecuyer.stream <- seeds[[3]]
## call C++ code to draw sample
posterior <- .C("cMCMCpaircompare",
sampledata = as.double(sample),
samplerow = as.integer(nrow(sample)),
samplecol = as.integer(ncol(sample)),
pwc.datanumericdata = as.integer(pwc.data.numeric-1),
pwc.datanumericrow = as.integer(nrow(pwc.data.numeric)),
pwc.datanumericcol = as.integer(ncol(pwc.data.numeric)),
alphafixed = as.integer(alpha.fixed),
burnin = as.integer(burnin),
mcmc = as.integer(mcmc),
thin = as.integer(thin),
lecuyer = as.integer(lecuyer),
seedarray = as.integer(seed.array),
lecuyerstream = as.integer(lecuyer.stream),
verbose = as.integer(verbose),
thetastartdata = as.double(theta.start),
thetastartrow = as.integer(length(theta.start)),
thetastartcol = as.integer(1),
astartdata = as.double(alpha.start),
astartrow = as.integer(length(alpha.start)),
astartcol = as.integer(1),
a=as.double(a),
A=as.double(A),
thetaeqdata = as.double(theta.eq.constraints),
thetaeqrow = as.integer(nrow(theta.eq.constraints)),
thetaeqcol = as.integer(ncol(theta.eq.constraints)),
thetaineqdata = as.double(theta.ineq.constraints),
thetaineqrow = as.integer(nrow(theta.ineq.constraints)),
thetaineqcol = as.integer(ncol(theta.ineq.constraints)),
storealpha = as.integer(store.alpha),
storetheta = as.integer(store.theta),
PACKAGE="MCMCpack"
)
## undo the C++ indexing by 0
posterior$pwc.datanumericdata <- posterior$pwc.datanumericdata + 1
theta.names <- paste("theta.", cand.codes, sep = "")
alpha.names <- paste("alpha.", resp.codes, sep = "")
## put together matrix and build MCMC object to return
sample <- matrix(posterior$sampledata, posterior$samplerow,
posterior$samplecol,
byrow=FALSE)
output <- mcmc(data=sample, start=burnin+1, end=burnin+mcmc, thin=thin)
names <- NULL
if(store.theta == TRUE) {
names <- c(names, theta.names)
}
if (store.alpha == TRUE){
names <- c(names, alpha.names)
}
varnames(output) <- names
attr(output,"title") <-
"MCMCpaircompare Posterior Sample"
return(output)
} ## end MCMCpaircompare
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.