Nothing
#' Calculate the exact MLE of a Mallows-Binomial distribution using an A* algorithm
#'
#' This function estimates the exact MLE of a Mallows-Binomial distribution using an A* tree search algorithm proposed in Pearce and Erosheva (2022). Algorithm may be very slow when number of objects exceeds 15, but is often still tractable for larger J when consensus is strong.
#'
#' @import gtools
#' @import isotone
#'
#' @param rankings A matrix of rankings, potentially with attribute "assignments" to signify separate
#' reviewer assignments. One ranking per row.
#' @param ratings A matrix of ratings, one row per judge and one column per object.
#' @param M Numeric specifying maximum (=worst quality) integer rating.
#' @param keep_nodes Boolean specifying if function should retain the list of open nodes traversed during A*
#' tree search. Defaults to \code{FALSE}.
#'
#' @return A list with elements \code{pi0}, the estimated consensus ranking MLE, \code{p}, the
#' estimated object quality parameter MLE, \code{theta}, the estimated scale parameter MLE, and
#' \code{numnodes}, number of nodes traversed during algorithm and a measure of computational complexity.
#' If \code{keep_nodes == TRUE}, then the list also contains \code{nodes}, a matrix of open nodes remaining
#' at the end of search. If multiple MLEs are found, \code{pi0}, \code{p}, and \code{theta} are returned a matrix elements, with
#' one row per MLE.
#'
#' @examples
#' data("ToyData1")
#' ASTAR(ToyData1$rankings,ToyData1$ratings,ToyData1$M,keep_nodes=TRUE)
#'
#' @export
ASTAR <- function(rankings,ratings,M,keep_nodes=FALSE){
# calculate constants
I <- nrow(rankings)
J <- ncol(rankings)
if(any(dim(ratings) != c(I,J))){stop("rankings and ratings must have same dimension")}
if(is.null(attr(rankings,"assignments"))){
attr(rankings,"assignments") <- matrix(TRUE,nrow=I,ncol=J)
}
# calculations for updating theta
Ji <- apply(attr(rankings,"assignments"),1,sum) #each judge's total number of assignments
Ri <- apply(rankings,1,function(pi){sum(!is.na(pi))}) #each judge's size of ranking
i_pi <- which(Ri>0 & Ji>0) #which judges provided rankings
t1 <- function(theta,D){theta*D/I} #first log probability term
t2 <- function(theta){sum(psi(theta,Ji[i_pi],Ri[i_pi],log=T))/I} #second log probability term
get_theta <- function(D){ # function used to optimize for theta conditional on some order
# D is the minimum summed kendall distance between some central ordering and the observations.
# As D increases, theta decreases monotonically and the cost increases monotonically.
opt_theta <- function(theta){t1(theta,D=D)+t2(theta)}
result <- optimize(f=opt_theta,interval=c(1e-8,10^8))
return(list(thetahat = result$minimum,objective = result$objective))
}
# calculations for updating p
c1 <- apply(ratings,2,function(x){sum(x,na.rm=T)})/I #set of constants in log binomial density
c2 <- apply(M-ratings,2,function(x){sum(x,na.rm=T)})/I #set of constants in log binomial density
t3 <- function(p){sum(c1*log(1/p)+c2*log(1/(1-p)))} #calculation of log binomial density given p
t3_gradient <- function(p){ -c1/p + c2/(1-p) } # gradient of t3 function, for use in optimization later on
get_p <- function(order){ # constrained optimization function
# order is the top-R portion of a central ordering (i.e., if order = c(5,4), then object 5 must be in first place
# and object 4 must be in second place, the remaining objects must be in places 3:J (any order among those) ).
# initializing the order
R <- length(order)
if(R<J){unordered <- setdiff(1:J,order)}else{unordered <- c()}
order_and_unordered <- c(order,unordered)
# calculation of linear constraints
ui <- rbind(diag(1,nrow=J),diag(-1,nrow=J))
ci <- rep(c(0,-1),each=J)
if(R>1){for(place in 2:R){
ui_addition <- rep(0,J)
ui_addition[c(order[place],order[place-1])] <- c(1,-1)
ui <- rbind(ui,ui_addition)
ci <- c(ci,0)
}}
if(R<J){for(place in (R+1):J){
ui_addition <- rep(0,J)
ui_addition[c(order_and_unordered[place],order[R])] <- c(1,-1)
ui <- rbind(ui,ui_addition)
ci <- c(ci,0)
}}
# get intelligent starting values via isotonic regression
meanratings <- c1/(c1+c2)
order_meanratings <- c(order,setdiff(order(meanratings),order))
start <- gpava(1:J,meanratings[order_meanratings])
start <- start$x[order(order_meanratings)]
start[start==0] <- 1e-7
start[start==1] <- 1-1e-7
# optimize for p conditional on an ordering of parameters
result <- suppressWarnings(constrOptim(theta = start,f = t3,grad = t3_gradient,ui=ui,ci=ci-1e-8,
mu=.1,control = list(reltol = 1e-10)))
return(list(phat = result$par,objective = result$value))
}
# get D (necessary for updating theta)
Q <- getQ(rankings,I,J)*I
get_D <- function(order){
S <- setdiff(1:J,order)
D <- 0
if(length(S)>=2){
D <- D +
sum(apply(combinations(length(S),2,S),1,function(uv){
min(Q[uv[1],uv[2]],Q[uv[2],uv[1]])
}))
}
for(i in 1:length(order)){D <- D + sum(Q[setdiff(1:J,order[1:i]),order[i]])}
return(D)
}
# START ESTIMATION VIA ASTAR
# create matrix of open nodes to continue exploring
open <- matrix(data=c(1:J,rep(NA,J*(J-1))),nrow=J,ncol=J)
open[,J] <- unlist(lapply(1:J,function(order){
round(get_theta(D=get_D(order))$objective + get_p(order)$objective,4)
}))
num_nodes <- J
# create matrix to store complete paths
result <- matrix(NA,nrow=0,ncol=J)
# search through the current tree
while(nrow(open)>0){
which_current <- which.min(open[,J])
current <- c(na.exclude(open[which_current,1:(J-1)]))
if(length(current)==(J-1)){
# print("found a solution!")
curr_min <- open[which_current,J]
which_mins <- which(open[,J] == curr_min)
result <- rbind(result,c(current,curr_min))
open <- open[-which_current,]
if(length(which_mins)==1){break() #only stop if not more possible solutions
}else{#print("still going!")
}
}else{
neighbors <- setdiff(1:J,current)
for(neighbor in neighbors){
order <- c(current,neighbor)
totalcostheuristic <- round(get_theta(D=get_D(order))$objective + get_p(order)$objective,4)
open <- rbind(open,c(order,rep(NA,J-1-length(order)),totalcostheuristic))
}
num_nodes <- num_nodes + length(neighbors)
open <- open[-which_current,]
}
}
# be sure all solutions are equal in their totalcostheuristic (in case of multiple solutions)
result <- result[which(result[,J]==min(result[,J])),,drop=FALSE]
result[,J] <- apply(result[,1:(J-1),drop=FALSE],1,function(pi){setdiff(1:J,pi)}) #complete the rankings
if(nrow(result)>1){
message("There's a tie! Results are shown as a matrix to give multiple solutions.")
results <- t(apply(result,1,function(curr_node){
c(get_p(curr_node)$phat,get_theta(D=get_D(curr_node))$thetahat)
}))
if(keep_nodes){
return(list(pi0=result,
p=results[,1:J],
theta=results[,J+1,drop=FALSE],
num_nodes=num_nodes,
nodes = open))
}else{
return(list(pi0=result,
p=results[,1:J],
theta=results[,J+1,drop=FALSE],
num_nodes=num_nodes))
}
}else{
curr_node <- result[1,]
results <- c(get_p(curr_node)$phat,get_theta(D=get_D(curr_node))$thetahat)
if(keep_nodes){
return(list(pi0=curr_node,
p=results[1:J],
theta=results[J+1],
num_nodes=num_nodes,
nodes = open))
}else{
return(list(pi0=curr_node,
p=results[1:J],
theta=results[J+1],
num_nodes=num_nodes))
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.