R/ASTAR.R

Defines functions ASTAR

Documented in ASTAR

#' Calculate the exact MLE of a Mallows-Binomial distribution using an A* algorithm
#'
#' This function estimates the exact MLE of a Mallows-Binomial distribution using an A* tree search algorithm proposed in Pearce and Erosheva (2022). Algorithm may be very slow when number of objects exceeds 15, but is often still tractable for larger J when consensus is strong.
#'
#' @import gtools
#' @import isotone
#'
#' @param rankings A matrix of rankings, potentially with attribute "assignments" to signify separate
#'   reviewer assignments. One ranking per row.
#' @param ratings A matrix of ratings, one row per judge and one column per object.
#' @param M Numeric specifying maximum (=worst quality) integer rating.
#' @param keep_nodes Boolean specifying if function should retain the list of open nodes traversed during A*
#'   tree search. Defaults to \code{FALSE}.
#'
#' @return A list with elements \code{pi0}, the estimated consensus ranking MLE, \code{p}, the
#'   estimated object quality parameter MLE, \code{theta}, the estimated scale parameter MLE, and
#'   \code{numnodes}, number of nodes traversed during algorithm and a measure of computational complexity.
#'   If \code{keep_nodes == TRUE}, then the list also contains \code{nodes}, a matrix of open nodes remaining
#'   at the end of search. If multiple MLEs are found, \code{pi0}, \code{p}, and \code{theta} are returned a matrix elements, with
#'   one row per MLE.
#'
#' @examples
#' data("ToyData1")
#' ASTAR(ToyData1$rankings,ToyData1$ratings,ToyData1$M,keep_nodes=TRUE)
#'
#' @export
ASTAR <- function(rankings,ratings,M,keep_nodes=FALSE){

  # calculate constants
  I <- nrow(rankings)
  J <- ncol(rankings)
  if(any(dim(ratings) != c(I,J))){stop("rankings and ratings must have same dimension")}
  if(is.null(attr(rankings,"assignments"))){
    attr(rankings,"assignments") <- matrix(TRUE,nrow=I,ncol=J)
  }

  # calculations for updating theta
  Ji <- apply(attr(rankings,"assignments"),1,sum) #each judge's total number of assignments
  Ri <- apply(rankings,1,function(pi){sum(!is.na(pi))}) #each judge's size of ranking
  i_pi <- which(Ri>0 & Ji>0) #which judges provided rankings
  t1 <- function(theta,D){theta*D/I} #first log probability term
  t2 <- function(theta){sum(psi(theta,Ji[i_pi],Ri[i_pi],log=T))/I} #second log probability term
  get_theta <- function(D){ # function used to optimize for theta conditional on some order
    # D is the minimum summed kendall distance between some central ordering and the observations.
    # As D increases, theta decreases monotonically and the cost increases monotonically.
    opt_theta <- function(theta){t1(theta,D=D)+t2(theta)}
    result <- optimize(f=opt_theta,interval=c(1e-8,10^8))
    return(list(thetahat = result$minimum,objective = result$objective))
  }

  # calculations for updating p
  c1 <- apply(ratings,2,function(x){sum(x,na.rm=T)})/I #set of constants in log binomial density
  c2 <- apply(M-ratings,2,function(x){sum(x,na.rm=T)})/I #set of constants in log binomial density
  t3 <- function(p){sum(c1*log(1/p)+c2*log(1/(1-p)))} #calculation of log binomial density given p
  t3_gradient <- function(p){ -c1/p + c2/(1-p) } # gradient of t3 function, for use in optimization later on
  get_p <- function(order){ # constrained optimization function
    # order is the top-R portion of a central ordering (i.e., if order = c(5,4), then object 5 must be in first place
    # and object 4 must be in second place, the remaining objects must be in places 3:J (any order among those) ).

    # initializing the order
    R <- length(order)
    if(R<J){unordered <- setdiff(1:J,order)}else{unordered <- c()}
    order_and_unordered <- c(order,unordered)

    # calculation of linear constraints
    ui <- rbind(diag(1,nrow=J),diag(-1,nrow=J))
    ci <- rep(c(0,-1),each=J)
    if(R>1){for(place in 2:R){
      ui_addition <- rep(0,J)
      ui_addition[c(order[place],order[place-1])] <- c(1,-1)
      ui <- rbind(ui,ui_addition)
      ci <- c(ci,0)
    }}
    if(R<J){for(place in (R+1):J){
      ui_addition <- rep(0,J)
      ui_addition[c(order_and_unordered[place],order[R])] <- c(1,-1)
      ui <- rbind(ui,ui_addition)
      ci <- c(ci,0)
    }}

    # get intelligent starting values via isotonic regression
    meanratings <- c1/(c1+c2)
    order_meanratings <- c(order,setdiff(order(meanratings),order))
    start <- gpava(1:J,meanratings[order_meanratings])
    start <- start$x[order(order_meanratings)]
    start[start==0] <- 1e-7
    start[start==1] <- 1-1e-7

    # optimize for p conditional on an ordering of parameters
    result <- suppressWarnings(constrOptim(theta = start,f = t3,grad = t3_gradient,ui=ui,ci=ci-1e-8,
                          mu=.1,control = list(reltol = 1e-10)))
    return(list(phat = result$par,objective = result$value))
  }

  # get D (necessary for updating theta)
  Q <- getQ(rankings,I,J)*I
  get_D <- function(order){
    S <- setdiff(1:J,order)
    D <- 0
    if(length(S)>=2){
      D <- D +
        sum(apply(combinations(length(S),2,S),1,function(uv){
          min(Q[uv[1],uv[2]],Q[uv[2],uv[1]])
        }))
    }
    for(i in 1:length(order)){D <- D + sum(Q[setdiff(1:J,order[1:i]),order[i]])}
    return(D)
  }

  # START ESTIMATION VIA ASTAR

  # create matrix of open nodes to continue exploring
  open <- matrix(data=c(1:J,rep(NA,J*(J-1))),nrow=J,ncol=J)
  open[,J] <- unlist(lapply(1:J,function(order){
    round(get_theta(D=get_D(order))$objective + get_p(order)$objective,4)
  }))
  num_nodes <- J

  # create matrix to store complete paths
  result <- matrix(NA,nrow=0,ncol=J)

  # search through the current tree
  while(nrow(open)>0){
    which_current <- which.min(open[,J])
    current <- c(na.exclude(open[which_current,1:(J-1)]))
    if(length(current)==(J-1)){
      # print("found a solution!")
      curr_min <- open[which_current,J]
      which_mins <- which(open[,J] == curr_min)
      result <- rbind(result,c(current,curr_min))
      open <- open[-which_current,]
      if(length(which_mins)==1){break() #only stop if not more possible solutions
      }else{#print("still going!")
      }
    }else{
      neighbors <- setdiff(1:J,current)
      for(neighbor in neighbors){
        order <- c(current,neighbor)
        totalcostheuristic <- round(get_theta(D=get_D(order))$objective + get_p(order)$objective,4)
        open <- rbind(open,c(order,rep(NA,J-1-length(order)),totalcostheuristic))
      }
      num_nodes <- num_nodes + length(neighbors)
      open <- open[-which_current,]
    }
  }
  # be sure all solutions are equal in their totalcostheuristic (in case of multiple solutions)
  result <- result[which(result[,J]==min(result[,J])),,drop=FALSE]
  result[,J] <- apply(result[,1:(J-1),drop=FALSE],1,function(pi){setdiff(1:J,pi)}) #complete the rankings

  if(nrow(result)>1){
    message("There's a tie! Results are shown as a matrix to give multiple solutions.")
    results <- t(apply(result,1,function(curr_node){
      c(get_p(curr_node)$phat,get_theta(D=get_D(curr_node))$thetahat)
    }))
    if(keep_nodes){
      return(list(pi0=result,
                  p=results[,1:J],
                  theta=results[,J+1,drop=FALSE],
                  num_nodes=num_nodes,
                  nodes = open))
    }else{
      return(list(pi0=result,
                  p=results[,1:J],
                  theta=results[,J+1,drop=FALSE],
                  num_nodes=num_nodes))
    }
  }else{
    curr_node <- result[1,]
    results <- c(get_p(curr_node)$phat,get_theta(D=get_D(curr_node))$thetahat)
    if(keep_nodes){
      return(list(pi0=curr_node,
                  p=results[1:J],
                  theta=results[J+1],
                  num_nodes=num_nodes,
                  nodes = open))
    }else{
      return(list(pi0=curr_node,
                  p=results[1:J],
                  theta=results[J+1],
                  num_nodes=num_nodes))
    }
  }

}

Try the rankrate package in your browser

Any scripts or data that you put into this service are public.

rankrate documentation built on April 12, 2025, 1:46 a.m.