R/rank.analysis.R

Defines functions exact_rank_prob

Documented in exact_rank_prob

#' @title Probabilistic centrality rankings
#' @description  Performs a complete and exact rank analysis of a given partial ranking.
#' This includes rank probabilities, relative rank probabilities and expected ranks.
#'
#' @importFrom Rcpp evalCpp
#' @useDynLib netrankr
#'
#' @param P A partial ranking as matrix object calculated with [neighborhood_inclusion]
#'    or [positional_dominance].
#' @param only.results Logical. return only results (default) or additionally
#'     the ideal tree and lattice if \code{FALSE}.
#' @param verbose Logical. should diagnostics be printed. Defaults to \code{FALSE}.
#' @param force Logical. If \code{FALSE} (default), stops the analysis if the partial
#'     ranking has more than 40 elements and less than 0.4 comparable pairs.
#'     Only change if you know what you are doing.
#' @details The function derives rank probabilities from a given partial ranking
#' (for instance returned by [neighborhood_inclusion] or [positional_dominance]). This includes the
#' calculation of expected ranks, (relative) rank probabilities and the number of possible rankings.
#'  Note that the set of rankings grows exponentially in the number of elements and the exact
#'  calculation becomes infeasible quite quickly and approximations need to be used.
#'  See `vignette("benchmarks")` for guidelines and [approx_rank_relative],
#'  [approx_rank_expected], and [mcmc_rank_prob] for approximative methods.
#' @return
#' \item{lin.ext}{Number of possible rankings that extend `P`.}
#' \item{mse}{Array giving the equivalence classes of `P`.}
#' \item{rank.prob}{Matrix containing rank probabilities: \code{rank.prob[u,k]} is the probability that u has rank k.}
#' \item{relative.rank}{Matrix containing relative rank probabilities: \code{relative.rank[u,v]} is the probability that u is ranked lower than v.}
#' \item{expected.rank}{Expected ranks of nodes in any centrality ranking.}
#' \item{rank.spread}{Standard deviation of the ranking probabilities.}
#' \item{topo.order}{Random ranking used to build the lattice of ideals (if \code{only.results = FALSE}).}
#' \item{tree}{Adjacency list (incoming) of the tree of ideals (if \code{only.results = FALSE}).}
#' \item{lattice}{Adjacency list (incoming) of the lattice of ideals (if \code{only.results = FALSE}).}
#' \item{ideals}{List of order ideals (if \code{only.results = FALSE}).}
#' In all cases, higher numerical ranks imply a higher position in the ranking. That is,
#' the lowest ranked node has rank 1.
#' @author David Schoch, Julian Müller
#' @references De Loof, K. 2009. Efficient computation of rank probabilities in posets.
#' *Phd thesis*, Ghent University.
#'
#' De Loof, K., De Meyer, H. and De Baets, B., 2006. Exploiting the
#' lattice of ideals representation of a poset. *Fundamenta Informaticae*, 71(2,3):309-321.
#'
#' @seealso [approx_rank_relative], [approx_rank_expected], [mcmc_rank_prob]
#' @examples
#' P <- matrix(c(0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, rep(0, 10)), 5, 5, byrow = TRUE)
#' P
#' res <- exact_rank_prob(P)
#'
#' # a warning is displayed if only one ranking is possible
#' tg <- threshold_graph(20, 0.2)
#' P <- neighborhood_inclusion(tg)
#' res <- exact_rank_prob(P)
#' @export
exact_rank_prob <- function(P, only.results = TRUE, verbose = FALSE, force = FALSE) {
  if (!inherits(P, "Matrix") & !is.matrix(P)) {
    stop("P must be a dense or spare matrix")
  }
  if (!is.binary(P)) {
    stop("P is not a binary matrix")
  }
  # convert to dense matrix-----------------------------------------
  # exact_rank_prob only works with small matrices. Hence, we can safely convert
  # to a dense matrix
  P <- as.matrix(P)
  # Check for names ------------------------------------------------
  if (is.null(rownames(P)) & is.null(colnames(P))) {
    rownames(P) <- colnames(P) <- paste0("V", seq_len(nrow(P)))
  }
  n_full <- nrow(P)
  P_full <- P
  # Equivalence classes ------------------------------------------------
  MSE <- which((P + t(P)) == 2, arr.ind = TRUE)
  if (length(MSE) >= 1) {
    MSE <- t(apply(MSE, 1, sort))
    MSE <- MSE[!duplicated(MSE), ]
    g <- igraph::graph.empty()
    g <- igraph::add.vertices(g, nrow(P))
    g <- igraph::add.edges(g, c(t(MSE)))
    g <- igraph::as.undirected(g)
    MSE <- igraph::clusters(g)$membership
    equi <- which(duplicated(MSE))
    P <- P[-equi, -equi]
  } else {
    MSE <- seq_len(nrow(P))
  }
  if (length(unique(MSE)) == 1) {
    stop("all elements are structurally equivalent and have the same rank")
  }
  # names <- rownames(P)
  # number of Elements
  nElem <- nrow(P)

  # check for linear order ---------------------------------------------
  if (comparable_pairs(P) == 1) {
    warning("P is already a ranking.\nExpected Ranks correspond to the only possible ranking.")
    expected_full <- rank(colSums(P_full), ties.method = "max")
    rank.spread_full <- rep(0, nrow(P_full))
    mrp_full <- P_full
    mrp_full[mrp_full == t(mrp_full)] <- 0
    rp_full <- matrix(0, nrow(P_full), nrow(P_full))
    for (i in seq_len(nrow(P_full))) {
      rp_full[i, expected_full[i]] <- 1
    }
    # add names
    rownames(rp_full) <- rownames(mrp_full) <- colnames(mrp_full) <- rownames(P_full)
    names(expected_full) <- names(rank.spread_full) <- rownames(P_full)
    colnames(rp_full) <- seq_len(ncol(rp_full))
    res <- list(
      lin.ext = 1,
      mse = MSE,
      rank.prob = rp_full,
      relative.rank = mrp_full,
      expected.rank = expected_full,
      rank.spread = rank.spread_full,
      topo.order = NULL,
      tree = NULL,
      lattice = NULL,
      ideals = NULL
    )
    class(res) <- "netrankr_full"
    return(res)
  }

  # sanity check if applicable ------------------------------------------------
  if (nrow(P) > 40 & comparable_pairs(P) < 0.4 & force == F) {
    stop("Input data too big. Use approximations or set `force=TRUE` if you know what you are doing")
  }
  # Prepare Data structures---------------------
  topo.order <- as.vector(igraph::topological.sort(igraph::graph_from_adjacency_matrix(P, "directed")))

  P <- P[topo.order, topo.order]
  ImPred <- igraph::get.adjlist(igraph::graph_from_adjacency_matrix(P, "directed"), "in")
  ImPred <- lapply(ImPred, function(x) as.vector(x) - 1)

  ImSucc <- igraph::get.adjlist(igraph::graph_from_adjacency_matrix(P, "directed"), "out")
  ImSucc <- lapply(ImSucc, function(x) as.vector(x) - 1)
  # TREEOFIDEALS ----------------------------------------------------
  if (verbose == TRUE) {
    print("building tree of ideals")
  }
  tree <- treeOfIdeals(ImPred)
  nIdeals <- length(tree$label)
  if (verbose == TRUE) {
    print("tree of ideals built")
  }
  Ek <- sapply(0:(nElem - 1), function(x) {
    which(tree$label == x) - 1
  })
  # tree$child=lapply(tree$child,function(x) {idx=order(tree$label[x+1],decreasing=T);x[idx]})
  if (verbose == TRUE) {
    print("building lattice of Ideals")
  }
  latofI <- LatticeOfIdeals(tree$child, tree$parent, Ek, nElem, nIdeals)

  if (verbose == TRUE) {
    print("lattice of ideals built")
  }
  ideallist <- listingIdeals(ImSucc, nElem, nIdeals)
  # ideallist=lapply(ideallist,sort)

  if (verbose == TRUE) {
    print("ideals listed")
  }

  if (verbose == TRUE) {
    print(paste("No of ideals:", nIdeals))
    print("Calculating Rank Probabilities")
  }

  res <- rankprobs(latofI, ideallist, nElem, nIdeals)
  if (verbose == TRUE) {
    print(paste("No. of possible Rankings: ", res$linext))
  }
  res$rp <- res$rp[order(topo.order), ]
  res$mrp <- res$mrp[order(topo.order), order(topo.order)]

  ############################### END
  expected <- res$rp %*% seq_len(nElem)
  rank.spread <- rowSums((matrix(rep(seq_len(nElem), each = nElem), nElem, nElem) - c(expected))^2 * res$rp)
  expected <- c(expected)

  ###############################
  # Insert equivalent nodes again ----
  rp_full <- matrix(0, n_full, ncol(res$rp))
  mrp_full <- matrix(0, n_full, n_full)
  expected_full <- c(0, n_full)
  rank.spread_full <- rep(0, n_full)
  for (i in sort(unique(MSE))) {
    idx <- which(MSE == i)
    if (length(idx) > 1) {
      group.head <- i
      rp_full[idx, ] <- do.call(rbind, replicate(length(idx), res$rp[group.head, ], simplify = FALSE))
      mrp_full[idx, ] <- do.call(rbind, replicate(length(idx), res$mrp[group.head, MSE], simplify = FALSE))
      rank.spread_full[idx] <- rank.spread[group.head]
    } else if (length(idx) == 1) {
      rp_full[idx, ] <- res$rp[i, ]
      mrp_full[idx, ] <- res$mrp[i, MSE]
      rank.spread_full[idx] <- rank.spread[i]
    }
  }
  expected_full <- expected[MSE]
  for (val in sort(unique(expected_full), decreasing = TRUE)) {
    idx <- which(expected_full == val)
    expected_full[idx] <- expected_full[idx] + sum(duplicated(MSE[expected_full <= val]))
  }
  # add names
  rownames(rp_full) <- rownames(mrp_full) <- colnames(mrp_full) <- rownames(P_full)
  names(expected_full) <- names(rank.spread_full) <- rownames(P_full)
  colnames(rp_full) <- seq_len(ncol(rp_full))

  ###############################
  if (only.results) {
    res <- list(
      lin.ext = res$linext,
      mse = MSE,
      rank.prob = rp_full,
      relative.rank = t(mrp_full),
      expected.rank = expected_full,
      rank.spread = sqrt(rank.spread_full),
      topo.order = NULL,
      tree = NULL,
      lattice = NULL,
      ideals = NULL
    )
    class(res) <- "netrankr_full"
    return(res)
  } else {
    res <- list(
      lin.ext = res$linext,
      mse = MSE,
      rank.prob = rp_full,
      relative.rank = t(mrp_full),
      expected.rank = expected_full,
      rank.spread = sqrt(rank.spread_full),
      topo.order = topo.order,
      tree = tree,
      lattice = latofI,
      ideals = ideallist
    )
    class(res) <- "netrankr_full"
    return(res)
  }
}

Try the netrankr package in your browser

Any scripts or data that you put into this service are public.

netrankr documentation built on Sept. 27, 2022, 1:07 a.m.