#' Candidate pairs from pairwise comparisons
#'
#' Converts a comparison matrix generated by \code{\link{pairwise_compare}} into a
#' data frame of candidates for matches.
#'
#' @param m A matrix from \code{\link{pairwise_compare}}.
#' @param directional Should be set to the same value as in
#' \code{\link{pairwise_compare}}.
#' @return A data frame containing all the non-\code{NA} values from \code{m}.
#' Columns \code{a} and \code{b} are the IDs from the original corpus as
#' passed to the comparison function. Column \code{score} is the score
#' returned by the comparison function.
#' @examples
#' dir <- system.file("extdata/legal", package = "textreuse")
#' corpus <- TextReuseCorpus(dir = dir)
#'
#' m1 <- pairwise_compare(corpus, ratio_of_matches, directional = TRUE)
#' pairwise_candidates(m1, directional = TRUE)
#'
#' m2 <- pairwise_compare(corpus, jaccard_similarity)
#' pairwise_candidates(m2)
#' @export
pairwise_candidates <- function(m, directional = FALSE) {
assert_that(is.matrix(m))
matches <- which(!is.na(m))
indexes <- arrayInd(matches, dim(m))
score <- m[matches]
a <- rownames(m)[indexes[ , 1]]
b <- colnames(m)[indexes[ , 2]]
df <- data.frame(a = a, b = b, score = score, stringsAsFactors = FALSE)
if (!directional) df <- sort_df_by_rows(df)
df <- sort_df_by_columns(df)
class(df) <- c("textreuse_candidates", "tbl_df", "tbl", "data.frame")
df
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.