#' BLEU (Bilingual Evaluation Understudy).
#'
#' Evaluate a machine translation candidate against a set of reference
#' sentences.
#'
#' @param candidate Candidate sentence.
#'
#' @param reference Vector of reference sentences.
#' @param n ngram size, default = 1.
#'
#' @return A double indicating the BLEU score between the candidate and the set
#' of references.
#'
#' @importFrom tokenizers tokenize_ngrams
#' @importFrom purrr map_int map_dbl
#'
#' @examples
#' cand <- "the cat the cat on the mat"
#' reference <- c("the cat is on the mat", "there is a cat on the mat")
#' bleu_vec(cand, reference, 2)
#' @export
bleu_vec <- function(candidate, reference, n = 1) {
tokenized_candidate <- tokenize_ngrams(candidate, simplify = TRUE, n = n)
tokenized_references <- tokenize_ngrams(reference, n = n)
cand_tokens <- length(tokenized_candidate)
clips <- map_dbl(unique(tokenized_candidate), function(x) {
max(map_int(tokenized_references, ~ sum(.x == x)))
})
return(sum(clips) / cand_tokens)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.