R/page_distance.R

#' Giving the distance beetween each revisions
#'
#' @param revs A character vector containing all the revisions of the page. See \code{page_content} function.
#' @param mode The name of a \code{textreuse} similarity function : \describe{
#' \item{jaccard_similarity}{Evaluate the distance using jaccard similarity}
#' \item{jaccard_dissimilarity}{\code{1 - jaccard_similarity}}
#' \item{jaccard_bag_similarity}{The measure treats revisions as bag of words and not as sequences of words}
#' \item{ratio_of_matches}{The ratio beetween the number of items in i that are also in i-1.}
#' } For more details, see \code{?similarity-function} after loading \code{textreuse}
#'
#' @return A numeric vector of length \code{length(revs) - 1}. For a given \code{i}, \code{page_distance(revs)[i]} give the ratio of matches beetween \code{revs[i-1]} and \code{revs[i]}. For more information about the ratio ot matches, see \code{textreuse} package.
#' @export
#'
#' @import foreach
#' @importFrom textreuse jaccard_similarity jaccard_dissimilarity jaccard_bag_similarity ratio_of_matches
#'

page_distance <- function(revs, mode = "jaccard_similarity") {
  
  dist <- get(mode)
  
  foreach(i = seq_along(revs)[-1], .combine = "c") %do% {
    
    dist(revs[i-1] %>% tokenize_words, revs[i] %>% tokenize_words)
    
  }
  
}
cafeine05/WikiSocio documentation built on May 13, 2019, 10:39 a.m.