R/tif2token.R

Defines functions tif2token

Documented in tif2token

#' Convert TIF to tokens list -- via corpus package
#'
#' @name tif2token
#' @param tif A TIF
#' @return A list
#'
#'
#' @export
#' @rdname tif2token
#'
#'
tif2token <- function(tif){

  x1 <- corpus::text_tokens(tif$text,

                            filter = corpus::text_filter(
                              map_case = FALSE,
                              combine = c(corpus::abbreviations_en, 'Gov.', 'Sen.'),
                              connector = '_' ) )

  names(x1) <- tif$doc_id
  return(x1)
}
jaytimm/text2df documentation built on July 21, 2023, 1:58 a.m.