R/tif2sentence.R

Defines functions tif2sentence

Documented in tif2sentence

#' Convert TIF to sentence df
#'
#' @name tif2sentence
#' @param tif A TIF
#' @param concat Boolean
#' @return A data frame
#'
#' @export
#' @rdname tif2sentence
#'
#'
tif2sentence <- function(tif, concat = F){

  c0 <- tif$text
  names(c0) <- tif$doc_id
  x0 <- corpus::text_split(c0,
                           filter = corpus::text_filter(
                             sent_suppress = c(corpus::abbreviations_en, 'Gov.', 'Sen.')))

  x0$text <- as.character(x0$text)

  if(concat){
    x0$doc_id <- paste0(x0$parent, '.', x0$index)
    x0[, c('doc_id', 'text')]
  } else{
    colnames(x0) <- c('doc_id', 'sentence_id', 'text')
    x0
    }
  }
jaytimm/text2df documentation built on July 21, 2023, 1:58 a.m.