R/main.R

# extract text from pdf
extract_Text <- function(pdf_file) {
  txt <- pdf_text(pdf_file)
  txt <- strsplit(txt, "\n", fixed = T)
  return(txt)
}


# extract meta information from pdf
extract_Info <- function(pdf_file) {
  # Author, version, etc
  info <- pdf_info(pdf_file) %>% jsonlite::toJSON(auto_unbox = TRUE, pretty = TRUE)
  # Table with fonts
  fonts <- pdf_fonts(pdf_file) %>% jsonlite::toJSON(auto_unbox = TRUE, pretty = TRUE)
  return(list(info=info, fonts=fonts))
}

#### download section into json from pdf
extract_Section <- function(pdf_file) {
  toc <- pdf_toc(pdf_file)
  try( if(length(toc) == 0) return(FALSE))
  toc_json <- jsonlite::toJSON(x = toc, auto_unbox = TRUE, pretty = TRUE)
  return(toc_json)
}
artpulsion/readpdf documentation built on May 13, 2019, 5:21 p.m.