Extract Text'


# rmarkdown::render(
#     system.file("extract_text_app/app/extract_text.Rmd", package = "textreadr")
# )

library(readr)
library(shinyWidgets)
library(readxl)
library(shiny)
library(shinyjs)
library(magrittr)
library(dplyr)
library(textreadr)
library(tesseract)
````





Inputs {.sidebar  data-width=250} 
-------------------------------------


```r
fileInput('file_input', tags$b('Text File'), accept = c('.doc', '.docx', '.pdf', '.rtf', '.html'))
mydata <- reactive({

    req(input$file_input)
    in_file <- input$file_input

    ext <- tools::file_ext(in_file$datapath)

    switch(ext,
        pdf = {textreadr::read_pdf(in_file$datapath)},
        doc = {textreadr::read_doc(in_file$datapath)},        
        docx = {textreadr::read_docx(in_file$datapath)},
        rtf = {textreadr::read_rtf(in_file$datapath)},
        html = {textreadr::read_html(in_file$datapath)},    
        stop("file type unsupported")
    ) %>%
    dplyr::as_tibble()

})
output$downloadData <- downloadHandler(
    filename = function() {
         sprintf('text_file_%s.csv', gsub('\\s+', '_', gsub(':', '.', Sys.time())))
    },
    content = function(con) {

        dat <- mydata()


        readr::write_csv(dat, path= con)
    }
)

tags$div(
    uiOutput('next_instructions'),
    tags$br(),
    uiOutput('download_ui')
)

output$download_ui <- renderUI({

    req(mydata())
    tags$div(
        HTML('<span style = "color:black;padding-top: 125px;"><b>Download .csv File</b></span>'),
        downloadButton('downloadData', 'Download')
    )

})

Column

Uploaded Data (truncated to 50 rows)

renderTable({

    dat <- mydata()

    dat[] <- lapply(dat, as.character)

    dat %>%
        setNames(gsub('___', '_', colnames(.))) %>%
        head(50)

}, digits = 2)


Try the textreadr package in your browser

Any scripts or data that you put into this service are public.

textreadr documentation built on Oct. 9, 2021, 5:06 p.m.