Nothing
# HTML TO TXT ------------------------------------------------------------------
getHTML <- function(x){
strings <- lapply(x, function(fileName){
con <- file(fileName)
on.exit(close(con))
raw_strings <- readChar(con, file.info(fileName)$size, useBytes = TRUE)
return(raw_strings)
})
# Remove subscripts (except for p_rep)
strings <- lapply(strings, gsub, pattern = "<sub>(?!rep).*?</sub>", replacement = "", perl = TRUE)
# Remove HTML tags:
strings <- lapply(strings, gsub, pattern = "<(.|\n)*?>", replacement = "")
# Replace html codes:
# from: https://dev.w3.org/html5/html-author/charref
strings <- lapply(strings, gsub, pattern = "<", replacement = "<", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "<", replacement = "<", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "<", replacement = "<", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "<", replacement = "<", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "<", replacement = "<", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "=", replacement = "=", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "=", replacement = "=", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "=", replacement = "=", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = ">", replacement = ">", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = ">", replacement = ">", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = ">", replacement = ">", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = ">", replacement = ">", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "(", replacement = "(", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = ")", replacement = ")", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = " ", replacement = " ", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = " ", replacement = " ", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "\n", replacement = "")
strings <- lapply(strings, gsub, pattern = "\r", replacement = "")
strings <- lapply(strings, gsub, pattern = "\\s+", replacement = " ")
strings <- lapply(strings, gsub, pattern = "−", replacement = "-", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "−", replacement = "-", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "−", replacement = "-", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "χ", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "χ", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "χ", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "χ", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "Χ", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "Χ", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "Χ", replacement = "X", fixed = TRUE)
return(strings)
}
# PDF TO TXT -------------------------------------------------------------------
getPDF <- function(x){
txtfiles <- character(length(x))
for (i in 1:length(x)){
system(paste('pdftotext -q -enc "ASCII7" "', x[i], '"', sep = ""))
if (file.exists(gsub("\\.pdf$", "\\.txt", x[i]))) {
fileName <- gsub("\\.pdf$", "\\.txt", x[i])
strings <- readChar(fileName, file.info(fileName)$size)
# remove carriage returns and new lines
strings <- gsub(x = strings, pattern = "[\r\n]", replacement = "")
# save result in vector
txtfiles[i] <- strings
} else{
warning(paste("Failure in file", x[i]))
txtfiles[i] <- ""
}
}
return(txtfiles)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.