globalVariables(c("doc_id", "txt_id"))
#' Create Text Files from Word
#'
#' Read text from a Word document and convert to plain text format
#'
#' @param destdir Destination path. Where the files will be saved.
#' @param docxfiles Character vector containing the filepath(s) of
#' the Word documents to be converted.
#' @param ... Arguments passed to \code{\link[readtext]{readtext}}.
#'
#' @details When \code{destdir} is unchanged, the function uses the current
#' working directory as the location for saving any files converted from
#' Word.
#'
#' @import stringr
#' @importFrom dplyr mutate
#' @importFrom readtext readtext
#' @importFrom purrr map_dfr
#' @importFrom purrr map2_chr
#' @importFrom purrr walk2
#'
#' @return A character vector containing the paths of the generated text files.
#'
#' @export
read_transcript <- function(destdir, docxfiles, ...) {
if (is.null(destdir) || missing(destdir))
destdir <- getwd()
tryCatch({
destdir <- normalizePath(destdir)
},
warning = function(warn) {
stop(sprintf("Directory '%s' does not exist", basename(destdir)))
})
# TODO: Add '...' to control this function from without?
docdt <- map_dfr(docxfiles, function(x) {
r <- try(readtext(x), silent = TRUE)
if (inherits(r, 'try-error')) {
warning(r)
return()
}
r
})
if (!nrow(docdt))
stop("No files were read")
docdt <- docdt %>%
mutate(txt_id = str_replace(doc_id, '(.+)(\\.docx?$)', '\\1.txt')) %>%
mutate(txt_id = .makeSafeNames(txt_id))
## This function needs to be defined here so that
## it can find 'destdir'. Its purpose is to create
## the eventual filepath of the text file and return it.
.createFileAndReturnPath <- function(txt, fname) {
fpath <- file.path(destdir, fname)
cat(txt, file = fpath)
normalizePath(fpath, winslash = "/")
}
docdt$text %>%
map2_chr(docdt$txt_id, .createFileAndReturnPath) %>%
invisible
}
.makeSafeNames <- function(str, to.lower = FALSE) {
# stopifnot(is.character(str))
str <- str %>%
str_trim %>%
str_squish %>%
str_replace_all("\\s|\\?|\\!|\\*|\\^|&", '-') %>%
str_replace_all('-{2,}', '-')
if (to.lower)
str <- str_to_lower(str)
str
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.