Nothing
#' Tesseract Engine
#'
#' Create an OCR engine for a given language and control parameters. This can be used by
#' the [ocr] and [ocr_data] functions to recognize text.
#'
#' Tesseract control parameters can be set either via a named list in the
#' `options` parameter, or in a `config` file text file which contains the parameter name
#' followed by a space and then the value, one per line. Use [tesseract_params()] to list
#' or find parameters. Note that that some parameters are only supported in certain versions
#' of libtesseract, and that invalid parameters can sometimes cause libtesseract to crash.
#'
#' @export
#' @return no return value, called for side effects
#' @rdname tesseract
#' @family tesseract
#' @param language string with language for training data. Usually defaults to `eng`
#' @param datapath path with the training data for this language. Default uses
#' the system library.
#' @param configs character vector with files, each containing one or more parameter
#' values. These config files can exist in the current directory or one of the standard
#' tesseract config files that live in the tessdata directory. See details.
#' @param options a named list with tesseract parameters. See details.
#' @param cache speed things up by caching engines
tesseract <- local({
store <- new.env()
function(language = "eng", datapath = NULL, configs = NULL, options = NULL, cache = TRUE) {
datapath <- normalizePath(as.character(datapath), mustWork = TRUE)
language <- as.character(language)
configs <- as.character(configs)
options <- as.list(options)
if (isTRUE(cache)) {
key <- digest::digest(list(language, datapath, configs, options))
if (is.null(store[[key]])) {
ptr <- tesseract_engine(datapath, language, configs, options)
assign(key, ptr, store)
}
store[[key]]
} else {
tesseract_engine(datapath, language, configs, options)
}
}
})
#' @export
#' @return no return value, called for side effects
#' @rdname tesseract
#' @param filter only list parameters containing a particular string
#' @examples
#' tesseract_params("smooth")
tesseract_params <- function(filter = "") {
tmp <- print_params(tempfile())
on.exit(unlink(tmp))
df <- parse_params(tmp)
subset <- grepl(filter, paste(df$param, df$desc), ignore.case = TRUE)
df_as_tibble(df[subset, ])
}
#' @export
#' @return list with information about the tesseract engine
#' @rdname tesseract
tesseract_info <- function() {
info <- engine_info_internal(tesseract())
config <- tesseract_config()
list(
datapath = info$datapath,
available = info$available,
version = config$version,
configs = list.files(file.path(info$datapath, "configs"))
)
}
parse_params <- function(path) {
utils::read.delim(path,
header = FALSE, quote = "",
col.names = c("param", "default", "desc"), stringsAsFactors = FALSE
)
}
tesseract_engine <- function(datapath, language, configs, options) {
# Tesseract::read_config_file first checks for local file, then in tessdata
lapply(configs, function(confpath) {
if (file.exists(confpath)) {
params <- tryCatch(utils::read.table(confpath, quote = ""), error = function(e) {
bail("Failed to parse config file '%s': %s", confpath, e$message)
})
ok <- validate_params(params$V1)
if (any(!ok)) {
bail("Unsupported Tesseract parameter(s): [%s] in %s", paste(params$V1[!ok], collapse = ", "), confpath)
}
}
})
opt_names <- as.character(names(options))
opt_values <- as.character(options)
ok <- validate_params(opt_names)
if (any(!ok)) {
bail("Unsupported Tesseract parameter(s): [%s]", paste(opt_names[!ok], collapse = ", "))
}
tesseract_engine_internal(datapath, language, configs, opt_names, opt_values)
}
bail <- function(...) {
stop(sprintf(...), call. = FALSE)
}
df_as_tibble <- function(df) {
stopifnot(is.data.frame(df))
class(df) <- c("tbl_df", "tbl", "data.frame")
df
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.