| Transkribus | R Documentation |
Connect to Transkribus, inspect collections, documents, pages, perform handwritten text recognition
JSESSIONIDcharacter string with the JSESSIONID to use once logged in
new()Log in with your Transkribus user and password
Transkribus$new( url = "https://transkribus.eu/TrpServer/rest/auth/login", user, password )
urlcharacter string with the url to use in the call to the Transkribus API
usercharacter string with your Transkribus user in order to connect
passwordcharacter string with your Transkribus password in order to connect
map_name_to_id()Map labels to identifiers
Transkribus$map_name_to_id(
name,
type = c("collection", "document", "htr-model"),
collection
)namecharacter string with the label to map to the identifier
typetype of mapping: either one of 'collection' of 'document'
collectionid of the collection
list_collections()List all collections you have access to
Transkribus$list_collections( url = "https://transkribus.eu/TrpServer/rest/collections/list" )
urlcharacter string with the url to use in the call to the Transkribus API
list_collection()List the content (the documents) of a collection
Transkribus$list_collection( url = "https://transkribus.eu/TrpServer/rest/collections/%s/list", collection )
urlcharacter string with the url to use in the call to the Transkribus API
collectionid of the collection
create_collection()Create a collection
Transkribus$create_collection( url = "https://transkribus.eu/TrpServer/rest/collections/createCollection", label )
urlcharacter string with the url to use in the call to the Transkribus API
labelcharacter string with the name of the collection to create
delete_collection()Delete a collection
Transkribus$delete_collection(
url = "https://transkribus.eu/TrpServer/rest/collections/{collection}",
collection
)urlcharacter string with the url to use in the call to the Transkribus API
collectionid of the collection
list_document()List the content (the pages) of a document
Transkribus$list_document(
url = "https://transkribus.eu/TrpServer/rest/collections/%s/%s/fulldoc",
collection,
document,
type = c("pages", "raw")
)urlcharacter string with the url to use in the call to the Transkribus API
collectionid of the collection
documentid of the document
typecharacter string with the type of extraction, either 'pages' or 'raw'. Defaults to 'pages'
list_dictionaries()Retrieve the set of dictionaries containing possible letters as output
Transkribus$list_dictionaries( url = "https://transkribus.eu/TrpServer/rest/recognition/dicts" )
urlcharacter string with the url to use in the call to the Transkribus API
list_models()Retrieve all HTR/OCR models you have access to within a collection
Transkribus$list_models( url = "https://transkribus.eu/TrpServer/rest/recognition/%s/list", collection )
urlcharacter string with the url to use in the call to the Transkribus API
collectionid of the collection
list_job()List all jobs or get the information of one specific job
Transkribus$list_job( url = "https://transkribus.eu/TrpServer/rest/jobs/list", job )
urlcharacter string with the url to use in the call to the Transkribus API
jobid of the job
transcribe()Transcribe a set of pages with a model
Transkribus$transcribe(
url = "https://transkribus.eu/TrpServer/rest/recognition/{collection}/{model}/htrCITlab?id={document}&pages={page}&dict={dictionary}",
collection,
document,
page,
model,
dictionary
)urlcharacter string with the url to use in the call to the Transkribus API
collectionid of the collection
documentid of the document
pageid of the page to transcribe
modelid of the Transkribus model to use
dictionarycharacter string with the dictionary (set of letters) to use
upload()Upload a set of images in a collection
Transkribus$upload(
url = c("https://transkribus.eu/TrpServer/rest/uploads?collId={collection}",
"https://transkribus.eu/TrpServer/rest/uploads/{uploadId}"),
collection,
data,
document,
author = "R-API",
trace = TRUE
)urlcharacter string with the url to use in the call to the Transkribus API
collectionid of the collection
dataa character vector with the full path(s) to the image files on disk
documentthe title of the document
authorthe author of the document
tracelogical indicating to show progress
layout()Perform layout analysis on all pages of a document in a collection
Transkribus$layout(
url = "https://transkribus.eu/TrpServer/rest/LA?collId={collection}",
collection,
document,
doBlockSeg = FALSE,
doLineSeg = TRUE,
doPolygonToBaseline = FALSE,
doBaselineToPolygon = FALSE
)urlcharacter string with the url to use in the call to the Transkribus API
collectionid of the collection
documentid of the document
doBlockSegif TRUE, existing layout will be deleted, if FALSE keep existing text block regions
doLineSegif TRUE, detect lines in text blocks, if FALSE keep existing lines
doPolygonToBaselineif TRUE, inspect line polygons and add baselines, if FALSE keep existing baselines
doBaselineToPolygonif TRUE, extrapolate new line polygons from baselines, if FALSE do not extrapolate
clone()The objects of this class are cloneable with this method.
Transkribus$clone(deep = FALSE)
deepWhether to make a deep clone.
library(madoc.utils)
api <- Transkribus$new(user = "jan.wijffels@vub.ac.be",
password = Sys.getenv("TRANSKRIBUS_PWD"))
## Get pages of a collection
collections <- api$list_collections()
collections
id_collection <- sample(collections$colId, size = 1)
documents <- api$list_collection(collection = id_collection)
documents
id_document <- sample(documents$docId, size = 1)
pages <- api$list_document(collection = id_collection, document = id_document)
pages
## Create a collection, upload some images to the collection, delete it again
id <- api$create_collection(label = "example-collection")
img <- c(system.file(package = "madoc.utils", "extdata", "alto-example.jpg"),
system.file(package = "madoc.utils", "extdata", "example.png"))
api$upload(data = img, collection = id, document = paste("Upload", Sys.time()), author = "R-API")
api$list_collection(collection = id)
api$delete_collection(collection = id)
## Look at relevant models and dictionaries
dicts <- api$list_dictionaries()
grep(dicts, pattern = "Dutch", ignore.case = TRUE, value = TRUE)
models <- api$list_models(collection = id_collection)
str(models)
dutch <- grep(models$language, pattern = "Dutch", ignore.case = TRUE, value = TRUE)
dutch <- subset(models, language %in% dutch)
dutch <- c("Dutch Mountains (18th Century)", "IJsberg", "Dutch Notarial Model 18th Century")
dutch <- subset(models, name %in% dutch)
dutch <- subset(models, name %in% "Dutch Mountains (18th Century)" & provider == "CITlabPlus")
str(dutch)
id_model <- dutch$htrId
## Inspect jobs
jobs <- api$list_job()
jobs
## Not run:
id <- api$create_collection(label = "test-collection")
img <- c(system.file(package = "madoc.utils", "extdata", "example.png"),
system.file(package = "madoc.utils", "extdata", "alto-example.jpg"))
api$upload(data = img, collection = id, document = "Doc with 2 images", author = "R-API")
##
## This section shows how to transcribe using the API
## >> note that this consumes Transkribus credits
##
## Inspect one image and transcribe it
##
## - id_model <- 21683 ## Dutch Mountains HTR+
id_collection <- id
id_document <- docs$docId
docs <- api$list_collection(collection = id_collection)
pages <- api$list_document(collection = id_collection, document = id_document)
page <- head(pages, n = 1)
id_job <- api$transcribe(collection = id_collection, document = id_document,
page = 1,
model = 21683,
dictionary = "Combined_Dutch_Model_M1.dict")
x <- read_pagexml(page$page_xml)
##
## A random document from a collection
##
library(magick)
pages <- api$list_document(collection = id_collection, document = id_document)
page <- tail(pages, n = 1)
page
img <- image_read(page$thumbUrl)
img <- image_read(page$url)
image_resize(img, "x600")
id_job <- api$transcribe(collection = id_collection, document = id_document, page = page$pageNr,
model = id_model, dictionary = "Combined_Dutch_Model_M1.dict")
api$list_job(job = id_job)
## After the job has finished, we have a Page-XML file which we can read in
pages <- api$list_document(collection = id_collection, document = id_document)
page <- tail(pages, n = 1)
img <- image_read(page$url)
x <- read_pagexml(page$page_xml)
bl <- image_draw_baselines(img, x = x$baseline, col = "darkgreen", lwd = 4)
image_resize(bl, "x900")
bl <- image_crop_baselineareas(img,
x = setNames(x$baseline, x$id),
textregion = x$points,
extend = FALSE, overview = FALSE)
bl <- image_rbind(bl, color = "red", geometry = "2x2")
image_resize(bl, "x900")
api$delete_collection(collection = id)
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.