Nothing
#' Export a transcript object to a 'ELAN' .eaf file
#'
#' Advice: In most situations it is more convenient to use \code{act::corpus_export} for exporting annotation files.
#'
#' The .eaf file will be written to the file specified in \code{outputPath}.
#' If \code{outputPath} is left empty, the function will return the contents of the .eaf itself.
#'
#' @param t Transcript object; transcript to be exported.
#' @param outputPath Character string; path where .eaf file will be saved.
#' @param filterTierNames Vector of character strings; names of tiers to be included. If left unspecified, all tiers will be exported.
#' @param filterSectionStartsec Double; start of selection in seconds.
#' @param filterSectionEndsec Double; end of selection in seconds.
#' @param createMediaLinks Logical; if \code{TRUE} media links will be created.
#'
#' @return Contents of the .eaf file (only if \code{outputPath} is left empty)
#' @export
#'
#' @seealso \code{corpus_export}, \code{export_exb}, \code{export_printtranscript}, \code{export_rpraat}, \code{export_srt}, \code{export_textgrid}
#'
#' @example inst/examples/export_eaf.R
#'
export_eaf <- function(t,
outputPath=NULL,
filterTierNames=NULL,
filterSectionStartsec = NULL,
filterSectionEndsec = NULL,
createMediaLinks=TRUE) {
if (missing(t)) {stop("Transcript object in parameter 't' is missing.") } else { if (!methods::is(t, "transcript")) {stop("Parameter 't' needs to be a transcript object.") } }
#--- check if output folder exists
if (!is.null(outputPath)) {
if (!dir.exists(dirname(outputPath))) {
stop("Output folder does not exist. Modify parameter 'outputPath'.")
}
}
# t<- meinkorpus@transcripts[["ARG_I_CHI_Santi"]]
# outputPath<-NULL
# filterTierNames<-NULL
# filterSectionStartsec <- NULL
# filterSectionEndsec <- NULL
# createMediaLinks <- TRUE
# t<- meinkorpus@transcripts[[7]]
#=== Get data
#--- Filter and cure transcript
t <- act::transcripts_filter_single(t, filterTierNames=filterTierNames, filterSectionStartsec = filterSectionStartsec, filterSectionEndsec = filterSectionEndsec)
t <- act::transcripts_cure_single(t, annotationsWithReversedTimes=TRUE, overlappingAnnotations=TRUE, annotationsWithTimesBelowZero=TRUE, missingTiers=TRUE, showWarning=TRUE)
# --- Convert Point tiers to interval tiers
if ('TextTier' %in% t@tiers$type) {
tempcorpus <- methods::new("corpus")
tempcorpus@transcripts <- list(t)
names(tempcorpus@transcripts) <- t@name
tempcorpus <- act::tiers_convert(tempcorpus, pointToInterval=TRUE)
t <- tempcorpus@transcripts[[1]]
}
#---get annotations
myAnnotations <- t@annotations
#--- get only relevant columns
myCols <- c("tier.name", "startSec","endSec","content")
if (!all(myCols %in% colnames(myAnnotations))) {
stop(paste("Missing columns. Annotations needs to contain: ", paste(myCols, collapse = " ", sep="")))
}
myAnnotations <- myAnnotations[,myCols]
#convert annotations to html safe characters
myAnnotations$content <- XML::xmlValue(XML::xmlTextNode(as.vector(myAnnotations$content)))
myAnnotations$tier.name <- XML::xmlValue(XML::xmlTextNode(as.vector(myAnnotations$tier.name)))
#--- generate EAF-XML-document
myEAF <- "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
myEAF <- append(myEAF, "<ANNOTATION_DOCUMENT AUTHOR=\"\" DATE=\"2018-05-04T19:33:08+01:00\" FORMAT=\"3.0\" VERSION=\"3.0\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\"http://www.mpi.nl/tools/elan/EAFv3.0.xsd\">" )
#--- generate media header
myEAF <- append(myEAF, " <HEADER MEDIA_FILE=\"\" TIME_UNITS=\"milliseconds\">")
if (createMediaLinks==TRUE) {
if (length(t@media.path)>0) {
for (mediaPath in t@media.path ) {
myMimeType <- "unknown"
if (tools::file_ext(mediaPath) %in% c("wav")) {
myMimeType <-"audio/x-wav"
} else if (tools::file_ext(mediaPath) %in% c("mp3", "aif", "aiff")) {
myMimeType <-"audio/*"
} else if (tools::file_ext(mediaPath) %in% c("mp4", "mov", "mpg")) {
myMimeType <-"video/mp4"
}
myEAF <- append(myEAF, sprintf(" <MEDIA_DESCRIPTOR MEDIA_URL=\"file:///%s\" MIME_TYPE=\"%s\"/>", mediaPath, myMimeType))
}
}
}
#<PROPERTY NAME="URN">urn:nl-mpi-tools-elan-eaf:cfb1957f-d6fa-4f7d-b124-6a74700d014d</PROPERTY>
#<PROPERTY NAME="lastUsedAnnotationId">21</PROPERTY>
myEAF <- append(myEAF, " </HEADER>")
if (nrow(myAnnotations)>0) {
#--- generate time order
myAnnotations$annotationID <- paste("a",1:nrow(myAnnotations),sep="")
myAnnotations$TIME_SLOT_REF1 <- paste("ts",1:nrow(myAnnotations),sep="")
myAnnotations$TIME_SLOT_REF2 <- paste("ts", (nrow(myAnnotations)+1):(nrow(myAnnotations)*2),sep="")
myAnnotations$startSec <- as.integer(myAnnotations$startSec*1000)
myAnnotations$endSec <- as.integer(myAnnotations$endSec*1000)
myEAF <- append(myEAF, " <TIME_ORDER>")
myEAF <- append(myEAF, sprintf(" <TIME_SLOT TIME_SLOT_ID=\"%s\" TIME_VALUE=\"%s\"/>", myAnnotations$TIME_SLOT_REF1, myAnnotations$startSec))
myEAF <- append(myEAF, sprintf(" <TIME_SLOT TIME_SLOT_ID=\"%s\" TIME_VALUE=\"%s\"/>", myAnnotations$TIME_SLOT_REF2, myAnnotations$endSec))
myEAF <- append(myEAF, " </TIME_ORDER>")
} else {
myEAF <- append(myEAF, " <TIME_ORDER>")
myEAF <- append(myEAF, " </TIME_ORDER>")
}
#iterate through all tierNames
if (nrow(t@tiers)>0) {
tierNr <- 1
#myEAF <- append(myEAF, sprintf(" <TIER LINGUISTIC_TYPE_REF=\"praat\" TIER_ID=\"%s\"/>", t@tiers$name, sep="\n"))
for (tierNr in 1:nrow(t@tiers)) {
#--- get annotations within tier
annotations.tier <- myAnnotations[myAnnotations$tier.name==t@tiers$name[tierNr],]
if (nrow(annotations.tier)==0) {
#--- generate tier AND close
myEAF <- append(myEAF, sprintf(" <TIER LINGUISTIC_TYPE_REF=\"praat\" TIER_ID=\"%s\"/>", t@tiers$name[tierNr]))
} else {
#--- generate tier
myEAF <- append(myEAF, sprintf(" <TIER LINGUISTIC_TYPE_REF=\"praat\" TIER_ID=\"%s\">", t@tiers$name[tierNr]))
if (t@tiers$type[tierNr] == "IntervalTier")
{
} else if (t@tiers$type[tierNr] == "TextTier") {
#--- add end times
annotations.tier$endSec <- annotations.tier$startSec + 100
}
#--- check for overlap of intervals, if there are more than one intervals
if (nrow(annotations.tier)>1) {
#get intervals whose endSec is bigger then the startSec of the following
overlaps <- annotations.tier$endSec[1:nrow(annotations.tier)-1]>annotations.tier$startSec[2:nrow(annotations.tier)]
#if there are
if (any(overlaps==TRUE))
{
#get the indices of those intervals
overlaps <- c(1:length(overlaps))[overlaps]
#replace endSec with startSec of the following interval
annotations.tier$endSec[overlaps]<- annotations.tier$startSec[overlaps+1]
}
}
annotations <- paste( " <ANNOTATION>",
sprintf(" <ALIGNABLE_ANNOTATION ANNOTATION_ID=\"%s\" TIME_SLOT_REF1=\"%s\" TIME_SLOT_REF2=\"%s\">", annotations.tier$annotationID, annotations.tier$TIME_SLOT_REF1, annotations.tier$TIME_SLOT_REF2),
sprintf(" <ANNOTATION_VALUE>%s</ANNOTATION_VALUE>", annotations.tier$content ),
" </ALIGNABLE_ANNOTATION>",
" </ANNOTATION>", sep="\n")
myEAF <- append(myEAF, annotations)
myEAF <- append(myEAF, " </TIER>")
}
}
}
#---
myEAF <- append(myEAF, " <LINGUISTIC_TYPE GRAPHIC_REFERENCES=\"false\" LINGUISTIC_TYPE_ID=\"praat\" TIME_ALIGNABLE=\"true\"/>")
myEAF <- append(myEAF, " <CONSTRAINT DESCRIPTION=\"Time subdivision of parent annotation\'s time interval, no time gaps allowed within this interval\" STEREOTYPE=\"Time_Subdivision\"/>")
myEAF <- append(myEAF, " <CONSTRAINT DESCRIPTION=\"Symbolic subdivision of a parent annotation. Annotations refering to the same parent are ordered\" STEREOTYPE=\"Symbolic_Subdivision\"/>")
myEAF <- append(myEAF, " <CONSTRAINT DESCRIPTION=\"1-1 association with a parent annotation\" STEREOTYPE=\"Symbolic_Association\"/>")
myEAF <- append(myEAF, " <CONSTRAINT DESCRIPTION=\"Time alignable annotations within the parent annotation's time interval, gaps are allowed\" STEREOTYPE=\"Included_In\"/>")
myEAF <- append(myEAF, "</ANNOTATION_DOCUMENT>")
if (is.null(outputPath)) {
return(myEAF)
} else {
#---write to file
fileConn <- file(outputPath, open="wb")
myEAF <- stringr::str_flatten(myEAF, collapse="\n")
writeBin(charToRaw(myEAF), fileConn, endian="little")
close(fileConn)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.