R/class_transcript.R
In act: Aligned Corpus Toolkit

.emptyAnnotations <- data.frame(
	annotationID 			= as.integer(),
	tier.name 				= as.character(),
	startSec 				= as.double(),
	endSec 					= as.double(),
	content 				= as.character(),
	content.norm 			= as.character(),
	char.orig.bytime.start 	= as.integer(),
	char.orig.bytime.end	= as.integer(),
	char.norm.bytime.start	= as.integer(),
	char.norm.bytime.end	= as.integer(),
	char.orig.bytier.start 	= as.integer(),
	char.orig.bytier.end 	= as.integer(),
	char.norm.bytier.start 	= as.integer(),
	char.norm.bytier.end 	= as.integer(),
	row.names 				= as.character(), 
	stringsAsFactors        = FALSE)

.emptyAnnotations$annotationID	<- as.integer(.emptyAnnotations$annotationID)
.emptyAnnotations$startSec		<- as.double(.emptyAnnotations$startSec)
.emptyAnnotations$endSec  		<- as.double(.emptyAnnotations$endSec)
.emptyAnnotations$content  		<- as.character(.emptyAnnotations$content)

.emptyTiers <- data.frame( 
	name 				= as.character(), 
	type 				= as.character(), 
	position 			= as.integer(), 
	stringsAsFactors 	= FALSE)

#' transcript object
#' 
#' A transcript object contains the annotations of a loaded annotation file and some meta data .
#' In addition, it contains information that is auto generated by the act package, which is necessary for some functions (e.g. the full text search) 
#' 
#' Some of the slots are defined by the user.
#' Other slots are \code{[READ ONLY]}, which means that they can be accessed by the user but 
#' should not be changed. They contain values that are filled when you execute functions 
#' on the object.
#' 
#' @slot name Character string; \code{[READ ONLY]} Name of the transcript, generated from the annotation file name.
#' @slot file.path Character string; \code{[READ ONLY]} Original location of the annotation file.
#' @slot file.encoding Character string; \code{[READ ONLY]} Encoding applied to the file when reading.
#' @slot file.type Character string; \code{[READ ONLY]} Type of the original annotation file/object, e.g. 'eaf' or 'textgrid' for files and 'rpraat' for a rPraat .TextGrid object.
#' @slot file.content Character string; \code{[READ ONLY]} Content of the original annotation file/object.
#' @slot import.result Character string; \code{[READ ONLY]} Information about the success of the import of the annotation file.
#' @slot load.message Character string; \code{[READ ONLY]} Possibly messages about errors that occurred on importing the annotation file.
#' @slot length.sec Double; \code{[READ ONLY]} Duration of the transcript in seconds.
#' @slot tiers Data.frame; \code{[READ ONLY]} Table with the tiers. To modify the tiers it is highly recommended to use functions of the package to ensure for consistency of the data.
#' @slot annotations Data.frame; Table with the annotations.
#' @slot media.path Character string; Path(s) to the media files that correspond to this transcript object.
#' @slot normalization.systime POSIXct; Time of the last normalization. 
#' @slot fulltext.systime POSIXct; \code{[READ ONLY]} Time of the last creation of the full texts. 
#' @slot fulltext.filter.tier.names Vector of character strings; names of tiers that were included in the full text..
#' @slot fulltext.bytime.orig Character string; \code{[READ ONLY]} full text of the transcript based on the ORIGINAL content of the annotations, sorting the annotations by TIME
#' @slot fulltext.bytime.norm Character string; \code{[READ ONLY]} full text of the transcript based on the NORMALIZED content of the annotations, sorting the annotations by TIME
#' @slot fulltext.bytier.orig Character string; \code{[READ ONLY]} full text of the transcript based on the ORIGINAL content of the annotations, sorting the annotations first by TIERS and then by time
#' @slot fulltext.bytier.norm Character string; \code{[READ ONLY]} full text of the transcript based on the NORMALIZED content of the annotations, sorting the annotations first by TIERS and then by time
#' @slot modification.systime POSIXct; \code{[READ ONLY]} Time of the last modification of the transcript. Modifications after importing the annotation file by applying one/some of the packages function(s). Manual changes of the transcript by the user are not tracked!
#' @slot history List; \code{[READ ONLY]} History of the modifications made to the transcript object.
#' @export
#'
#' @examples 
#' library(act)
#' 
#' examplecorpus@transcripts[[1]]
#' 
methods::setClass("transcript", 
				  representation(
				  	name                       = "character",
				  	file.path                  = "character",
				  	file.encoding              = "character",
				  	file.type                  = "character",
				  	file.content               = "ANY",
				  	
				  	import.result                = "character",
				  	load.message               = "character",
				  	
				  	length.sec                 = "numeric",
				  	tiers                      = "ANY",
				  	annotations                = "ANY",
				  	media.path                 = "character",
				  	
				  	normalization.systime      = "ANY",
				  	fulltext.systime           = "ANY",
				  	fulltext.filter.tier.names = "character",
				  	fulltext.bytime.orig       = "character",
				  	fulltext.bytime.norm       = "character",
				  	fulltext.bytier.orig       = "character",
				  	fulltext.bytier.norm       = "character",
				  	modification.systime       = "ANY",
				  	history                    = "list"
				  	
				  ), prototype = list (
				  	name                       = "",
				  	file.path                  = "",
				  	file.encoding              = "",
				  	file.type                  = "",
				  	file.content               = character(),
				  	
				  	import.result                = "",
				  	load.message               = "",
				  	
				  	length.sec                 = 0,
				  	tiers                      = .emptyTiers,
				  	annotations                = .emptyAnnotations ,
				  	media.path                 = character(),
				  	
				  	normalization.systime      = character(),
				  	fulltext.systime           = character(),
				  	fulltext.filter.tier.names = character(),
				  	fulltext.bytime.orig       = "",
				  	fulltext.bytime.norm       = "",
				  	fulltext.bytier.orig       = "",
				  	fulltext.bytier.norm       = "",
				  	modification.systime       = FALSE,
				  	history           = list()
				  )
)

transcript_show <- function (object) {
	cat("  transcript object", fill=TRUE)
	cat("    name                       : ", paste("'",object@name,"'",sep="", collapse=""), fill=TRUE)
	cat("    length.sec                 : ", object@length.sec, fill=TRUE)
	cat("    tiers                      : ", nrow(object@tiers), fill=TRUE)
	cat("    annotations                : ", nrow(object@annotations), fill=TRUE)
	cat("\n")
	cat("    file.path                  : ", paste("'", object@file.path, "'" , sep="", collapse=""), fill=TRUE)
	cat("    file.encoding              : ", paste("'", object@file.encoding, "'" , sep="", collapse=""), fill=TRUE)
	cat("    file.type                  : ", paste("'", object@file.type,"'", sep="", collapse=""), fill=TRUE)
	cat("    file.content               : ", if(length(object@file.content)==0) {"[empty]"} else {"[check directly]"}, fill=TRUE)
	cat("\n")
	cat("    import.result              : ", paste("'", object@import.result, "'", sep="", collapse=""), fill=TRUE)
	cat("    load.message               : ", paste("'", object@load.message, "'" , sep="", collapse=""), fill=TRUE)
	cat("    media.path                 : ", if(length(object@media.path)==0) {"[empty]"} else {paste ("[check directly]", as.character(length(object@media.path)), " path(s)", sep= " ")}, fill=TRUE)
	cat("\n")
	cat("    normalization.systime      : ", paste("'", object@normalization.systime, "'", sep="", collapse=""), fill=TRUE)
	cat("    fulltext.systime           : ", paste("'", object@fulltext.systime, "'", sep="", collapse=""), fill=TRUE)
	cat("    fulltext.filter.tier.names : ", '[check directly]', length(object@fulltext.filter.tier.names), "name(s)", fill=TRUE) #paste("'", object@fulltext.filter.tier.names,"'",sep="", collapse=", ")
	cat("    modification.systime       : ", paste("'", object@modification.systime, "'", sep="", collapse=""), fill=TRUE)
	cat("    history                    : ", '[check directly]', length(object@history), "message(s)", fill=TRUE)
	
	cat("\n")
	cat("  Aggregated info from act::info_summarized():", fill=TRUE)
	info <- act::info_summarized(object)
	cat("    tier.count                 : ", info$tier.count, fill=TRUE)
   #cat("    tier.names                 : ", paste("'", info$tier.names,"'",sep="", collapse=", "), fill=TRUE)
	cat("    annotations.count          : ", info$annotations.count, fill=TRUE)
	cat("    words.org.count            : ", info$words.org.count, fill=TRUE)
	cat("    words.norm.count           : ", info$words.norm.count, fill=TRUE)
	cat()
}

methods::setMethod("show", signature = "transcript", definition = transcript_show)