Nothing
#' Import annotation files into corpus object
#'
#' Scans all path specified in if \code{x@paths.annotation.files} for annotation files.
#' Supported file formats will be loaded as transcript objects into the corpus object.
#' All previously loaded transcript objects will be deleted.
#'
#' If \code{assignMedia=TRUE} the paths defined in \code{x@paths.media.files} will be scanned for media files.
#' Based on their file names the media files and annotations files will be matched.
#' Only the the file types set in \code{options()$act.fileformats.audio} and \code{options()$act.fileformats.video} will be recognized.
#' You can modify these options to recognize other media types.
#'
#' See \code{@import.results} of the corpus object to check the results of importing the files.
#' To get a detailed overview of the corpus object use \code{act::info(x)}, for a summary use \code{act::info_summarized(x)}.
#'
#' @param x Corpus object.
#' @param createFullText Logical; if \code{TRUE} full text will be created.
#' @param assignMedia Logical; if \code{TRUE} the folder(s) specified in \code{@paths.media.files} of your corpus object will be scanned for media.
#'
#' @return Corpus object.
#'
#' @seealso \link{corpus_new}, \link{examplecorpus}
#'
#' @export
#'
#' @example inst/examples/corpus_import.R
#'
corpus_import <- function(x,
createFullText = TRUE,
assignMedia = TRUE) {
#createFullText <- TRUE
#assignMedia <- TRUE
#x<-a
if (missing(x)) {stop("Corpus object in parameter 'x' is missing.") } else { if (!methods::is(x,"corpus") ) {stop("Parameter 'x' needs to be a corpus object.") } }
#--- check if files and folders exist
paths <- x@paths.annotation.files
paths.dont.exist <- which(!file.exists(paths))
message <- c()
if (length(paths.dont.exist)>0) {
message <- c(message, sprintf("%s of %s path(s) in 'x@paths.annotation.files' do not exist.", length(paths.dont.exist), length(x@paths.annotation.files)))
m <- stringr::str_c(" ",paths[paths.dont.exist], collapse="\n")
message <- stringr::str_c(message,"\n", m, collapse="\n")
paths <- paths[-paths.dont.exist]
}
if (length(paths)==0) {
message <- c(message, " No existing input paths.")
message <- paste(message, sep='\n', collapse='\n')
stop(message)
}
#--- make list of all file paths
paths.new <- c()
for (path in paths) {
#remove tailing slashes first
path <- gsub("/*$", "", path , perl=TRUE)
#if it is a directory
if(dir.exists(path)) {
#get all files in folders
paths.sub <- list.files(path, recursive=getOption("act.import.scanSubfolders", TRUE), ignore.case=TRUE, full.names=TRUE)
paths.new <- c(paths.new, paths.sub)
} else {
#it must be a file
paths.new <- c(paths.new, path)
}
}
#include/exclude
if (length(x@import.names.include)!=0) {
paths.new <- paths.new[grep(pattern=x@import.names.include, basename(paths.new))]
}
if (length(x@import.names.exclude)!=0) {
paths.new <- paths.new[!grepl(pattern=x@import.names.exclude, basename(paths.new))]
}
#--- get only supported file formats
supportedFileFormats <- "(?i)\\.(eaf|exb|textgrid)"
paths.new <- unlist(paths.new[stringr::str_which(string=paths.new, pattern=supportedFileFormats, ) ])
if (length(paths.new)==0) {
stop("No annotation files found. Please check 'x@paths.annotation.files'.")
}
#--- make the names
transcript.names <- basename(paths.new)
transcript.names <- tools::file_path_sans_ext(transcript.names)
transcript.names.info <- helper_transcriptNames_make (transcriptNames = transcript.names,
searchPatterns = x@import.names.modify$searchPatterns,
searchReplacements = x@import.names.modify$searchReplacements,
toUpperCase = x@import.names.modify$toUpperCase,
toLowerCase = x@import.names.modify$toLowerCase,
trim = x@import.names.modify$trim,
defaultForEmptyNames = x@import.names.modify$defaultForEmptyNames
)
results <- data.frame( file.name = basename(paths.new),
transcript.name = transcript.names.info$names.ok.ids,
status = "load",
message = "",
duplicated = duplicated(transcript.names),
file.path = paths.new,
stringsAsFactors = FALSE)
#--- how to deal with double transcripts
if (any(results$duplicated)) {
if (x@import.skip.double.files) {
#skip duplicates
results$status[results$duplicated] <- "skipped"
results$message[results$duplicated] <- "Non-unique transcript names"
} else {
transcript.name.old <- results$transcript.name
results$transcript.name <- make.unique(results$transcript.name)
results$message[results$transcript.name!=transcript.name.old] <- "Renamed because of non-unique transcript names"
}
}
#--- read files
#set progress bar
helper_progress_set("Importing files", nrow(results))
test <- list()
counter <- 0
i<- 1
if (nrow(results) > 0) {
for (i in 1:nrow(results) ) {
#update progress bar
helper_progress_tick()
#import only the files that are to be loaded (and not skipped)
if (results$status[i]=="load") {
#import file
new.transcript <- act::import(filePath=results$file.path[i],
transcriptName=results$transcript.name[i])
if (is.null(new.transcript)) {
results$status[i] <- "error"
results$message[i] <- paste (results$message[i], "Unknown error", sep=", ", collapse=", ")
} else {
#get the result from this file
if (new.transcript@import.result=="ok") {
#importing was ok
results$status[i] <- "ok"
#if there is a message
if (new.transcript@load.message!="") {
if (results$message[i]=="") {
results$message[i] <- new.transcript@load.message
}
results$message[i] <- paste (results$message[i], new.transcript@load.message, sep=", ", collapse=", ")
}
#add to the list
counter <- counter+1
test[[counter]] <- new.transcript
} else {
#load error
results$status[i] <- "error"
results$message[i] <- paste (results$message[i], new.transcript@load.message, sep=", ", collapse=", ")
}
}
}
}
}
#--- add results to corpus
#View(results)
#test
x@import.results <- results
#--- update history
x@history[[length(x@history)+1]] <- list(
modification ="corpus_import",
systime = Sys.time(),
results = "See 'x@import.results'"
)
#--- add transcripts to corpus
if (length(test)==0) {
stop("No annotation files found in input path(s).")
} else {
x <- act::transcripts_add(x,
test,
createFullText=createFullText,
assignMedia=assignMedia)
}
#--- show warnings
if (length(message)>0){
warning(paste(message,sep="\n", collapse="\n"))
}
return(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.