Nothing
#' Search and replace contents of annotations using a matrix
#'
#' This functions performs a search and replace in the contents of an annotation.
#' A simple matrix consisting of two columns will be used.
#' The first column of the matrix needs to contain the search string, the second column the replacement string.
#' The matrix needs to be in CSV format.
#'
#' @seealso [matrix_load()] for loading the matrix
#' and [matrix_save()] for saving the matrix to a CSV file.
#'
#' If only certain transcripts or tiers should be affected set the parameter \code{filterTranscriptNames}.
#' In case that you want to select transcripts by using regular expressions use the function \code{act::search_makefilter} first.
#'
#'
#' @param x Corpus object.
#' @param path_replacementMatrixCSV Character string; path to replacement matrix (a CSV file).
#' @param filterTranscriptNames Vector of character strings; names of the transcripts to be included.
#'
#' @return Corpus object.
#' @export
#'
#'@seealso \link{media_delete}, \link{media_getPathToExistingFile}
#'
#' @example inst/examples/annotations_matrix.R
#'
annotations_matrix <- function(x,
path_replacementMatrixCSV,
filterTranscriptNames=NULL) {
if (missing(x)) {stop("Corpus object in parameter 'x' is missing.") } else { if (!methods::is(x,"corpus") ) {stop("Parameter 'x' needs to be a corpus object.") } }
#=== get the transcript names
#if none are given, take all names
if (is.null(filterTranscriptNames)) {
filterTranscriptNames <- NULL
} else if (length(filterTranscriptNames)==0) {
filterTranscriptNames <- NULL
} else if (length(filterTranscriptNames)==1) {
if (filterTranscriptNames[1]=="") { filterTranscriptNames <- NULL }
}
if (is.null(filterTranscriptNames)) { filterTranscriptNames <- names(x@transcripts) }
#=== load the matrix
act_replacementMatrix <- matrix_load(path_replacementMatrixCSV)
#=== check matrix
if (is.null(act_replacementMatrix)) { stop("Normalization matrix not read.") }
if ("search" %in% colnames(act_replacementMatrix)==FALSE) { stop("Column 'search' is missing in normalization matrix CSV file. File needs to contain colums 'search' and 'replace'")}
if ("replace" %in% colnames(act_replacementMatrix)==FALSE) { stop("Column 'replace' is missing in normalization matrix CSV file. File needs to contain colums 'search' and 'replace'") }
#=== check
if (is.null(x@transcripts)) { stop("No transcripts found in corpus object x.") }
#replace NA by empty string
act_replacementMatrix$replace[is.na(act_replacementMatrix$replace)] <- ""
#=== create named vector for replacement
mymatrix <- as.character(act_replacementMatrix$replace)
names(mymatrix) <- act_replacementMatrix$search
#=== check if the matrix works
out <- tryCatch(
{
#This is the 'try' part
stringr::str_replace_all("test string", mymatrix)
},
error=function(cond) {
#this is the error part
NULL
}
)
if (is.null(out)) { stop("Replacement matrix seems to be containing invalid regular expressions.") }
#=== do the replacement
annotations_modified_nr <- 0
transcripts_modified_ids <- c()
if (length(mymatrix)<1) {
warning("Replacement matrix is empty.")
} else {
#set progress bar
helper_progress_set("Processing",length(filterTranscriptNames))
for (i in filterTranscriptNames) {
#update progress bar
helper_progress_tick()
#towower
x@transcripts[[i]]@annotations$content <- stringr::str_to_lower(x@transcripts[[i]]@annotations$content)
#replace
annotations_modified_nr <- annotations_modified_nr+length(which(stringr::str_detect(x@transcripts[[i]]@annotations$content, "update.*_B")))
if (annotations_modified_nr>0) {
x@transcripts[[i]]@annotations$content <- stringr::str_replace_all(x@transcripts[[i]]@annotations$content, mymatrix)
#HISTORY transcript
x@transcripts[[i]]@modification.systime <- Sys.time()
x@transcripts[[i]]@history[[length(x@transcripts[[i]]@history)+1]] <- list(
modification = "annotations_matrix",
systime = Sys.time(),
annotations.modified.count = annotations_modified_nr
)
#increase counters for corpus object
transcripts_modified_ids <- c(transcripts_modified_ids, i)
}
}
}
#HISTORY corpus
x@history[[length(x@history)+1]] <- list(
modification = "annotations_matrix",
systime = Sys.time(),
transcripts.modified.count = length(transcripts_modified_ids),
transcripts.modified.ids = transcripts_modified_ids,
annotations.modified.count = annotations_modified_nr
)
return (x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.